From 0e182a384597ecd7009b73f4167e3b06ef76fc83 Mon Sep 17 00:00:00 2001
From: root <tangcheng231@mails.ucas.edu.cn>
Date: Fri, 9 May 2025 08:07:02 +0000
Subject: [PATCH]  all categories of SciEval (datasets + configs +
 loader+dataset-index.yml)

---
 dataset-index.yml                              |  4 ++--
 .../SciEval_0shot_gen_4043d4.py}               |  6 +++++-
 .../SciEval_0shot_llmjudge_gen_7cc41c.py}      |  5 +++++
 .../SciEval_lifescience_sets.py                |  3 +++
 .../{SciEval_lifescience.py => SciEval.py}     | 18 ++++++++++++------
 opencompass/datasets/__init__.py               |  2 +-
 6 files changed, 28 insertions(+), 10 deletions(-)
 rename opencompass/configs/datasets/{SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py => SciEval/SciEval_0shot_gen_4043d4.py} (94%)
 rename opencompass/configs/datasets/{SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py => SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py} (98%)
 rename opencompass/configs/datasets/{SciEval_lifscience => SciEval}/SciEval_lifescience_sets.py (69%)
 rename opencompass/datasets/{SciEval_lifescience.py => SciEval.py} (72%)

diff --git a/dataset-index.yml b/dataset-index.yml
index cd9e02e6..d3415c53 100644
--- a/dataset-index.yml
+++ b/dataset-index.yml
@@ -675,8 +675,8 @@
     name: SciEval
     category: Understanding
     paper: https://arxiv.org/pdf/2308.13149
-    configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py
-    configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py
+    configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py
+    configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py
 - mmlu_cf:
     name: MMLU-CF
     category: Understanding
diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py b/opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py
similarity index 94%
rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py
rename to opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py
index 5381abcf..645e744b 100644
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py
+++ b/opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py
@@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
 from opencompass.utils.text_postprocessors import first_option_postprocess
-from opencompass.datasets import SciEvalDataset  # 你自己实现的类
+from opencompass.datasets import SciEvalDataset  
 
 # 只评测 biology + multiple-choice 的 test split
 _hint = ('Given a question and four options, please select the right answer. '
          "Your answer should be 'A', 'B', 'C' or 'D'.")
+category = [
+    'biology',
+]
 
 scieval_reader_cfg = dict(
     input_columns=['input', 'A', 'B', 'C', 'D'],
@@ -54,6 +57,7 @@ scieval_datasets = [
         type=SciEvalDataset,
         path='OpenDFM/SciEval',
         name='default',
+        category=category, 
         reader_cfg=scieval_reader_cfg,
         infer_cfg=scieval_infer_cfg,
         eval_cfg=scieval_eval_cfg,
diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py b/opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py
similarity index 98%
rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py
rename to opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py
index 26af5cd3..f52aec91 100644
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py
+++ b/opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py
@@ -11,6 +11,10 @@ from opencompass.datasets import SciEvalDataset
 
 with read_base():
     from .SciEval_lifescience_sets import SciEval_lifescience_subsets
+    
+category = [
+    'biology',
+]
 
 QUERY_TEMPLATE = """
 Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. 
@@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets:
             type=SciEvalDataset,
             path='OpenDFM/SciEval',
             name='default',
+            category=category, 
             reader_cfg=scieval_reader_cfg,
             infer_cfg=scieval_infer_cfg,
             eval_cfg=scieval_eval_cfg,
diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py b/opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py
similarity index 69%
rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py
rename to opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py
index 8d0a0a83..8cf9e540 100644
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py
+++ b/opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py
@@ -1,3 +1,6 @@
 SciEval_lifescience_subsets = [
     'biology',        # 大学生物学
+    'physics',
+    'chemistry'
+
 ]
diff --git a/opencompass/datasets/SciEval_lifescience.py b/opencompass/datasets/SciEval.py
similarity index 72%
rename from opencompass/datasets/SciEval_lifescience.py
rename to opencompass/datasets/SciEval.py
index af93e496..593e3183 100644
--- a/opencompass/datasets/SciEval_lifescience.py
+++ b/opencompass/datasets/SciEval.py
@@ -19,11 +19,13 @@ _PATTERN_MC = (
 
 @LOAD_DATASET.register_module()
 class SciEvalDataset(BaseDataset):
-    """Biology multiple-choice subset of SciEval."""
+    """多选题子集，支持所有类别（可选指定 category 过滤）"""
 
     @staticmethod
     def load(path: str, name: str, **kwargs) -> DatasetDict:
-        dataset = DatasetDict()
+        # 如果传入 category，则仅保留该类别，否则包含所有类别
+        category = kwargs.get('category')
+        dataset: DatasetDict = DatasetDict()
 
         for split in ('test', ):
             raw_iter = load_dataset(
@@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset):
                 split=split,
                 streaming=True,
             )
-
             examples: List[dict] = []
+
             for ex in raw_iter:
-                if (ex.get('category') != 'biology'
-                        or ex.get('type') != 'multiple-choice'):
+                # 仅保留多选题
+                if ex.get('type') != 'multiple-choice':
+                    continue
+                # 如指定了 category，则进行过滤
+                if category is not None \
+                   and ex.get('category') != category:
                     continue
 
-                ans_list = ex.get('answer') or ex.get('answers') or []
+                ans_list = (ex.get('answer') or ex.get('answers') or [])
                 if not ans_list:
                     continue
                 target = ans_list[0]
diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py
index c005eb78..74a1f4bd 100644
--- a/opencompass/datasets/__init__.py
+++ b/opencompass/datasets/__init__.py
@@ -127,7 +127,7 @@ from .ruler import *  # noqa: F401, F403
 from .safety import *  # noqa: F401, F403
 from .scibench import ScibenchDataset, scibench_postprocess  # noqa: F401, F403
 from .scicode import *  # noqa: F401, F403
-from .SciEval_lifescience import SciEvalDataset  # noqa: F401
+from .SciEval import SciEvalDataset  # noqa: F401
 from .simpleqa import *  # noqa: F401, F403
 from .siqa import *  # noqa: F401, F403
 from .smolinstruct import *  # noqa: F401, F403