all categories of SciEval (datasets + configs + loader+dataset-index.yml)

2025-05-30 16:03:24 +08:00 · 2025-05-09 08:07:02 +00:00 · 2025-05-09 08:07:02 +00:00 · 0e182a3845
commit 0e182a3845
parent 9c8244aa44
6 changed files with 28 additions and 10 deletions
--- a/dataset-index.yml
+++ b/dataset-index.yml
@ -675,8 +675,8 @@
    name: SciEval
    category: Understanding
    paper: https://arxiv.org/pdf/2308.13149
-    configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py
+    configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py
-    configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py
+    configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py
 - mmlu_cf:
    name: MMLU-CF
    category: Understanding
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py
+++ b/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py
@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
 from opencompass.utils.text_postprocessors import first_option_postprocess
-from opencompass.datasets import SciEvalDataset  # 你自己实现的类
+from opencompass.datasets import SciEvalDataset  
 # 只评测 biology + multiple-choice 的 test split
 _hint = ('Given a question and four options, please select the right answer. '
         "Your answer should be 'A', 'B', 'C' or 'D'.")
 category = [
    'biology',
 ]
 scieval_reader_cfg = dict(
    input_columns=['input', 'A', 'B', 'C', 'D'],
@ -54,6 +57,7 @@ scieval_datasets = [
        type=SciEvalDataset,
        path='OpenDFM/SciEval',
        name='default',
        category=category, 
        reader_cfg=scieval_reader_cfg,
        infer_cfg=scieval_infer_cfg,
        eval_cfg=scieval_eval_cfg,
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py
+++ b/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py
@ -11,6 +11,10 @@ from opencompass.datasets import SciEvalDataset
 with read_base():
    from .SciEval_lifescience_sets import SciEval_lifescience_subsets
 category = [
    'biology',
 ]
 QUERY_TEMPLATE = """
 Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. 
@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets:
            type=SciEvalDataset,
            path='OpenDFM/SciEval',
            name='default',
            category=category, 
            reader_cfg=scieval_reader_cfg,
            infer_cfg=scieval_infer_cfg,
            eval_cfg=scieval_eval_cfg,
--- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py
+++ b/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py
@ -1,3 +1,6 @@
 SciEval_lifescience_subsets = [
    'biology',        # 大学生物学
    'physics',
    'chemistry'
 ]
--- a/opencompass/datasets/SciEval_lifescience.py
+++ b/opencompass/datasets/SciEval_lifescience.py
@ -19,11 +19,13 @@ _PATTERN_MC = (
@LOAD_DATASET.register_module()
 class SciEvalDataset(BaseDataset):
-    """Biology multiple-choice subset of SciEval."""
+    """多选题子集，支持所有类别（可选指定 category 过滤）"""
    @staticmethod
    def load(path: str, name: str, **kwargs) -> DatasetDict:
-        dataset = DatasetDict()
+        # 如果传入 category，则仅保留该类别，否则包含所有类别
        category = kwargs.get('category')
        dataset: DatasetDict = DatasetDict()
        for split in ('test', ):
            raw_iter = load_dataset(
@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset):
                split=split,
                streaming=True,
            )
            examples: List[dict] = []
            for ex in raw_iter:
-                if (ex.get('category') != 'biology'
+                # 仅保留多选题
-                        or ex.get('type') != 'multiple-choice'):
+                if ex.get('type') != 'multiple-choice':
                    continue
                # 如指定了 category，则进行过滤
                if category is not None \
                   and ex.get('category') != category:
                    continue
-                ans_list = ex.get('answer') or ex.get('answers') or []
+                ans_list = (ex.get('answer') or ex.get('answers') or [])
                if not ans_list:
                    continue
                target = ans_list[0]
--- a/opencompass/datasets/init.py
+++ b/opencompass/datasets/init.py
@ -127,7 +127,7 @@ from .ruler import *  # noqa: F401, F403
 from .safety import *  # noqa: F401, F403
 from .scibench import ScibenchDataset, scibench_postprocess  # noqa: F401, F403
 from .scicode import *  # noqa: F401, F403
-from .SciEval_lifescience import SciEvalDataset  # noqa: F401
+from .SciEval import SciEvalDataset  # noqa: F401
 from .simpleqa import *  # noqa: F401, F403
 from .siqa import *  # noqa: F401, F403
 from .smolinstruct import *  # noqa: F401, F403