From 0e182a384597ecd7009b73f4167e3b06ef76fc83 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 9 May 2025 08:07:02 +0000 Subject: [PATCH] all categories of SciEval (datasets + configs + loader+dataset-index.yml) --- dataset-index.yml | 4 ++-- .../SciEval_0shot_gen_4043d4.py} | 6 +++++- .../SciEval_0shot_llmjudge_gen_7cc41c.py} | 5 +++++ .../SciEval_lifescience_sets.py | 3 +++ .../{SciEval_lifescience.py => SciEval.py} | 18 ++++++++++++------ opencompass/datasets/__init__.py | 2 +- 6 files changed, 28 insertions(+), 10 deletions(-) rename opencompass/configs/datasets/{SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py => SciEval/SciEval_0shot_gen_4043d4.py} (94%) rename opencompass/configs/datasets/{SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py => SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py} (98%) rename opencompass/configs/datasets/{SciEval_lifscience => SciEval}/SciEval_lifescience_sets.py (69%) rename opencompass/datasets/{SciEval_lifescience.py => SciEval.py} (72%) diff --git a/dataset-index.yml b/dataset-index.yml index cd9e02e6..d3415c53 100644 --- a/dataset-index.yml +++ b/dataset-index.yml @@ -675,8 +675,8 @@ name: SciEval category: Understanding paper: https://arxiv.org/pdf/2308.13149 - configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py - configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py + configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py + configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py - mmlu_cf: name: MMLU-CF category: Understanding diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py b/opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py similarity index 94% rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py rename to opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py index 5381abcf..645e744b 100644 --- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_gen_4043d4.py +++ b/opencompass/configs/datasets/SciEval/SciEval_0shot_gen_4043d4.py @@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator from opencompass.utils.text_postprocessors import first_option_postprocess -from opencompass.datasets import SciEvalDataset # 你自己实现的类 +from opencompass.datasets import SciEvalDataset # 只评测 biology + multiple-choice 的 test split _hint = ('Given a question and four options, please select the right answer. ' "Your answer should be 'A', 'B', 'C' or 'D'.") +category = [ + 'biology', +] scieval_reader_cfg = dict( input_columns=['input', 'A', 'B', 'C', 'D'], @@ -54,6 +57,7 @@ scieval_datasets = [ type=SciEvalDataset, path='OpenDFM/SciEval', name='default', + category=category, reader_cfg=scieval_reader_cfg, infer_cfg=scieval_infer_cfg, eval_cfg=scieval_eval_cfg, diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py b/opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py similarity index 98% rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py rename to opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py index 26af5cd3..f52aec91 100644 --- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_0shot_llmjudge_gen_012dd1.py +++ b/opencompass/configs/datasets/SciEval/SciEval_0shot_llmjudge_gen_7cc41c.py @@ -11,6 +11,10 @@ from opencompass.datasets import SciEvalDataset with read_base(): from .SciEval_lifescience_sets import SciEval_lifescience_subsets + +category = [ + 'biology', +] QUERY_TEMPLATE = """ Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. @@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets: type=SciEvalDataset, path='OpenDFM/SciEval', name='default', + category=category, reader_cfg=scieval_reader_cfg, infer_cfg=scieval_infer_cfg, eval_cfg=scieval_eval_cfg, diff --git a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py b/opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py similarity index 69% rename from opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py rename to opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py index 8d0a0a83..8cf9e540 100644 --- a/opencompass/configs/datasets/SciEval_lifscience/SciEval_lifescience_sets.py +++ b/opencompass/configs/datasets/SciEval/SciEval_lifescience_sets.py @@ -1,3 +1,6 @@ SciEval_lifescience_subsets = [ 'biology', # 大学生物学 + 'physics', + 'chemistry' + ] diff --git a/opencompass/datasets/SciEval_lifescience.py b/opencompass/datasets/SciEval.py similarity index 72% rename from opencompass/datasets/SciEval_lifescience.py rename to opencompass/datasets/SciEval.py index af93e496..593e3183 100644 --- a/opencompass/datasets/SciEval_lifescience.py +++ b/opencompass/datasets/SciEval.py @@ -19,11 +19,13 @@ _PATTERN_MC = ( @LOAD_DATASET.register_module() class SciEvalDataset(BaseDataset): - """Biology multiple-choice subset of SciEval.""" + """多选题子集,支持所有类别(可选指定 category 过滤)""" @staticmethod def load(path: str, name: str, **kwargs) -> DatasetDict: - dataset = DatasetDict() + # 如果传入 category,则仅保留该类别,否则包含所有类别 + category = kwargs.get('category') + dataset: DatasetDict = DatasetDict() for split in ('test', ): raw_iter = load_dataset( @@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset): split=split, streaming=True, ) - examples: List[dict] = [] + for ex in raw_iter: - if (ex.get('category') != 'biology' - or ex.get('type') != 'multiple-choice'): + # 仅保留多选题 + if ex.get('type') != 'multiple-choice': + continue + # 如指定了 category,则进行过滤 + if category is not None \ + and ex.get('category') != category: continue - ans_list = ex.get('answer') or ex.get('answers') or [] + ans_list = (ex.get('answer') or ex.get('answers') or []) if not ans_list: continue target = ans_list[0] diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index c005eb78..74a1f4bd 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -127,7 +127,7 @@ from .ruler import * # noqa: F401, F403 from .safety import * # noqa: F401, F403 from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403 from .scicode import * # noqa: F401, F403 -from .SciEval_lifescience import SciEvalDataset # noqa: F401 +from .SciEval import SciEvalDataset # noqa: F401 from .simpleqa import * # noqa: F401, F403 from .siqa import * # noqa: F401, F403 from .smolinstruct import * # noqa: F401, F403