all categories of SciEval (datasets + configs + loader+dataset-index.yml)

This commit is contained in:
root 2025-05-09 08:07:02 +00:00
parent 9c8244aa44
commit 0e182a3845
6 changed files with 28 additions and 10 deletions

View File

@ -675,8 +675,8 @@
name: SciEval name: SciEval
category: Understanding category: Understanding
paper: https://arxiv.org/pdf/2308.13149 paper: https://arxiv.org/pdf/2308.13149
configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py
configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py
- mmlu_cf: - mmlu_cf:
name: MMLU-CF name: MMLU-CF
category: Understanding category: Understanding

View File

@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
from opencompass.utils.text_postprocessors import first_option_postprocess from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets import SciEvalDataset # 你自己实现的类 from opencompass.datasets import SciEvalDataset
# 只评测 biology + multiple-choice 的 test split # 只评测 biology + multiple-choice 的 test split
_hint = ('Given a question and four options, please select the right answer. ' _hint = ('Given a question and four options, please select the right answer. '
"Your answer should be 'A', 'B', 'C' or 'D'.") "Your answer should be 'A', 'B', 'C' or 'D'.")
category = [
'biology',
]
scieval_reader_cfg = dict( scieval_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D'], input_columns=['input', 'A', 'B', 'C', 'D'],
@ -54,6 +57,7 @@ scieval_datasets = [
type=SciEvalDataset, type=SciEvalDataset,
path='OpenDFM/SciEval', path='OpenDFM/SciEval',
name='default', name='default',
category=category,
reader_cfg=scieval_reader_cfg, reader_cfg=scieval_reader_cfg,
infer_cfg=scieval_infer_cfg, infer_cfg=scieval_infer_cfg,
eval_cfg=scieval_eval_cfg, eval_cfg=scieval_eval_cfg,

View File

@ -11,6 +11,10 @@ from opencompass.datasets import SciEvalDataset
with read_base(): with read_base():
from .SciEval_lifescience_sets import SciEval_lifescience_subsets from .SciEval_lifescience_sets import SciEval_lifescience_subsets
category = [
'biology',
]
QUERY_TEMPLATE = """ QUERY_TEMPLATE = """
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD.
@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets:
type=SciEvalDataset, type=SciEvalDataset,
path='OpenDFM/SciEval', path='OpenDFM/SciEval',
name='default', name='default',
category=category,
reader_cfg=scieval_reader_cfg, reader_cfg=scieval_reader_cfg,
infer_cfg=scieval_infer_cfg, infer_cfg=scieval_infer_cfg,
eval_cfg=scieval_eval_cfg, eval_cfg=scieval_eval_cfg,

View File

@ -1,3 +1,6 @@
SciEval_lifescience_subsets = [ SciEval_lifescience_subsets = [
'biology', # 大学生物学 'biology', # 大学生物学
'physics',
'chemistry'
] ]

View File

@ -19,11 +19,13 @@ _PATTERN_MC = (
@LOAD_DATASET.register_module() @LOAD_DATASET.register_module()
class SciEvalDataset(BaseDataset): class SciEvalDataset(BaseDataset):
"""Biology multiple-choice subset of SciEval.""" """多选题子集,支持所有类别(可选指定 category 过滤)"""
@staticmethod @staticmethod
def load(path: str, name: str, **kwargs) -> DatasetDict: def load(path: str, name: str, **kwargs) -> DatasetDict:
dataset = DatasetDict() # 如果传入 category则仅保留该类别否则包含所有类别
category = kwargs.get('category')
dataset: DatasetDict = DatasetDict()
for split in ('test', ): for split in ('test', ):
raw_iter = load_dataset( raw_iter = load_dataset(
@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset):
split=split, split=split,
streaming=True, streaming=True,
) )
examples: List[dict] = [] examples: List[dict] = []
for ex in raw_iter: for ex in raw_iter:
if (ex.get('category') != 'biology' # 仅保留多选题
or ex.get('type') != 'multiple-choice'): if ex.get('type') != 'multiple-choice':
continue
# 如指定了 category则进行过滤
if category is not None \
and ex.get('category') != category:
continue continue
ans_list = ex.get('answer') or ex.get('answers') or [] ans_list = (ex.get('answer') or ex.get('answers') or [])
if not ans_list: if not ans_list:
continue continue
target = ans_list[0] target = ans_list[0]

View File

@ -127,7 +127,7 @@ from .ruler import * # noqa: F401, F403
from .safety import * # noqa: F401, F403 from .safety import * # noqa: F401, F403
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403 from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
from .scicode import * # noqa: F401, F403 from .scicode import * # noqa: F401, F403
from .SciEval_lifescience import SciEvalDataset # noqa: F401 from .SciEval import SciEvalDataset # noqa: F401
from .simpleqa import * # noqa: F401, F403 from .simpleqa import * # noqa: F401, F403
from .siqa import * # noqa: F401, F403 from .siqa import * # noqa: F401, F403
from .smolinstruct import * # noqa: F401, F403 from .smolinstruct import * # noqa: F401, F403