mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
all categories of SciEval (datasets + configs + loader+dataset-index.yml)
This commit is contained in:
parent
9c8244aa44
commit
0e182a3845
@ -675,8 +675,8 @@
|
||||
name: SciEval
|
||||
category: Understanding
|
||||
paper: https://arxiv.org/pdf/2308.13149
|
||||
configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py
|
||||
configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py
|
||||
configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py
|
||||
configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py
|
||||
- mmlu_cf:
|
||||
name: MMLU-CF
|
||||
category: Understanding
|
||||
|
@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||
from opencompass.datasets import SciEvalDataset # 你自己实现的类
|
||||
from opencompass.datasets import SciEvalDataset
|
||||
|
||||
# 只评测 biology + multiple-choice 的 test split
|
||||
_hint = ('Given a question and four options, please select the right answer. '
|
||||
"Your answer should be 'A', 'B', 'C' or 'D'.")
|
||||
category = [
|
||||
'biology',
|
||||
]
|
||||
|
||||
scieval_reader_cfg = dict(
|
||||
input_columns=['input', 'A', 'B', 'C', 'D'],
|
||||
@ -54,6 +57,7 @@ scieval_datasets = [
|
||||
type=SciEvalDataset,
|
||||
path='OpenDFM/SciEval',
|
||||
name='default',
|
||||
category=category,
|
||||
reader_cfg=scieval_reader_cfg,
|
||||
infer_cfg=scieval_infer_cfg,
|
||||
eval_cfg=scieval_eval_cfg,
|
@ -12,6 +12,10 @@ from opencompass.datasets import SciEvalDataset
|
||||
with read_base():
|
||||
from .SciEval_lifescience_sets import SciEval_lifescience_subsets
|
||||
|
||||
category = [
|
||||
'biology',
|
||||
]
|
||||
|
||||
QUERY_TEMPLATE = """
|
||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD.
|
||||
|
||||
@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets:
|
||||
type=SciEvalDataset,
|
||||
path='OpenDFM/SciEval',
|
||||
name='default',
|
||||
category=category,
|
||||
reader_cfg=scieval_reader_cfg,
|
||||
infer_cfg=scieval_infer_cfg,
|
||||
eval_cfg=scieval_eval_cfg,
|
@ -1,3 +1,6 @@
|
||||
SciEval_lifescience_subsets = [
|
||||
'biology', # 大学生物学
|
||||
'physics',
|
||||
'chemistry'
|
||||
|
||||
]
|
@ -19,11 +19,13 @@ _PATTERN_MC = (
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class SciEvalDataset(BaseDataset):
|
||||
"""Biology multiple-choice subset of SciEval."""
|
||||
"""多选题子集,支持所有类别(可选指定 category 过滤)"""
|
||||
|
||||
@staticmethod
|
||||
def load(path: str, name: str, **kwargs) -> DatasetDict:
|
||||
dataset = DatasetDict()
|
||||
# 如果传入 category,则仅保留该类别,否则包含所有类别
|
||||
category = kwargs.get('category')
|
||||
dataset: DatasetDict = DatasetDict()
|
||||
|
||||
for split in ('test', ):
|
||||
raw_iter = load_dataset(
|
||||
@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset):
|
||||
split=split,
|
||||
streaming=True,
|
||||
)
|
||||
|
||||
examples: List[dict] = []
|
||||
|
||||
for ex in raw_iter:
|
||||
if (ex.get('category') != 'biology'
|
||||
or ex.get('type') != 'multiple-choice'):
|
||||
# 仅保留多选题
|
||||
if ex.get('type') != 'multiple-choice':
|
||||
continue
|
||||
# 如指定了 category,则进行过滤
|
||||
if category is not None \
|
||||
and ex.get('category') != category:
|
||||
continue
|
||||
|
||||
ans_list = ex.get('answer') or ex.get('answers') or []
|
||||
ans_list = (ex.get('answer') or ex.get('answers') or [])
|
||||
if not ans_list:
|
||||
continue
|
||||
target = ans_list[0]
|
@ -127,7 +127,7 @@ from .ruler import * # noqa: F401, F403
|
||||
from .safety import * # noqa: F401, F403
|
||||
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
||||
from .scicode import * # noqa: F401, F403
|
||||
from .SciEval_lifescience import SciEvalDataset # noqa: F401
|
||||
from .SciEval import SciEvalDataset # noqa: F401
|
||||
from .simpleqa import * # noqa: F401, F403
|
||||
from .siqa import * # noqa: F401, F403
|
||||
from .smolinstruct import * # noqa: F401, F403
|
||||
|
Loading…
Reference in New Issue
Block a user