mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
all categories of SciEval (datasets + configs + loader+dataset-index.yml)
This commit is contained in:
parent
9c8244aa44
commit
0e182a3845
@ -675,8 +675,8 @@
|
|||||||
name: SciEval
|
name: SciEval
|
||||||
category: Understanding
|
category: Understanding
|
||||||
paper: https://arxiv.org/pdf/2308.13149
|
paper: https://arxiv.org/pdf/2308.13149
|
||||||
configpath: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_gen.py
|
configpath: opencompass/configs/datasets/SciEval/SciEval_gen.py
|
||||||
configpath_llmjudge: opencompass/configs/datasets/SciEval_lifscience/SciEval_lifscience_llm_judge_gen.py
|
configpath_llmjudge: opencompass/configs/datasets/SciEval/SciEval_llm_judge_gen.py
|
||||||
- mmlu_cf:
|
- mmlu_cf:
|
||||||
name: MMLU-CF
|
name: MMLU-CF
|
||||||
category: Understanding
|
category: Understanding
|
||||||
|
@ -3,11 +3,14 @@ from opencompass.openicl.icl_retriever import FixKRetriever
|
|||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
|
from opencompass.openicl.icl_evaluator import AccwithDetailsEvaluator
|
||||||
from opencompass.utils.text_postprocessors import first_option_postprocess
|
from opencompass.utils.text_postprocessors import first_option_postprocess
|
||||||
from opencompass.datasets import SciEvalDataset # 你自己实现的类
|
from opencompass.datasets import SciEvalDataset
|
||||||
|
|
||||||
# 只评测 biology + multiple-choice 的 test split
|
# 只评测 biology + multiple-choice 的 test split
|
||||||
_hint = ('Given a question and four options, please select the right answer. '
|
_hint = ('Given a question and four options, please select the right answer. '
|
||||||
"Your answer should be 'A', 'B', 'C' or 'D'.")
|
"Your answer should be 'A', 'B', 'C' or 'D'.")
|
||||||
|
category = [
|
||||||
|
'biology',
|
||||||
|
]
|
||||||
|
|
||||||
scieval_reader_cfg = dict(
|
scieval_reader_cfg = dict(
|
||||||
input_columns=['input', 'A', 'B', 'C', 'D'],
|
input_columns=['input', 'A', 'B', 'C', 'D'],
|
||||||
@ -54,6 +57,7 @@ scieval_datasets = [
|
|||||||
type=SciEvalDataset,
|
type=SciEvalDataset,
|
||||||
path='OpenDFM/SciEval',
|
path='OpenDFM/SciEval',
|
||||||
name='default',
|
name='default',
|
||||||
|
category=category,
|
||||||
reader_cfg=scieval_reader_cfg,
|
reader_cfg=scieval_reader_cfg,
|
||||||
infer_cfg=scieval_infer_cfg,
|
infer_cfg=scieval_infer_cfg,
|
||||||
eval_cfg=scieval_eval_cfg,
|
eval_cfg=scieval_eval_cfg,
|
@ -11,6 +11,10 @@ from opencompass.datasets import SciEvalDataset
|
|||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from .SciEval_lifescience_sets import SciEval_lifescience_subsets
|
from .SciEval_lifescience_sets import SciEval_lifescience_subsets
|
||||||
|
|
||||||
|
category = [
|
||||||
|
'biology',
|
||||||
|
]
|
||||||
|
|
||||||
QUERY_TEMPLATE = """
|
QUERY_TEMPLATE = """
|
||||||
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD.
|
Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD.
|
||||||
@ -117,6 +121,7 @@ for name in SciEval_lifescience_subsets:
|
|||||||
type=SciEvalDataset,
|
type=SciEvalDataset,
|
||||||
path='OpenDFM/SciEval',
|
path='OpenDFM/SciEval',
|
||||||
name='default',
|
name='default',
|
||||||
|
category=category,
|
||||||
reader_cfg=scieval_reader_cfg,
|
reader_cfg=scieval_reader_cfg,
|
||||||
infer_cfg=scieval_infer_cfg,
|
infer_cfg=scieval_infer_cfg,
|
||||||
eval_cfg=scieval_eval_cfg,
|
eval_cfg=scieval_eval_cfg,
|
@ -1,3 +1,6 @@
|
|||||||
SciEval_lifescience_subsets = [
|
SciEval_lifescience_subsets = [
|
||||||
'biology', # 大学生物学
|
'biology', # 大学生物学
|
||||||
|
'physics',
|
||||||
|
'chemistry'
|
||||||
|
|
||||||
]
|
]
|
@ -19,11 +19,13 @@ _PATTERN_MC = (
|
|||||||
|
|
||||||
@LOAD_DATASET.register_module()
|
@LOAD_DATASET.register_module()
|
||||||
class SciEvalDataset(BaseDataset):
|
class SciEvalDataset(BaseDataset):
|
||||||
"""Biology multiple-choice subset of SciEval."""
|
"""多选题子集,支持所有类别(可选指定 category 过滤)"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path: str, name: str, **kwargs) -> DatasetDict:
|
def load(path: str, name: str, **kwargs) -> DatasetDict:
|
||||||
dataset = DatasetDict()
|
# 如果传入 category,则仅保留该类别,否则包含所有类别
|
||||||
|
category = kwargs.get('category')
|
||||||
|
dataset: DatasetDict = DatasetDict()
|
||||||
|
|
||||||
for split in ('test', ):
|
for split in ('test', ):
|
||||||
raw_iter = load_dataset(
|
raw_iter = load_dataset(
|
||||||
@ -32,14 +34,18 @@ class SciEvalDataset(BaseDataset):
|
|||||||
split=split,
|
split=split,
|
||||||
streaming=True,
|
streaming=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
examples: List[dict] = []
|
examples: List[dict] = []
|
||||||
|
|
||||||
for ex in raw_iter:
|
for ex in raw_iter:
|
||||||
if (ex.get('category') != 'biology'
|
# 仅保留多选题
|
||||||
or ex.get('type') != 'multiple-choice'):
|
if ex.get('type') != 'multiple-choice':
|
||||||
|
continue
|
||||||
|
# 如指定了 category,则进行过滤
|
||||||
|
if category is not None \
|
||||||
|
and ex.get('category') != category:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ans_list = ex.get('answer') or ex.get('answers') or []
|
ans_list = (ex.get('answer') or ex.get('answers') or [])
|
||||||
if not ans_list:
|
if not ans_list:
|
||||||
continue
|
continue
|
||||||
target = ans_list[0]
|
target = ans_list[0]
|
@ -127,7 +127,7 @@ from .ruler import * # noqa: F401, F403
|
|||||||
from .safety import * # noqa: F401, F403
|
from .safety import * # noqa: F401, F403
|
||||||
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
||||||
from .scicode import * # noqa: F401, F403
|
from .scicode import * # noqa: F401, F403
|
||||||
from .SciEval_lifescience import SciEvalDataset # noqa: F401
|
from .SciEval import SciEvalDataset # noqa: F401
|
||||||
from .simpleqa import * # noqa: F401, F403
|
from .simpleqa import * # noqa: F401, F403
|
||||||
from .siqa import * # noqa: F401, F403
|
from .siqa import * # noqa: F401, F403
|
||||||
from .smolinstruct import * # noqa: F401, F403
|
from .smolinstruct import * # noqa: F401, F403
|
||||||
|
Loading…
Reference in New Issue
Block a user