diff --git a/opencompass/configs/datasets/Earth_Silver/Earth_Silver_gen.py b/opencompass/configs/datasets/Earth_Silver/Earth_Silver_gen.py new file mode 100644 index 00000000..6624ee03 --- /dev/null +++ b/opencompass/configs/datasets/Earth_Silver/Earth_Silver_gen.py @@ -0,0 +1,57 @@ +from opencompass.datasets import Earth_Silver_MCQDataset +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_evaluator import AccEvaluator + + +SYSTEM_PROMPT = 'You are a helpful assistant for answering earth science multiple-choice questions.\n\n' + + +ZERO_SHOT_PROMPT = 'Q: {question}\nPlease select the correct answer from the options above and output only the corresponding letter (A, B, C, or D) without any explanation or additional text.\n' + + +reader_cfg = dict( + input_columns=['question'], + output_column='answer', +) + + +infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + begin=[ + dict(role='SYSTEM', fallback_role='HUMAN', prompt=SYSTEM_PROMPT), + ], + round=[ + dict( + role='HUMAN', + prompt=ZERO_SHOT_PROMPT, + ), + ], + ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + + +eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role='BOT', +) + + +earth_silver_mcq_dataset = dict( + type=Earth_Silver_MCQDataset, + abbr='earth_silver_mcq', + path='ai-earth/Earth-Silver', + prompt_mode='zero-shot', + reader_cfg=reader_cfg, + infer_cfg=infer_cfg, + eval_cfg=eval_cfg, +) + + +earth_silver_mcq_datasets = [earth_silver_mcq_dataset] diff --git a/opencompass/datasets/Earth_Silver.py b/opencompass/datasets/Earth_Silver.py new file mode 100644 index 00000000..e1d4763c --- /dev/null +++ b/opencompass/datasets/Earth_Silver.py @@ -0,0 +1,33 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class Earth_Silver_MCQDataset(BaseDataset): + + name = 'msearth_mcq' + + @staticmethod + def load(path: str, prompt_mode: str = 'zero-shot', **kwargs): + """ + Args: + path : HF 标识, 固定写 'MSEarth/MSEarth_MCQ' + split: 'train' / 'validation' / 'test' + prompt_mode: 'zero-shot' 或 'few-shot' + """ + dataset = load_dataset(path=path) + + dataset = dataset.map(lambda item: { + 'question': item['question'], + 'answer': item['answer'] + }) + + if prompt_mode == 'zero-shot': + return dataset + elif prompt_mode == 'few-shot': + raise NotImplementedError('few-shot prompt 尚未实现') + else: + raise ValueError(f'Unsupported prompt_mode: {prompt_mode}') diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index b1753221..90b391ea 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -48,6 +48,7 @@ from .drop import * # noqa: F401, F403 from .drop_simple_eval import * # noqa: F401, F403 from .ds1000 import * # noqa: F401, F403 from .ds1000_interpreter import * # noqa: F401, F403 +from .Earth_Silver import * # noqa: F401, F403 from .eprstmt import * # noqa: F401, F403 from .FinanceIQ import * # noqa: F401, F403 from .flores import * # noqa: F401, F403