mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update earth silver benchmark
This commit is contained in:
parent
8c0ccf9a6b
commit
d9de21a8c3
@ -0,0 +1,57 @@
|
|||||||
|
from opencompass.datasets import Earth_Silver_MCQDataset
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
|
||||||
|
|
||||||
|
SYSTEM_PROMPT = 'You are a helpful assistant for answering earth science multiple-choice questions.\n\n'
|
||||||
|
|
||||||
|
|
||||||
|
ZERO_SHOT_PROMPT = 'Q: {question}\nPlease select the correct answer from the options above and output only the corresponding letter (A, B, C, or D) without any explanation or additional text.\n'
|
||||||
|
|
||||||
|
|
||||||
|
reader_cfg = dict(
|
||||||
|
input_columns=['question'],
|
||||||
|
output_column='answer',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin=[
|
||||||
|
dict(role='SYSTEM', fallback_role='HUMAN', prompt=SYSTEM_PROMPT),
|
||||||
|
],
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt=ZERO_SHOT_PROMPT,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
earth_silver_mcq_dataset = dict(
|
||||||
|
type=Earth_Silver_MCQDataset,
|
||||||
|
abbr='earth_silver_mcq',
|
||||||
|
path='ai-earth/Earth-Silver',
|
||||||
|
prompt_mode='zero-shot',
|
||||||
|
reader_cfg=reader_cfg,
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=eval_cfg,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
earth_silver_mcq_datasets = [earth_silver_mcq_dataset]
|
33
opencompass/datasets/Earth_Silver.py
Normal file
33
opencompass/datasets/Earth_Silver.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from datasets import load_dataset
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class Earth_Silver_MCQDataset(BaseDataset):
|
||||||
|
|
||||||
|
name = 'msearth_mcq'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path: str, prompt_mode: str = 'zero-shot', **kwargs):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
path : HF 标识, 固定写 'MSEarth/MSEarth_MCQ'
|
||||||
|
split: 'train' / 'validation' / 'test'
|
||||||
|
prompt_mode: 'zero-shot' 或 'few-shot'
|
||||||
|
"""
|
||||||
|
dataset = load_dataset(path=path)
|
||||||
|
|
||||||
|
dataset = dataset.map(lambda item: {
|
||||||
|
'question': item['question'],
|
||||||
|
'answer': item['answer']
|
||||||
|
})
|
||||||
|
|
||||||
|
if prompt_mode == 'zero-shot':
|
||||||
|
return dataset
|
||||||
|
elif prompt_mode == 'few-shot':
|
||||||
|
raise NotImplementedError('few-shot prompt 尚未实现')
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Unsupported prompt_mode: {prompt_mode}')
|
@ -48,6 +48,7 @@ from .drop import * # noqa: F401, F403
|
|||||||
from .drop_simple_eval import * # noqa: F401, F403
|
from .drop_simple_eval import * # noqa: F401, F403
|
||||||
from .ds1000 import * # noqa: F401, F403
|
from .ds1000 import * # noqa: F401, F403
|
||||||
from .ds1000_interpreter import * # noqa: F401, F403
|
from .ds1000_interpreter import * # noqa: F401, F403
|
||||||
|
from .Earth_Silver import * # noqa: F401, F403
|
||||||
from .eprstmt import * # noqa: F401, F403
|
from .eprstmt import * # noqa: F401, F403
|
||||||
from .FinanceIQ import * # noqa: F401, F403
|
from .FinanceIQ import * # noqa: F401, F403
|
||||||
from .flores import * # noqa: F401, F403
|
from .flores import * # noqa: F401, F403
|
||||||
|
Loading…
Reference in New Issue
Block a user