mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Dataset] Add CMB (#376)
* Add CMB * modify CMB --------- Co-authored-by: wangxidong <xidongw@163.com>
This commit is contained in:
parent
4d89533fbc
commit
47a752cd56
4
configs/datasets/cmb/cmb_gen.py
Normal file
4
configs/datasets/cmb/cmb_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .cmb_gen_72cbb7 import cmb_datasets # noqa: F401, F403
|
43
configs/datasets/cmb/cmb_gen_72cbb7.py
Normal file
43
configs/datasets/cmb/cmb_gen_72cbb7.py
Normal file
@ -0,0 +1,43 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import CMBDataset
|
||||
|
||||
|
||||
cmb_datasets = []
|
||||
|
||||
cmb_reader_cfg = dict(
|
||||
input_columns=["exam_type", "exam_class", "question_type", "question", "option_str"],
|
||||
output_column=None,
|
||||
train_split="val",
|
||||
test_split="test"
|
||||
)
|
||||
|
||||
cmb_infer_cfg = dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt=f"以下是中国{{exam_type}}中{{exam_class}}考试的一道{{question_type}},不需要做任何分析和解释,直接输出答案选项。\n{{question}}\n{{option_str}} \n 答案: ",
|
||||
),
|
||||
dict(role="BOT", prompt="{answer}"),
|
||||
],
|
||||
),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=FixKRetriever),
|
||||
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
|
||||
)
|
||||
|
||||
cmb_datasets.append(
|
||||
dict(
|
||||
type=CMBDataset,
|
||||
path="./data/CMB/",
|
||||
abbr="cmb",
|
||||
reader_cfg=cmb_reader_cfg,
|
||||
infer_cfg=cmb_infer_cfg
|
||||
)
|
||||
)
|
@ -13,6 +13,7 @@ from .ceval import * # noqa: F401, F403
|
||||
from .chid import * # noqa: F401, F403
|
||||
from .civilcomments import * # noqa: F401, F403
|
||||
from .cluewsc import * # noqa: F401, F403
|
||||
from .cmb import * # noqa: F401, F403
|
||||
from .cmmlu import * # noqa: F401, F403
|
||||
from .cmnli import * # noqa: F401, F403
|
||||
from .cmrc import * # noqa: F401, F403
|
||||
|
30
opencompass/datasets/cmb.py
Normal file
30
opencompass/datasets/cmb.py
Normal file
@ -0,0 +1,30 @@
|
||||
import json
|
||||
import os.path as osp
|
||||
|
||||
from datasets import Dataset, DatasetDict
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from .base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class CMBDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
with open(osp.join(path, 'test.json'), 'r') as f:
|
||||
test_data = json.load(f)
|
||||
with open(osp.join(path, 'val.json'), 'r') as f:
|
||||
val_data = json.load(f)
|
||||
|
||||
for da in test_data:
|
||||
da['option_str'] = '\n'.join(
|
||||
[f'{k}. {v}' for k, v in da['option'].items() if len(v) > 1])
|
||||
for da in val_data:
|
||||
da['option_str'] = '\n'.join(
|
||||
[f'{k}. {v}' for k, v in da['option'].items() if len(v) > 1])
|
||||
|
||||
test_dataset = Dataset.from_list(test_data)
|
||||
val_dataset = Dataset.from_list(val_data)
|
||||
return DatasetDict({'test': test_dataset, 'val': val_dataset})
|
Loading…
Reference in New Issue
Block a user