mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Dataset] Add CMB (#376)
* Add CMB * modify CMB --------- Co-authored-by: wangxidong <xidongw@163.com>
This commit is contained in:
parent
4d89533fbc
commit
47a752cd56
4
configs/datasets/cmb/cmb_gen.py
Normal file
4
configs/datasets/cmb/cmb_gen.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .cmb_gen_72cbb7 import cmb_datasets # noqa: F401, F403
|
43
configs/datasets/cmb/cmb_gen_72cbb7.py
Normal file
43
configs/datasets/cmb/cmb_gen_72cbb7.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import CMBDataset
|
||||||
|
|
||||||
|
|
||||||
|
cmb_datasets = []
|
||||||
|
|
||||||
|
cmb_reader_cfg = dict(
|
||||||
|
input_columns=["exam_type", "exam_class", "question_type", "question", "option_str"],
|
||||||
|
output_column=None,
|
||||||
|
train_split="val",
|
||||||
|
test_split="test"
|
||||||
|
)
|
||||||
|
|
||||||
|
cmb_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin="</E>",
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role="HUMAN",
|
||||||
|
prompt=f"以下是中国{{exam_type}}中{{exam_class}}考试的一道{{question_type}},不需要做任何分析和解释,直接输出答案选项。\n{{question}}\n{{option_str}} \n 答案: ",
|
||||||
|
),
|
||||||
|
dict(role="BOT", prompt="{answer}"),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
ice_token="</E>",
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
|
||||||
|
)
|
||||||
|
|
||||||
|
cmb_datasets.append(
|
||||||
|
dict(
|
||||||
|
type=CMBDataset,
|
||||||
|
path="./data/CMB/",
|
||||||
|
abbr="cmb",
|
||||||
|
reader_cfg=cmb_reader_cfg,
|
||||||
|
infer_cfg=cmb_infer_cfg
|
||||||
|
)
|
||||||
|
)
|
@ -13,6 +13,7 @@ from .ceval import * # noqa: F401, F403
|
|||||||
from .chid import * # noqa: F401, F403
|
from .chid import * # noqa: F401, F403
|
||||||
from .civilcomments import * # noqa: F401, F403
|
from .civilcomments import * # noqa: F401, F403
|
||||||
from .cluewsc import * # noqa: F401, F403
|
from .cluewsc import * # noqa: F401, F403
|
||||||
|
from .cmb import * # noqa: F401, F403
|
||||||
from .cmmlu import * # noqa: F401, F403
|
from .cmmlu import * # noqa: F401, F403
|
||||||
from .cmnli import * # noqa: F401, F403
|
from .cmnli import * # noqa: F401, F403
|
||||||
from .cmrc import * # noqa: F401, F403
|
from .cmrc import * # noqa: F401, F403
|
||||||
|
30
opencompass/datasets/cmb.py
Normal file
30
opencompass/datasets/cmb.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import json
|
||||||
|
import os.path as osp
|
||||||
|
|
||||||
|
from datasets import Dataset, DatasetDict
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
|
from .base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class CMBDataset(BaseDataset):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path: str):
|
||||||
|
with open(osp.join(path, 'test.json'), 'r') as f:
|
||||||
|
test_data = json.load(f)
|
||||||
|
with open(osp.join(path, 'val.json'), 'r') as f:
|
||||||
|
val_data = json.load(f)
|
||||||
|
|
||||||
|
for da in test_data:
|
||||||
|
da['option_str'] = '\n'.join(
|
||||||
|
[f'{k}. {v}' for k, v in da['option'].items() if len(v) > 1])
|
||||||
|
for da in val_data:
|
||||||
|
da['option_str'] = '\n'.join(
|
||||||
|
[f'{k}. {v}' for k, v in da['option'].items() if len(v) > 1])
|
||||||
|
|
||||||
|
test_dataset = Dataset.from_list(test_data)
|
||||||
|
val_dataset = Dataset.from_list(val_data)
|
||||||
|
return DatasetDict({'test': test_dataset, 'val': val_dataset})
|
Loading…
Reference in New Issue
Block a user