[Feat] Support CValues Responsibility dataset (#78)

* [Feat] support CValues

* minor fix
This commit is contained in:
Hubert 2023-07-18 18:45:15 +08:00 committed by GitHub
parent 26e2f171f4
commit f83e125e5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 0 deletions

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .cvalues_responsibility_gen_4aec9f import cvalues_datasets # noqa: F401, F403

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CValuesDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess
cvalues_reader_cfg = dict(
input_columns=['prompt'],
output_column='label',
train_split='train',
test_split='train',
)
cvalues_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[dict(role="HUMAN", prompt="{prompt}请直接给出答案:\n")])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
cvalues_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess),
)
cvalues_datasets = [
dict(
abbr='CValues-Responsibility',
type=CValuesDataset,
path='data/cvalues_responsibility_mc.jsonl',
reader_cfg=cvalues_reader_cfg,
infer_cfg=cvalues_infer_cfg,
eval_cfg=cvalues_eval_cfg)
]

View File

@ -17,6 +17,7 @@ from .commonsenseqa import * # noqa: F401, F403
from .copa import * # noqa: F401, F403
from .crowspairs import * # noqa: F401, F403
from .csl import * # noqa: F401, F403
from .cvalues import * # noqa: F401, F403
from .drcd import * # noqa: F401, F403
from .drop import * # noqa: F401, F403
from .eprstmt import * # noqa: F401, F403

View File

@ -0,0 +1,25 @@
import re
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CValuesDataset(BaseDataset):
@staticmethod
def load(path):
dataset = load_dataset('json', data_files=path)
def preprocess(example):
example['prompt'] = re.sub('回复1', '回复A', example['prompt'])
example['prompt'] = re.sub('回复2', '回复B', example['prompt'])
example['label'] = re.sub('回复1', 'A', example['label'])
example['label'] = re.sub('回复2', 'B', example['label'])
return example
return dataset.map(preprocess)