[Feature] Add Codereval inference

This commit is contained in:
jingmingzhuo 2023-12-01 21:45:43 +08:00
parent e019c831fe
commit c958d1cbc0
3 changed files with 68 additions and 0 deletions

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CoderEvalDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess
CoderEval_reader_cfg = dict(
input_columns="input",
output_column=None,
)
CoderEval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role="HUMAN", prompt="Please help me complete the following function.\n**Note: only return the function to me, no other description.**\n```python\n{input}\n```"),
dict(role="BOT", prompt="{answer}"),
]
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
CoderEval_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_postprocessor=dict(type=first_capital_postprocess))
files = ['CEPythonHumanLabel', 'CEPythonRaw']
CoderEval_datasets = []
for _file in files:
CoderEval_datasets.append(
dict(
type=CoderEvalDataset,
abbr=_file,
test_path=f"data/CoderEval/{_file}.jsonl",
reader_cfg=CoderEval_reader_cfg,
infer_cfg=CoderEval_infer_cfg,
eval_cfg=CoderEval_eval_cfg,
)
)
del _file

View File

@ -21,6 +21,7 @@ from .cmb import * # noqa: F401, F403
from .cmmlu import * # noqa: F401, F403
from .cmnli import * # noqa: F401, F403
from .cmrc import * # noqa: F401, F403
from .codereval import * # noqa: F401, F403
from .commonsenseqa import * # noqa: F401, F403
from .commonsenseqa_cn import * # noqa: F401, F403
from .copa import * # noqa: F401, F403

View File

@ -0,0 +1,20 @@
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CoderEvalDataset(BaseDataset):
@staticmethod
def load(test_path):
datasets = []
with open(test_path, 'r', encoding='utf-8') as file:
for line in file:
dataset = json.loads(line.strip())
datasets.append(dataset)
return Dataset.from_list(datasets)