OpenCompass/opencompass/configs/datasets/PMMEval/humanevalxl_gen_bdec92.py
wanyu2018umac 90efcf2216
[Feature] Add P-MMEval (#1714)
* Update with PMMEval

* Update

* Update __init__.py

* Fix Bugs

* Delete .pre-commit-config.yaml

* Pull merge

---------

Co-authored-by: liushz <qq1791167085@163.com>
2024-11-27 21:26:18 +08:00

50 lines
1.8 KiB
Python
Executable File

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets.PMMEval import PMMEvalHumanEvalXLDataset, PMMEvalHumanEvalXLEvaluator
NATURAL_LANGUAGE_FULLNAMES = ['English', 'Chinese', 'Arabic', 'Spanish', 'French', 'Japanese', 'Korean', 'Portuguese', 'Thai', 'Vietnamese']
PMMEval_HumanEvalXL_datasets = list()
PMMEval_HumanEvalXL_reader_cfg = dict(
input_columns=['task_id', 'prompt', 'entry_point', 'test', 'language', 'description', 'natural_language'],
output_column='declaration',
test_split='test'
)
PMMEval_HumanEvalXL_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
PMMEval_HumanEvalXL_datasets = list()
for lang_fullname in NATURAL_LANGUAGE_FULLNAMES:
for program_lang in ['python', 'java', 'javascript']:
PMMEval_HumanEvalXL_eval_cfg = dict(
evaluator=dict(
type=PMMEvalHumanEvalXLEvaluator,
language=program_lang,
text_language=lang_fullname,
ip_address='localhost',
port=5001),
pred_role='BOT')
PMMEval_HumanEvalXL_datasets.append(
dict(
abbr=f'humanevalxl-{program_lang}-{lang_fullname}',
type=PMMEvalHumanEvalXLDataset,
path='P-MMEval',
lang=lang_fullname,
program_lang=program_lang,
reader_cfg=PMMEval_HumanEvalXL_reader_cfg,
infer_cfg=PMMEval_HumanEvalXL_infer_cfg,
eval_cfg=PMMEval_HumanEvalXL_eval_cfg)
)