mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
# Select the 10 most popular programming languages from MultiPL-E to compose the test set.
|
|
|
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
from opencompass.datasets import MultiplEDataset, MultiplEEvaluator
|
|
|
|
|
|
_TOP_TEN_LANGUAGE_ = ['cpp', 'cs', 'go', 'java', 'rb', 'js', 'php', 'r', 'rs', 'sh']
|
|
|
|
multiple_reader_cfg = dict(input_columns=['language', 'prompt'], output_column='tests')
|
|
|
|
multiple_infer_cfg = dict(
|
|
prompt_template=dict(type=PromptTemplate, template='Based on the provided {language} code snippet, complete the subsequent content. The initial part of the completed code must match the provided code snippet exactly:\n{prompt}'),
|
|
retriever=dict(type=ZeroRetriever),
|
|
inferencer=dict(type=GenInferencer),
|
|
)
|
|
|
|
multiple_eval_cfg = {
|
|
lang: dict(
|
|
evaluator=dict(
|
|
type=MultiplEEvaluator,
|
|
language=lang,
|
|
ip_address='https://opencompass-multiple-evaluator.hf.space',
|
|
),
|
|
pred_role='BOT',
|
|
) for lang in _TOP_TEN_LANGUAGE_
|
|
}
|
|
|
|
multiple_datasets = [
|
|
dict(
|
|
type=MultiplEDataset,
|
|
abbr=f'humaneval-multiple-{lang}',
|
|
language=lang,
|
|
num_repeats=1,
|
|
path='opencompass/multipl_e',
|
|
tag='humaneval',
|
|
reader_cfg=multiple_reader_cfg,
|
|
infer_cfg=multiple_infer_cfg,
|
|
eval_cfg=multiple_eval_cfg[lang],
|
|
) for lang in _TOP_TEN_LANGUAGE_
|
|
]
|
|
|
|
multiple_datasets += [
|
|
dict(
|
|
type=MultiplEDataset,
|
|
abbr=f'mbpp-multiple-{lang}',
|
|
language=lang,
|
|
num_repeats=1,
|
|
path='opencompass/multipl_e',
|
|
tag='mbpp',
|
|
reader_cfg=multiple_reader_cfg,
|
|
infer_cfg=multiple_infer_cfg,
|
|
eval_cfg=multiple_eval_cfg[lang],
|
|
) for lang in _TOP_TEN_LANGUAGE_
|
|
]
|