OpenCompass/opencompass/configs/datasets/multipl_e/multiple_top_ten_gen.py

# Select the 10 most popular programming languages from MultiPL-E to compose the test set.

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MultiplEDataset, MultiplEEvaluator


_TOP_TEN_LANGUAGE_ = ['cpp', 'cs', 'go', 'java', 'rb', 'js', 'php', 'r', 'rs', 'sh']

multiple_reader_cfg = dict(input_columns=['language', 'prompt'], output_column='tests')

multiple_infer_cfg = dict(
    prompt_template=dict(type=PromptTemplate, template='Based on the provided {language} code snippet, complete the subsequent content. The initial part of the completed code must match the provided code snippet exactly:\n{prompt}'),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=GenInferencer),
)

multiple_eval_cfg = {
    lang: dict(
        evaluator=dict(
            type=MultiplEEvaluator,
            language=lang,
            ip_address='https://opencompass-multiple-evaluator.hf.space',
        ),
        pred_role='BOT',
    ) for lang in _TOP_TEN_LANGUAGE_
}

multiple_datasets = [
    dict(
        type=MultiplEDataset,
        abbr=f'humaneval-multiple-{lang}',
        language=lang,
        num_repeats=1,
        path='opencompass/multipl_e',
        tag='humaneval',
        reader_cfg=multiple_reader_cfg,
        infer_cfg=multiple_infer_cfg,
        eval_cfg=multiple_eval_cfg[lang],
    ) for lang in _TOP_TEN_LANGUAGE_
]

multiple_datasets += [
    dict(
        type=MultiplEDataset,
        abbr=f'mbpp-multiple-{lang}',
        language=lang,
        num_repeats=1,
        path='opencompass/multipl_e',
        tag='mbpp',
        reader_cfg=multiple_reader_cfg,
        infer_cfg=multiple_infer_cfg,
        eval_cfg=multiple_eval_cfg[lang],
    ) for lang in _TOP_TEN_LANGUAGE_
]
multiple_code develop 2025-03-20 13:56:27 +08:00			`# Select the 10 most popular programming languages from MultiPL-E to compose the test set.`

			`from opencompass.openicl.icl_prompt_template import PromptTemplate`
			`from opencompass.openicl.icl_retriever import ZeroRetriever`
			`from opencompass.openicl.icl_inferencer import GenInferencer`
			`from opencompass.datasets import MultiplEDataset, MultiplEEvaluator`


			`_TOP_TEN_LANGUAGE_ = ['cpp', 'cs', 'go', 'java', 'rb', 'js', 'php', 'r', 'rs', 'sh']`

			`multiple_reader_cfg = dict(input_columns=['language', 'prompt'], output_column='tests')`

			`multiple_infer_cfg = dict(`
			`prompt_template=dict(type=PromptTemplate, template='Based on the provided {language} code snippet, complete the subsequent content. The initial part of the completed code must match the provided code snippet exactly:\n{prompt}'),`
			`retriever=dict(type=ZeroRetriever),`
comments upadate 2025-03-21 11:49:54 +08:00			`inferencer=dict(type=GenInferencer),`
multiple_code develop 2025-03-20 13:56:27 +08:00			`)`

			`multiple_eval_cfg = {`
			`lang: dict(`
			`evaluator=dict(`
			`type=MultiplEEvaluator,`
			`language=lang,`
multiple_code update 2025-03-20 14:02:13 +08:00			`ip_address='https://opencompass-multiple-evaluator.hf.space',`
multiple_code develop 2025-03-20 13:56:27 +08:00			`),`
			`pred_role='BOT',`
			`) for lang in _TOP_TEN_LANGUAGE_`
			`}`

			`multiple_datasets = [`
			`dict(`
			`type=MultiplEDataset,`
			`abbr=f'humaneval-multiple-{lang}',`
			`language=lang,`
			`num_repeats=1,`
			`path='opencompass/multipl_e',`
			`tag='humaneval',`
			`reader_cfg=multiple_reader_cfg,`
			`infer_cfg=multiple_infer_cfg,`
			`eval_cfg=multiple_eval_cfg[lang],`
			`) for lang in _TOP_TEN_LANGUAGE_`
			`]`

			`multiple_datasets += [`
			`dict(`
			`type=MultiplEDataset,`
			`abbr=f'mbpp-multiple-{lang}',`
			`language=lang,`
			`num_repeats=1,`
			`path='opencompass/multipl_e',`
			`tag='mbpp',`
			`reader_cfg=multiple_reader_cfg,`
			`infer_cfg=multiple_infer_cfg,`
			`eval_cfg=multiple_eval_cfg[lang],`
			`) for lang in _TOP_TEN_LANGUAGE_`
			`]`