2025-03-20 13:56:27 +08:00
# Select the 10 most popular programming languages from MultiPL-E to compose the test set.
from opencompass . openicl . icl_prompt_template import PromptTemplate
from opencompass . openicl . icl_retriever import ZeroRetriever
from opencompass . openicl . icl_inferencer import GenInferencer
from opencompass . datasets import MultiplEDataset , MultiplEEvaluator
_TOP_TEN_LANGUAGE_ = [ ' cpp ' , ' cs ' , ' go ' , ' java ' , ' rb ' , ' js ' , ' php ' , ' r ' , ' rs ' , ' sh ' ]
multiple_reader_cfg = dict ( input_columns = [ ' language ' , ' prompt ' ] , output_column = ' tests ' )
multiple_infer_cfg = dict (
prompt_template = dict ( type = PromptTemplate , template = ' Based on the provided {language} code snippet, complete the subsequent content. The initial part of the completed code must match the provided code snippet exactly: \n {prompt} ' ) ,
retriever = dict ( type = ZeroRetriever ) ,
2025-03-21 11:49:54 +08:00
inferencer = dict ( type = GenInferencer ) ,
2025-03-20 13:56:27 +08:00
)
multiple_eval_cfg = {
lang : dict (
evaluator = dict (
type = MultiplEEvaluator ,
language = lang ,
2025-03-20 14:02:13 +08:00
ip_address = ' https://opencompass-multiple-evaluator.hf.space ' ,
2025-03-20 13:56:27 +08:00
) ,
pred_role = ' BOT ' ,
) for lang in _TOP_TEN_LANGUAGE_
}
multiple_datasets = [
dict (
type = MultiplEDataset ,
abbr = f ' humaneval-multiple- { lang } ' ,
language = lang ,
num_repeats = 1 ,
path = ' opencompass/multipl_e ' ,
tag = ' humaneval ' ,
reader_cfg = multiple_reader_cfg ,
infer_cfg = multiple_infer_cfg ,
eval_cfg = multiple_eval_cfg [ lang ] ,
) for lang in _TOP_TEN_LANGUAGE_
]
multiple_datasets + = [
dict (
type = MultiplEDataset ,
abbr = f ' mbpp-multiple- { lang } ' ,
language = lang ,
num_repeats = 1 ,
path = ' opencompass/multipl_e ' ,
tag = ' mbpp ' ,
reader_cfg = multiple_reader_cfg ,
infer_cfg = multiple_infer_cfg ,
eval_cfg = multiple_eval_cfg [ lang ] ,
) for lang in _TOP_TEN_LANGUAGE_
]