from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.datasets import ( BigCodeBenchDataset, BigCodeBenchEvaluator ) bigcodebench_hard_reader_cfg = dict( input_columns=['instruct_prompt'], output_column='test', ) bigcodebench_hard_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template=dict( begin=[dict(role='system', fallback_role='HUMAN', prompt='')], round=[ dict(role='HUMAN', prompt='{instruct_prompt}'), ] ) ), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer, max_out_len=8192) ) bigcodebench_hard_eval_cfg = dict( evaluator=dict( type=BigCodeBenchEvaluator, release_version='v0.1.2', eval_type='instruct', remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/', dataset_version='hard', ), pred_role='BOT', ) bigcodebench_hard_instruct_datasets = [ dict( abbr='bigcodebench_hard_instruct', type=BigCodeBenchDataset, path='opencompass/bigcodebench', reader_cfg=bigcodebench_hard_reader_cfg, infer_cfg=bigcodebench_hard_infer_cfg, eval_cfg=bigcodebench_hard_eval_cfg, release_version='v0.1.2', dataset_version='hard', ) ]