from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.datasets import (LCBCodeGenerationDataset, LCBCodeExecutionDataset, LCBTestOutputPredictionDataset, LCBCodeGenerationEvaluator, LCBCodeExecutionEvaluator, LCBTestOutputEvaluator) lcb_code_generation_reader_cfg = dict( input_columns=[ 'question_content', 'format_prompt', ], # output_column='evaluation_sample', output_column='question_id', ) SYSTEM_MESSAGE_GENERIC = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.' # noqa: E501 prompt_template = '### Question:\n{question_content}\n\n{format_prompt}' + \ '### Answer: (use the provided format with backticks)\n\n' # Code Generation Tasks lcb_code_generation_infer_cfg = dict(prompt_template=dict( type=PromptTemplate, template=dict(round=[dict(role='HUMAN', prompt=prompt_template)])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer)) lcb_code_generation_eval_cfg = dict( evaluator=dict(type=LCBCodeGenerationEvaluator, num_process_evaluate=4, timeout=6, release_version='release_v5', start_date='2024-08-01', end_date='2025-02-01'), pred_role='BOT', ) LCBCodeGeneration_dataset = dict( type=LCBCodeGenerationDataset, abbr='lcb_code_generation', path='opencompass/code_generation_lite', reader_cfg=lcb_code_generation_reader_cfg, infer_cfg=lcb_code_generation_infer_cfg, eval_cfg=lcb_code_generation_eval_cfg, release_version='release_v5', ) # Code Execution Dataset lcb_code_execution_reader_cfg = dict( input_columns=[ 'prompt', ], output_column='evaluation_sample', ) lcb_code_execution_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template=dict( begin=[ dict( role='SYSTEM', fallback_role='HUMAN', prompt= 'You are an expert at Python programming, code execution, test case generation, and fuzzing.' # noqa: E501 ), ], round=[dict(role='HUMAN', prompt='{prompt}')])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer)) lcb_code_execution_eval_cfg = dict( evaluator=dict(type=LCBCodeExecutionEvaluator, ), pred_role='BOT', ) LCBCodeExecution_dataset = dict( type=LCBCodeExecutionDataset, abbr='lcb_code_execution', path='opencompass/execution-v2', reader_cfg=lcb_code_execution_reader_cfg, infer_cfg=lcb_code_execution_infer_cfg, eval_cfg=lcb_code_execution_eval_cfg, ) # TestOuputput Dataset lcb_test_output_reader_cfg = dict( input_columns=[ 'prompt', ], output_column='evaluation_sample', ) system_prompt = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.' # noqa: E501 lcb_test_output_infer_cfg = dict( prompt_template=dict( type=PromptTemplate, template=dict( # begin=[ # dict( # role='SYSTEM', # prompt=system_prompt # ), # ], round=[dict(role='HUMAN', prompt='{prompt}')])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer)) lcb_test_output_eval_cfg = dict( evaluator=dict(type=LCBTestOutputEvaluator, ), pred_role='BOT', ) LCBTestOutput_dataset = dict( type=LCBTestOutputPredictionDataset, abbr='lcb_test_output', path='opencompass/test_generation', reader_cfg=lcb_test_output_reader_cfg, infer_cfg=lcb_test_output_infer_cfg, eval_cfg=lcb_test_output_eval_cfg, ) LCB_datasets = [ LCBCodeGeneration_dataset, LCBCodeExecution_dataset, LCBTestOutput_dataset, ]