humaneval

This commit is contained in:
Dongsheng Zhu 2025-05-14 03:47:10 +00:00
parent febd188403
commit 3d477dd265
4 changed files with 43 additions and 6 deletions

View File

@ -40,6 +40,6 @@ bigcodebench_full_instruct_datasets = [
infer_cfg=bigcodebench_full_infer_cfg,
eval_cfg=bigcodebench_full_eval_cfg,
release_version='v0.1.2',
n=3,
k=2)
n=5,
k=3)
]

View File

@ -42,7 +42,7 @@ bigcodebench_hard_instruct_datasets = [
eval_cfg=bigcodebench_hard_eval_cfg,
release_version='v0.1.2',
dataset_version='hard',
n=3,
k=2
n=5,
k=3
)
]

View File

@ -0,0 +1,37 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_postprocess_v2
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt='Read the following function signature and docstring, and fully implement the function described. Your response should only contain the code for this function.\n{prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvalEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type=humaneval_postprocess_v2),
)
humaneval_datasets = [
dict(
abbr='openai_humaneval',
type=HumanevalDataset,
path='opencompass/humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg,
n=3,
k=2)
]

View File

@ -183,13 +183,13 @@ def humaneval_postprocess_v2(text: str) -> str:
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
if len(blocks) >= 1:
text = blocks[0]
return text
return text.lstrip()
def humaneval_postprocess_v3(text: str) -> str:
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
if len(blocks) >= 1:
text = blocks[-1]
return text
return text.lstrip()
def humaneval_internal_v2_postprocess(text: str):
if text.startswith(' ') and not text.startswith(' '):