mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
humaneval
This commit is contained in:
parent
febd188403
commit
3d477dd265
@ -40,6 +40,6 @@ bigcodebench_full_instruct_datasets = [
|
|||||||
infer_cfg=bigcodebench_full_infer_cfg,
|
infer_cfg=bigcodebench_full_infer_cfg,
|
||||||
eval_cfg=bigcodebench_full_eval_cfg,
|
eval_cfg=bigcodebench_full_eval_cfg,
|
||||||
release_version='v0.1.2',
|
release_version='v0.1.2',
|
||||||
n=3,
|
n=5,
|
||||||
k=2)
|
k=3)
|
||||||
]
|
]
|
||||||
|
@ -42,7 +42,7 @@ bigcodebench_hard_instruct_datasets = [
|
|||||||
eval_cfg=bigcodebench_hard_eval_cfg,
|
eval_cfg=bigcodebench_hard_eval_cfg,
|
||||||
release_version='v0.1.2',
|
release_version='v0.1.2',
|
||||||
dataset_version='hard',
|
dataset_version='hard',
|
||||||
n=3,
|
n=5,
|
||||||
k=2
|
k=3
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_postprocess_v2
|
||||||
|
|
||||||
|
humaneval_reader_cfg = dict(
|
||||||
|
input_columns=['prompt'], output_column='task_id', train_split='test')
|
||||||
|
|
||||||
|
# TODO: allow empty output-column
|
||||||
|
humaneval_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Read the following function signature and docstring, and fully implement the function described. Your response should only contain the code for this function.\n{prompt}'),
|
||||||
|
])),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer))
|
||||||
|
|
||||||
|
humaneval_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=HumanEvalEvaluator),
|
||||||
|
pred_role='BOT',
|
||||||
|
pred_postprocessor=dict(type=humaneval_postprocess_v2),
|
||||||
|
)
|
||||||
|
|
||||||
|
humaneval_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='openai_humaneval',
|
||||||
|
type=HumanevalDataset,
|
||||||
|
path='opencompass/humaneval',
|
||||||
|
reader_cfg=humaneval_reader_cfg,
|
||||||
|
infer_cfg=humaneval_infer_cfg,
|
||||||
|
eval_cfg=humaneval_eval_cfg,
|
||||||
|
n=3,
|
||||||
|
k=2)
|
||||||
|
]
|
@ -183,13 +183,13 @@ def humaneval_postprocess_v2(text: str) -> str:
|
|||||||
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
|
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
|
||||||
if len(blocks) >= 1:
|
if len(blocks) >= 1:
|
||||||
text = blocks[0]
|
text = blocks[0]
|
||||||
return text
|
return text.lstrip()
|
||||||
|
|
||||||
def humaneval_postprocess_v3(text: str) -> str:
|
def humaneval_postprocess_v3(text: str) -> str:
|
||||||
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
|
blocks = re.findall(r'```\w*\n(.*?)```', text, re.DOTALL)
|
||||||
if len(blocks) >= 1:
|
if len(blocks) >= 1:
|
||||||
text = blocks[-1]
|
text = blocks[-1]
|
||||||
return text
|
return text.lstrip()
|
||||||
|
|
||||||
def humaneval_internal_v2_postprocess(text: str):
|
def humaneval_internal_v2_postprocess(text: str):
|
||||||
if text.startswith(' ') and not text.startswith(' '):
|
if text.startswith(' ') and not text.startswith(' '):
|
||||||
|
Loading…
Reference in New Issue
Block a user