OpenCompass/opencompass/configs/datasets/bigcodebench/bigcodebench_hard_instruct_gen_8815eb.py
Dongsheng Zhu fff2d51440
[Update] Code evaluation alignment (#1909)
* code alignment

* update oss md5

* bigcodebench update

* lint

* lint_

* lint yapf
2025-03-04 18:49:38 +08:00

47 lines
1.6 KiB
Python

from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import (BigCodeBenchDataset, BigCodeBenchEvaluator)
bigcodebench_hard_reader_cfg = dict(
input_columns=['instruct_prompt'],
output_column='test',
)
bigcodebench_hard_infer_cfg = dict(prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[dict(role='system', fallback_role='HUMAN', prompt='')],
round=[
dict(role='HUMAN', prompt='{instruct_prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer,
max_out_len=8192))
bigcodebench_hard_eval_cfg = dict(
evaluator=dict(
type=BigCodeBenchEvaluator,
release_version='v0.1.2',
eval_type='instruct',
# remote_execute_api='https://bigcode-bigcodebench-evaluator.hf.space/',
remote_execute_api=
'https://opencompass-opencompass-bigcodebench-evaluator.hf.space', # noqa: E501
dataset_version='hard',
),
pred_role='BOT',
)
bigcodebench_hard_instruct_datasets = [
dict(
abbr='bigcodebench_hard_instruct',
type=BigCodeBenchDataset,
path='opencompass/bigcodebench',
reader_cfg=bigcodebench_hard_reader_cfg,
infer_cfg=bigcodebench_hard_infer_cfg,
eval_cfg=bigcodebench_hard_eval_cfg,
release_version='v0.1.2',
dataset_version='hard',
)
]