OpenCompass/opencompass/configs/datasets/bbeh/bbeh_gen.py
Yufeng Zhao bc2969dba8
[Feature] Add support for BBEH dataset (#1925)
* bbeh

* bbeh

* fix_smallbugs_bbeh

* removeprint

* results

---------

Co-authored-by: yufeng zhao <zhaoyufeng@pjlab.org.cn>
2025-03-12 10:53:31 +08:00

93 lines
3.8 KiB
Python

import os
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BBEHDataset, BBEHEvaluator, bbeh_mcq_postprocess, BBEHEvaluator_mcq
bbeh_reader_cfg = dict(input_columns=['input'], output_column='target')
bbeh_multiple_choice_sets = [
'bbeh_boolean_expressions',
'bbeh_disambiguation_qa',
'bbeh_geometric_shapes',
'bbeh_hyperbaton',
'bbeh_movie_recommendation',
'bbeh_nycc',
'bbeh_shuffled_objects',
]
bbeh_free_form_sets = [
'bbeh_boardgame_qa',
'bbeh_buggy_tables',
'bbeh_causal_understanding',
'bbeh_dyck_languages',
'bbeh_linguini',
'bbeh_multistep_arithmetic',
'bbeh_object_counting',
'bbeh_object_properties',
'bbeh_sarc_triples',
'bbeh_spatial_reasoning',
'bbeh_sportqa',
'bbeh_temporal_sequence',
'bbeh_time_arithmetic',
'bbeh_web_of_lies',
'bbeh_word_sorting',
'bbeh_zebra_puzzles',
]
bbeh_datasets = []
for _name in bbeh_multiple_choice_sets:
bbeh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=8192))
bbeh_eval_cfg = dict(
evaluator=dict(type=BBEHEvaluator_mcq),
pred_role='BOT',
pred_postprocessor=dict(type=bbeh_mcq_postprocess),
dataset_postprocessor=dict(type=bbeh_mcq_postprocess))
bbeh_datasets.append(
dict(
type=BBEHDataset,
path='opencompass/bbeh',
name=_name,
abbr=_name,
reader_cfg=bbeh_reader_cfg,
infer_cfg=bbeh_infer_cfg.copy(),
eval_cfg=bbeh_eval_cfg.copy()))
for _name in bbeh_free_form_sets:
bbeh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=8192))
bbeh_eval_cfg = dict(evaluator=dict(type=BBEHEvaluator), pred_role='BOT', pred_postprocessor=dict(type=bbeh_mcq_postprocess), dataset_postprocessor=dict(type=bbeh_mcq_postprocess))
bbeh_datasets.append(
dict(
type=BBEHDataset,
path='opencompass/bbeh',
name=_name,
abbr=_name,
reader_cfg=bbeh_reader_cfg,
infer_cfg=bbeh_infer_cfg.copy(),
eval_cfg=bbeh_eval_cfg.copy()))