mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* bbeh * bbeh * fix_smallbugs_bbeh * removeprint * results --------- Co-authored-by: yufeng zhao <zhaoyufeng@pjlab.org.cn>
93 lines
3.8 KiB
Python
93 lines
3.8 KiB
Python
import os
|
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
|
from opencompass.datasets import BBEHDataset, BBEHEvaluator, bbeh_mcq_postprocess, BBEHEvaluator_mcq
|
|
|
|
bbeh_reader_cfg = dict(input_columns=['input'], output_column='target')
|
|
|
|
|
|
bbeh_multiple_choice_sets = [
|
|
'bbeh_boolean_expressions',
|
|
'bbeh_disambiguation_qa',
|
|
'bbeh_geometric_shapes',
|
|
'bbeh_hyperbaton',
|
|
'bbeh_movie_recommendation',
|
|
'bbeh_nycc',
|
|
'bbeh_shuffled_objects',
|
|
]
|
|
|
|
bbeh_free_form_sets = [
|
|
'bbeh_boardgame_qa',
|
|
'bbeh_buggy_tables',
|
|
'bbeh_causal_understanding',
|
|
'bbeh_dyck_languages',
|
|
'bbeh_linguini',
|
|
'bbeh_multistep_arithmetic',
|
|
'bbeh_object_counting',
|
|
'bbeh_object_properties',
|
|
'bbeh_sarc_triples',
|
|
'bbeh_spatial_reasoning',
|
|
'bbeh_sportqa',
|
|
'bbeh_temporal_sequence',
|
|
'bbeh_time_arithmetic',
|
|
'bbeh_web_of_lies',
|
|
'bbeh_word_sorting',
|
|
'bbeh_zebra_puzzles',
|
|
]
|
|
|
|
bbeh_datasets = []
|
|
for _name in bbeh_multiple_choice_sets:
|
|
bbeh_infer_cfg = dict(
|
|
prompt_template=dict(
|
|
type=PromptTemplate,
|
|
template=dict(round=[
|
|
dict(
|
|
role='HUMAN',
|
|
prompt=
|
|
f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
|
|
)
|
|
])),
|
|
retriever=dict(type=ZeroRetriever),
|
|
inferencer=dict(type=GenInferencer, max_out_len=8192))
|
|
bbeh_eval_cfg = dict(
|
|
evaluator=dict(type=BBEHEvaluator_mcq),
|
|
pred_role='BOT',
|
|
pred_postprocessor=dict(type=bbeh_mcq_postprocess),
|
|
dataset_postprocessor=dict(type=bbeh_mcq_postprocess))
|
|
|
|
bbeh_datasets.append(
|
|
dict(
|
|
type=BBEHDataset,
|
|
path='opencompass/bbeh',
|
|
name=_name,
|
|
abbr=_name,
|
|
reader_cfg=bbeh_reader_cfg,
|
|
infer_cfg=bbeh_infer_cfg.copy(),
|
|
eval_cfg=bbeh_eval_cfg.copy()))
|
|
|
|
for _name in bbeh_free_form_sets:
|
|
bbeh_infer_cfg = dict(
|
|
prompt_template=dict(
|
|
type=PromptTemplate,
|
|
template=dict(round=[
|
|
dict(
|
|
role='HUMAN',
|
|
prompt=
|
|
f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
|
|
)
|
|
])),
|
|
retriever=dict(type=ZeroRetriever),
|
|
inferencer=dict(type=GenInferencer, max_out_len=8192))
|
|
bbeh_eval_cfg = dict(evaluator=dict(type=BBEHEvaluator), pred_role='BOT', pred_postprocessor=dict(type=bbeh_mcq_postprocess), dataset_postprocessor=dict(type=bbeh_mcq_postprocess))
|
|
|
|
bbeh_datasets.append(
|
|
dict(
|
|
type=BBEHDataset,
|
|
path='opencompass/bbeh',
|
|
name=_name,
|
|
abbr=_name,
|
|
reader_cfg=bbeh_reader_cfg,
|
|
infer_cfg=bbeh_infer_cfg.copy(),
|
|
eval_cfg=bbeh_eval_cfg.copy())) |