OpenCompass/opencompass/configs/datasets/bbeh/bbeh_gen.py

import os
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BBEHDataset, BBEHEvaluator, bbeh_mcq_postprocess, BBEHEvaluator_mcq

bbeh_reader_cfg = dict(input_columns=['input'], output_column='target')


bbeh_multiple_choice_sets = [
    'bbeh_boolean_expressions',
    'bbeh_disambiguation_qa',
    'bbeh_geometric_shapes',
    'bbeh_hyperbaton',
    'bbeh_movie_recommendation',
    'bbeh_nycc',
    'bbeh_shuffled_objects',
]

bbeh_free_form_sets = [
    'bbeh_boardgame_qa',
    'bbeh_buggy_tables',
    'bbeh_causal_understanding',
    'bbeh_dyck_languages',
    'bbeh_linguini',
    'bbeh_multistep_arithmetic',
    'bbeh_object_counting',
    'bbeh_object_properties',
    'bbeh_sarc_triples',
    'bbeh_spatial_reasoning',
    'bbeh_sportqa',
    'bbeh_temporal_sequence',
    'bbeh_time_arithmetic',
    'bbeh_web_of_lies',
    'bbeh_word_sorting',
    'bbeh_zebra_puzzles',
]

bbeh_datasets = []
for _name in bbeh_multiple_choice_sets:
    bbeh_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=dict(round=[
                dict(
                    role='HUMAN',
                    prompt=
                    f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
                )
            ])),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer, max_out_len=8192))
    bbeh_eval_cfg = dict(
        evaluator=dict(type=BBEHEvaluator_mcq),
        pred_role='BOT',
        pred_postprocessor=dict(type=bbeh_mcq_postprocess),
        dataset_postprocessor=dict(type=bbeh_mcq_postprocess))

    bbeh_datasets.append(
        dict(
            type=BBEHDataset,
            path='opencompass/bbeh',
            name=_name,
            abbr=_name,
            reader_cfg=bbeh_reader_cfg,
            infer_cfg=bbeh_infer_cfg.copy(),
            eval_cfg=bbeh_eval_cfg.copy()))

for _name in bbeh_free_form_sets:
    bbeh_infer_cfg = dict(
        prompt_template=dict(
            type=PromptTemplate,
            template=dict(round=[
                dict(
                    role='HUMAN',
                    prompt=
                    f"Think step by step, and when you provide the final answer, please use the prefix \"The answer is:\"without any modification, and provide the answer directly, with no formatting, no bolding, and no markup. For instance: \"The answer is: 42\" or \"The answer is: yes\". If the question is multiple choice with a single correct answer, the final answer must only be the letter corresponding to the correct answer. For example, \"The answer is: (a)\"\n\nQ: {{input}}\nA: "
                )
            ])),
        retriever=dict(type=ZeroRetriever),
        inferencer=dict(type=GenInferencer, max_out_len=8192))
    bbeh_eval_cfg = dict(evaluator=dict(type=BBEHEvaluator), pred_role='BOT', pred_postprocessor=dict(type=bbeh_mcq_postprocess), dataset_postprocessor=dict(type=bbeh_mcq_postprocess))

    bbeh_datasets.append(
        dict(
            type=BBEHDataset,
            path='opencompass/bbeh',
            name=_name,
            abbr=_name,
            reader_cfg=bbeh_reader_cfg,
            infer_cfg=bbeh_infer_cfg.copy(),
            eval_cfg=bbeh_eval_cfg.copy()))