This commit is contained in:
Myhs-phz 2025-03-24 08:27:45 +00:00
parent b1c2f60d32
commit 2639d113d8
8 changed files with 18 additions and 11 deletions

View File

@ -313,6 +313,12 @@
paper: https://arxiv.org/pdf/2210.09261
configpath: opencompass/configs/datasets/bbh/bbh_gen.py
configpath_llmjudge: opencompass/configs/datasets/bbh/bbh_llm_judge_gen.py
- bbeh:
name: BIG-Bench Extra Hard
category: Reasoning
paper: https://arxiv.org/abs/2502.19187
configpath: opencompass/configs/datasets/bbeh
configpath_llmjudge: ''
- BoolQ:
name: SuperGLUE / BoolQ
category: Knowledge

View File

@ -2,6 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2
from opencompass.datasets import CustomDataset
aime2024_reader_cfg = dict(
@ -20,7 +21,7 @@ aime2024_infer_cfg = dict(
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=2048)
inferencer=dict(type=GenInferencer)
)
aime2024_eval_cfg = dict(

View File

@ -54,7 +54,7 @@ for _name in bbh_multiple_choice_sets:
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer))
bbh_eval_cfg = dict(
evaluator=dict(type=BBHEvaluator_mcq),
pred_role='BOT',
@ -85,7 +85,7 @@ for _name in bbh_free_form_sets:
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer))
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role='BOT')
bbh_datasets.append(

View File

@ -81,7 +81,7 @@ for _name in bbh_multiple_choice_sets:
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer))
bbh_eval_cfg = dict(
evaluator=dict(
type=GenericLLMEvaluator,
@ -137,7 +137,7 @@ for _name in bbh_free_form_sets:
)
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
inferencer=dict(type=GenInferencer))
bbh_eval_cfg = dict(
evaluator=dict(
type=GenericLLMEvaluator,

View File

@ -15,9 +15,9 @@ bigcodebench_hard_infer_cfg = dict(prompt_template=dict(
round=[
dict(role='HUMAN', prompt='{instruct_prompt}'),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer,
max_out_len=8192))
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer,)
)
bigcodebench_hard_eval_cfg = dict(
evaluator=dict(

View File

@ -37,7 +37,7 @@ for category in categories:
infer_cfg = dict(
prompt_template=prompt_template,
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024),
inferencer=dict(type=GenInferencer),
)
# Evaluation configuration

View File

@ -63,7 +63,7 @@ for category in categories:
infer_cfg = dict(
prompt_template=prompt_template,
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024),
inferencer=dict(type=GenInferencer),
)
# Evaluation configuration

View File

@ -17,7 +17,7 @@ math_infer_cfg = dict(
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=8192),
inferencer=dict(type=GenInferencer),
)
GRADER_TEMPLATE = """