mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
fix
This commit is contained in:
parent
b1c2f60d32
commit
2639d113d8
@ -313,6 +313,12 @@
|
||||
paper: https://arxiv.org/pdf/2210.09261
|
||||
configpath: opencompass/configs/datasets/bbh/bbh_gen.py
|
||||
configpath_llmjudge: opencompass/configs/datasets/bbh/bbh_llm_judge_gen.py
|
||||
- bbeh:
|
||||
name: BIG-Bench Extra Hard
|
||||
category: Reasoning
|
||||
paper: https://arxiv.org/abs/2502.19187
|
||||
configpath: opencompass/configs/datasets/bbeh
|
||||
configpath_llmjudge: ''
|
||||
- BoolQ:
|
||||
name: SuperGLUE / BoolQ
|
||||
category: Knowledge
|
||||
|
@ -2,6 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2
|
||||
from opencompass.datasets import CustomDataset
|
||||
|
||||
|
||||
aime2024_reader_cfg = dict(
|
||||
@ -20,7 +21,7 @@ aime2024_infer_cfg = dict(
|
||||
)
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=2048)
|
||||
inferencer=dict(type=GenInferencer)
|
||||
)
|
||||
|
||||
aime2024_eval_cfg = dict(
|
||||
|
@ -54,7 +54,7 @@ for _name in bbh_multiple_choice_sets:
|
||||
)
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer))
|
||||
bbh_eval_cfg = dict(
|
||||
evaluator=dict(type=BBHEvaluator_mcq),
|
||||
pred_role='BOT',
|
||||
@ -85,7 +85,7 @@ for _name in bbh_free_form_sets:
|
||||
)
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer))
|
||||
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role='BOT')
|
||||
|
||||
bbh_datasets.append(
|
||||
|
@ -81,7 +81,7 @@ for _name in bbh_multiple_choice_sets:
|
||||
)
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer))
|
||||
bbh_eval_cfg = dict(
|
||||
evaluator=dict(
|
||||
type=GenericLLMEvaluator,
|
||||
@ -137,7 +137,7 @@ for _name in bbh_free_form_sets:
|
||||
)
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
||||
inferencer=dict(type=GenInferencer))
|
||||
bbh_eval_cfg = dict(
|
||||
evaluator=dict(
|
||||
type=GenericLLMEvaluator,
|
||||
|
@ -15,9 +15,9 @@ bigcodebench_hard_infer_cfg = dict(prompt_template=dict(
|
||||
round=[
|
||||
dict(role='HUMAN', prompt='{instruct_prompt}'),
|
||||
])),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,
|
||||
max_out_len=8192))
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer,)
|
||||
)
|
||||
|
||||
bigcodebench_hard_eval_cfg = dict(
|
||||
evaluator=dict(
|
||||
|
@ -37,7 +37,7 @@ for category in categories:
|
||||
infer_cfg = dict(
|
||||
prompt_template=prompt_template,
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=1024),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
# Evaluation configuration
|
||||
|
@ -63,7 +63,7 @@ for category in categories:
|
||||
infer_cfg = dict(
|
||||
prompt_template=prompt_template,
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=1024),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
# Evaluation configuration
|
||||
|
@ -17,7 +17,7 @@ math_infer_cfg = dict(
|
||||
),
|
||||
),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=GenInferencer, max_out_len=8192),
|
||||
inferencer=dict(type=GenInferencer),
|
||||
)
|
||||
|
||||
GRADER_TEMPLATE = """
|
||||
|
Loading…
Reference in New Issue
Block a user