mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
fix
This commit is contained in:
parent
b1c2f60d32
commit
2639d113d8
@ -313,6 +313,12 @@
|
|||||||
paper: https://arxiv.org/pdf/2210.09261
|
paper: https://arxiv.org/pdf/2210.09261
|
||||||
configpath: opencompass/configs/datasets/bbh/bbh_gen.py
|
configpath: opencompass/configs/datasets/bbh/bbh_gen.py
|
||||||
configpath_llmjudge: opencompass/configs/datasets/bbh/bbh_llm_judge_gen.py
|
configpath_llmjudge: opencompass/configs/datasets/bbh/bbh_llm_judge_gen.py
|
||||||
|
- bbeh:
|
||||||
|
name: BIG-Bench Extra Hard
|
||||||
|
category: Reasoning
|
||||||
|
paper: https://arxiv.org/abs/2502.19187
|
||||||
|
configpath: opencompass/configs/datasets/bbeh
|
||||||
|
configpath_llmjudge: ''
|
||||||
- BoolQ:
|
- BoolQ:
|
||||||
name: SuperGLUE / BoolQ
|
name: SuperGLUE / BoolQ
|
||||||
category: Knowledge
|
category: Knowledge
|
||||||
|
@ -2,6 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
|
|||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2
|
from opencompass.datasets import Aime2024Dataset, MATHEvaluator, math_postprocess_v2
|
||||||
|
from opencompass.datasets import CustomDataset
|
||||||
|
|
||||||
|
|
||||||
aime2024_reader_cfg = dict(
|
aime2024_reader_cfg = dict(
|
||||||
@ -20,7 +21,7 @@ aime2024_infer_cfg = dict(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=2048)
|
inferencer=dict(type=GenInferencer)
|
||||||
)
|
)
|
||||||
|
|
||||||
aime2024_eval_cfg = dict(
|
aime2024_eval_cfg = dict(
|
||||||
|
@ -54,7 +54,7 @@ for _name in bbh_multiple_choice_sets:
|
|||||||
)
|
)
|
||||||
])),
|
])),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
inferencer=dict(type=GenInferencer))
|
||||||
bbh_eval_cfg = dict(
|
bbh_eval_cfg = dict(
|
||||||
evaluator=dict(type=BBHEvaluator_mcq),
|
evaluator=dict(type=BBHEvaluator_mcq),
|
||||||
pred_role='BOT',
|
pred_role='BOT',
|
||||||
@ -85,7 +85,7 @@ for _name in bbh_free_form_sets:
|
|||||||
)
|
)
|
||||||
])),
|
])),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
inferencer=dict(type=GenInferencer))
|
||||||
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role='BOT')
|
bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role='BOT')
|
||||||
|
|
||||||
bbh_datasets.append(
|
bbh_datasets.append(
|
||||||
|
@ -81,7 +81,7 @@ for _name in bbh_multiple_choice_sets:
|
|||||||
)
|
)
|
||||||
])),
|
])),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
inferencer=dict(type=GenInferencer))
|
||||||
bbh_eval_cfg = dict(
|
bbh_eval_cfg = dict(
|
||||||
evaluator=dict(
|
evaluator=dict(
|
||||||
type=GenericLLMEvaluator,
|
type=GenericLLMEvaluator,
|
||||||
@ -137,7 +137,7 @@ for _name in bbh_free_form_sets:
|
|||||||
)
|
)
|
||||||
])),
|
])),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=512))
|
inferencer=dict(type=GenInferencer))
|
||||||
bbh_eval_cfg = dict(
|
bbh_eval_cfg = dict(
|
||||||
evaluator=dict(
|
evaluator=dict(
|
||||||
type=GenericLLMEvaluator,
|
type=GenericLLMEvaluator,
|
||||||
|
@ -15,9 +15,9 @@ bigcodebench_hard_infer_cfg = dict(prompt_template=dict(
|
|||||||
round=[
|
round=[
|
||||||
dict(role='HUMAN', prompt='{instruct_prompt}'),
|
dict(role='HUMAN', prompt='{instruct_prompt}'),
|
||||||
])),
|
])),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer,
|
inferencer=dict(type=GenInferencer,)
|
||||||
max_out_len=8192))
|
)
|
||||||
|
|
||||||
bigcodebench_hard_eval_cfg = dict(
|
bigcodebench_hard_eval_cfg = dict(
|
||||||
evaluator=dict(
|
evaluator=dict(
|
||||||
|
@ -37,7 +37,7 @@ for category in categories:
|
|||||||
infer_cfg = dict(
|
infer_cfg = dict(
|
||||||
prompt_template=prompt_template,
|
prompt_template=prompt_template,
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=1024),
|
inferencer=dict(type=GenInferencer),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Evaluation configuration
|
# Evaluation configuration
|
||||||
|
@ -63,7 +63,7 @@ for category in categories:
|
|||||||
infer_cfg = dict(
|
infer_cfg = dict(
|
||||||
prompt_template=prompt_template,
|
prompt_template=prompt_template,
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=1024),
|
inferencer=dict(type=GenInferencer),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Evaluation configuration
|
# Evaluation configuration
|
||||||
|
@ -17,7 +17,7 @@ math_infer_cfg = dict(
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
retriever=dict(type=ZeroRetriever),
|
retriever=dict(type=ZeroRetriever),
|
||||||
inferencer=dict(type=GenInferencer, max_out_len=8192),
|
inferencer=dict(type=GenInferencer),
|
||||||
)
|
)
|
||||||
|
|
||||||
GRADER_TEMPLATE = """
|
GRADER_TEMPLATE = """
|
||||||
|
Loading…
Reference in New Issue
Block a user