mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
![]() |
from mmengine.config import read_base
|
||
|
|
||
|
with read_base():
|
||
|
from .groups.mmlu import mmlu_summary_groups
|
||
|
from .groups.ceval import ceval_summary_groups
|
||
|
from .groups.bbh import bbh_summary_groups
|
||
|
from .groups.GaokaoBench import GaokaoBench_summary_groups
|
||
|
from .groups.cmmlu import cmmlu_summary_groups
|
||
|
|
||
|
summarizer = dict(
|
||
|
dataset_abbrs=[
|
||
|
['mmlu', 'naive_average'],
|
||
|
['cmmlu', 'naive_average'],
|
||
|
['ceval-test', 'naive_average'],
|
||
|
['GaokaoBench', 'weighted_average'],
|
||
|
['triviaqa_wiki_1shot', 'score'],
|
||
|
['nq_open_1shot', 'score'],
|
||
|
['race-high', 'accuracy'],
|
||
|
['winogrande', 'accuracy'],
|
||
|
['hellaswag', 'accuracy'],
|
||
|
['bbh', 'naive_average'],
|
||
|
['gsm8k', 'accuracy'],
|
||
|
['math', 'accuracy'],
|
||
|
['TheoremQA', 'accuracy'],
|
||
|
['openai_humaneval', 'humaneval_pass@1'],
|
||
|
['sanitized_mbpp', 'score'],
|
||
|
],
|
||
|
summary_groups=sum(
|
||
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []),
|
||
|
)
|