2024-02-05 23:29:10 +08:00
|
|
|
summarizer = dict(
|
|
|
|
dataset_abbrs=[
|
2024-04-09 17:50:23 +08:00
|
|
|
'######## MathBench Application Accuracy ########', # category
|
2024-02-05 23:29:10 +08:00
|
|
|
['mathbench-college-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-college-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-high-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-high-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-middle-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-middle-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-primary-cloze_cn', 'accuracy'],
|
|
|
|
['mathbench-primary-cloze_en', 'accuracy'],
|
2024-04-09 17:50:23 +08:00
|
|
|
['mathbench-arithmetic-cloze_en', 'accuracy'],
|
|
|
|
'######## MathBench Application CircularEval ########', # category
|
2024-02-05 23:29:10 +08:00
|
|
|
['mathbench-college-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-college-single_choice_en', 'perf_4'],
|
|
|
|
['mathbench-high-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-high-single_choice_en', 'perf_4'],
|
|
|
|
['mathbench-middle-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-middle-single_choice_en', 'perf_4'],
|
2024-04-09 17:50:23 +08:00
|
|
|
'######## MathBench Knowledge CircularEval ########', # category
|
2024-03-11 22:34:19 +08:00
|
|
|
['mathbench-college_knowledge-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-college_knowledge-single_choice_en', 'perf_4'],
|
|
|
|
['mathbench-high_knowledge-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-high_knowledge-single_choice_en', 'perf_4'],
|
|
|
|
['mathbench-middle_knowledge-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-middle_knowledge-single_choice_en', 'perf_4'],
|
|
|
|
['mathbench-primary_knowledge-single_choice_cn', 'perf_4'],
|
|
|
|
['mathbench-primary_knowledge-single_choice_en', 'perf_4'],
|
2024-04-09 17:50:23 +08:00
|
|
|
'######## MathBench Knowledge Accuracy ########', # category
|
|
|
|
['mathbench-college_knowledge-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-college_knowledge-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-high_knowledge-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-high_knowledge-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-middle_knowledge-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-middle_knowledge-single_choice_en', 'acc_1'],
|
|
|
|
['mathbench-primary_knowledge-single_choice_cn', 'acc_1'],
|
|
|
|
['mathbench-primary_knowledge-single_choice_en', 'acc_1'],
|
2024-02-05 23:29:10 +08:00
|
|
|
],
|
|
|
|
summary_groups=sum(
|
2024-05-14 15:35:58 +08:00
|
|
|
[v for k, v in locals().items() if k.endswith('_summary_groups')], [])
|
2024-02-05 23:29:10 +08:00
|
|
|
)
|