OpenCompass/configs/summarizers/math_agent.py
2023-12-11 17:42:53 +08:00

29 lines
1.3 KiB
Python

summarizer = dict(
dataset_abbrs=[
'######## GSM8K-Agent Accuracy ########', # category
['gsm8k-agent', 'follow_acc'],
['gsm8k-agent', 'reasoning_acc'],
['gsm8k-agent', 'code_acc'],
['gsm8k-agent', 'action_pct'],
'######## MATH-Agent Accuracy ########', # category
['math-agent', 'follow_acc'],
['math-agent', 'reasoning_acc'],
['math-agent', 'code_acc'],
['math-agent', 'action_pct'],
'######## MathBench-Agent Accuracy ########', # category
['mathbench-college-single_choice_cn-agent', 'acc_1'],
['mathbench-college-cloze_en-agent', 'accuracy'],
['mathbench-high-single_choice_cn-agent', 'acc_1'],
['mathbench-high-single_choice_en-agent', 'acc_1'],
['mathbench-middle-single_choice_cn-agent', 'acc_1'],
['mathbench-primary-cloze_cn-agent', 'accuracy'],
'######## MathBench-Agent CircularEval ########', # category
['mathbench-college-single_choice_cn-agent', 'perf_4'],
['mathbench-high-single_choice_cn-agent', 'perf_4'],
['mathbench-high-single_choice_en-agent', 'perf_4'],
['mathbench-middle-single_choice_cn-agent', 'perf_4'],
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], [])
)