mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
174 lines
4.4 KiB
YAML
174 lines
4.4 KiB
YAML
internlm2_5-7b-chat-hf_fullbench:
|
|
race-high: 93.75
|
|
ARC-c: 93.75
|
|
BoolQ: 81.25
|
|
triviaqa_wiki_1shot: 50
|
|
nq_open_1shot: 25
|
|
IFEval: 50
|
|
drop: 81.25
|
|
GPQA_diamond: 25
|
|
hellaswag: 87.5
|
|
TheoremQA: 18.75
|
|
musr_average: 39.58
|
|
gsm8k: 56.25
|
|
math: 75
|
|
cmo_fib: 6.25
|
|
aime2024: 6.25
|
|
wikibench-wiki-single_choice_cncircular: 50
|
|
sanitized_mbpp: 68.75
|
|
ds1000: 16.96
|
|
lcb_code_generation: 12.5
|
|
lcb_code_execution: 43.75
|
|
lcb_test_output: 18.75
|
|
bbh-logical_deduction_seven_objects: 50
|
|
bbh-multistep_arithmetic_two: 68.75
|
|
mmlu-other: 72.6
|
|
cmmlu-china-specific: 76.25
|
|
mmlu_pro_math: 25
|
|
ds1000_Pandas: 12.5
|
|
ds1000_Numpy: 0
|
|
ds1000_Tensorflow: 12.5
|
|
ds1000_Scipy: 18.75
|
|
ds1000_Sklearn: 18.75
|
|
ds1000_Pytorch: 12.5
|
|
ds1000_Matplotlib: 43.75
|
|
openai_mmmlu_lite_AR-XY: 37.5
|
|
college: 12.5
|
|
college_knowledge: 87.5
|
|
Alignbench总分: 0.65
|
|
Alignbench专业能力: 7.83
|
|
AlpacaEvaltotal: 0
|
|
AlpacaEvalhelpful_base: 0
|
|
CompassArenacompassarena_language: 60
|
|
CompassArenacompassarena_knowledge: 56
|
|
CompassArenacompassarena_reason_v2: 50
|
|
CompassArenacompassarena_math_v2: 53.5
|
|
CompassArenacompassarena_creationv2_zh: 48.75
|
|
Fofofofo_test_prompts: 1
|
|
followbenchHSR_AVG: 1
|
|
followbenchSSR_AVG: 1
|
|
followbenchHSR_L1: 1
|
|
followbenchHSR_L2: 1
|
|
followbenchHSR_L3: 1
|
|
followbenchHSR_L4: 1
|
|
followbenchHSR_L5: 1
|
|
followbenchSSR_L1: 1
|
|
followbenchSSR_L2: 1
|
|
followbenchSSR_L3: 1
|
|
followbenchSSR_L4: 1
|
|
followbenchSSR_L5: 1
|
|
MTBench101average: 8.1
|
|
Wildbenchscore: -3.3333333333333335
|
|
|
|
internlm2_5-7b-chat-turbomind_fullbench:
|
|
race-high: 93.75
|
|
ARC-c: 87.5
|
|
BoolQ: 68.75
|
|
triviaqa_wiki_1shot: 50
|
|
nq_open_1shot: 25
|
|
IFEval: 50
|
|
drop: 75
|
|
hellaswag: 81.25
|
|
TheoremQA: 6.25
|
|
musr_average: 39.58
|
|
gsm8k: 68.75
|
|
math: 75
|
|
GPQA_diamond: 25
|
|
cmo_fib: 6.25
|
|
aime2024: 6.25
|
|
wikibench-wiki-single_choice_cncircular: 25
|
|
sanitized_mbpp: 68.75
|
|
ds1000: 13.39
|
|
lcb_code_generation: 12.5
|
|
lcb_code_execution: 43.75
|
|
lcb_test_output: 12.5
|
|
bbh-logical_deduction_seven_objects: 56.25
|
|
bbh-multistep_arithmetic_two: 68.75
|
|
mmlu-other: 74.04
|
|
cmmlu-china-specific: 76.25
|
|
mmlu_pro_math: 25
|
|
ds1000_Pandas: 0
|
|
ds1000_Numpy: 0
|
|
ds1000_Tensorflow: 12.5
|
|
ds1000_Scipy: 18.75
|
|
ds1000_Sklearn: 18.75
|
|
ds1000_Pytorch: 6.25
|
|
ds1000_Matplotlib: 37.5
|
|
openai_mmmlu_lite_AR-XY: 37.5
|
|
college: 0
|
|
college_knowledge: 87.5
|
|
Alignbench总分: 0.64
|
|
Alignbench专业能力: 7.6
|
|
AlpacaEvaltotal: 10
|
|
AlpacaEvalhelpful_base: 10
|
|
CompassArenacompassarena_language: 59
|
|
CompassArenacompassarena_knowledge: 57
|
|
CompassArenacompassarena_reason_v2: 49.5
|
|
CompassArenacompassarena_math_v2: 51
|
|
CompassArenacompassarena_creationv2_zh: 43.75
|
|
Fofofofo_test_prompts: 1
|
|
followbenchHSR_AVG: 1
|
|
followbenchSSR_AVG: 1
|
|
followbenchHSR_L1: 1
|
|
followbenchHSR_L2: 1
|
|
followbenchHSR_L3: 1
|
|
followbenchHSR_L4: 1
|
|
followbenchHSR_L5: 1
|
|
followbenchSSR_L1: 1
|
|
followbenchSSR_L2: 1
|
|
followbenchSSR_L3: 1
|
|
followbenchSSR_L4: 1
|
|
followbenchSSR_L5: 1
|
|
MTBench101average: 8.1
|
|
Wildbenchscore: -8.333333333333334
|
|
|
|
internlm2_5-7b-hf_fullbench:
|
|
race-high: 100
|
|
ARC-c: 68.75
|
|
BoolQ: 87.5
|
|
GPQA_diamond: 62.5
|
|
drop: 62.5
|
|
math: 12.5
|
|
wikibench-wiki-single_choice_cncircular: 25
|
|
sanitized_mbpp: 56.25
|
|
gsm8k: 37.5
|
|
triviaqa_wiki_1shot: 43.75
|
|
nq_open_1shot: 43.75
|
|
winogrande: 75
|
|
hellaswag: 93.75
|
|
TheoremQA: 25
|
|
dingo_en_192: 37.5
|
|
dingo_zh_170: 100
|
|
college: 12.5
|
|
college_knowledge: 87.5
|
|
bbh-logical_deduction_seven_objects: 43.75
|
|
bbh-multistep_arithmetic_two: 56.25
|
|
mmlu-other: 76.92
|
|
cmmlu-china-specific: 84.17
|
|
mmlu_pro_math: 18.75
|
|
|
|
internlm2_5-7b-turbomind_fullbench:
|
|
race-high: 100
|
|
ARC-c: 68.75
|
|
BoolQ: 87.5
|
|
GPQA_diamond: 62.5
|
|
drop: 62.5
|
|
math: 18.75
|
|
wikibench-wiki-single_choice_cncircular: 25
|
|
sanitized_mbpp: 56.25
|
|
gsm8k: 68.75
|
|
triviaqa_wiki_1shot: 43.75
|
|
nq_open_1shot: 43.75
|
|
winogrande: 87.5
|
|
hellaswag: 93.75
|
|
TheoremQA: 31.25
|
|
dingo_en_192: 43.75
|
|
dingo_zh_170: 100
|
|
college: 12.5
|
|
college_knowledge: 87.5
|
|
bbh-logical_deduction_seven_objects: 50
|
|
bbh-multistep_arithmetic_two: 56.25
|
|
mmlu-other: 76.92
|
|
cmmlu-china-specific: 84.17
|
|
mmlu_pro_math: 18.75
|