This commit is contained in:
zhulinJulia24 2025-04-01 13:05:24 +08:00
parent f71eb78c72
commit b42b83ac60

View File

@ -234,15 +234,15 @@ internlm2_5-7b-turbomind:
sanitized_mbpp_score: 55.25
dingo_en_192_score: 60.94
dingo_zh_170_score: 67.65
mmlu-stem_naive_average: 63.72
mmlu-social-science_naive_average: 80.15
mmlu-humanities_naive_average: 74.27
mmlu-other_naive_average: 71.85
cmmlu-stem_naive_average: 67.07
cmmlu-social-science_naive_average: 81.49
cmmlu-humanities_naive_average: 85.84
cmmlu-other_naive_average: 82.69
cmmlu-china-specific_naive_average: 79.88
mmlu-stem_accuracy: 63.72
mmlu-social-science_accuracy: 80.15
mmlu-humanities_accuracy: 74.27
mmlu-other_accuracy: 71.85
cmmlu-stem_accuracy: 67.07
cmmlu-social-science_accuracy: 81.49
cmmlu-humanities_accuracy: 85.84
cmmlu-other_accuracy: 82.69
cmmlu-china-specific_accuracy: 79.88
mmlu_pro_biology_accuracy: 58.58
mmlu_pro_business_accuracy: 28.01
mmlu_pro_chemistry_accuracy: 22.79
@ -281,12 +281,12 @@ internlm2_5-7b-turbomind:
longbench_naive_average: 46.19
longbench_zh_naive_average: 49.3
longbench_en_naive_average: 43.97
longbench_single-document-qa_naive_average: 42.84
longbench_multi-document-qa_naive_average: 37.29
longbench_summarization_naive_average: 23.21
longbench_few-shot-learning_naive_average: 61.67
longbench_synthetic-tasks_naive_average: 60.05
longbench_code-completion_naive_average: 52.09
longbench_single-document-qa_score: 42.84
longbench_multi-document-qa_score: 37.29
longbench_summarization_score: 23.21
longbench_few-shot-learning_score: 61.67
longbench_synthetic-tasks_score: 60.05
longbench_code-completion_score: 52.09
internlm2_5-7b-chat-turbomind:
objective:
@ -327,15 +327,15 @@ internlm2_5-7b-chat-turbomind:
teval_naive_average: 80
SciCode_sub_accuracy: 5.56
qa_dingo_cn_score: 99.01
mmlu-stem_naive_average: 68.2
mmlu-social-science_naive_average: 75.8
mmlu-humanities_naive_average: 69.3
mmlu-other_naive_average: 71.3
cmmlu-stem_naive_average: 66.64
cmmlu-social-science_naive_average: 76
cmmlu-humanities_naive_average: 77.9
cmmlu-other_naive_average: 77.25
cmmlu-china-specific_naive_average: 73.6
mmlu-stem_accuracy: 68.2
mmlu-social-science_accuracy: 75.8
mmlu-humanities_accuracy: 69.3
mmlu-other_accuracy: 71.3
cmmlu-stem_accuracy: 66.64
cmmlu-social-science_accuracy: 76
cmmlu-humanities_accuracy: 77.9
cmmlu-other_accuracy: 77.25
cmmlu-china-specific_accuracy: 73.6
mmlu_pro_biology_accuracy: 66.67
mmlu_pro_business_accuracy: 47.91
mmlu_pro_chemistry_accuracy: 35
@ -448,9 +448,9 @@ internlm2_5-7b-chat-1m-turbomind:
babilong_32k_naive_average: 48.9
babilong_128k_naive_average: 40.8
babilong_256k_naive_average: 23.5
longbench_single-document-qa_naive_average: 43.56
longbench_multi-document-qa_naive_average: 46.24
longbench_summarization_naive_average: 24.32
longbench_few-shot-learning_naive_average: 51.67
longbench_synthetic-tasks_naive_average: 66.83
longbench_code-completion_naive_average: 45.99
longbench_single-document-qa_score: 43.56
longbench_multi-document-qa_score: 46.24
longbench_summarization_score: 24.32
longbench_few-shot-learning_score: 51.67
longbench_synthetic-tasks_score: 66.83
longbench_code-completion_score: 45.99