This commit is contained in:
zhulinJulia24 2025-04-03 14:32:49 +08:00
parent e3c2521df5
commit ba99868c77

View File

@ -78,38 +78,38 @@ internlm2_5-7b-chat-hf_fullbench:
internlm2_5-7b-chat-turbomind_fullbench:
objective:
race-high_accuracy: 93.75
ARC-c_accuracy: 93.75
ARC-c_accuracy: 87.50
BoolQ_accuracy: 68.75
triviaqa_wiki_1shot_score: 50
nq_open_1shot_score: 25
IFEval_Prompt-level-strict-accuracy: 56.25
drop_accuracy: 81.25
drop_accuracy: 75
GPQA_diamond_accuracy: 31.25
hellaswag_accuracy: 81.25
TheoremQA_score: 6.25
hellaswag_accuracy: 87.5
TheoremQA_score: 12.5
musr_average_naive_average: 39.58
korbench_single_naive_average: 37.50
gsm8k_accuracy: 68.75
math_accuracy: 68.75
korbench_single_naive_average: 40
gsm8k_accuracy: 62.5
math_accuracy: 75
cmo_fib_accuracy: 6.25
aime2024_accuracy: 6.25
wikibench-wiki-single_choice_cncircular_perf_4: 50.00
wikibench-wiki-single_choice_cncircular_perf_4: 25
sanitized_mbpp_score: 68.75
ds1000_naive_average: 16.96
ds1000_naive_average: 17.86
lcb_code_generation_pass@1: 12.5
lcb_code_execution_pass@1: 43.75
lcb_test_output_pass@1: 25.00
bbh-logical_deduction_seven_objects_score: 50.00
bbh-multistep_arithmetic_two_score: 68.75
mmlu-other_accuracy: 69.71
cmmlu-china-specific_accuracy: 75.83
lcb_test_output_pass@1: 18.75
bbh-logical_deduction_seven_objects_score: 56.25
bbh-multistep_arithmetic_two_score: 75
mmlu-other_accuracy: 72.6
cmmlu-china-specific_accuracy: 78.33
mmlu_pro_math_accuracy: 31.25
ds1000_Pandas_accuracy: 0
ds1000_Pandas_accuracy: 12.5
ds1000_Numpy_accuracy: 0
ds1000_Tensorflow_accuracy: 12.5
ds1000_Scipy_accuracy: 18.75
ds1000_Scipy_accuracy: 25
ds1000_Sklearn_accuracy: 18.75
ds1000_Pytorch_accuracy: 18.75
ds1000_Pytorch_accuracy: 6.25
ds1000_Matplotlib_accuracy: 50.00
openai_mmmlu_lite_AR-XY_accuracy: 37.5
college_naive_average: 12.50