internlm2_5-7b-chat-hf_fullbench: race-high: 93.75 ARC-c: 93.75 BoolQ: 81.25 triviaqa_wiki_1shot: 50 nq_open_1shot: 25 IFEval: 50 drop: 81.25 GPQA_diamond: 25 hellaswag: 87.5 TheoremQA: 18.75 musr_average: 39.58 gsm8k: 56.25 math: 75 cmo_fib: 6.25 aime2024: 6.25 wikibench-wiki-single_choice_cncircular: 50 sanitized_mbpp: 68.75 ds1000: 16.96 lcb_code_generation: 12.5 lcb_code_execution: 43.75 lcb_test_output: 18.75 bbh-logical_deduction_seven_objects: 50 bbh-multistep_arithmetic_two: 68.75 mmlu-other: 72.6 cmmlu-china-specific: 76.25 mmlu_pro_math: 25 ds1000_Pandas: 12.5 ds1000_Numpy: 0 ds1000_Tensorflow: 12.5 ds1000_Scipy: 18.75 ds1000_Sklearn: 18.75 ds1000_Pytorch: 12.5 ds1000_Matplotlib: 43.75 openai_mmmlu_lite_AR-XY: 37.5 college: 12.5 college_knowledge: 87.5 Alignbench总分: 0.65 Alignbench专业能力: 7.83 AlpacaEvaltotal: 0 AlpacaEvalhelpful_base: 0 CompassArenacompassarena_language: 60 CompassArenacompassarena_knowledge: 56 CompassArenacompassarena_reason_v2: 50 CompassArenacompassarena_math_v2: 53.5 CompassArenacompassarena_creationv2_zh: 48.75 Fofofofo_test_prompts: 1 followbenchHSR_AVG: 1 followbenchSSR_AVG: 1 followbenchHSR_L1: 1 followbenchHSR_L2: 1 followbenchHSR_L3: 1 followbenchHSR_L4: 1 followbenchHSR_L5: 1 followbenchSSR_L1: 1 followbenchSSR_L2: 1 followbenchSSR_L3: 1 followbenchSSR_L4: 1 followbenchSSR_L5: 1 MTBench101average: 8.1 Wildbenchscore: -3.3333333333333335 internlm2_5-7b-chat-turbomind_fullbench: race-high: 93.75 ARC-c: 87.5 BoolQ: 68.75 triviaqa_wiki_1shot: 50 nq_open_1shot: 25 IFEval: 50 drop: 75 hellaswag: 81.25 TheoremQA: 6.25 musr_average: 37.5 gsm8k: 68.75 math: 75 GPQA_diamond: 25 cmo_fib: 6.25 aime2024: 6.25 wikibench-wiki-single_choice_cncircular: 25 sanitized_mbpp: 68.75 ds1000: 13.39 lcb_code_generation: 12.5 lcb_code_execution: 43.75 lcb_test_output: 12.5 bbh-logical_deduction_seven_objects: 56.25 bbh-multistep_arithmetic_two: 68.75 mmlu-other: 74.04 cmmlu-china-specific: 76.25 mmlu_pro_math: 25 ds1000_Pandas: 0 ds1000_Numpy: 0 ds1000_Tensorflow: 12.5 ds1000_Scipy: 18.75 ds1000_Sklearn: 18.75 ds1000_Pytorch: 6.25 ds1000_Matplotlib: 37.5 openai_mmmlu_lite_AR-XY: 37.5 college: 0 college_knowledge: 87.5 Alignbench总分: 0.64 Alignbench专业能力: 7.6 AlpacaEvaltotal: 10 AlpacaEvalhelpful_base: 10 CompassArenacompassarena_language: 59 CompassArenacompassarena_knowledge: 57 CompassArenacompassarena_reason_v2: 49.5 CompassArenacompassarena_math_v2: 51 CompassArenacompassarena_creationv2_zh: 43.75 Fofofofo_test_prompts: 1 followbenchHSR_AVG: 1 followbenchSSR_AVG: 1 followbenchHSR_L1: 1 followbenchHSR_L2: 1 followbenchHSR_L3: 1 followbenchHSR_L4: 1 followbenchHSR_L5: 1 followbenchSSR_L1: 1 followbenchSSR_L2: 1 followbenchSSR_L3: 1 followbenchSSR_L4: 1 followbenchSSR_L5: 1 MTBench101average: 8.1 Wildbenchscore: -8.333333333333334 internlm2_5-7b-hf_fullbench: race-high: 100 ARC-c: 68.75 BoolQ: 87.5 GPQA_diamond: 62.5 drop: 62.5 math: 12.5 wikibench-wiki-single_choice_cncircular: 25 sanitized_mbpp: 56.25 gsm8k: 37.5 triviaqa_wiki_1shot: 43.75 nq_open_1shot: 43.75 winogrande: 75 hellaswag: 93.75 TheoremQA: 25 dingo_en_192: 37.5 dingo_zh_170: 100 college: 12.5 college_knowledge: 87.5 bbh-logical_deduction_seven_objects: 43.75 bbh-multistep_arithmetic_two: 56.25 mmlu-other: 76.92 cmmlu-china-specific: 84.17 mmlu_pro_math: 18.75 internlm2_5-7b-turbomind_fullbench: race-high: 100 ARC-c: 68.75 BoolQ: 87.5 GPQA_diamond: 62.5 drop: 62.5 math: 18.75 wikibench-wiki-single_choice_cncircular: 25 sanitized_mbpp: 56.25 gsm8k: 68.75 triviaqa_wiki_1shot: 43.75 nq_open_1shot: 43.75 winogrande: 87.5 hellaswag: 93.75 TheoremQA: 31.25 dingo_en_192: 43.75 dingo_zh_170: 100 college: 12.5 college_knowledge: 87.5 bbh-logical_deduction_seven_objects: 50 bbh-multistep_arithmetic_two: 56.25 mmlu-other: 76.92 cmmlu-china-specific: 84.17 mmlu_pro_math: 18.75