From a603625f1907f4fc18dd2ead87d01d12f65695ba Mon Sep 17 00:00:00 2001 From: zhulinJulia24 Date: Tue, 1 Apr 2025 13:30:45 +0800 Subject: [PATCH] update --- .github/scripts/oc_score_baseline_fullbench.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/scripts/oc_score_baseline_fullbench.yaml b/.github/scripts/oc_score_baseline_fullbench.yaml index b90daa5e..c0e735fb 100644 --- a/.github/scripts/oc_score_baseline_fullbench.yaml +++ b/.github/scripts/oc_score_baseline_fullbench.yaml @@ -24,8 +24,8 @@ internlm2_5-7b-chat-hf_fullbench: lcb_test_output_pass@1: 18.75 bbh-logical_deduction_seven_objects_score: 50 bbh-multistep_arithmetic_two_score: 68.75 - mmlu-other_naive_average: 72.6 - cmmlu-china-specific_naive_average: 76.25 + mmlu-other_accuracy: 72.6 + cmmlu-china-specific_accuracy: 76.25 mmlu_pro_math_accuracy: 25 ds1000_Pandas_accuracy: 12.5 ds1000_Numpy_accuracy: 0 @@ -101,8 +101,8 @@ internlm2_5-7b-chat-turbomind_fullbench: lcb_test_output_pass@1: 25.00 bbh-logical_deduction_seven_objects_score: 50.00 bbh-multistep_arithmetic_two_score: 68.75 - mmlu-other_naive_average: 69.71 - cmmlu-china-specific_naive_average: 75.83 + mmlu-other_accuracy: 69.71 + cmmlu-china-specific_accuracy: 75.83 mmlu_pro_math_accuracy: 31.25 ds1000_Pandas_accuracy: 0 ds1000_Numpy_accuracy: 0