update

2025-05-30 16:03:24 +08:00 · 2025-04-03 14:32:49 +08:00 · 2025-04-03 14:32:49 +08:00 · ba99868c77
commit ba99868c77
parent e3c2521df5
1 changed files with 17 additions and 17 deletions
--- a/.github/scripts/oc_score_baseline_fullbench.yaml
+++ b/.github/scripts/oc_score_baseline_fullbench.yaml
@ -78,38 +78,38 @@ internlm2_5-7b-chat-hf_fullbench:
 internlm2_5-7b-chat-turbomind_fullbench:
    objective:
        race-high_accuracy:  93.75
-        ARC-c_accuracy: 93.75
+        ARC-c_accuracy: 87.50
        BoolQ_accuracy: 68.75
        triviaqa_wiki_1shot_score: 50
        nq_open_1shot_score: 25
        IFEval_Prompt-level-strict-accuracy: 56.25
-        drop_accuracy: 81.25
+        drop_accuracy: 75
        GPQA_diamond_accuracy: 31.25
-        hellaswag_accuracy: 81.25
+        hellaswag_accuracy: 87.5
-        TheoremQA_score: 6.25
+        TheoremQA_score: 12.5
        musr_average_naive_average: 39.58
-        korbench_single_naive_average: 37.50
+        korbench_single_naive_average: 40
-        gsm8k_accuracy: 68.75
+        gsm8k_accuracy: 62.5
-        math_accuracy: 68.75
+        math_accuracy: 75
        cmo_fib_accuracy: 6.25
        aime2024_accuracy: 6.25
-        wikibench-wiki-single_choice_cncircular_perf_4: 50.00
+        wikibench-wiki-single_choice_cncircular_perf_4: 25
        sanitized_mbpp_score: 68.75
-        ds1000_naive_average: 16.96
+        ds1000_naive_average: 17.86
        lcb_code_generation_pass@1: 12.5
        lcb_code_execution_pass@1: 43.75
-        lcb_test_output_pass@1: 25.00
+        lcb_test_output_pass@1: 18.75
-        bbh-logical_deduction_seven_objects_score: 50.00
+        bbh-logical_deduction_seven_objects_score: 56.25
-        bbh-multistep_arithmetic_two_score: 68.75
+        bbh-multistep_arithmetic_two_score: 75
-        mmlu-other_accuracy: 69.71
+        mmlu-other_accuracy: 72.6
-        cmmlu-china-specific_accuracy: 75.83
+        cmmlu-china-specific_accuracy: 78.33
        mmlu_pro_math_accuracy: 31.25
-        ds1000_Pandas_accuracy: 0
+        ds1000_Pandas_accuracy: 12.5
        ds1000_Numpy_accuracy: 0
        ds1000_Tensorflow_accuracy: 12.5
-        ds1000_Scipy_accuracy: 18.75
+        ds1000_Scipy_accuracy: 25
        ds1000_Sklearn_accuracy: 18.75
-        ds1000_Pytorch_accuracy: 18.75
+        ds1000_Pytorch_accuracy: 6.25
        ds1000_Matplotlib_accuracy: 50.00
        openai_mmmlu_lite_AR-XY_accuracy: 37.5
        college_naive_average: 12.50