From ba99868c77154282976ed368dd558a18d1dba975 Mon Sep 17 00:00:00 2001 From: zhulinJulia24 Date: Thu, 3 Apr 2025 14:32:49 +0800 Subject: [PATCH] update --- .../scripts/oc_score_baseline_fullbench.yaml | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/scripts/oc_score_baseline_fullbench.yaml b/.github/scripts/oc_score_baseline_fullbench.yaml index baf469d5..0c32331d 100644 --- a/.github/scripts/oc_score_baseline_fullbench.yaml +++ b/.github/scripts/oc_score_baseline_fullbench.yaml @@ -78,38 +78,38 @@ internlm2_5-7b-chat-hf_fullbench: internlm2_5-7b-chat-turbomind_fullbench: objective: race-high_accuracy: 93.75 - ARC-c_accuracy: 93.75 + ARC-c_accuracy: 87.50 BoolQ_accuracy: 68.75 triviaqa_wiki_1shot_score: 50 nq_open_1shot_score: 25 IFEval_Prompt-level-strict-accuracy: 56.25 - drop_accuracy: 81.25 + drop_accuracy: 75 GPQA_diamond_accuracy: 31.25 - hellaswag_accuracy: 81.25 - TheoremQA_score: 6.25 + hellaswag_accuracy: 87.5 + TheoremQA_score: 12.5 musr_average_naive_average: 39.58 - korbench_single_naive_average: 37.50 - gsm8k_accuracy: 68.75 - math_accuracy: 68.75 + korbench_single_naive_average: 40 + gsm8k_accuracy: 62.5 + math_accuracy: 75 cmo_fib_accuracy: 6.25 aime2024_accuracy: 6.25 - wikibench-wiki-single_choice_cncircular_perf_4: 50.00 + wikibench-wiki-single_choice_cncircular_perf_4: 25 sanitized_mbpp_score: 68.75 - ds1000_naive_average: 16.96 + ds1000_naive_average: 17.86 lcb_code_generation_pass@1: 12.5 lcb_code_execution_pass@1: 43.75 - lcb_test_output_pass@1: 25.00 - bbh-logical_deduction_seven_objects_score: 50.00 - bbh-multistep_arithmetic_two_score: 68.75 - mmlu-other_accuracy: 69.71 - cmmlu-china-specific_accuracy: 75.83 + lcb_test_output_pass@1: 18.75 + bbh-logical_deduction_seven_objects_score: 56.25 + bbh-multistep_arithmetic_two_score: 75 + mmlu-other_accuracy: 72.6 + cmmlu-china-specific_accuracy: 78.33 mmlu_pro_math_accuracy: 31.25 - ds1000_Pandas_accuracy: 0 + ds1000_Pandas_accuracy: 12.5 ds1000_Numpy_accuracy: 0 ds1000_Tensorflow_accuracy: 12.5 - ds1000_Scipy_accuracy: 18.75 + ds1000_Scipy_accuracy: 25 ds1000_Sklearn_accuracy: 18.75 - ds1000_Pytorch_accuracy: 18.75 + ds1000_Pytorch_accuracy: 6.25 ds1000_Matplotlib_accuracy: 50.00 openai_mmmlu_lite_AR-XY_accuracy: 37.5 college_naive_average: 12.50