mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update
This commit is contained in:
parent
e3c2521df5
commit
ba99868c77
34
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
34
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
@ -78,38 +78,38 @@ internlm2_5-7b-chat-hf_fullbench:
|
||||
internlm2_5-7b-chat-turbomind_fullbench:
|
||||
objective:
|
||||
race-high_accuracy: 93.75
|
||||
ARC-c_accuracy: 93.75
|
||||
ARC-c_accuracy: 87.50
|
||||
BoolQ_accuracy: 68.75
|
||||
triviaqa_wiki_1shot_score: 50
|
||||
nq_open_1shot_score: 25
|
||||
IFEval_Prompt-level-strict-accuracy: 56.25
|
||||
drop_accuracy: 81.25
|
||||
drop_accuracy: 75
|
||||
GPQA_diamond_accuracy: 31.25
|
||||
hellaswag_accuracy: 81.25
|
||||
TheoremQA_score: 6.25
|
||||
hellaswag_accuracy: 87.5
|
||||
TheoremQA_score: 12.5
|
||||
musr_average_naive_average: 39.58
|
||||
korbench_single_naive_average: 37.50
|
||||
gsm8k_accuracy: 68.75
|
||||
math_accuracy: 68.75
|
||||
korbench_single_naive_average: 40
|
||||
gsm8k_accuracy: 62.5
|
||||
math_accuracy: 75
|
||||
cmo_fib_accuracy: 6.25
|
||||
aime2024_accuracy: 6.25
|
||||
wikibench-wiki-single_choice_cncircular_perf_4: 50.00
|
||||
wikibench-wiki-single_choice_cncircular_perf_4: 25
|
||||
sanitized_mbpp_score: 68.75
|
||||
ds1000_naive_average: 16.96
|
||||
ds1000_naive_average: 17.86
|
||||
lcb_code_generation_pass@1: 12.5
|
||||
lcb_code_execution_pass@1: 43.75
|
||||
lcb_test_output_pass@1: 25.00
|
||||
bbh-logical_deduction_seven_objects_score: 50.00
|
||||
bbh-multistep_arithmetic_two_score: 68.75
|
||||
mmlu-other_accuracy: 69.71
|
||||
cmmlu-china-specific_accuracy: 75.83
|
||||
lcb_test_output_pass@1: 18.75
|
||||
bbh-logical_deduction_seven_objects_score: 56.25
|
||||
bbh-multistep_arithmetic_two_score: 75
|
||||
mmlu-other_accuracy: 72.6
|
||||
cmmlu-china-specific_accuracy: 78.33
|
||||
mmlu_pro_math_accuracy: 31.25
|
||||
ds1000_Pandas_accuracy: 0
|
||||
ds1000_Pandas_accuracy: 12.5
|
||||
ds1000_Numpy_accuracy: 0
|
||||
ds1000_Tensorflow_accuracy: 12.5
|
||||
ds1000_Scipy_accuracy: 18.75
|
||||
ds1000_Scipy_accuracy: 25
|
||||
ds1000_Sklearn_accuracy: 18.75
|
||||
ds1000_Pytorch_accuracy: 18.75
|
||||
ds1000_Pytorch_accuracy: 6.25
|
||||
ds1000_Matplotlib_accuracy: 50.00
|
||||
openai_mmmlu_lite_AR-XY_accuracy: 37.5
|
||||
college_naive_average: 12.50
|
||||
|
Loading…
Reference in New Issue
Block a user