mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update
This commit is contained in:
parent
7157b8911d
commit
9d63fdd616
26
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
26
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
@ -490,8 +490,8 @@ qwen2.5-7b-instruct-turbomind:
|
||||
lcb_code_generation_pass@1: 39.5
|
||||
lcb_code_execution_pass@1: 42.38
|
||||
lcb_test_output_pass@1: 50.68
|
||||
bigcodebench_hard_instruct_pass@1: 100
|
||||
bigcodebench_hard_complete_pass@1: 100
|
||||
bigcodebench_hard_instruct_pass@1: 16.22
|
||||
bigcodebench_hard_complete_pass@1: 11.49
|
||||
teval_naive_average: 79.72
|
||||
SciCode_sub_accuracy: 100
|
||||
qa_dingo_cn_score: 99.01
|
||||
@ -598,8 +598,8 @@ internlm2_5-7b-chat-pytorch:
|
||||
lcb_code_execution_pass@1: 33.82
|
||||
lcb_test_output_pass@1: 22.62
|
||||
bigcodebench_hard_instruct_pass@1: 6.08
|
||||
bigcodebench_hard_complete_pass@1: 100
|
||||
teval_naive_average: 100
|
||||
bigcodebench_hard_complete_pass@1: 6.76
|
||||
teval_naive_average: 79.73
|
||||
SciCode_sub_accuracy: 100
|
||||
qa_dingo_cn_score: 100
|
||||
mmlu_accuracy: 70.2
|
||||
@ -702,9 +702,9 @@ qwen2.5-7b-instruct-pytorch:
|
||||
lcb_code_generation_pass@1: 38.75
|
||||
lcb_code_execution_pass@1: 42.38
|
||||
lcb_test_output_pass@1: 50.45
|
||||
bigcodebench_hard_instruct_pass@1: 100
|
||||
bigcodebench_hard_complete_pass@1: 100
|
||||
teval_naive_average: 100
|
||||
bigcodebench_hard_instruct_pass@1: 16.89
|
||||
bigcodebench_hard_complete_pass@1: 12.16
|
||||
teval_naive_average: 79.46
|
||||
SciCode_sub_accuracy: 100
|
||||
qa_dingo_cn_score: 100
|
||||
mmlu_accuracy: 76.27
|
||||
@ -807,9 +807,9 @@ internlm3-8b-instruct-turbomind:
|
||||
lcb_code_generation_pass@1: 34.75
|
||||
lcb_code_execution_pass@1: 49.9
|
||||
lcb_test_output_pass@1: 48.19
|
||||
bigcodebench_hard_instruct_pass@1: 100
|
||||
bigcodebench_hard_complete_pass@1: 100
|
||||
teval_naive_average: 100
|
||||
bigcodebench_hard_instruct_pass@1: 13.51
|
||||
bigcodebench_hard_complete_pass@1: 15.54
|
||||
teval_naive_average: 82.86
|
||||
SciCode_sub_accuracy: 100
|
||||
qa_dingo_cn_score: 100
|
||||
mmlu_accuracy: 76.21
|
||||
@ -912,9 +912,9 @@ internlm3-8b-instruct-pytorch:
|
||||
lcb_code_generation_pass@1: 34.5
|
||||
lcb_code_execution_pass@1: 48.02
|
||||
lcb_test_output_pass@1: 47.74
|
||||
bigcodebench_hard_instruct_pass@1: 100
|
||||
bigcodebench_hard_complete_pass@1: 100
|
||||
teval_naive_average: 100
|
||||
bigcodebench_hard_instruct_pass@1: 12.84
|
||||
bigcodebench_hard_complete_pass@1: 15.54
|
||||
teval_naive_average: 82.86
|
||||
SciCode_sub_accuracy: 100
|
||||
qa_dingo_cn_score: 100
|
||||
mmlu_accuracy: 76.23
|
||||
|
Loading…
Reference in New Issue
Block a user