mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update
This commit is contained in:
parent
3472ed113d
commit
6dbbd80af0
126
.github/scripts/oc_score_baseline_testrange.yaml
vendored
126
.github/scripts/oc_score_baseline_testrange.yaml
vendored
@ -18,7 +18,7 @@ chat:
|
|||||||
gsm8k_accuracy: 28.12
|
gsm8k_accuracy: 28.12
|
||||||
race-high_accuracy: 53.12
|
race-high_accuracy: 53.12
|
||||||
deepseek-7b-chat-vllm:
|
deepseek-7b-chat-vllm:
|
||||||
gsm8k_accuracy: 43.75
|
gsm8k_accuracy: 56.25
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
gemma2-2b-it-hf:
|
gemma2-2b-it-hf:
|
||||||
gsm8k_accuracy: 50
|
gsm8k_accuracy: 50
|
||||||
@ -33,13 +33,13 @@ chat:
|
|||||||
gsm8k_accuracy: 40.62
|
gsm8k_accuracy: 40.62
|
||||||
race-high_accuracy: 68.75
|
race-high_accuracy: 68.75
|
||||||
gemma-2-9b-it-turbomind:
|
gemma-2-9b-it-turbomind:
|
||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 68.75
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
gemma-2-27b-it-turbomind:
|
gemma-2-27b-it-turbomind:
|
||||||
gsm8k_accuracy: 78.12
|
gsm8k_accuracy: 78.12
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
gemma-7b-it-vllm:
|
gemma-7b-it-vllm:
|
||||||
gsm8k_accuracy: 31.25
|
gsm8k_accuracy: 28.12
|
||||||
race-high_accuracy: 68.75
|
race-high_accuracy: 68.75
|
||||||
internlm2_5-7b-chat-hf:
|
internlm2_5-7b-chat-hf:
|
||||||
gsm8k_accuracy: 84.38
|
gsm8k_accuracy: 84.38
|
||||||
@ -48,25 +48,25 @@ chat:
|
|||||||
gsm8k_accuracy: 65.62
|
gsm8k_accuracy: 65.62
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
internlm2_5-7b-chat-turbomind:
|
internlm2_5-7b-chat-turbomind:
|
||||||
gsm8k_accuracy: 84.38
|
gsm8k_accuracy: 81.25
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
internlm2-chat-1.8b-turbomind:
|
internlm2-chat-1.8b-turbomind:
|
||||||
gsm8k_accuracy: 28.12
|
gsm8k_accuracy: 25.00
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
internlm2-chat-1.8b-sft-turbomind:
|
internlm2-chat-1.8b-sft-turbomind:
|
||||||
gsm8k_accuracy: 31.25
|
gsm8k_accuracy: 34.38
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
internlm2-chat-7b-lmdeploy:
|
internlm2-chat-7b-lmdeploy:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 59.38
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 87.50
|
||||||
internlm2-chat-7b-sft-turbomind:
|
internlm2-chat-7b-sft-turbomind:
|
||||||
gsm8k_accuracy: 56.25
|
gsm8k_accuracy: 56.25
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 87.50
|
||||||
internlm3-8b-instruct-turbomind:
|
internlm3-8b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 65.62
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
internlm2-chat-7b-vllm:
|
internlm2-chat-7b-vllm:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 53.12
|
||||||
race-high_accuracy: 87.50
|
race-high_accuracy: 87.50
|
||||||
llama-3_1-8b-instruct-hf:
|
llama-3_1-8b-instruct-hf:
|
||||||
gsm8k_accuracy: 84.38
|
gsm8k_accuracy: 84.38
|
||||||
@ -81,13 +81,13 @@ chat:
|
|||||||
gsm8k_accuracy: 18.75
|
gsm8k_accuracy: 18.75
|
||||||
race-high_accuracy: 46.88
|
race-high_accuracy: 46.88
|
||||||
llama-3_1-8b-instruct-turbomind:
|
llama-3_1-8b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 84.38
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
llama-3_2-3b-instruct-turbomind:
|
llama-3_2-3b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 65.62
|
||||||
race-high_accuracy: 81.25
|
race-high_accuracy: 81.25
|
||||||
llama-3-8b-instruct-turbomind:
|
llama-3-8b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 65.62
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
mistral-7b-instruct-v0.2-hf:
|
mistral-7b-instruct-v0.2-hf:
|
||||||
gsm8k_accuracy: 40.62
|
gsm8k_accuracy: 40.62
|
||||||
@ -100,12 +100,12 @@ chat:
|
|||||||
race-high_accuracy: 81.25
|
race-high_accuracy: 81.25
|
||||||
mistral-nemo-instruct-2407-turbomind:
|
mistral-nemo-instruct-2407-turbomind:
|
||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 71.88
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 75
|
||||||
mistral-7b-instruct-v0.1-vllm:
|
mistral-7b-instruct-v0.1-vllm:
|
||||||
gsm8k_accuracy: 34.38
|
gsm8k_accuracy: 34.38
|
||||||
race-high_accuracy: 65.62
|
race-high_accuracy: 65.62
|
||||||
mistral-7b-instruct-v0.2-vllm:
|
mistral-7b-instruct-v0.2-vllm:
|
||||||
gsm8k_accuracy: 21.88
|
gsm8k_accuracy: 28.12
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
qwen2.5-0.5b-instruct-hf:
|
qwen2.5-0.5b-instruct-hf:
|
||||||
gsm8k_accuracy: 34.38
|
gsm8k_accuracy: 34.38
|
||||||
@ -114,7 +114,7 @@ chat:
|
|||||||
gsm8k_accuracy: 53.12
|
gsm8k_accuracy: 53.12
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
qwen2.5-0.5b-instruct-turbomind:
|
qwen2.5-0.5b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 31.25
|
gsm8k_accuracy: 28.12
|
||||||
race-high_accuracy: 43.75
|
race-high_accuracy: 43.75
|
||||||
qwen2.5-3b-instruct-turbomind:
|
qwen2.5-3b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 56.25
|
gsm8k_accuracy: 56.25
|
||||||
@ -132,10 +132,10 @@ chat:
|
|||||||
gsm8k_accuracy: 56.25
|
gsm8k_accuracy: 56.25
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
qwen2-7b-instruct-turbomind:
|
qwen2-7b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 75.00
|
||||||
race-high_accuracy: 87.50
|
race-high_accuracy: 87.50
|
||||||
qwen1.5-0.5b-chat-vllm:
|
qwen1.5-0.5b-chat-vllm:
|
||||||
gsm8k_accuracy: 3.12
|
gsm8k_accuracy: 6.25
|
||||||
race-high_accuracy: 53.12
|
race-high_accuracy: 53.12
|
||||||
yi-1.5-6b-chat-hf:
|
yi-1.5-6b-chat-hf:
|
||||||
gsm8k_accuracy: 65.62
|
gsm8k_accuracy: 65.62
|
||||||
@ -144,13 +144,13 @@ chat:
|
|||||||
gsm8k_accuracy: 75
|
gsm8k_accuracy: 75
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
yi-1.5-6b-chat-turbomind:
|
yi-1.5-6b-chat-turbomind:
|
||||||
gsm8k_accuracy: 62.5
|
gsm8k_accuracy: 59.38
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
yi-1.5-9b-chat-turbomind:
|
yi-1.5-9b-chat-turbomind:
|
||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 78.12
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
deepseek-v2_lite-chat-turbomind:
|
deepseek-v2_lite-chat-turbomind:
|
||||||
gsm8k_accuracy: 37.5
|
gsm8k_accuracy: 43.75
|
||||||
race-high_accuracy: 71.88
|
race-high_accuracy: 71.88
|
||||||
gemma2-27b-it-hf:
|
gemma2-27b-it-hf:
|
||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 71.88
|
||||||
@ -165,7 +165,7 @@ chat:
|
|||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 81.25
|
||||||
race-high_accuracy: 87.50
|
race-high_accuracy: 87.50
|
||||||
mistral-small-instruct-2409-turbomind:
|
mistral-small-instruct-2409-turbomind:
|
||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 78.12
|
||||||
race-high_accuracy: 87.50
|
race-high_accuracy: 87.50
|
||||||
phi-4:
|
phi-4:
|
||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 81.25
|
||||||
@ -174,16 +174,16 @@ chat:
|
|||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 71.88
|
||||||
race-high_accuracy: 96.88
|
race-high_accuracy: 96.88
|
||||||
qwen2.5-14b-instruct-turbomind:
|
qwen2.5-14b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 71.88
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 96.88
|
||||||
yi-1.5-34b-chat-turbomind:
|
yi-1.5-34b-chat-turbomind:
|
||||||
gsm8k_accuracy: 75.00
|
gsm8k_accuracy: 71.88
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
deepseek-67b-chat-turbomind:
|
deepseek-67b-chat-turbomind:
|
||||||
gsm8k_accuracy: 75.00
|
gsm8k_accuracy: 71.88
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 75.00
|
||||||
deepseek-r1-distill-qwen-32b-turbomind:
|
deepseek-r1-distill-qwen-32b-turbomind:
|
||||||
gsm8k_accuracy: 25
|
gsm8k_accuracy: 31.25
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
llama-3_3-70b-instruct-turbomind:
|
llama-3_3-70b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 93.75
|
gsm8k_accuracy: 93.75
|
||||||
@ -192,19 +192,19 @@ chat:
|
|||||||
gsm8k_accuracy: 87.50
|
gsm8k_accuracy: 87.50
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
nvidia-3_1-Nemotron-70b-instruct-HF-turbomind:
|
nvidia-3_1-Nemotron-70b-instruct-HF-turbomind:
|
||||||
gsm8k_accuracy: 93.75
|
gsm8k_accuracy: 90.62
|
||||||
race-high_accuracy: 50.00
|
race-high_accuracy: 53.12
|
||||||
qwen2.5-72b-instruct-turbomind:
|
qwen2.5-72b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 81.25
|
gsm8k_accuracy: 78.12
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
deepseek-r1-distill-llama-70b-turbomind:
|
deepseek-r1-distill-llama-70b-turbomind:
|
||||||
gsm8k_accuracy: 40.62
|
gsm8k_accuracy: 50.00
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 87.50
|
||||||
deepseek-v2_5-1210-turbomind:
|
deepseek-v2_5-1210-turbomind:
|
||||||
gsm8k_accuracy: 90.62
|
gsm8k_accuracy: 90.62
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
mixtral-8x22b-instruct-v0.1-turbomind:
|
mixtral-8x22b-instruct-v0.1-turbomind:
|
||||||
gsm8k_accuracy: 78.12
|
gsm8k_accuracy: 75.00
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
mixtral-8x22b-instruct-v0.1-vllm:
|
mixtral-8x22b-instruct-v0.1-vllm:
|
||||||
gsm8k_accuracy: 78.12
|
gsm8k_accuracy: 78.12
|
||||||
@ -222,11 +222,11 @@ base:
|
|||||||
winogrande_accuracy: 71.88
|
winogrande_accuracy: 71.88
|
||||||
deepseek-7b-base-turbomind:
|
deepseek-7b-base-turbomind:
|
||||||
gsm8k_accuracy: 18.75
|
gsm8k_accuracy: 18.75
|
||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 3.12
|
||||||
race-high_accuracy: 43.75
|
race-high_accuracy: 50.00
|
||||||
winogrande_accuracy: 84.38
|
winogrande_accuracy: 84.38
|
||||||
deepseek-moe-16b-base-vllm:
|
deepseek-moe-16b-base-vllm:
|
||||||
gsm8k_accuracy: 21.88
|
gsm8k_accuracy: 25.00
|
||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 0
|
||||||
race-high_accuracy: 25
|
race-high_accuracy: 25
|
||||||
winogrande_accuracy: 68.75
|
winogrande_accuracy: 68.75
|
||||||
@ -253,15 +253,15 @@ base:
|
|||||||
gemma-2-9b-turbomind:
|
gemma-2-9b-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 68.75
|
||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 0
|
||||||
race-high_accuracy: 18.75
|
race-high_accuracy: 84.38
|
||||||
winogrande_accuracy: 46.88
|
winogrande_accuracy: 81.25
|
||||||
gemma-2b-vllm:
|
gemma-2b-vllm:
|
||||||
gsm8k_accuracy: 15.62
|
gsm8k_accuracy: 15.62
|
||||||
GPQA_diamond_accuracy: 3.12
|
GPQA_diamond_accuracy: 3.12
|
||||||
race-high_accuracy: 28.12
|
race-high_accuracy: 28.12
|
||||||
winogrande_accuracy: 68.75
|
winogrande_accuracy: 68.75
|
||||||
gemma-7b-vllm:
|
gemma-7b-vllm:
|
||||||
gsm8k_accuracy: 43.75
|
gsm8k_accuracy: 59.38
|
||||||
GPQA_diamond_accuracy: 6.25
|
GPQA_diamond_accuracy: 6.25
|
||||||
race-high_accuracy: 81.25
|
race-high_accuracy: 81.25
|
||||||
winogrande_accuracy: 81.25
|
winogrande_accuracy: 81.25
|
||||||
@ -276,8 +276,8 @@ base:
|
|||||||
race-high_accuracy: 62.5
|
race-high_accuracy: 62.5
|
||||||
winogrande_accuracy: 78.12
|
winogrande_accuracy: 78.12
|
||||||
internlm2-1.8b-turbomind:
|
internlm2-1.8b-turbomind:
|
||||||
gsm8k_accuracy: 6.25
|
gsm8k_accuracy: 12.50
|
||||||
GPQA_diamond_accuracy: 12.5
|
GPQA_diamond_accuracy: 9.38
|
||||||
race-high_accuracy: 71.88
|
race-high_accuracy: 71.88
|
||||||
winogrande_accuracy: 75
|
winogrande_accuracy: 75
|
||||||
internlm2_5-7b-turbomind:
|
internlm2_5-7b-turbomind:
|
||||||
@ -286,13 +286,13 @@ base:
|
|||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
winogrande_accuracy: 87.5
|
winogrande_accuracy: 87.5
|
||||||
internlm2-7b-turbomind:
|
internlm2-7b-turbomind:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 53.12
|
||||||
GPQA_diamond_accuracy: 34.38
|
GPQA_diamond_accuracy: 25.00
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
winogrande_accuracy: 71.88
|
winogrande_accuracy: 71.88
|
||||||
internlm2-base-7b-turbomind:
|
internlm2-base-7b-turbomind:
|
||||||
gsm8k_accuracy: 28.12
|
gsm8k_accuracy: 25.00
|
||||||
GPQA_diamond_accuracy: 31.25
|
GPQA_diamond_accuracy: 34.38
|
||||||
race-high_accuracy: 71.88
|
race-high_accuracy: 71.88
|
||||||
winogrande_accuracy: 62.50
|
winogrande_accuracy: 62.50
|
||||||
llama-2-7b-hf:
|
llama-2-7b-hf:
|
||||||
@ -311,8 +311,8 @@ base:
|
|||||||
race-high_accuracy: 65.62
|
race-high_accuracy: 65.62
|
||||||
winogrande_accuracy: 65.62
|
winogrande_accuracy: 65.62
|
||||||
llama-3.1-8b-turbomind:
|
llama-3.1-8b-turbomind:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 56.25
|
||||||
GPQA_diamond_accuracy: 15.62
|
GPQA_diamond_accuracy: 9.38
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
winogrande_accuracy: 78.12
|
winogrande_accuracy: 78.12
|
||||||
llama-3-8b-turbomind:
|
llama-3-8b-turbomind:
|
||||||
@ -332,12 +332,12 @@ base:
|
|||||||
winogrande_accuracy: 71.88
|
winogrande_accuracy: 71.88
|
||||||
qwen2.5-1.5b-turbomind:
|
qwen2.5-1.5b-turbomind:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 59.38
|
||||||
GPQA_diamond_accuracy: 18.75
|
GPQA_diamond_accuracy: 21.88
|
||||||
race-high_accuracy: 75
|
race-high_accuracy: 78.12
|
||||||
winogrande_accuracy: 71.88
|
winogrande_accuracy: 71.88
|
||||||
qwen2.5-7b-turbomind:
|
qwen2.5-7b-turbomind:
|
||||||
gsm8k_accuracy: 71.88
|
gsm8k_accuracy: 78.12
|
||||||
GPQA_diamond_accuracy: 18.75
|
GPQA_diamond_accuracy: 21.88
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
winogrande_accuracy: 75.00
|
winogrande_accuracy: 75.00
|
||||||
qwen1.5-moe-a2.7b-hf:
|
qwen1.5-moe-a2.7b-hf:
|
||||||
@ -361,18 +361,18 @@ base:
|
|||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
winogrande_accuracy: 68.75
|
winogrande_accuracy: 68.75
|
||||||
qwen2-1.5b-turbomind:
|
qwen2-1.5b-turbomind:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 56.25
|
||||||
GPQA_diamond_accuracy: 6.25
|
GPQA_diamond_accuracy: 12.50
|
||||||
race-high_accuracy: 81.25
|
race-high_accuracy: 81.25
|
||||||
winogrande_accuracy: 75
|
winogrande_accuracy: 75
|
||||||
qwen2-7b-turbomind:
|
qwen2-7b-turbomind:
|
||||||
gsm8k_accuracy: 62.5
|
gsm8k_accuracy: 65.62
|
||||||
GPQA_diamond_accuracy: 12.5
|
GPQA_diamond_accuracy: 12.5
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
winogrande_accuracy: 75
|
winogrande_accuracy: 75
|
||||||
qwen1.5-0.5b-vllm:
|
qwen1.5-0.5b-vllm:
|
||||||
gsm8k_accuracy: 9.38
|
gsm8k_accuracy: 9.38
|
||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 3.12
|
||||||
race-high_accuracy: 56.25
|
race-high_accuracy: 56.25
|
||||||
winogrande_accuracy: 59.38
|
winogrande_accuracy: 59.38
|
||||||
yi-1.5-6b-hf:
|
yi-1.5-6b-hf:
|
||||||
@ -386,7 +386,7 @@ base:
|
|||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
winogrande_accuracy: 59.38
|
winogrande_accuracy: 59.38
|
||||||
yi-1.5-9b-turbomind:
|
yi-1.5-9b-turbomind:
|
||||||
gsm8k_accuracy: 78.12
|
gsm8k_accuracy: 75.00
|
||||||
GPQA_diamond_accuracy: 40.62
|
GPQA_diamond_accuracy: 40.62
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
winogrande_accuracy: 65.62
|
winogrande_accuracy: 65.62
|
||||||
@ -406,13 +406,13 @@ base:
|
|||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
winogrande_accuracy: 78.12
|
winogrande_accuracy: 78.12
|
||||||
qwen2.5-32b-turbomind:
|
qwen2.5-32b-turbomind:
|
||||||
gsm8k_accuracy: 84.38
|
gsm8k_accuracy: 90.62
|
||||||
GPQA_diamond_accuracy: 28.12
|
GPQA_diamond_accuracy: 31.25
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
winogrande_accuracy: 81.25
|
winogrande_accuracy: 81.25
|
||||||
deepseek-67b-base-turbomind:
|
deepseek-67b-base-turbomind:
|
||||||
gsm8k_accuracy: 59.38
|
gsm8k_accuracy: 62.50
|
||||||
GPQA_diamond_accuracy: 34.38
|
GPQA_diamond_accuracy: 31.25
|
||||||
race-high_accuracy: 78.12
|
race-high_accuracy: 78.12
|
||||||
winogrande_accuracy: 81.25
|
winogrande_accuracy: 81.25
|
||||||
llama-3-70b-turbomind:
|
llama-3-70b-turbomind:
|
||||||
@ -422,11 +422,11 @@ base:
|
|||||||
winogrande_accuracy: 84.38
|
winogrande_accuracy: 84.38
|
||||||
qwen2.5-72b-turbomind:
|
qwen2.5-72b-turbomind:
|
||||||
gsm8k_accuracy: 84.38
|
gsm8k_accuracy: 84.38
|
||||||
GPQA_diamond_accuracy: 31.25
|
GPQA_diamond_accuracy: 40.62
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
winogrande_accuracy: 87.5
|
winogrande_accuracy: 87.5
|
||||||
deepseek-v2-turbomind:
|
deepseek-v2-turbomind:
|
||||||
gsm8k_accuracy: 65.62
|
gsm8k_accuracy: 65.62
|
||||||
GPQA_diamond_accuracy: 9.38
|
GPQA_diamond_accuracy: 3.12
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
winogrande_accuracy: 81.25
|
winogrande_accuracy: 81.25
|
||||||
|
Loading…
Reference in New Issue
Block a user