OpenCompass/.github/scripts/oc_score_baseline.yaml
zhulinJulia24 fb69ba5eb8
[CI] add commond testcase into daily testcase (#1447)
* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

---------

Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
2024-08-23 01:49:17 +08:00

215 lines
3.2 KiB
YAML

baichuan2-7b-chat-hf:
gsm8k: 30
race-middle: 74
race-high: 79
glm-4-9b-chat-hf:
gsm8k: 75
race-middle: 88
race-high: 88
deepseek-7b-chat-hf:
gsm8k: 60
race-middle: 74
race-high: 80
deepseek-moe-16b-chat-hf:
gsm8k: 62
race-middle: 62
race-high: 70
gemma-2b-it-hf:
gsm8k: 14
race-middle: 62
race-high: 52
gemma-7b-it-hf:
gsm8k: 39
race-middle: 74
race-high: 71
internlm2_5-7b-chat-hf:
gsm8k: 86
race-middle: 92
race-high: 93
internlm2_5-7b-chat-turbomind:
gsm8k: 87
race-middle: 92
race-high: 93
internlm2-chat-1.8b-turbomind:
gsm8k: 40
race-middle: 82
race-high: 83
internlm2-chat-1.8b-sft-turbomind:
gsm8k: 32
race-middle: 81
race-high: 83
internlm2-chat-7b-turbomind:
gsm8k: 69
race-middle: 90
race-high: 88
internlm2-chat-7b-sft-turbomind:
gsm8k: 71
race-middle: 91
race-high: 92
llama-3-8b-instruct-hf:
gsm8k: 77
race-middle: 85
race-high: 87
llama-3-8b-instruct-turbomind:
gsm8k: 77
race-middle: 85
race-high: 89
mistral-7b-instruct-v0.2-hf:
gsm8k: 48
race-middle: 82
race-high: 78
minicpm-2b-dpo-fp32-hf:
gsm8k: 58
race-middle: 66
race-high: 74
minicpm-2b-sft-bf16-hf:
gsm8k: 58
race-middle: 75
race-high: 81
minicpm-2b-sft-fp32-hf:
gsm8k: 58
race-middle: 75
race-high: 81
phi-3-mini-4k-instruct-hf:
gsm8k: 67
race-middle: 81
race-high: 84
qwen1.5-0.5b-chat-hf:
gsm8k: 5
race-middle: 55
race-high: 50
qwen2-1.5b-instruct-turbomind:
gsm8k: 60
race-middle: 77
race-high: 86
qwen2-7b-instruct-turbomind:
gsm8k: 88
race-middle: 87
race-high: 89
yi-1.5-6b-chat-hf:
gsm8k: 72
race-middle: 88
race-high: 86
yi-1.5-9b-chat-hf:
gsm8k: 81
race-middle: 89
race-high: 91
deepseek-moe-16b-base-hf:
gsm8k: 25
race-middle: 35
race-high: 23
lmdeploy-api-test:
gsm8k: 90
race-middle: 95
race-high: 96
deepseek-7b-base-turbomind:
gsm8k: 21
race-middle: 42
race-high: 42
gemma-2b-hf:
gsm8k: 19
race-middle: 33
race-high: 26
gemma-7b-hf:
gsm8k: 65
race-middle: 59
race-high: 66
internlm2_5-7b-hf:
gsm8k: 46
race-middle: 92
race-high: 91
internlm2_5-7b-turbomind:
gsm8k: 73
race-middle: 90
race-high: 91
internlm2-1.8b-turbomind:
gsm8k: 25
race-middle: 75
race-high: 72
internlm2-7b-turbomind:
gsm8k: 67
race-middle: 78
race-high: 76
internlm2-base-7b-hf:
gsm8k: 2
race-middle: 71
race-high: 74
internlm2-base-7b-turbomind:
gsm8k: 39
race-middle: 75
race-high: 81
llama-3-8b-turbomind:
gsm8k: 52
race-middle: 63
race-high: 70
mistral-7b-v0.2-hf:
gsm8k: 43
race-middle: 42
race-high: 60
qwen1.5-moe-a2.7b-hf:
gsm8k: 64
race-middle: 78
race-high: 90
qwen2-0.5b-hf:
gsm8k: 35
race-middle: 52
race-high: 48
qwen2-1.5b-turbomind:
gsm8k: 57
race-middle: 64
race-high: 78
qwen2-7b-turbomind:
gsm8k: 83
race-middle: 88
race-high: 88
yi-1.5-6b-hf:
gsm8k: 59
race-middle: 81
race-high: 89
yi-1.5-9b-hf:
gsm8k: 77
race-middle: 90
race-high: 90