OpenCompass/.github/scripts/oc_score_baseline.yaml
zhulinJulia24 f982d6278e
[CI] fix baseline score (#2000)
* update

* update

* update

* update

* update

* update

* update

* updaste

* update

* update

* updaste

* updaste

* update

* update

* update

* update

* update

* update

* update

* update
2025-04-03 19:32:36 +08:00

40 lines
955 B
YAML

internlm2_5-7b-hf:
demo_gsm8k_accuracy: 42.19
race-middle_accuracy: 91.78
race-high_accuracy: 90.02
internlm2_5-7b_hf:
demo_gsm8k_accuracy: 42.19
race-middle_accuracy: 91.78
race-high_accuracy: 90.02
internlm2_5-7b-chat-lmdeploy:
demo_gsm8k_accuracy: 87.50
race-middle_accuracy: 92.76
race-high_accuracy: 90.54
internlm3-8b-instruct-lmdeploy:
demo_gsm8k_accuracy: 73.44
race-middle_accuracy: 93.38
race-high_accuracy: 90.34
internlm3-8b-instruct_hf-lmdeploy:
demo_gsm8k_accuracy: 73.44
race-middle_accuracy: 93.38
race-high_accuracy: 90.34
internlm3-8b-instruct_hf-vllm:
demo_gsm8k_accuracy: 81.25
race-middle_accuracy: 92.20
race-high_accuracy: 89.88
internlm2_5-7b-chat_hf:
demo_gsm8k_accuracy: 87.50
race-middle_accuracy: 92.76
race-high_accuracy: 90.48
lmdeploy-api-test:
gsm8k_accuracy: 56.25
race-middle_accuracy: 93.75
race-high_accuracy: 93.75