mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
.. | ||
race_cot_gen_d95929.py | ||
race_few_shot_gen_a498ed.py | ||
race_few_shot_ppl.py | ||
race_gen_69ee4f.py | ||
race_gen_9302a5.py | ||
race_gen.py | ||
race_ppl_5831a0.py | ||
race_ppl_a138cd.py | ||
race_ppl_abed12.py | ||
race_ppl.py | ||
README.md |
RACE
python3 run.py --models hf_internlm2_7b --datasets race_ppl_abed12 --debug
python3 run.py --models hf_internlm2_chat_7b --datasets race_gen_69ee4f --debug
Base Models
model | race-high | race-middle |
---|---|---|
llama-7b-turbomind | 31.30 | 29.53 |
llama-13b-turbomind | 35.56 | 40.25 |
llama-30b-turbomind | 57.35 | 55.78 |
llama-65b-turbomind | 70.21 | 75.35 |
llama-2-7b-turbomind | 39.74 | 46.73 |
llama-2-13b-turbomind | 57.06 | 60.52 |
llama-2-70b-turbomind | 79.02 | 82.17 |
llama-3-8b-turbomind | 67.75 | 73.61 |
llama-3-70b-turbomind | 85.79 | 90.25 |
internlm2-1.8b-turbomind | 64.72 | 70.40 |
internlm2-7b-turbomind | 72.56 | 74.16 |
internlm2-20b-turbomind | 72.90 | 74.03 |
qwen-1.8b-turbomind | 63.09 | 69.29 |
qwen-7b-turbomind | 80.30 | 85.38 |
qwen-14b-turbomind | 88.11 | 92.06 |
qwen-72b-turbomind | 90.62 | 93.59 |
qwen1.5-0.5b-hf | 54.66 | 60.38 |
qwen1.5-1.8b-hf | 67.27 | 73.33 |
qwen1.5-4b-hf | 78.50 | 83.29 |
qwen1.5-7b-hf | 82.73 | 86.70 |
qwen1.5-14b-hf | 87.99 | 91.85 |
qwen1.5-32b-hf | 90.57 | 93.25 |
qwen1.5-72b-hf | 90.45 | 93.87 |
qwen1.5-moe-a2-7b-hf | 79.56 | 83.57 |
mistral-7b-v0.1-hf | 73.58 | 76.25 |
mistral-7b-v0.2-hf | 73.67 | 77.09 |
mixtral-8x7b-v0.1-hf | 80.13 | 84.61 |
mixtral-8x22b-v0.1-hf | 86.56 | 89.62 |
yi-6b-hf | 82.93 | 85.72 |
yi-34b-hf | 90.94 | 92.76 |
deepseek-7b-base-hf | 50.91 | 56.82 |
deepseek-67b-base-hf | 83.53 | 88.23 |
Chat Models
model | race-high | race-middle |
---|---|---|
qwen1.5-0.5b-chat-hf | 49.03 | 52.79 |
qwen1.5-1.8b-chat-hf | 66.24 | 72.91 |
qwen1.5-4b-chat-hf | 73.53 | 80.29 |
qwen1.5-7b-chat-hf | 83.28 | 88.09 |
qwen1.5-14b-chat-hf | 87.51 | 91.36 |
qwen1.5-32b-chat-hf | 91.22 | 93.52 |
qwen1.5-72b-chat-hf | 91.11 | 93.38 |
qwen1.5-110b-chat-hf | 92.31 | 93.66 |
internlm2-chat-1.8b-hf | 73.87 | 81.13 |
internlm2-chat-1.8b-sft-hf | 73.81 | 81.69 |
internlm2-chat-7b-hf | 84.51 | 88.72 |
internlm2-chat-7b-sft-hf | 84.88 | 89.90 |
internlm2-chat-20b-hf | 88.02 | 91.43 |
internlm2-chat-20b-sft-hf | 88.11 | 91.57 |
llama-3-8b-instruct-hf | 81.22 | 86.63 |
llama-3-70b-instruct-hf | 89.57 | 93.45 |
llama-3-8b-instruct-lmdeploy | 81.02 | 86.14 |
llama-3-70b-instruct-lmdeploy | 89.34 | 93.25 |
mistral-7b-instruct-v0.1-hf | 69.75 | 74.72 |
mistral-7b-instruct-v0.2-hf | 73.30 | 77.58 |
mixtral-8x7b-instruct-v0.1-hf | 81.88 | 87.26 |