mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
59 lines
4.3 KiB
Python
59 lines
4.3 KiB
Python
from mmengine.config import read_base
|
|
|
|
with read_base():
|
|
from opencompass.configs.datasets.collections.leaderboard.qwen import \
|
|
datasets
|
|
from opencompass.configs.models.qwen.hf_qwen_7b import models
|
|
from opencompass.configs.summarizers.leaderboard import summarizer
|
|
'''
|
|
dataset version metric mode qwen-7b-hf
|
|
-------------------------------------- --------- ---------------- ------ ------------
|
|
--------- 考试 Exam --------- - - - -
|
|
ceval - naive_average ppl 58.65
|
|
agieval - naive_average mixed 40.49
|
|
mmlu - naive_average ppl 57.78
|
|
cmmlu - naive_average ppl 58.57
|
|
GaokaoBench - weighted_average mixed 51.76
|
|
ARC-c 72cf91 accuracy gen 83.73
|
|
ARC-e 72cf91 accuracy gen 90.65
|
|
--------- 语言 Language --------- - - - -
|
|
WiC ce62e6 accuracy ppl 51.10
|
|
chid-dev 25f3d3 accuracy ppl 86.63
|
|
afqmc-dev cc328c accuracy ppl 69.00
|
|
WSC 678cb5 accuracy ppl 63.46
|
|
tydiqa-goldp - naive_average gen 19.98
|
|
flores_100 - naive_average gen 3.20
|
|
--------- 知识 Knowledge --------- - - - -
|
|
BoolQ 463fee accuracy ppl 83.00
|
|
commonsense_qa 0d8e25 accuracy ppl 67.49
|
|
triviaqa b6904f score gen 40.45
|
|
nq b6904f score gen 14.16
|
|
--------- 理解 Understanding --------- - - - -
|
|
C3 e6778d accuracy gen 75.29
|
|
race-middle 73bdec accuracy ppl 90.53
|
|
race-high 73bdec accuracy ppl 87.71
|
|
openbookqa_fact fa871c accuracy gen 92.20
|
|
csl_dev 3c4211 accuracy ppl 56.25
|
|
lcsts 0b3969 rouge1 gen 12.38
|
|
Xsum 207e69 rouge1 gen 36.00
|
|
eprstmt-dev 101429 accuracy gen 89.38
|
|
lambada de1af2 accuracy gen 67.88
|
|
--------- 推理 Reasoning --------- - - - -
|
|
cmnli 15e783 accuracy ppl 54.85
|
|
ocnli 1471e7 accuracy gen 42.34
|
|
AX_b 793c72 accuracy gen 58.61
|
|
AX_g c4c886 accuracy gen 69.10
|
|
RTE c4c886 accuracy gen 57.76
|
|
COPA 59f42c accuracy gen 88.00
|
|
ReCoRD 3e0689 score gen 27.78
|
|
hellaswag 06a1e2 accuracy gen 92.47
|
|
piqa 24369d accuracy gen 78.02
|
|
siqa ea30d1 accuracy ppl 75.03
|
|
math 2c0b9e accuracy gen 11.06
|
|
gsm8k 4c7f6e accuracy gen 50.87
|
|
drop 53a0a7 score gen 44.95
|
|
openai_humaneval dd0dff humaneval_pass@1 gen 23.78
|
|
mbpp 60ca11 score gen 31.20
|
|
bbh - naive_average gen 40.03
|
|
'''
|