2025-02-14 16:17:30 +08:00
|
|
|
|
#! /usr/bin/env python
|
|
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import yaml
|
|
|
|
|
from tabulate import tabulate
|
|
|
|
|
|
|
|
|
|
OC_ROOT = Path(__file__).absolute().parents[2]
|
|
|
|
|
GITHUB_PREFIX = 'https://github.com/open-compass/opencompass/tree/main/'
|
|
|
|
|
DATASETZOO_TEMPLATE = """\
|
|
|
|
|
# 数据集统计
|
|
|
|
|
|
|
|
|
|
在本页面中,我们列举了OpenCompass所支持的所有数据集。
|
|
|
|
|
|
|
|
|
|
你可以使用排序和搜索功能找到需要的数据集。
|
|
|
|
|
|
2025-03-25 14:54:13 +08:00
|
|
|
|
我们对每一个数据集都给出了推荐的运行配置,部分数据集中还提供了基于LLM Judge的推荐配置。
|
|
|
|
|
|
|
|
|
|
你可以基于推荐配置快速启动评测。但请注意,推荐配置可能随时间推移被更新。
|
|
|
|
|
|
2025-02-14 16:17:30 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
with open('dataset_statistics.md', 'w') as f:
|
|
|
|
|
f.write(DATASETZOO_TEMPLATE)
|
|
|
|
|
|
|
|
|
|
load_path = str(OC_ROOT / 'dataset-index.yml')
|
|
|
|
|
|
|
|
|
|
with open(load_path, 'r') as f2:
|
|
|
|
|
data_list = yaml.load(f2, Loader=yaml.FullLoader)
|
|
|
|
|
|
2025-03-25 14:54:13 +08:00
|
|
|
|
HEADER = ['name', 'category', 'paper', 'configpath', 'configpath_llmjudge']
|
2025-02-14 16:17:30 +08:00
|
|
|
|
|
2025-03-31 19:08:55 +08:00
|
|
|
|
recommanded_dataset_list = [
|
|
|
|
|
'ifeval', 'aime2024', 'bbh', 'bigcodebench', 'cmmlu', 'drop', 'gpqa',
|
|
|
|
|
'hellaswag', 'humaneval', 'korbench', 'livecodebench', 'math', 'mmlu',
|
|
|
|
|
'mmlu_pro', 'musr'
|
|
|
|
|
]
|
|
|
|
|
|
2025-02-14 16:17:30 +08:00
|
|
|
|
|
|
|
|
|
def table_format(data_list):
|
|
|
|
|
table_format_list = []
|
|
|
|
|
for i in data_list:
|
|
|
|
|
table_format_list_sub = []
|
|
|
|
|
for j in i:
|
2025-03-31 19:08:55 +08:00
|
|
|
|
if j in recommanded_dataset_list:
|
|
|
|
|
link_token = '[链接]('
|
|
|
|
|
else:
|
|
|
|
|
link_token = '[链接(TBD)]('
|
|
|
|
|
|
2025-02-14 16:17:30 +08:00
|
|
|
|
for index in HEADER:
|
|
|
|
|
if index == 'paper':
|
|
|
|
|
table_format_list_sub.append('[链接](' + i[j][index] + ')')
|
2025-03-25 14:54:13 +08:00
|
|
|
|
elif index == 'configpath_llmjudge':
|
|
|
|
|
if i[j][index] == '':
|
|
|
|
|
table_format_list_sub.append(i[j][index])
|
|
|
|
|
else:
|
2025-03-31 19:08:55 +08:00
|
|
|
|
table_format_list_sub.append(link_token +
|
|
|
|
|
GITHUB_PREFIX +
|
2025-03-25 14:54:13 +08:00
|
|
|
|
i[j][index] + ')')
|
2025-02-14 16:17:30 +08:00
|
|
|
|
elif index == 'configpath':
|
|
|
|
|
if isinstance(i[j][index], list):
|
|
|
|
|
sub_list_text = ''
|
|
|
|
|
for k in i[j][index]:
|
2025-03-31 19:08:55 +08:00
|
|
|
|
sub_list_text += (link_token + GITHUB_PREFIX + k +
|
2025-02-14 16:17:30 +08:00
|
|
|
|
') / ')
|
|
|
|
|
table_format_list_sub.append(sub_list_text[:-2])
|
|
|
|
|
else:
|
2025-03-31 19:08:55 +08:00
|
|
|
|
table_format_list_sub.append(link_token +
|
|
|
|
|
GITHUB_PREFIX +
|
2025-02-14 16:17:30 +08:00
|
|
|
|
i[j][index] + ')')
|
|
|
|
|
else:
|
|
|
|
|
table_format_list_sub.append(i[j][index])
|
|
|
|
|
table_format_list.append(table_format_list_sub)
|
|
|
|
|
return table_format_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_format_list = table_format(data_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_table(data_list, title=None):
|
|
|
|
|
|
|
|
|
|
with open('dataset_statistics.md', 'a') as f:
|
|
|
|
|
if title is not None:
|
|
|
|
|
f.write(f'\n{title}')
|
|
|
|
|
f.write("""\n```{table}\n:class: dataset\n""")
|
2025-03-25 14:54:13 +08:00
|
|
|
|
header = ['数据集名称', '数据集类型', '原文或资源地址', '推荐配置', '推荐配置(基于LLM评估)']
|
2025-02-14 16:17:30 +08:00
|
|
|
|
table_cfg = dict(tablefmt='pipe',
|
|
|
|
|
floatfmt='.2f',
|
|
|
|
|
numalign='right',
|
|
|
|
|
stralign='center')
|
|
|
|
|
f.write(tabulate(data_list, header, **table_cfg))
|
|
|
|
|
f.write('\n```\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_table(
|
|
|
|
|
data_list=data_format_list,
|
|
|
|
|
title='## 支持数据集列表',
|
|
|
|
|
)
|