[Fix] the automatically download for several datasets (#1652)

* [Fix] the automatically download for several datasets

* Update

* Update

* Update CI
This commit is contained in:
Songyang Zhang 2024-11-01 15:57:18 +08:00 committed by GitHub
parent 695738a89b
commit c789ce5698
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
29 changed files with 109 additions and 39 deletions

View File

@ -53,9 +53,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
🔥🔥🔥 We are delighted to announce that **the OpenCompass has been recommended by the Meta AI**, click [Get Started](https://ai.meta.com/llama/get-started/#validation) of Llama for more information.
> **Attention**<br />
> We launch the OpenCompass Collaboration project, welcome to support diverse evaluation benchmarks into OpenCompass!
> Clike [Issue](https://github.com/open-compass/opencompass/issues/248) for more information.
> Let's work together to build a more powerful OpenCompass toolkit!
> Breaking Change Notice: In version 0.4.0, we are consolidating all AMOTIC configuration files (previously located in ./configs/datasets, ./configs/models, and ./configs/summarizers) into the opencompass package. Users are advised to update their configuration references to reflect this structural change.
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>

View File

@ -53,9 +53,7 @@
🔥🔥🔥 祝贺 **OpenCompass 作为大模型标准测试工具被Meta AI官方推荐**, 点击 Llama 的 [入门文档](https://ai.meta.com/llama/get-started/#validation) 获取更多信息。
> **注意**<br />
> 我们正式启动 OpenCompass 共建计划,诚邀社区用户为 OpenCompass 提供更具代表性和可信度的客观评测数据集!
> 点击 [Issue](https://github.com/open-compass/opencompass/issues/248) 获取更多数据集.
> 让我们携手共进,打造功能强大易用的大模型评测平台!
> 重要通知:从 v0.4.0 版本开始,所有位于 ./configs/datasets、./configs/models 和 ./configs/summarizers 目录下的 AMOTIC 配置文件将迁移至 opencompass 包中。请及时更新您的配置文件路径。
## 🚀 最新进展 <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>

View File

@ -284,11 +284,12 @@ for _folder, _prompts in [
},
'pred_role': 'BOT',
}
_base_path = './data/GAOKAO-BENCH/data'
_base_path = 'opencompass/GAOKAO-BENCH'
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,

View File

@ -288,7 +288,8 @@ for _folder, _prompts in [
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,
@ -335,11 +336,12 @@ for _p in _MCQ_prompts:
},
'pred_role': 'BOT',
}
_base_path = './data/GAOKAO-BENCH/data'
_base_path = 'opencompass/GAOKAO-BENCH'
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,

View File

@ -31,10 +31,12 @@ for folder, prompts in [
'evaluator': {'type': 'GaokaoBenchEvaluator' + '_' + p['type']},
'pred_role': 'BOT',
}
_base_path = 'opencompass/GAOKAO-BENCH'
dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + p['keyword'],
'path': os.path.join('data', 'GAOKAO-BENCH', 'data', folder, p['keyword'] + '.json'),
'path': _base_path,
'filename': '/' + folder + '/' + p['keyword'] + '.json',
'name': p['keyword'],
'reader_cfg': reader_cfg,
'infer_cfg': infer_cfg,

View File

@ -30,10 +30,12 @@ for folder, prompts in [
'evaluator': {'type': 'GaokaoBenchEvaluator' + '_' + p['type']},
'pred_role': 'BOT',
}
_base_path = 'opencompass/GAOKAO-BENCH'
dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + p['keyword'],
'path': os.path.join('data', 'GAOKAO-BENCH', 'data', folder, p['keyword'] + '.json'),
'path': _base_path,
'filename': '/' + folder + '/' + p['keyword'] + '.json',
'name': p['keyword'],
'reader_cfg': reader_cfg,
'infer_cfg': infer_cfg,

View File

@ -54,7 +54,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -38,7 +38,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -54,7 +54,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -54,7 +54,7 @@ for k in [0, 1, 5, 25]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -60,7 +60,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name +
'circular' if do_circular else '',

View File

@ -43,7 +43,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name + 'circular' if do_circular else '',
reader_cfg=dict(

View File

@ -43,7 +43,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name + 'circular' if do_circular else '',
reader_cfg=dict(

View File

@ -1 +1,17 @@
__version__ = '0.3.4'
def _warn_about_config_migration():
import warnings
warnings.warn(
'Starting from v0.4.0, all AMOTIC configuration files currently '
'located in `./configs/datasets`, `./configs/models`, and '
'`./configs/summarizers` will be migrated to the '
'`opencompass/configs/` package. Please update your configuration '
'file paths accordingly.',
UserWarning, # Changed to UserWarning
stacklevel=2)
# Trigger the warning
_warn_about_config_migration()

View File

@ -284,11 +284,12 @@ for _folder, _prompts in [
},
'pred_role': 'BOT',
}
_base_path = './data/GAOKAO-BENCH/data'
_base_path = 'opencompass/GAOKAO-BENCH'
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,

View File

@ -288,7 +288,8 @@ for _folder, _prompts in [
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,
@ -335,11 +336,12 @@ for _p in _MCQ_prompts:
},
'pred_role': 'BOT',
}
_base_path = './data/GAOKAO-BENCH/data'
_base_path = 'opencompass/GAOKAO-BENCH'
_dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + _p['keyword'],
'path': _base_path + '/' + _folder + '/' + _p['keyword'] + '.json',
'path': _base_path,
'filename': '/' + _folder + '/' + _p['keyword'] + '.json',
'name': _p['keyword'],
'reader_cfg': _reader_cfg,
'infer_cfg': _infer_cfg,

View File

@ -31,10 +31,12 @@ for folder, prompts in [
'evaluator': {'type': 'GaokaoBenchEvaluator' + '_' + p['type']},
'pred_role': 'BOT',
}
_base_path = 'opencompass/GAOKAO-BENCH'
dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + p['keyword'],
'path': os.path.join('data', 'GAOKAO-BENCH', 'data', folder, p['keyword'] + '.json'),
'path': _base_path,
'filename': '/' + folder + '/' + p['keyword'] + '.json',
'name': p['keyword'],
'reader_cfg': reader_cfg,
'infer_cfg': infer_cfg,

View File

@ -30,10 +30,12 @@ for folder, prompts in [
'evaluator': {'type': 'GaokaoBenchEvaluator' + '_' + p['type']},
'pred_role': 'BOT',
}
_base_path = 'opencompass/GAOKAO-BENCH'
dataset = {
'type': GaokaoBenchDataset,
'abbr': 'GaokaoBench_' + p['keyword'],
'path': os.path.join('data', 'GAOKAO-BENCH', 'data', folder, p['keyword'] + '.json'),
'path': _base_path,
'filename': '/' + folder + '/' + p['keyword'] + '.json',
'name': p['keyword'],
'reader_cfg': reader_cfg,
'infer_cfg': infer_cfg,

View File

@ -54,7 +54,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -38,7 +38,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -54,7 +54,7 @@ for k in [1]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -54,7 +54,7 @@ for k in [0, 1, 5, 25]:
dict(
type=NQOpenDataset,
abbr=f'nq_open_{k}shot',
path='./data/nq-open/',
path='opencompass/nq_open',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)

View File

@ -60,7 +60,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name +
'circular' if do_circular else '',

View File

@ -43,7 +43,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name + 'circular' if do_circular else '',
reader_cfg=dict(

View File

@ -43,7 +43,8 @@ for _split in list(wikibench_sets.keys()):
wikibench_datasets.append(
dict(
type=WikiBenchDataset,
path=f'./data/WikiBench/{_name}.jsonl',
path='opencompass/WikiBench',
filename=f'{_name}.jsonl',
name='circular_' + _name if do_circular else _name,
abbr='wikibench-' + _split + '-' + _name + 'circular' if do_circular else '',
reader_cfg=dict(

View File

@ -15,8 +15,10 @@ from .base import BaseDataset
class GaokaoBenchDataset(BaseDataset):
@staticmethod
def load(path: str, name: str):
path = get_data_path(path, local_mode=True)
def load(path: str, filename: str, name: str):
path = get_data_path(path)
path = path + filename
if environ.get('DATASET_SOURCE') == 'ModelScope':
from modelscope import MsDataset
return MsDataset.load(path, subset_name=name, split='test')

View File

@ -55,7 +55,7 @@ class NQOpenDataset(BaseDataset):
@staticmethod
def load(path: str):
path = get_data_path(path, local_mode=True)
path = get_data_path(path)
dataset = DatasetDict()
for split in ['validation', 'train']:
filename = osp.join(path, f'nq-open-{split}.jsonl')

View File

@ -21,8 +21,9 @@ def get_number(options):
class WikiBenchDataset(BaseDataset):
@staticmethod
def load(path: str, name: str):
path = get_data_path(path, local_mode=True)
def load(path: str, filename: str, name: str):
path = get_data_path(path)
path = path + filename
circular_patterns = ['ABCD', 'BCDA', 'CDAB', 'DABC']

View File

@ -301,6 +301,21 @@ DATASETS_MAPPING = {
"ms_id": "",
"hf_id": "",
"local": "./data/cmo.jsonl",
},
"opencompass/nq_open": {
"ms_id": "",
"hf_id": "",
"local": "./data/nq-open/",
},
"opencompass/GAOKAO-BENCH": {
"ms_id": "",
"hf_id": "",
"local": "./data/GAOKAO-BENCH/data",
},
"opencompass/WikiBench": {
"ms_id": "",
"hf_id": "",
"local": "./data/WikiBench/",
}
}
@ -472,5 +487,25 @@ DATASETS_URL = {
"/cmo": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/cmo.zip",
"md5": "fad52c81290506a8ca74f46b5400d8fc"
},
"/nq-open": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/nq-open.zip",
"md5": "a340521e5c9ec591227dcb367f718b25",
},
"/winogrande": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/winogrande.zip",
"md5": "9e949a75eacc26ed4fd2b9aa870b495b",
},
"/triviaqa": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/triviaqa.zip",
"md5": "e6a118d744236814926b2ec7ec66c034",
},
"/GAOKAO-BENCH": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/GAOKAO-BENCH.zip",
"md5": "ba3c71b8b9db96d2a0664b977c4f9784",
},
"/WikiBench": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/WikiBench.zip",
"md5": "6dac1d1a3133fe1effff185cbf71d928",
}
}