[Update] Update mmmlu_lite dataload (#1658)

* update mmmlu_lite dataload from oss

* update mmmlu_lite dataload from oss
This commit is contained in:
liushz 2024-11-01 17:32:29 +08:00 committed by GitHub
parent c789ce5698
commit f7d899823c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 12 deletions

View File

@ -31,11 +31,8 @@ MMMLU contains the MMLU test set translated into the following locales:
## How to Use
Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU)
```python
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "default")
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "by_language")
ds = load_dataset("opencompass/mmmlu_lite", "AR_XY")
```

View File

@ -95,8 +95,7 @@ for _name in mmmlu_lite_all_sets:
dict(
abbr=f'openai_m{_name}',
type=MMMLULiteDataset,
# path='opencompass/mmmlu_lite',
path = './data/mmmlu_lite',
path='opencompass/mmmlu_lite',
name=f'openai_m{_name}',
reader_cfg=mmmlu_lite_reader_cfg,
infer_cfg=mmmlu_lite_infer_cfg,

View File

@ -2,7 +2,7 @@
# yapf: disable
import json
import os
import os.path as osp
from datasets import Dataset, DatasetDict, load_dataset
@ -43,10 +43,12 @@ class MMMLULiteDataset(BaseDataset):
@staticmethod
def load(path: str, name: str):
path = get_data_path(path, local_mode=False)
dataset = DatasetDict()
path = os.path.join(path, name + '.jsonl')
dataset_list = []
with open(path, 'r') as f:
dataset_list = [json.loads(line) for line in f.readlines()]
dataset['test'] = Dataset.from_list(dataset_list)
name = name.split('_')[-1]
raw_data = []
filename = osp.join(path, name, 'test.jsonl')
with open(filename, encoding='utf-8') as f:
raw_data = [json.loads(line) for line in f.readlines()]
dataset['test'] = Dataset.from_list(raw_data)
return dataset

View File

@ -316,6 +316,16 @@ DATASETS_MAPPING = {
"ms_id": "",
"hf_id": "",
"local": "./data/WikiBench/",
},
"opencompass/mmmlu_lite": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmmlu_lite",
},
"opencompass/mmmlu_lite": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmmlu_lite",
}
}
@ -324,6 +334,10 @@ DATASETS_URL = {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
"md5": "761310671509a239e41c4b717f7fab9c",
},
"/mmmlu_lite": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmmlu_lite.zip",
"md5": "a776af1220e1826fd0608eda1bc4425e",
},
"/gpqa/": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip",
"md5": "2e9657959030a765916f1f2aca29140d",