[Update] Update mmmlu_lite dataload (#1658)

* update mmmlu_lite dataload from oss

* update mmmlu_lite dataload from oss
This commit is contained in:
liushz 2024-11-01 17:32:29 +08:00 committed by GitHub
parent c789ce5698
commit f7d899823c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 12 deletions

View File

@ -31,11 +31,8 @@ MMMLU contains the MMLU test set translated into the following locales:
## How to Use ## How to Use
Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU)
```python ```python
from datasets import load_dataset from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "default") ds = load_dataset("opencompass/mmmlu_lite", "AR_XY")
from datasets import load_dataset
ds = load_dataset("openai/MMMLU", "by_language")
``` ```

View File

@ -95,8 +95,7 @@ for _name in mmmlu_lite_all_sets:
dict( dict(
abbr=f'openai_m{_name}', abbr=f'openai_m{_name}',
type=MMMLULiteDataset, type=MMMLULiteDataset,
# path='opencompass/mmmlu_lite', path='opencompass/mmmlu_lite',
path = './data/mmmlu_lite',
name=f'openai_m{_name}', name=f'openai_m{_name}',
reader_cfg=mmmlu_lite_reader_cfg, reader_cfg=mmmlu_lite_reader_cfg,
infer_cfg=mmmlu_lite_infer_cfg, infer_cfg=mmmlu_lite_infer_cfg,

View File

@ -2,7 +2,7 @@
# yapf: disable # yapf: disable
import json import json
import os import os.path as osp
from datasets import Dataset, DatasetDict, load_dataset from datasets import Dataset, DatasetDict, load_dataset
@ -43,10 +43,12 @@ class MMMLULiteDataset(BaseDataset):
@staticmethod @staticmethod
def load(path: str, name: str): def load(path: str, name: str):
path = get_data_path(path, local_mode=False)
dataset = DatasetDict() dataset = DatasetDict()
path = os.path.join(path, name + '.jsonl') name = name.split('_')[-1]
dataset_list = [] raw_data = []
with open(path, 'r') as f: filename = osp.join(path, name, 'test.jsonl')
dataset_list = [json.loads(line) for line in f.readlines()] with open(filename, encoding='utf-8') as f:
dataset['test'] = Dataset.from_list(dataset_list) raw_data = [json.loads(line) for line in f.readlines()]
dataset['test'] = Dataset.from_list(raw_data)
return dataset return dataset

View File

@ -316,6 +316,16 @@ DATASETS_MAPPING = {
"ms_id": "", "ms_id": "",
"hf_id": "", "hf_id": "",
"local": "./data/WikiBench/", "local": "./data/WikiBench/",
},
"opencompass/mmmlu_lite": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmmlu_lite",
},
"opencompass/mmmlu_lite": {
"ms_id": "",
"hf_id": "",
"local": "./data/mmmlu_lite",
} }
} }
@ -324,6 +334,10 @@ DATASETS_URL = {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip", "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
"md5": "761310671509a239e41c4b717f7fab9c", "md5": "761310671509a239e41c4b717f7fab9c",
}, },
"/mmmlu_lite": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmmlu_lite.zip",
"md5": "a776af1220e1826fd0608eda1bc4425e",
},
"/gpqa/": { "/gpqa/": {
"url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip", "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip",
"md5": "2e9657959030a765916f1f2aca29140d", "md5": "2e9657959030a765916f1f2aca29140d",