From f7d899823c3693bbafad785a8a0a61cf5805bab3 Mon Sep 17 00:00:00 2001 From: liushz Date: Fri, 1 Nov 2024 17:32:29 +0800 Subject: [PATCH] [Update] Update mmmlu_lite dataload (#1658) * update mmmlu_lite dataload from oss * update mmmlu_lite dataload from oss --- opencompass/configs/datasets/mmmlu_lite/README.md | 5 +---- .../datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py | 3 +-- opencompass/datasets/mmmlu.py | 14 ++++++++------ opencompass/utils/datasets_info.py | 14 ++++++++++++++ 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/opencompass/configs/datasets/mmmlu_lite/README.md b/opencompass/configs/datasets/mmmlu_lite/README.md index d40e901c..f7866a0b 100644 --- a/opencompass/configs/datasets/mmmlu_lite/README.md +++ b/opencompass/configs/datasets/mmmlu_lite/README.md @@ -31,11 +31,8 @@ MMMLU contains the MMLU test set translated into the following locales: ## How to Use -Download file from [link](https://hf-mirror.com/datasets/openai/MMMLU) ```python from datasets import load_dataset -ds = load_dataset("openai/MMMLU", "default") -from datasets import load_dataset -ds = load_dataset("openai/MMMLU", "by_language") +ds = load_dataset("opencompass/mmmlu_lite", "AR_XY") ``` \ No newline at end of file diff --git a/opencompass/configs/datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py b/opencompass/configs/datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py index 26794156..9e9a8ab4 100644 --- a/opencompass/configs/datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py +++ b/opencompass/configs/datasets/mmmlu_lite/mmmlu_lite_gen_c51a84.py @@ -95,8 +95,7 @@ for _name in mmmlu_lite_all_sets: dict( abbr=f'openai_m{_name}', type=MMMLULiteDataset, - # path='opencompass/mmmlu_lite', - path = './data/mmmlu_lite', + path='opencompass/mmmlu_lite', name=f'openai_m{_name}', reader_cfg=mmmlu_lite_reader_cfg, infer_cfg=mmmlu_lite_infer_cfg, diff --git a/opencompass/datasets/mmmlu.py b/opencompass/datasets/mmmlu.py index b5bef0ec..3c641e5c 100644 --- a/opencompass/datasets/mmmlu.py +++ b/opencompass/datasets/mmmlu.py @@ -2,7 +2,7 @@ # yapf: disable import json -import os +import os.path as osp from datasets import Dataset, DatasetDict, load_dataset @@ -43,10 +43,12 @@ class MMMLULiteDataset(BaseDataset): @staticmethod def load(path: str, name: str): + path = get_data_path(path, local_mode=False) dataset = DatasetDict() - path = os.path.join(path, name + '.jsonl') - dataset_list = [] - with open(path, 'r') as f: - dataset_list = [json.loads(line) for line in f.readlines()] - dataset['test'] = Dataset.from_list(dataset_list) + name = name.split('_')[-1] + raw_data = [] + filename = osp.join(path, name, 'test.jsonl') + with open(filename, encoding='utf-8') as f: + raw_data = [json.loads(line) for line in f.readlines()] + dataset['test'] = Dataset.from_list(raw_data) return dataset diff --git a/opencompass/utils/datasets_info.py b/opencompass/utils/datasets_info.py index c877adf9..e896f917 100644 --- a/opencompass/utils/datasets_info.py +++ b/opencompass/utils/datasets_info.py @@ -316,6 +316,16 @@ DATASETS_MAPPING = { "ms_id": "", "hf_id": "", "local": "./data/WikiBench/", + }, + "opencompass/mmmlu_lite": { + "ms_id": "", + "hf_id": "", + "local": "./data/mmmlu_lite", + }, + "opencompass/mmmlu_lite": { + "ms_id": "", + "hf_id": "", + "local": "./data/mmmlu_lite", } } @@ -324,6 +334,10 @@ DATASETS_URL = { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip", "md5": "761310671509a239e41c4b717f7fab9c", }, + "/mmmlu_lite": { + "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmmlu_lite.zip", + "md5": "a776af1220e1826fd0608eda1bc4425e", + }, "/gpqa/": { "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/gpqa.zip", "md5": "2e9657959030a765916f1f2aca29140d",