mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] modelscope dataset load problem (#1406)
* fix modelscope dataset load * fix lint
This commit is contained in:
parent
264fd23129
commit
818d72a650
@ -20,9 +20,7 @@ class CEvalDataset(BaseDataset):
|
||||
dataset = {}
|
||||
if environ.get('DATASET_SOURCE') == 'ModelScope':
|
||||
from modelscope import MsDataset
|
||||
dataset = MsDataset.load(dataset_name=path,
|
||||
subset_name=name,
|
||||
trust_remote_code=True)
|
||||
dataset = MsDataset.load(dataset_name=path, subset_name=name)
|
||||
else:
|
||||
for split in ['dev', 'val', 'test']:
|
||||
filename = osp.join(path, split, f'{name}_{split}.csv')
|
||||
|
@ -18,9 +18,10 @@ class CMMLUDataset(BaseDataset):
|
||||
path = get_data_path(path)
|
||||
if environ.get('DATASET_SOURCE') == 'ModelScope':
|
||||
from modelscope import MsDataset
|
||||
dataset = MsDataset.load(path,
|
||||
subset_name=name,
|
||||
trust_remote_code=True)
|
||||
dataset = MsDataset.load(
|
||||
path,
|
||||
subset_name=name,
|
||||
)
|
||||
modified_dataset = DatasetDict()
|
||||
for split in dataset.keys():
|
||||
raw_data = []
|
||||
|
@ -20,7 +20,7 @@ class GSM8KDataset(BaseDataset):
|
||||
path = get_data_path(path)
|
||||
if environ.get('DATASET_SOURCE') == 'ModelScope':
|
||||
from modelscope import MsDataset
|
||||
dataset = MsDataset.load(dataset_name=path, trust_remote_code=True)
|
||||
dataset = MsDataset.load(dataset_name=path)
|
||||
else:
|
||||
datasets = {}
|
||||
for split in ['train', 'test']:
|
||||
|
@ -34,9 +34,7 @@ class StrategyQADataset(BaseDataset):
|
||||
|
||||
if environ.get('DATASET_SOURCE') == 'ModelScope':
|
||||
from modelscope import MsDataset
|
||||
dataset = MsDataset.load('opencompass/strategy_qa',
|
||||
split='train',
|
||||
trust_remote_code=True)
|
||||
dataset = MsDataset.load(path, split='train')
|
||||
else:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
dataset = json.load(f)
|
||||
|
@ -21,8 +21,7 @@ class WinograndeDataset(BaseDataset):
|
||||
from modelscope import MsDataset
|
||||
ms_dataset = MsDataset.load(path,
|
||||
subset_name='winogrande_xs',
|
||||
split='validation',
|
||||
trust_remote_code=True)
|
||||
split='validation')
|
||||
dataset_list = []
|
||||
for line in ms_dataset:
|
||||
prompt = line['sentence']
|
||||
@ -71,8 +70,7 @@ class WinograndeDatasetV2(BaseDataset):
|
||||
from modelscope import MsDataset
|
||||
ms_dataset = MsDataset.load(path,
|
||||
subset_name='winogrande_xs',
|
||||
split='validation',
|
||||
trust_remote_code=True)
|
||||
split='validation')
|
||||
dataset_list = []
|
||||
for line in ms_dataset:
|
||||
prompt = line['sentence']
|
||||
@ -127,8 +125,7 @@ class WinograndeDatasetV3(BaseDataset):
|
||||
for split in ['train', 'validation']:
|
||||
ms_dataset = MsDataset.load(path,
|
||||
subset_name='winogrande_xs',
|
||||
split=split,
|
||||
trust_remote_code=True)
|
||||
split=split)
|
||||
dataset_list = []
|
||||
for line in ms_dataset:
|
||||
prompt = line['sentence']
|
||||
|
@ -34,7 +34,6 @@ def reload_datasets():
|
||||
from configs.datasets.commonsenseqa.commonsenseqa_gen import commonsenseqa_datasets
|
||||
|
||||
from configs.datasets.mmlu.mmlu_gen import mmlu_datasets
|
||||
from configs.datasets.strategyqa.strategyqa_gen import strategyqa_datasets
|
||||
from configs.datasets.bbh.bbh_gen import bbh_datasets
|
||||
from configs.datasets.Xsum.Xsum_gen import Xsum_datasets
|
||||
from configs.datasets.winogrande.winogrande_gen import winogrande_datasets
|
||||
@ -51,15 +50,17 @@ def reload_datasets():
|
||||
from configs.datasets.storycloze.storycloze_ppl import storycloze_datasets as storycloze_ppl_datasets
|
||||
from configs.datasets.summedits.summedits_gen import summedits_datasets as summedits_v2_datasets
|
||||
|
||||
from configs.datasets.strategyqa.strategyqa_gen import strategyqa_datasets
|
||||
from configs.datasets.mbpp.mbpp_gen import mbpp_datasets as mbpp_v1_datasets
|
||||
from configs.datasets.lcsts.lcsts_gen import lcsts_datasets
|
||||
|
||||
from configs.datasets.hellaswag.hellaswag_gen import hellaswag_datasets as hellaswag_v2_datasets
|
||||
from configs.datasets.hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets as hellaswag_ice_datasets
|
||||
from configs.datasets.hellaswag.hellaswag_ppl_9dbb12 import hellaswag_datasets as hellaswag_v1_datasets
|
||||
from configs.datasets.hellaswag.hellaswag_ppl_a6e128 import hellaswag_datasets as hellaswag_v3_datasets
|
||||
from configs.datasets.mbpp.mbpp_gen import mbpp_datasets as mbpp_v1_datasets
|
||||
from configs.datasets.mbpp.mbpp_passk_gen_830460 import mbpp_datasets as mbpp_v2_datasets
|
||||
from configs.datasets.mbpp.sanitized_mbpp_gen_830460 import sanitized_mbpp_datasets
|
||||
from configs.datasets.nq.nq_gen import nq_datasets
|
||||
from configs.datasets.lcsts.lcsts_gen import lcsts_datasets
|
||||
from configs.datasets.math.math_gen import math_datasets
|
||||
from configs.datasets.piqa.piqa_gen import piqa_datasets as piqa_v2_datasets
|
||||
from configs.datasets.piqa.piqa_ppl import piqa_datasets as piqa_v1_datasets
|
||||
@ -105,7 +106,8 @@ def load_datasets(source, conf):
|
||||
return dataset
|
||||
try:
|
||||
dataset = conf['type'].load(path=conf['path'])
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
print(e)
|
||||
dataset = conf['type'].load(**conf)
|
||||
return dataset
|
||||
|
||||
@ -147,7 +149,7 @@ class TestingMsDatasets(unittest.TestCase):
|
||||
print(exception)
|
||||
return 'failure', f'{modelscope_path_name} is not the same as {local_path_name}'
|
||||
|
||||
with ThreadPoolExecutor(16) as executor:
|
||||
with ThreadPoolExecutor(thread) as executor:
|
||||
futures = {
|
||||
executor.submit(compare_datasets, ms_conf, local_conf): (ms_conf, local_conf)
|
||||
for ms_conf, local_conf in zip(ms_datasets_conf, local_datasets_conf)
|
||||
@ -220,4 +222,5 @@ def _check_data(ms_dataset: Dataset | DatasetDict,
|
||||
|
||||
if __name__ == '__main__':
|
||||
sample_size = 100
|
||||
thread = 1
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user