OpenCompass/opencompass/datasets/jigsawmultilingual.py
2023-07-04 21:34:55 +08:00

36 lines
1.0 KiB
Python

import csv
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class JigsawMultilingualDataset(BaseDataset):
@staticmethod
def load(path, label, lang):
assert lang in ['es', 'fr', 'it', 'pt', 'ru', 'tr']
dataset = DatasetDict()
data_list = list()
idx = 0
with open(path) as file, open(label) as label:
text_reader = csv.reader(file)
label_reader = csv.reader(label)
for text, target in zip(text_reader, label_reader):
if text[2] == lang:
assert text[0] == target[0]
data_list.append({
'idx': idx,
'text': text[1],
'label': int(target[1]),
'choices': ['no', 'yes']
})
idx += 1
dataset['test'] = Dataset.from_list(data_list)
return dataset