mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
36 lines
1.0 KiB
Python
36 lines
1.0 KiB
Python
import csv
|
|
|
|
from datasets import Dataset, DatasetDict
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class JigsawMultilingualDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path, label, lang):
|
|
assert lang in ['es', 'fr', 'it', 'pt', 'ru', 'tr']
|
|
dataset = DatasetDict()
|
|
|
|
data_list = list()
|
|
idx = 0
|
|
with open(path) as file, open(label) as label:
|
|
text_reader = csv.reader(file)
|
|
label_reader = csv.reader(label)
|
|
for text, target in zip(text_reader, label_reader):
|
|
if text[2] == lang:
|
|
assert text[0] == target[0]
|
|
data_list.append({
|
|
'idx': idx,
|
|
'text': text[1],
|
|
'label': int(target[1]),
|
|
'choices': ['no', 'yes']
|
|
})
|
|
idx += 1
|
|
|
|
dataset['test'] = Dataset.from_list(data_list)
|
|
return dataset
|