mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add Chinese version: csqa crowspairs nq * Update cn_data * Update cn_data * update format --------- Co-authored-by: liuhongwei <liuhongwei@pjlab.org.cn> Co-authored-by: Leymore <zfz-960727@163.com>
31 lines
845 B
Python
31 lines
845 B
Python
import json
|
|
|
|
from datasets import Dataset, DatasetDict
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
class CommonsenseQADataset_CN(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path):
|
|
datasetdict = DatasetDict()
|
|
for split in ['train', 'validation']:
|
|
data = []
|
|
with open(path, 'r') as f:
|
|
for line in f:
|
|
item = json.loads(line)
|
|
data.append(item)
|
|
|
|
def pre_process(example):
|
|
for i in range(5):
|
|
example[chr(ord('A') + i)] = example['choices']['text'][i]
|
|
return example
|
|
|
|
dataset = Dataset.from_list(data)
|
|
dataset = dataset.map(pre_process).remove_columns(
|
|
['question_concept', 'id', 'choices'])
|
|
datasetdict[split] = dataset
|
|
|
|
return datasetdict
|