mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
![]() |
from datasets import Dataset, DatasetDict, load_dataset
|
||
|
|
||
|
from opencompass.registry import LOAD_DATASET
|
||
|
|
||
|
from .base import BaseDataset
|
||
|
|
||
|
|
||
|
@LOAD_DATASET.register_module()
|
||
|
class MedQADataset(BaseDataset):
|
||
|
|
||
|
@staticmethod
|
||
|
def load_single(path):
|
||
|
dataset = []
|
||
|
data_lines = load_dataset(path, 'test') # "data/MedQA"
|
||
|
num = 0
|
||
|
for data in data_lines:
|
||
|
num += 1
|
||
|
choices = ''
|
||
|
for i in range(4):
|
||
|
data[chr(65 + i)] = data['ending' + str(i)]
|
||
|
choices += chr(65 + i) + '. ' + data['ending' + str(i)] + '\n'
|
||
|
data['question'] = data['sent1']
|
||
|
data['choices'] = choices
|
||
|
data['label'] = chr(65 + int(data['label'])) + '. ' + data[
|
||
|
'ending' + str(data['label'])]
|
||
|
|
||
|
dataset.append(data)
|
||
|
|
||
|
return Dataset.from_list(dataset)
|
||
|
|
||
|
@staticmethod
|
||
|
def load(path):
|
||
|
train_dataset = Dataset.from_list([])
|
||
|
val_dataset = MedQADataset.load_single(path) # "data/MedQA/test.json"
|
||
|
dataset = DatasetDict({
|
||
|
'train': train_dataset,
|
||
|
'validation': val_dataset
|
||
|
})
|
||
|
return dataset
|