mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
51 lines
1.4 KiB
Python
51 lines
1.4 KiB
Python
![]() |
import json
|
||
|
|
||
|
from datasets import Dataset, DatasetDict
|
||
|
|
||
|
from opencompass.registry import LOAD_DATASET
|
||
|
|
||
|
from .base import BaseDataset
|
||
|
|
||
|
|
||
|
@LOAD_DATASET.register_module()
|
||
|
class PubMedQADataset(BaseDataset):
|
||
|
|
||
|
@staticmethod
|
||
|
def load_single(file_path):
|
||
|
dataset = []
|
||
|
with open(file_path, 'r') as file:
|
||
|
data_lines = json.load(file)
|
||
|
num = 0
|
||
|
for name in data_lines:
|
||
|
data = data_lines[name]
|
||
|
num += 1
|
||
|
# if num > 10:
|
||
|
# break
|
||
|
data['question'] = (f"CONTEXTS: {data['CONTEXTS']}\n"
|
||
|
f"QUESTION: {data['QUESTION']}")
|
||
|
choices = 'A. yes\nB. no\nC. maybe'
|
||
|
data['choices'] = choices
|
||
|
if data['final_decision'] == 'yes':
|
||
|
data['label'] = 'A. yes'
|
||
|
elif data['final_decision'] == 'no':
|
||
|
data['label'] = 'B. no'
|
||
|
else:
|
||
|
data['label'] = 'C. maybe'
|
||
|
# print(data)
|
||
|
|
||
|
dataset.append(data)
|
||
|
|
||
|
return Dataset.from_list(dataset)
|
||
|
|
||
|
@staticmethod
|
||
|
def load(path):
|
||
|
train_dataset = Dataset.from_list([])
|
||
|
val_dataset = PubMedQADataset.load_single(
|
||
|
'/fs-computility/ai4sData/shared/'
|
||
|
'lifescience/benchmark/raw/PubMedQA/ori_pqal.json')
|
||
|
dataset = DatasetDict({
|
||
|
'train': train_dataset,
|
||
|
'validation': val_dataset
|
||
|
})
|
||
|
return dataset
|