OpenCompass/opencompass/datasets/PubMedQA.py

37 lines
1.0 KiB
Python
Raw Normal View History

2025-05-07 23:05:36 +08:00
from datasets import Dataset, load_dataset
2025-05-07 01:55:18 +08:00
from opencompass.registry import LOAD_DATASET
2025-05-07 23:05:36 +08:00
from opencompass.utils import get_data_path
2025-05-07 01:55:18 +08:00
from .base import BaseDataset
@LOAD_DATASET.register_module()
class PubMedQADataset(BaseDataset):
@staticmethod
2025-05-07 23:05:36 +08:00
def load_single(path):
2025-05-07 01:55:18 +08:00
dataset = []
2025-05-07 23:05:36 +08:00
ds = load_dataset(path, 'pqa_labeled')
2025-05-07 01:55:18 +08:00
for data in ds['train']:
data['question'] = (f"CONTEXTS: {data['context']}\n"
f"QUESTION: {data['question']}")
choices = 'A. yes\nB. no\nC. maybe'
data['choices'] = choices
if data['final_decision'] == 'yes':
data['label'] = 'A. yes'
elif data['final_decision'] == 'no':
data['label'] = 'B. no'
else:
data['label'] = 'C. maybe'
dataset.append(data)
return Dataset.from_list(dataset)
@staticmethod
def load(path):
2025-05-07 23:05:36 +08:00
path = get_data_path(path)
dataset = PubMedQADataset.load_single(path)
2025-05-07 01:55:18 +08:00
return dataset