PubMedQA & ScienceQA

This commit is contained in:
xuxuxuxuxuxjh 2025-04-25 14:47:20 +08:00
parent 7cffdf1cfb
commit 14311ec0b7
3 changed files with 8 additions and 27 deletions

View File

@ -1,6 +1,4 @@
import json from datasets import Dataset, DatasetDict, load_dataset
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET from opencompass.registry import LOAD_DATASET
@ -11,18 +9,12 @@ from .base import BaseDataset
class PubMedQADataset(BaseDataset): class PubMedQADataset(BaseDataset):
@staticmethod @staticmethod
def load_single(file_path): def load_single():
dataset = [] dataset = []
with open(file_path, 'r') as file: ds = load_dataset('qiaojin/PubMedQA', 'pqa_labeled')
data_lines = json.load(file) for data in ds['train']:
num = 0 data['question'] = (f"CONTEXTS: {data['context']}\n"
for name in data_lines: f"QUESTION: {data['question']}")
data = data_lines[name]
num += 1
# if num > 10:
# break
data['question'] = (f"CONTEXTS: {data['CONTEXTS']}\n"
f"QUESTION: {data['QUESTION']}")
choices = 'A. yes\nB. no\nC. maybe' choices = 'A. yes\nB. no\nC. maybe'
data['choices'] = choices data['choices'] = choices
if data['final_decision'] == 'yes': if data['final_decision'] == 'yes':
@ -31,7 +23,6 @@ class PubMedQADataset(BaseDataset):
data['label'] = 'B. no' data['label'] = 'B. no'
else: else:
data['label'] = 'C. maybe' data['label'] = 'C. maybe'
# print(data)
dataset.append(data) dataset.append(data)
@ -40,9 +31,7 @@ class PubMedQADataset(BaseDataset):
@staticmethod @staticmethod
def load(path): def load(path):
train_dataset = Dataset.from_list([]) train_dataset = Dataset.from_list([])
val_dataset = PubMedQADataset.load_single( val_dataset = PubMedQADataset.load_single()
'/fs-computility/ai4sData/shared/'
'lifescience/benchmark/raw/PubMedQA/ori_pqal.json')
dataset = DatasetDict({ dataset = DatasetDict({
'train': train_dataset, 'train': train_dataset,
'validation': val_dataset 'validation': val_dataset

View File

@ -12,12 +12,8 @@ class ScienceQADataset(BaseDataset):
def load_single(): def load_single():
dataset = [] dataset = []
ds = load_dataset('derek-thomas/ScienceQA') ds = load_dataset('derek-thomas/ScienceQA')
num = 0
for data in ds['test']: for data in ds['test']:
if data['image'] is None and data['topic'] == 'biology': if data['image'] is None:
num += 1
# if num > 10:
# break
data['label'] = chr(65 + data['answer'] data['label'] = chr(65 + data['answer']
) + '. ' + data['choices'][data['answer']] ) + '. ' + data['choices'][data['answer']]
choices = '' choices = ''

4
run.py
View File

@ -1,4 +0,0 @@
from opencompass.cli.main import main
if __name__ == '__main__':
main()