mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
PubMedQA & ScienceQA
This commit is contained in:
parent
7cffdf1cfb
commit
14311ec0b7
@ -1,6 +1,4 @@
|
|||||||
import json
|
from datasets import Dataset, DatasetDict, load_dataset
|
||||||
|
|
||||||
from datasets import Dataset, DatasetDict
|
|
||||||
|
|
||||||
from opencompass.registry import LOAD_DATASET
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
@ -11,18 +9,12 @@ from .base import BaseDataset
|
|||||||
class PubMedQADataset(BaseDataset):
|
class PubMedQADataset(BaseDataset):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load_single(file_path):
|
def load_single():
|
||||||
dataset = []
|
dataset = []
|
||||||
with open(file_path, 'r') as file:
|
ds = load_dataset('qiaojin/PubMedQA', 'pqa_labeled')
|
||||||
data_lines = json.load(file)
|
for data in ds['train']:
|
||||||
num = 0
|
data['question'] = (f"CONTEXTS: {data['context']}\n"
|
||||||
for name in data_lines:
|
f"QUESTION: {data['question']}")
|
||||||
data = data_lines[name]
|
|
||||||
num += 1
|
|
||||||
# if num > 10:
|
|
||||||
# break
|
|
||||||
data['question'] = (f"CONTEXTS: {data['CONTEXTS']}\n"
|
|
||||||
f"QUESTION: {data['QUESTION']}")
|
|
||||||
choices = 'A. yes\nB. no\nC. maybe'
|
choices = 'A. yes\nB. no\nC. maybe'
|
||||||
data['choices'] = choices
|
data['choices'] = choices
|
||||||
if data['final_decision'] == 'yes':
|
if data['final_decision'] == 'yes':
|
||||||
@ -31,7 +23,6 @@ class PubMedQADataset(BaseDataset):
|
|||||||
data['label'] = 'B. no'
|
data['label'] = 'B. no'
|
||||||
else:
|
else:
|
||||||
data['label'] = 'C. maybe'
|
data['label'] = 'C. maybe'
|
||||||
# print(data)
|
|
||||||
|
|
||||||
dataset.append(data)
|
dataset.append(data)
|
||||||
|
|
||||||
@ -40,9 +31,7 @@ class PubMedQADataset(BaseDataset):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path):
|
def load(path):
|
||||||
train_dataset = Dataset.from_list([])
|
train_dataset = Dataset.from_list([])
|
||||||
val_dataset = PubMedQADataset.load_single(
|
val_dataset = PubMedQADataset.load_single()
|
||||||
'/fs-computility/ai4sData/shared/'
|
|
||||||
'lifescience/benchmark/raw/PubMedQA/ori_pqal.json')
|
|
||||||
dataset = DatasetDict({
|
dataset = DatasetDict({
|
||||||
'train': train_dataset,
|
'train': train_dataset,
|
||||||
'validation': val_dataset
|
'validation': val_dataset
|
||||||
|
@ -12,12 +12,8 @@ class ScienceQADataset(BaseDataset):
|
|||||||
def load_single():
|
def load_single():
|
||||||
dataset = []
|
dataset = []
|
||||||
ds = load_dataset('derek-thomas/ScienceQA')
|
ds = load_dataset('derek-thomas/ScienceQA')
|
||||||
num = 0
|
|
||||||
for data in ds['test']:
|
for data in ds['test']:
|
||||||
if data['image'] is None and data['topic'] == 'biology':
|
if data['image'] is None:
|
||||||
num += 1
|
|
||||||
# if num > 10:
|
|
||||||
# break
|
|
||||||
data['label'] = chr(65 + data['answer']
|
data['label'] = chr(65 + data['answer']
|
||||||
) + '. ' + data['choices'][data['answer']]
|
) + '. ' + data['choices'][data['answer']]
|
||||||
choices = ''
|
choices = ''
|
||||||
|
Loading…
Reference in New Issue
Block a user