mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
38 lines
1.1 KiB
Python
38 lines
1.1 KiB
Python
from datasets import Dataset, DatasetDict, load_dataset
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class ScienceQADataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load_single():
|
|
dataset = []
|
|
ds = load_dataset('derek-thomas/ScienceQA')
|
|
for data in ds['test']:
|
|
if data['image'] is None:
|
|
data['label'] = chr(65 + data['answer']
|
|
) + '. ' + data['choices'][data['answer']]
|
|
choices = ''
|
|
for i in range(len(data['choices'])):
|
|
choices += chr(65 + i) + '. ' + data['choices'][i] + '\n'
|
|
data['choices'] = choices
|
|
# print(data)
|
|
|
|
dataset.append(data)
|
|
|
|
return Dataset.from_list(dataset)
|
|
|
|
@staticmethod
|
|
def load(path):
|
|
train_dataset = Dataset.from_list([])
|
|
val_dataset = ScienceQADataset.load_single()
|
|
dataset = DatasetDict({
|
|
'train': train_dataset,
|
|
'validation': val_dataset
|
|
})
|
|
return dataset
|