OpenCompass/opencompass/datasets/obqa.py

57 lines
1.9 KiB
Python
Raw Normal View History

import json
from datasets import Dataset
2023-07-05 09:01:25 +08:00
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class OBQADataset(BaseDataset):
@staticmethod
def load(path):
dataset_list = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
item = {
'A': line['question']['choices'][0]['text'],
'B': line['question']['choices'][1]['text'],
'C': line['question']['choices'][2]['text'],
'D': line['question']['choices'][3]['text'],
'question_stem': line['question']['stem'],
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)
@LOAD_DATASET.register_module()
class OBQADataset_V2(BaseDataset):
@staticmethod
def load(path):
dataset_list = []
with open(path, 'r') as f:
for line in f:
line = json.loads(line)
question = line['question']['stem']
if not question.endswith('?'):
question += ' what?'
item = {
'A': line['question']['choices'][0]['text'],
'B': line['question']['choices'][1]['text'],
'C': line['question']['choices'][2]['text'],
'D': line['question']['choices'][3]['text'],
'question_stem': question,
'answerKey': line['answerKey'],
}
if 'fact1' in line:
item['fact1'] = line['fact1']
dataset_list.append(item)
return Dataset.from_list(dataset_list)