OpenCompass/opencompass/datasets/arc.py

33 lines
1.0 KiB
Python
Raw Normal View History

2023-07-05 10:22:40 +08:00
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class ARCDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(path, 'r', errors='ignore') as in_f:
rows = []
for line in in_f:
item = json.loads(line.strip())
question = item['question']
if len(question['choices']) != 4:
2023-07-05 10:22:40 +08:00
continue
labels = [c['label'] for c in question['choices']]
answerKey = 'ABCD'[labels.index(item['answerKey'])]
2023-07-05 10:22:40 +08:00
rows.append({
'question': question['stem'],
2023-07-05 10:22:40 +08:00
'answerKey': answerKey,
'textA': question['choices'][0]['text'],
'textB': question['choices'][1]['text'],
'textC': question['choices'][2]['text'],
'textD': question['choices'][3]['text'],
2023-07-05 10:22:40 +08:00
})
return Dataset.from_list(rows)