OpenCompass/opencompass/datasets/arc.py
2023-07-05 02:22:40 +00:00

46 lines
1.5 KiB
Python

import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class ARCDataset(BaseDataset):
@staticmethod
def load(path: str):
with open(path, 'r', errors='ignore') as in_f:
rows = []
for i, line in enumerate(in_f):
sample = json.loads(line.strip())
answerKey = sample['answerKey']
sample = sample['question']
question = sample['stem']
choices = sample['choices']
if len(choices) != 4:
continue
textA = choices[0]['text']
textB = choices[1]['text']
textC = choices[2]['text']
textD = choices[3]['text']
rows.append({
'question': question,
'answerKey': answerKey,
'textA': textA,
'textB': textB,
'textC': textC,
'textD': textD
})
dataset = Dataset.from_dict({
'question': [row['question'] for row in rows],
'answerKey': [row['answerKey'] for row in rows],
'textA': [row['textA'] for row in rows],
'textB': [row['textB'] for row in rows],
'textC': [row['textC'] for row in rows],
'textD': [row['textD'] for row in rows]
})
return dataset