mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
46 lines
1.5 KiB
Python
46 lines
1.5 KiB
Python
import json
|
|
|
|
from datasets import Dataset
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class ARCDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str):
|
|
with open(path, 'r', errors='ignore') as in_f:
|
|
rows = []
|
|
for i, line in enumerate(in_f):
|
|
sample = json.loads(line.strip())
|
|
answerKey = sample['answerKey']
|
|
sample = sample['question']
|
|
question = sample['stem']
|
|
choices = sample['choices']
|
|
if len(choices) != 4:
|
|
continue
|
|
textA = choices[0]['text']
|
|
textB = choices[1]['text']
|
|
textC = choices[2]['text']
|
|
textD = choices[3]['text']
|
|
rows.append({
|
|
'question': question,
|
|
'answerKey': answerKey,
|
|
'textA': textA,
|
|
'textB': textB,
|
|
'textC': textC,
|
|
'textD': textD
|
|
})
|
|
dataset = Dataset.from_dict({
|
|
'question': [row['question'] for row in rows],
|
|
'answerKey': [row['answerKey'] for row in rows],
|
|
'textA': [row['textA'] for row in rows],
|
|
'textB': [row['textB'] for row in rows],
|
|
'textC': [row['textC'] for row in rows],
|
|
'textD': [row['textD'] for row in rows]
|
|
})
|
|
return dataset
|