mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* [Feature] Add reasonbench dataset * add configs for supporting generative inference & merge datasets in the same category * modify config filename to prompt version * fix codes to meet pre-commit requirements * lint the code to meet pre-commit requirements * Align Load_data Sourcecode Briefly * fix bugs * reduce code redundancy
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import json
|
|
|
|
from datasets import Dataset
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
|
|
from ..base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class ReasonBenchDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str):
|
|
raw_data = []
|
|
with open(path, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = json.loads(line)
|
|
prompt = line['prompt']
|
|
prompt_ppl = line['prompt_ppl']
|
|
label = line['label']
|
|
label_ppl = line['label_ppl']
|
|
choices = line['choices']
|
|
tag = line['tag']
|
|
source = line['source']
|
|
option_content = {choice: line[choice] for choice in choices}
|
|
data = {
|
|
'prompt': prompt,
|
|
'label': label,
|
|
'prompt_ppl': prompt_ppl,
|
|
'label_ppl': str(label_ppl)[0],
|
|
'choices': choices,
|
|
'tag': tag,
|
|
'source': source,
|
|
}
|
|
data.update(option_content)
|
|
raw_data.append(data)
|
|
dataset = Dataset.from_list(raw_data)
|
|
return dataset
|