OpenCompass/opencompass/datasets/summscreen.py

45 lines
1.4 KiB
Python
Raw Normal View History

2023-07-05 09:01:25 +08:00
from datasets import Dataset, DatasetDict
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class SummScreenDataset(BaseDataset):
@staticmethod
def load(path: str):
import json
import os
dataset_dict = DatasetDict()
split = 'dev'
dev_list = []
fd_folder = os.path.join(path, 'SummScreen_raw', 'fd')
files = os.listdir(fd_folder)
for file in files:
filename = os.path.join(fd_folder, file)
with open(filename, 'r') as f:
data = json.load(f)
summary = ''.join(data['Recap'])
content = '\n'.join(data['Transcript'])
dev_list.append({
'content': content,
'summary': summary,
})
tms_folder = os.path.join(path, 'SummScreen_raw', 'tms')
files = os.listdir(tms_folder)
for file in files:
filename = os.path.join(tms_folder, file)
with open(filename, 'r') as f:
data = json.load(f)
summary = ''.join(data['Recap'])
content = '\n'.join(data['Transcript'])
dev_list.append({
'content': content,
'summary': summary,
})
dataset_dict[split] = Dataset.from_list(dev_list)
return dataset_dict