mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

Added MaritimeBench dataset, including dataset metadata, configuration files, data processing logic, and a text post-processing function. This dataset is designed to evaluate AI models' domain knowledge and reasoning ability in the maritime field.
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
import json
|
|
import os.path as osp
|
|
from os import environ
|
|
|
|
import datasets
|
|
from datasets import Dataset, DatasetDict
|
|
|
|
from opencompass.registry import LOAD_DATASET
|
|
from opencompass.utils import get_data_path
|
|
|
|
from .base import BaseDataset
|
|
|
|
|
|
@LOAD_DATASET.register_module()
|
|
class MaritimeBenchDataset(BaseDataset):
|
|
|
|
@staticmethod
|
|
def load(path: str, name: str) -> datasets.Dataset:
|
|
path = get_data_path(path)
|
|
dataset = DatasetDict()
|
|
dataset_list = []
|
|
|
|
if environ.get('DATASET_SOURCE') == 'ModelScope':
|
|
from modelscope import MsDataset
|
|
for split in ['test']:
|
|
# 从 ModelScope 加载数据
|
|
ms_dataset = MsDataset.load(path,
|
|
subset_name=name,
|
|
split=split)
|
|
|
|
for line in ms_dataset:
|
|
question = line['question']
|
|
A = line['A']
|
|
B = line['B']
|
|
C = line['C']
|
|
D = line['D']
|
|
answer = line['answer']
|
|
dataset_list.append({
|
|
'question': question,
|
|
'A': A,
|
|
'B': B,
|
|
'C': C,
|
|
'D': D,
|
|
'answer': answer,
|
|
})
|
|
# dataset[split] = Dataset.from_list(dataset_list)
|
|
else:
|
|
for split in ['test']:
|
|
filename = osp.join(path, split, f'{name}_{split}.jsonl')
|
|
with open(filename, encoding='utf-8') as f:
|
|
for line in f:
|
|
data = json.loads(line)
|
|
dataset_list.append({
|
|
'question': data['question'],
|
|
'A': data['A'],
|
|
'B': data['B'],
|
|
'C': data['C'],
|
|
'D': data['D'],
|
|
'answer': data['answer']
|
|
})
|
|
|
|
dataset[split] = Dataset.from_list(dataset_list)
|
|
|
|
return dataset
|