From f4bf034532e20f5e04d0c3ecf133e166805eac35 Mon Sep 17 00:00:00 2001 From: "jimmy.xj" Date: Wed, 3 Jan 2024 10:58:57 +0800 Subject: [PATCH] Support devops-eval --- configs/datasets/devops_eval/__init__.py | 0 .../datasets/devops_eval/devops_eval_gen.py | 4 +++ .../datasets/devops_eval/devops_eval_ppl.py | 4 +++ opencompass/datasets/devops_eval.py | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+) create mode 100644 configs/datasets/devops_eval/__init__.py create mode 100644 configs/datasets/devops_eval/devops_eval_gen.py create mode 100644 configs/datasets/devops_eval/devops_eval_ppl.py create mode 100644 opencompass/datasets/devops_eval.py diff --git a/configs/datasets/devops_eval/__init__.py b/configs/datasets/devops_eval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/configs/datasets/devops_eval/devops_eval_gen.py b/configs/datasets/devops_eval/devops_eval_gen.py new file mode 100644 index 00000000..9c5d449a --- /dev/null +++ b/configs/datasets/devops_eval/devops_eval_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .devops_eval_gen_8df36d import devops_eval_datasets # noqa: F401, F403 diff --git a/configs/datasets/devops_eval/devops_eval_ppl.py b/configs/datasets/devops_eval/devops_eval_ppl.py new file mode 100644 index 00000000..06900f10 --- /dev/null +++ b/configs/datasets/devops_eval/devops_eval_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .devops_eval_ppl_8b3a0d import devops_eval_datasets # noqa: F401, F403 diff --git a/opencompass/datasets/devops_eval.py b/opencompass/datasets/devops_eval.py new file mode 100644 index 00000000..f644d38b --- /dev/null +++ b/opencompass/datasets/devops_eval.py @@ -0,0 +1,28 @@ +import csv +import os.path as osp + +from datasets import Dataset, DatasetDict + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class DevOpsEvalDataset(BaseDataset): + + @staticmethod + def load(path: str, name: str, language: str): + dataset = {} + for split in ['dev', 'test']: + filename = osp.join(path, language, split, f'{name}.csv') + with open(filename, encoding='utf-8') as f: + reader = csv.reader(f) + header = next(reader) + for row in reader: + item = dict(zip(header, row)) + item.setdefault('explanation', '') + item.setdefault('answer', '') + dataset.setdefault(split, []).append(item) + dataset = {i: Dataset.from_list(dataset[i]) for i in dataset} + return DatasetDict(dataset)