diff --git a/configs/datasets/mastermath2024v1/mastermath2024v1_gen.py b/configs/datasets/mastermath2024v1/mastermath2024v1_gen.py new file mode 100644 index 00000000..8f6ba03a --- /dev/null +++ b/configs/datasets/mastermath2024v1/mastermath2024v1_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .mastermath2024v1_gen_be6318 import mastermath2024v1_datasets diff --git a/configs/datasets/mastermath2024v1/mastermath2024v1_gen_be6318.py b/configs/datasets/mastermath2024v1/mastermath2024v1_gen_be6318.py new file mode 100644 index 00000000..b0bbaaf1 --- /dev/null +++ b/configs/datasets/mastermath2024v1/mastermath2024v1_gen_be6318.py @@ -0,0 +1,37 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MastermathDatasetv1, MastermathDatasetv1Evaluator +from opencompass.utils import first_option_postprocess + +mastermath2024v1_reader_cfg = dict( + input_columns=['question', 'A', 'B', 'C', 'D'], + output_column='answer') + +mastermath2024v1_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='{question}\n选项:\n' + '(A){A}\n' + '(B){B}\n' + '(C){C}\n' + '(D){D}\n' + '你的回答格式如下: "正确答案是 (在这里插入你的答案)"'), + ], )), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +mastermath2024v1_eval_cfg = dict(evaluator=dict(type=MastermathDatasetv1Evaluator), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD')) + +mastermath2024v1_datasets = [dict( + abbr='Mastermath2024v1', + type=MastermathDatasetv1, + path='./data/mastermath2024v1/', + name='kaoyan_math_1_mcq_Sheet1.csv', + reader_cfg=mastermath2024v1_reader_cfg, + infer_cfg=mastermath2024v1_infer_cfg, + eval_cfg=mastermath2024v1_eval_cfg)] + diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index 4d0c9c74..8cbb17ae 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -54,6 +54,7 @@ from .lawbench import * # noqa: F401, F403 from .lcsts import * # noqa: F401, F403 from .leval import * # noqa: F401, F403 from .longbench import * # noqa: F401, F403 +from .mastermath2024v1 import * # noqa: F401, F403 from .math import * # noqa: F401, F403 from .mathbench import * # noqa: F401, F403 from .mbpp import * # noqa: F401, F403 diff --git a/opencompass/datasets/mastermath2024v1.py b/opencompass/datasets/mastermath2024v1.py new file mode 100644 index 00000000..ca77859e --- /dev/null +++ b/opencompass/datasets/mastermath2024v1.py @@ -0,0 +1,63 @@ +import csv +import os + +from datasets import Dataset + +from opencompass.openicl import BaseEvaluator +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class MastermathDatasetv1(BaseDataset): + + @staticmethod + def load(path: str, name: str): + cnt = 0 + data = [] + with open(os.path.join(path, name), 'r', encoding='utf-8') as f: + reader = csv.reader(f, delimiter=',') + for row in reader: + if row[1] == 'question': + continue + cnt = cnt + 1 + question = row[1] + A = row[2] + B = row[3] + C = row[4] + D = row[5] + answer = row[6] + data.append({ + 'question': question, + 'A': A, + 'B': B, + 'C': C, + 'D': D, + 'answer': answer, + }) + + dataset = Dataset.from_list(data) + + return dataset + + +class MastermathDatasetv1Evaluator(BaseEvaluator): + + def score(self, predictions, references): + if len(predictions) != len(references): + return { + 'error': 'predictions and references have different length' + } + correct = 0 + count = 0 + details = [] + for i, j in zip(predictions, references): + detail = {'pred': i, 'answer': j, 'correct': False} + count += 1 + if i == j: + correct += 1 + detail['correct'] = True + details.append(detail) + result = {'accuracy': 100 * correct / count, 'details': details} + return result