From b35d991786b6d1af45edf7f1f2d73834e5212e02 Mon Sep 17 00:00:00 2001 From: Skyfall-xzz <48122345+Skyfall-xzz@users.noreply.github.com> Date: Wed, 20 Dec 2023 17:57:42 +0800 Subject: [PATCH] [Feature] Add ReasonBench(Internal) dataset (#577) * [Feature] Add reasonbench dataset * add configs for supporting generative inference & merge datasets in the same category * modify config filename to prompt version * fix codes to meet pre-commit requirements * lint the code to meet pre-commit requirements * Align Load_data Sourcecode Briefly * fix bugs * reduce code redundancy --- .../datasets/ReasonBench/reasonbench_gen.py | 4 + .../ReasonBench/reasonbench_gen_d15233.py | 140 ++++++++++++++++++ .../datasets/ReasonBench/reasonbench_ppl.py | 4 + .../ReasonBench/reasonbench_ppl_b4a005.py | 136 +++++++++++++++++ opencompass/datasets/__init__.py | 1 + .../reasonbench/ReasonBenchDataset.py | 39 +++++ opencompass/datasets/reasonbench/__init__.py | 1 + 7 files changed, 325 insertions(+) create mode 100644 configs/datasets/ReasonBench/reasonbench_gen.py create mode 100644 configs/datasets/ReasonBench/reasonbench_gen_d15233.py create mode 100644 configs/datasets/ReasonBench/reasonbench_ppl.py create mode 100644 configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py create mode 100644 opencompass/datasets/reasonbench/ReasonBenchDataset.py create mode 100644 opencompass/datasets/reasonbench/__init__.py diff --git a/configs/datasets/ReasonBench/reasonbench_gen.py b/configs/datasets/ReasonBench/reasonbench_gen.py new file mode 100644 index 00000000..ae65e25a --- /dev/null +++ b/configs/datasets/ReasonBench/reasonbench_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .reasonbench_gen_d15233 import reasonbench_datasets diff --git a/configs/datasets/ReasonBench/reasonbench_gen_d15233.py b/configs/datasets/ReasonBench/reasonbench_gen_d15233.py new file mode 100644 index 00000000..4b52c414 --- /dev/null +++ b/configs/datasets/ReasonBench/reasonbench_gen_d15233.py @@ -0,0 +1,140 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import FixKRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.datasets.reasonbench import ReasonBenchDataset + +reasonbench_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type=first_capital_postprocess) +) + +reader_cfgs = [] +for i in range(2, 5): + choices = ["A", "B", "C", "D"][:i] + + reader_cfgs.append(dict( + input_columns=["prompt_ppl"], + output_column="label_ppl") + ) + +infer_cfg=dict( + ice_template=dict( + type=PromptTemplate, + template=dict( + begin="", + round=[ + dict( + role="HUMAN", + prompt="{prompt_ppl}" + ), + dict(role="BOT", prompt="Answer: {label_ppl}"), + ]), + ice_token="", + ), + retriever=dict(type=FixKRetriever, fix_id_list=[]), + inferencer=dict(type=GenInferencer) +) + + +CausalReasoningDataset = [ + dict( + abbr="reasonbench-causal", + type=ReasonBenchDataset, + path="data/reasonbench/causal.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +CommonsenseReasoningDataset = [ + dict( + abbr="reasonbench-commonsense", + type=ReasonBenchDataset, + path="data/reasonbench/commonsense.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +AbductiveReasoningDataset = [ + dict( + abbr="reasonbench-abductive", + type=ReasonBenchDataset, + path="data/reasonbench/abductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +DeductiveReasoningDataset = [ + dict( + abbr="reasonbench-deductive", + type=ReasonBenchDataset, + path="data/reasonbench/deductive.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +InductiveReasoningDataset = [ + dict( + abbr="reasonbench-inductive", + type=ReasonBenchDataset, + path="data/reasonbench/inductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +SymbolicReasoningDataset = [ + dict( + abbr="reasonbench-symbolic", + type=ReasonBenchDataset, + path="data/reasonbench/symbolic.jsonl", + reader_cfg=reader_cfgs[2], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_CommonsenseReasoningDataset = [ + dict( + abbr="reasonbench-cleva_commonsense", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_commonsense.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_DeductiveReasoningDataset = [ + dict( + abbr="reasonbench-cleva_deductive", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_deductive.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_InductiveReasoningDataset = [ + dict( + abbr="reasonbench-cleva_inductive", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_inductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfg, + eval_cfg=reasonbench_eval_cfg), +] + +reasonbench_datasets = \ + CLEVA_CommonsenseReasoningDataset + \ + CLEVA_DeductiveReasoningDataset + \ + CLEVA_InductiveReasoningDataset + \ + CausalReasoningDataset + \ + CommonsenseReasoningDataset + \ + AbductiveReasoningDataset + \ + DeductiveReasoningDataset + \ + InductiveReasoningDataset + \ + SymbolicReasoningDataset diff --git a/configs/datasets/ReasonBench/reasonbench_ppl.py b/configs/datasets/ReasonBench/reasonbench_ppl.py new file mode 100644 index 00000000..6bc2b05f --- /dev/null +++ b/configs/datasets/ReasonBench/reasonbench_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .reasonbench_ppl_b4a005 import reasonbench_datasets diff --git a/configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py b/configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py new file mode 100644 index 00000000..02bcebc0 --- /dev/null +++ b/configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py @@ -0,0 +1,136 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets.reasonbench import ReasonBenchDataset + +reasonbench_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", +) + +reader_cfgs, infer_cfgs = [], [] +for i in range(2, 5): + choices = ["A", "B", "C", "D"][:i] + + reader_cfgs.append(dict( + input_columns=["prompt_ppl"] + choices + ["choices"], + output_column="label") + ) + + infer_cfgs.append(dict( + prompt_template=dict( + type=PromptTemplate, + template={ + str(id): + dict( + round=[ + dict(role="HUMAN", prompt="{prompt_ppl}Answer:"), + dict(role="BOT", prompt=f"{choice}") + ], ) + for id, choice in enumerate(choices) + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer) + )) + +CausalReasoningDataset = [ + dict( + abbr="reasonbench-causal", + type=ReasonBenchDataset, + path="data/reasonbench/causal.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfgs[0], + eval_cfg=reasonbench_eval_cfg), +] + +CommonsenseReasoningDataset = [ + dict( + abbr="reasonbench-commonsense", + type=ReasonBenchDataset, + path="data/reasonbench/commonsense.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfgs[1], + eval_cfg=reasonbench_eval_cfg), +] + +AbductiveReasoningDataset = [ + dict( + abbr="reasonbench-abductive", + type=ReasonBenchDataset, + path="data/reasonbench/abductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfgs[0], + eval_cfg=reasonbench_eval_cfg), +] + +DeductiveReasoningDataset = [ + dict( + abbr="reasonbench-deductive", + type=ReasonBenchDataset, + path="data/reasonbench/deductive.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfgs[1], + eval_cfg=reasonbench_eval_cfg), +] + +InductiveReasoningDataset = [ + dict( + abbr="reasonbench-inductive", + type=ReasonBenchDataset, + path="data/reasonbench/inductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfgs[0], + eval_cfg=reasonbench_eval_cfg), +] + +SymbolicReasoningDataset = [ + dict( + abbr="reasonbench-symbolic", + type=ReasonBenchDataset, + path="data/reasonbench/symbolic.jsonl", + reader_cfg=reader_cfgs[2], + infer_cfg=infer_cfgs[2], + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_CommonsenseReasoningDataset = [ + dict( + abbr="reasonbench-cleva_commonsense", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_commonsense.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfgs[1], + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_DeductiveReasoningDataset = [ + dict( + abbr="reasonbench-cleva_deductive", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_deductive.jsonl", + reader_cfg=reader_cfgs[1], + infer_cfg=infer_cfgs[1], + eval_cfg=reasonbench_eval_cfg), +] + +CLEVA_InductiveReasoningDataset = [ + dict( + abbr="reasonbench-cleva_inductive", + type=ReasonBenchDataset, + path="data/reasonbench/cleva_inductive.jsonl", + reader_cfg=reader_cfgs[0], + infer_cfg=infer_cfgs[0], + eval_cfg=reasonbench_eval_cfg), +] + +reasonbench_datasets = \ + CLEVA_CommonsenseReasoningDataset + \ + CLEVA_DeductiveReasoningDataset + \ + CLEVA_InductiveReasoningDataset + \ + CausalReasoningDataset + \ + CommonsenseReasoningDataset + \ + AbductiveReasoningDataset + \ + DeductiveReasoningDataset + \ + InductiveReasoningDataset + \ + SymbolicReasoningDataset diff --git a/opencompass/datasets/__init__.py b/opencompass/datasets/__init__.py index 2764a191..43d9dfdf 100644 --- a/opencompass/datasets/__init__.py +++ b/opencompass/datasets/__init__.py @@ -68,6 +68,7 @@ from .qasper import * # noqa: F401, F403 from .qaspercut import * # noqa: F401, F403 from .race import * # noqa: F401, F403 from .realtoxicprompts import * # noqa: F401, F403 +from .reasonbench import ReasonBenchDataset # noqa: F401, F403 from .record import * # noqa: F401, F403 from .safety import * # noqa: F401, F403 from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403 diff --git a/opencompass/datasets/reasonbench/ReasonBenchDataset.py b/opencompass/datasets/reasonbench/ReasonBenchDataset.py new file mode 100644 index 00000000..f5c5b7d3 --- /dev/null +++ b/opencompass/datasets/reasonbench/ReasonBenchDataset.py @@ -0,0 +1,39 @@ +import json + +from datasets import Dataset + +from opencompass.registry import LOAD_DATASET + +from ..base import BaseDataset + + +@LOAD_DATASET.register_module() +class ReasonBenchDataset(BaseDataset): + + @staticmethod + def load(path: str): + raw_data = [] + with open(path, 'r', encoding='utf-8') as f: + for line in f: + line = json.loads(line) + prompt = line['prompt'] + prompt_ppl = line['prompt_ppl'] + label = line['label'] + label_ppl = line['label_ppl'] + choices = line['choices'] + tag = line['tag'] + source = line['source'] + option_content = {choice: line[choice] for choice in choices} + data = { + 'prompt': prompt, + 'label': label, + 'prompt_ppl': prompt_ppl, + 'label_ppl': str(label_ppl)[0], + 'choices': choices, + 'tag': tag, + 'source': source, + } + data.update(option_content) + raw_data.append(data) + dataset = Dataset.from_list(raw_data) + return dataset diff --git a/opencompass/datasets/reasonbench/__init__.py b/opencompass/datasets/reasonbench/__init__.py new file mode 100644 index 00000000..792b6e1a --- /dev/null +++ b/opencompass/datasets/reasonbench/__init__.py @@ -0,0 +1 @@ +from .ReasonBenchDataset import * # noqa: F401, F403