[Feature] Add ReasonBench(Internal) dataset (#577)

* [Feature] Add reasonbench dataset

* add configs for supporting generative inference & merge datasets in the same category

* modify config filename to prompt version

* fix codes to meet pre-commit requirements

* lint the code to meet pre-commit requirements

* Align Load_data Sourcecode Briefly

* fix bugs

* reduce code redundancy
This commit is contained in:
Skyfall-xzz 2023-12-20 17:57:42 +08:00 committed by GitHub
parent 76a95e9e81
commit b35d991786
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 325 additions and 0 deletions

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .reasonbench_gen_d15233 import reasonbench_datasets

View File

@ -0,0 +1,140 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.utils.text_postprocessors import first_capital_postprocess
from opencompass.datasets.reasonbench import ReasonBenchDataset
reasonbench_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess)
)
reader_cfgs = []
for i in range(2, 5):
choices = ["A", "B", "C", "D"][:i]
reader_cfgs.append(dict(
input_columns=["prompt_ppl"],
output_column="label_ppl")
)
infer_cfg=dict(
ice_template=dict(
type=PromptTemplate,
template=dict(
begin="</E>",
round=[
dict(
role="HUMAN",
prompt="</E>{prompt_ppl}"
),
dict(role="BOT", prompt="Answer: {label_ppl}"),
]),
ice_token="</E>",
),
retriever=dict(type=FixKRetriever, fix_id_list=[]),
inferencer=dict(type=GenInferencer)
)
CausalReasoningDataset = [
dict(
abbr="reasonbench-causal",
type=ReasonBenchDataset,
path="data/reasonbench/causal.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
AbductiveReasoningDataset = [
dict(
abbr="reasonbench-abductive",
type=ReasonBenchDataset,
path="data/reasonbench/abductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
DeductiveReasoningDataset = [
dict(
abbr="reasonbench-deductive",
type=ReasonBenchDataset,
path="data/reasonbench/deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
InductiveReasoningDataset = [
dict(
abbr="reasonbench-inductive",
type=ReasonBenchDataset,
path="data/reasonbench/inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
SymbolicReasoningDataset = [
dict(
abbr="reasonbench-symbolic",
type=ReasonBenchDataset,
path="data/reasonbench/symbolic.jsonl",
reader_cfg=reader_cfgs[2],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-cleva_commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_DeductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_deductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_InductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_inductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfg,
eval_cfg=reasonbench_eval_cfg),
]
reasonbench_datasets = \
CLEVA_CommonsenseReasoningDataset + \
CLEVA_DeductiveReasoningDataset + \
CLEVA_InductiveReasoningDataset + \
CausalReasoningDataset + \
CommonsenseReasoningDataset + \
AbductiveReasoningDataset + \
DeductiveReasoningDataset + \
InductiveReasoningDataset + \
SymbolicReasoningDataset

View File

@ -0,0 +1,4 @@
from mmengine.config import read_base
with read_base():
from .reasonbench_ppl_b4a005 import reasonbench_datasets

View File

@ -0,0 +1,136 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets.reasonbench import ReasonBenchDataset
reasonbench_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
)
reader_cfgs, infer_cfgs = [], []
for i in range(2, 5):
choices = ["A", "B", "C", "D"][:i]
reader_cfgs.append(dict(
input_columns=["prompt_ppl"] + choices + ["choices"],
output_column="label")
)
infer_cfgs.append(dict(
prompt_template=dict(
type=PromptTemplate,
template={
str(id):
dict(
round=[
dict(role="HUMAN", prompt="{prompt_ppl}Answer:"),
dict(role="BOT", prompt=f"{choice}")
], )
for id, choice in enumerate(choices)
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer)
))
CausalReasoningDataset = [
dict(
abbr="reasonbench-causal",
type=ReasonBenchDataset,
path="data/reasonbench/causal.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]
CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]
AbductiveReasoningDataset = [
dict(
abbr="reasonbench-abductive",
type=ReasonBenchDataset,
path="data/reasonbench/abductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]
DeductiveReasoningDataset = [
dict(
abbr="reasonbench-deductive",
type=ReasonBenchDataset,
path="data/reasonbench/deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]
InductiveReasoningDataset = [
dict(
abbr="reasonbench-inductive",
type=ReasonBenchDataset,
path="data/reasonbench/inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]
SymbolicReasoningDataset = [
dict(
abbr="reasonbench-symbolic",
type=ReasonBenchDataset,
path="data/reasonbench/symbolic.jsonl",
reader_cfg=reader_cfgs[2],
infer_cfg=infer_cfgs[2],
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_CommonsenseReasoningDataset = [
dict(
abbr="reasonbench-cleva_commonsense",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_commonsense.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_DeductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_deductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_deductive.jsonl",
reader_cfg=reader_cfgs[1],
infer_cfg=infer_cfgs[1],
eval_cfg=reasonbench_eval_cfg),
]
CLEVA_InductiveReasoningDataset = [
dict(
abbr="reasonbench-cleva_inductive",
type=ReasonBenchDataset,
path="data/reasonbench/cleva_inductive.jsonl",
reader_cfg=reader_cfgs[0],
infer_cfg=infer_cfgs[0],
eval_cfg=reasonbench_eval_cfg),
]
reasonbench_datasets = \
CLEVA_CommonsenseReasoningDataset + \
CLEVA_DeductiveReasoningDataset + \
CLEVA_InductiveReasoningDataset + \
CausalReasoningDataset + \
CommonsenseReasoningDataset + \
AbductiveReasoningDataset + \
DeductiveReasoningDataset + \
InductiveReasoningDataset + \
SymbolicReasoningDataset

View File

@ -68,6 +68,7 @@ from .qasper import * # noqa: F401, F403
from .qaspercut import * # noqa: F401, F403 from .qaspercut import * # noqa: F401, F403
from .race import * # noqa: F401, F403 from .race import * # noqa: F401, F403
from .realtoxicprompts import * # noqa: F401, F403 from .realtoxicprompts import * # noqa: F401, F403
from .reasonbench import ReasonBenchDataset # noqa: F401, F403
from .record import * # noqa: F401, F403 from .record import * # noqa: F401, F403
from .safety import * # noqa: F401, F403 from .safety import * # noqa: F401, F403
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403 from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403

View File

@ -0,0 +1,39 @@
import json
from datasets import Dataset
from opencompass.registry import LOAD_DATASET
from ..base import BaseDataset
@LOAD_DATASET.register_module()
class ReasonBenchDataset(BaseDataset):
@staticmethod
def load(path: str):
raw_data = []
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line)
prompt = line['prompt']
prompt_ppl = line['prompt_ppl']
label = line['label']
label_ppl = line['label_ppl']
choices = line['choices']
tag = line['tag']
source = line['source']
option_content = {choice: line[choice] for choice in choices}
data = {
'prompt': prompt,
'label': label,
'prompt_ppl': prompt_ppl,
'label_ppl': str(label_ppl)[0],
'choices': choices,
'tag': tag,
'source': source,
}
data.update(option_content)
raw_data.append(data)
dataset = Dataset.from_list(raw_data)
return dataset

View File

@ -0,0 +1 @@
from .ReasonBenchDataset import * # noqa: F401, F403