mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add ReasonBench(Internal) dataset (#577)
* [Feature] Add reasonbench dataset * add configs for supporting generative inference & merge datasets in the same category * modify config filename to prompt version * fix codes to meet pre-commit requirements * lint the code to meet pre-commit requirements * Align Load_data Sourcecode Briefly * fix bugs * reduce code redundancy
This commit is contained in:
parent
76a95e9e81
commit
b35d991786
4
configs/datasets/ReasonBench/reasonbench_gen.py
Normal file
4
configs/datasets/ReasonBench/reasonbench_gen.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .reasonbench_gen_d15233 import reasonbench_datasets
|
140
configs/datasets/ReasonBench/reasonbench_gen_d15233.py
Normal file
140
configs/datasets/ReasonBench/reasonbench_gen_d15233.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||||
|
from opencompass.datasets.reasonbench import ReasonBenchDataset
|
||||||
|
|
||||||
|
reasonbench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_postprocessor=dict(type=first_capital_postprocess)
|
||||||
|
)
|
||||||
|
|
||||||
|
reader_cfgs = []
|
||||||
|
for i in range(2, 5):
|
||||||
|
choices = ["A", "B", "C", "D"][:i]
|
||||||
|
|
||||||
|
reader_cfgs.append(dict(
|
||||||
|
input_columns=["prompt_ppl"],
|
||||||
|
output_column="label_ppl")
|
||||||
|
)
|
||||||
|
|
||||||
|
infer_cfg=dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
begin="</E>",
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role="HUMAN",
|
||||||
|
prompt="</E>{prompt_ppl}"
|
||||||
|
),
|
||||||
|
dict(role="BOT", prompt="Answer: {label_ppl}"),
|
||||||
|
]),
|
||||||
|
ice_token="</E>",
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[]),
|
||||||
|
inferencer=dict(type=GenInferencer)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
CausalReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-causal",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/causal.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CommonsenseReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-commonsense",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/commonsense.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
AbductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-abductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/abductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
DeductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-deductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/deductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
InductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-inductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/inductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
SymbolicReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-symbolic",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/symbolic.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[2],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_CommonsenseReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_commonsense",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_commonsense.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_DeductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_deductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_deductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_InductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_inductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_inductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfg,
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
reasonbench_datasets = \
|
||||||
|
CLEVA_CommonsenseReasoningDataset + \
|
||||||
|
CLEVA_DeductiveReasoningDataset + \
|
||||||
|
CLEVA_InductiveReasoningDataset + \
|
||||||
|
CausalReasoningDataset + \
|
||||||
|
CommonsenseReasoningDataset + \
|
||||||
|
AbductiveReasoningDataset + \
|
||||||
|
DeductiveReasoningDataset + \
|
||||||
|
InductiveReasoningDataset + \
|
||||||
|
SymbolicReasoningDataset
|
4
configs/datasets/ReasonBench/reasonbench_ppl.py
Normal file
4
configs/datasets/ReasonBench/reasonbench_ppl.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .reasonbench_ppl_b4a005 import reasonbench_datasets
|
136
configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py
Normal file
136
configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets.reasonbench import ReasonBenchDataset
|
||||||
|
|
||||||
|
reasonbench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=AccEvaluator),
|
||||||
|
pred_role="BOT",
|
||||||
|
)
|
||||||
|
|
||||||
|
reader_cfgs, infer_cfgs = [], []
|
||||||
|
for i in range(2, 5):
|
||||||
|
choices = ["A", "B", "C", "D"][:i]
|
||||||
|
|
||||||
|
reader_cfgs.append(dict(
|
||||||
|
input_columns=["prompt_ppl"] + choices + ["choices"],
|
||||||
|
output_column="label")
|
||||||
|
)
|
||||||
|
|
||||||
|
infer_cfgs.append(dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
str(id):
|
||||||
|
dict(
|
||||||
|
round=[
|
||||||
|
dict(role="HUMAN", prompt="{prompt_ppl}Answer:"),
|
||||||
|
dict(role="BOT", prompt=f"{choice}")
|
||||||
|
], )
|
||||||
|
for id, choice in enumerate(choices)
|
||||||
|
}),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=PPLInferencer)
|
||||||
|
))
|
||||||
|
|
||||||
|
CausalReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-causal",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/causal.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfgs[0],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CommonsenseReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-commonsense",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/commonsense.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfgs[1],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
AbductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-abductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/abductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfgs[0],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
DeductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-deductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/deductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfgs[1],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
InductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-inductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/inductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfgs[0],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
SymbolicReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-symbolic",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/symbolic.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[2],
|
||||||
|
infer_cfg=infer_cfgs[2],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_CommonsenseReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_commonsense",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_commonsense.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfgs[1],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_DeductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_deductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_deductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[1],
|
||||||
|
infer_cfg=infer_cfgs[1],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
CLEVA_InductiveReasoningDataset = [
|
||||||
|
dict(
|
||||||
|
abbr="reasonbench-cleva_inductive",
|
||||||
|
type=ReasonBenchDataset,
|
||||||
|
path="data/reasonbench/cleva_inductive.jsonl",
|
||||||
|
reader_cfg=reader_cfgs[0],
|
||||||
|
infer_cfg=infer_cfgs[0],
|
||||||
|
eval_cfg=reasonbench_eval_cfg),
|
||||||
|
]
|
||||||
|
|
||||||
|
reasonbench_datasets = \
|
||||||
|
CLEVA_CommonsenseReasoningDataset + \
|
||||||
|
CLEVA_DeductiveReasoningDataset + \
|
||||||
|
CLEVA_InductiveReasoningDataset + \
|
||||||
|
CausalReasoningDataset + \
|
||||||
|
CommonsenseReasoningDataset + \
|
||||||
|
AbductiveReasoningDataset + \
|
||||||
|
DeductiveReasoningDataset + \
|
||||||
|
InductiveReasoningDataset + \
|
||||||
|
SymbolicReasoningDataset
|
@ -68,6 +68,7 @@ from .qasper import * # noqa: F401, F403
|
|||||||
from .qaspercut import * # noqa: F401, F403
|
from .qaspercut import * # noqa: F401, F403
|
||||||
from .race import * # noqa: F401, F403
|
from .race import * # noqa: F401, F403
|
||||||
from .realtoxicprompts import * # noqa: F401, F403
|
from .realtoxicprompts import * # noqa: F401, F403
|
||||||
|
from .reasonbench import ReasonBenchDataset # noqa: F401, F403
|
||||||
from .record import * # noqa: F401, F403
|
from .record import * # noqa: F401, F403
|
||||||
from .safety import * # noqa: F401, F403
|
from .safety import * # noqa: F401, F403
|
||||||
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
||||||
|
39
opencompass/datasets/reasonbench/ReasonBenchDataset.py
Normal file
39
opencompass/datasets/reasonbench/ReasonBenchDataset.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from datasets import Dataset
|
||||||
|
|
||||||
|
from opencompass.registry import LOAD_DATASET
|
||||||
|
|
||||||
|
from ..base import BaseDataset
|
||||||
|
|
||||||
|
|
||||||
|
@LOAD_DATASET.register_module()
|
||||||
|
class ReasonBenchDataset(BaseDataset):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load(path: str):
|
||||||
|
raw_data = []
|
||||||
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
for line in f:
|
||||||
|
line = json.loads(line)
|
||||||
|
prompt = line['prompt']
|
||||||
|
prompt_ppl = line['prompt_ppl']
|
||||||
|
label = line['label']
|
||||||
|
label_ppl = line['label_ppl']
|
||||||
|
choices = line['choices']
|
||||||
|
tag = line['tag']
|
||||||
|
source = line['source']
|
||||||
|
option_content = {choice: line[choice] for choice in choices}
|
||||||
|
data = {
|
||||||
|
'prompt': prompt,
|
||||||
|
'label': label,
|
||||||
|
'prompt_ppl': prompt_ppl,
|
||||||
|
'label_ppl': str(label_ppl)[0],
|
||||||
|
'choices': choices,
|
||||||
|
'tag': tag,
|
||||||
|
'source': source,
|
||||||
|
}
|
||||||
|
data.update(option_content)
|
||||||
|
raw_data.append(data)
|
||||||
|
dataset = Dataset.from_list(raw_data)
|
||||||
|
return dataset
|
1
opencompass/datasets/reasonbench/__init__.py
Normal file
1
opencompass/datasets/reasonbench/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from .ReasonBenchDataset import * # noqa: F401, F403
|
Loading…
Reference in New Issue
Block a user