mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Add ReasonBench(Internal) dataset (#577)
* [Feature] Add reasonbench dataset * add configs for supporting generative inference & merge datasets in the same category * modify config filename to prompt version * fix codes to meet pre-commit requirements * lint the code to meet pre-commit requirements * Align Load_data Sourcecode Briefly * fix bugs * reduce code redundancy
This commit is contained in:
parent
76a95e9e81
commit
b35d991786
4
configs/datasets/ReasonBench/reasonbench_gen.py
Normal file
4
configs/datasets/ReasonBench/reasonbench_gen.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .reasonbench_gen_d15233 import reasonbench_datasets
|
140
configs/datasets/ReasonBench/reasonbench_gen_d15233.py
Normal file
140
configs/datasets/ReasonBench/reasonbench_gen_d15233.py
Normal file
@ -0,0 +1,140 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import FixKRetriever
|
||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.utils.text_postprocessors import first_capital_postprocess
|
||||
from opencompass.datasets.reasonbench import ReasonBenchDataset
|
||||
|
||||
reasonbench_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_postprocessor=dict(type=first_capital_postprocess)
|
||||
)
|
||||
|
||||
reader_cfgs = []
|
||||
for i in range(2, 5):
|
||||
choices = ["A", "B", "C", "D"][:i]
|
||||
|
||||
reader_cfgs.append(dict(
|
||||
input_columns=["prompt_ppl"],
|
||||
output_column="label_ppl")
|
||||
)
|
||||
|
||||
infer_cfg=dict(
|
||||
ice_template=dict(
|
||||
type=PromptTemplate,
|
||||
template=dict(
|
||||
begin="</E>",
|
||||
round=[
|
||||
dict(
|
||||
role="HUMAN",
|
||||
prompt="</E>{prompt_ppl}"
|
||||
),
|
||||
dict(role="BOT", prompt="Answer: {label_ppl}"),
|
||||
]),
|
||||
ice_token="</E>",
|
||||
),
|
||||
retriever=dict(type=FixKRetriever, fix_id_list=[]),
|
||||
inferencer=dict(type=GenInferencer)
|
||||
)
|
||||
|
||||
|
||||
CausalReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-causal",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/causal.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CommonsenseReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-commonsense",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/commonsense.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
AbductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-abductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/abductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
DeductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-deductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/deductive.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
InductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-inductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/inductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
SymbolicReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-symbolic",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/symbolic.jsonl",
|
||||
reader_cfg=reader_cfgs[2],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_CommonsenseReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_commonsense",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_commonsense.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_DeductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_deductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_deductive.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_InductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_inductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_inductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfg,
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
reasonbench_datasets = \
|
||||
CLEVA_CommonsenseReasoningDataset + \
|
||||
CLEVA_DeductiveReasoningDataset + \
|
||||
CLEVA_InductiveReasoningDataset + \
|
||||
CausalReasoningDataset + \
|
||||
CommonsenseReasoningDataset + \
|
||||
AbductiveReasoningDataset + \
|
||||
DeductiveReasoningDataset + \
|
||||
InductiveReasoningDataset + \
|
||||
SymbolicReasoningDataset
|
4
configs/datasets/ReasonBench/reasonbench_ppl.py
Normal file
4
configs/datasets/ReasonBench/reasonbench_ppl.py
Normal file
@ -0,0 +1,4 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .reasonbench_ppl_b4a005 import reasonbench_datasets
|
136
configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py
Normal file
136
configs/datasets/ReasonBench/reasonbench_ppl_b4a005.py
Normal file
@ -0,0 +1,136 @@
|
||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||
from opencompass.datasets.reasonbench import ReasonBenchDataset
|
||||
|
||||
reasonbench_eval_cfg = dict(
|
||||
evaluator=dict(type=AccEvaluator),
|
||||
pred_role="BOT",
|
||||
)
|
||||
|
||||
reader_cfgs, infer_cfgs = [], []
|
||||
for i in range(2, 5):
|
||||
choices = ["A", "B", "C", "D"][:i]
|
||||
|
||||
reader_cfgs.append(dict(
|
||||
input_columns=["prompt_ppl"] + choices + ["choices"],
|
||||
output_column="label")
|
||||
)
|
||||
|
||||
infer_cfgs.append(dict(
|
||||
prompt_template=dict(
|
||||
type=PromptTemplate,
|
||||
template={
|
||||
str(id):
|
||||
dict(
|
||||
round=[
|
||||
dict(role="HUMAN", prompt="{prompt_ppl}Answer:"),
|
||||
dict(role="BOT", prompt=f"{choice}")
|
||||
], )
|
||||
for id, choice in enumerate(choices)
|
||||
}),
|
||||
retriever=dict(type=ZeroRetriever),
|
||||
inferencer=dict(type=PPLInferencer)
|
||||
))
|
||||
|
||||
CausalReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-causal",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/causal.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfgs[0],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CommonsenseReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-commonsense",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/commonsense.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfgs[1],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
AbductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-abductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/abductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfgs[0],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
DeductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-deductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/deductive.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfgs[1],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
InductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-inductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/inductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfgs[0],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
SymbolicReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-symbolic",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/symbolic.jsonl",
|
||||
reader_cfg=reader_cfgs[2],
|
||||
infer_cfg=infer_cfgs[2],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_CommonsenseReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_commonsense",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_commonsense.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfgs[1],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_DeductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_deductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_deductive.jsonl",
|
||||
reader_cfg=reader_cfgs[1],
|
||||
infer_cfg=infer_cfgs[1],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
CLEVA_InductiveReasoningDataset = [
|
||||
dict(
|
||||
abbr="reasonbench-cleva_inductive",
|
||||
type=ReasonBenchDataset,
|
||||
path="data/reasonbench/cleva_inductive.jsonl",
|
||||
reader_cfg=reader_cfgs[0],
|
||||
infer_cfg=infer_cfgs[0],
|
||||
eval_cfg=reasonbench_eval_cfg),
|
||||
]
|
||||
|
||||
reasonbench_datasets = \
|
||||
CLEVA_CommonsenseReasoningDataset + \
|
||||
CLEVA_DeductiveReasoningDataset + \
|
||||
CLEVA_InductiveReasoningDataset + \
|
||||
CausalReasoningDataset + \
|
||||
CommonsenseReasoningDataset + \
|
||||
AbductiveReasoningDataset + \
|
||||
DeductiveReasoningDataset + \
|
||||
InductiveReasoningDataset + \
|
||||
SymbolicReasoningDataset
|
@ -68,6 +68,7 @@ from .qasper import * # noqa: F401, F403
|
||||
from .qaspercut import * # noqa: F401, F403
|
||||
from .race import * # noqa: F401, F403
|
||||
from .realtoxicprompts import * # noqa: F401, F403
|
||||
from .reasonbench import ReasonBenchDataset # noqa: F401, F403
|
||||
from .record import * # noqa: F401, F403
|
||||
from .safety import * # noqa: F401, F403
|
||||
from .scibench import ScibenchDataset, scibench_postprocess # noqa: F401, F403
|
||||
|
39
opencompass/datasets/reasonbench/ReasonBenchDataset.py
Normal file
39
opencompass/datasets/reasonbench/ReasonBenchDataset.py
Normal file
@ -0,0 +1,39 @@
|
||||
import json
|
||||
|
||||
from datasets import Dataset
|
||||
|
||||
from opencompass.registry import LOAD_DATASET
|
||||
|
||||
from ..base import BaseDataset
|
||||
|
||||
|
||||
@LOAD_DATASET.register_module()
|
||||
class ReasonBenchDataset(BaseDataset):
|
||||
|
||||
@staticmethod
|
||||
def load(path: str):
|
||||
raw_data = []
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = json.loads(line)
|
||||
prompt = line['prompt']
|
||||
prompt_ppl = line['prompt_ppl']
|
||||
label = line['label']
|
||||
label_ppl = line['label_ppl']
|
||||
choices = line['choices']
|
||||
tag = line['tag']
|
||||
source = line['source']
|
||||
option_content = {choice: line[choice] for choice in choices}
|
||||
data = {
|
||||
'prompt': prompt,
|
||||
'label': label,
|
||||
'prompt_ppl': prompt_ppl,
|
||||
'label_ppl': str(label_ppl)[0],
|
||||
'choices': choices,
|
||||
'tag': tag,
|
||||
'source': source,
|
||||
}
|
||||
data.update(option_content)
|
||||
raw_data.append(data)
|
||||
dataset = Dataset.from_list(raw_data)
|
||||
return dataset
|
1
opencompass/datasets/reasonbench/__init__.py
Normal file
1
opencompass/datasets/reasonbench/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .ReasonBenchDataset import * # noqa: F401, F403
|
Loading…
Reference in New Issue
Block a user