mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Dataset prompts update for ARC, BoolQ, Race (#1527)
This commit is contained in:
parent
6997990c93
commit
90279b6461
63
configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
63
configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textA}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'B': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textB}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'C': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textC}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'D': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textD}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
type=ARCDataset,
|
||||||
|
abbr='ARC-c',
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'B': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
'A': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
57
configs/datasets/race/race_few_shot_ppl.py
Normal file
57
configs/datasets/race/race_few_shot_ppl.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
ans: dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt=f'Answer: {ans}'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for ans in ['A', 'B', 'C', 'D']
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
63
opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
63
opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import ARCDataset
|
||||||
|
|
||||||
|
ARC_c_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
|
||||||
|
output_column='answerKey',
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'A': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textA}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'B': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textB}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'C': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textC}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'D': dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
|
||||||
|
dict(role='BOT', prompt='{textD}'),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
ARC_c_datasets = [
|
||||||
|
dict(
|
||||||
|
type=ARCDataset,
|
||||||
|
abbr='ARC-c',
|
||||||
|
path='opencompass/ai2_arc-dev',
|
||||||
|
name='ARC-Challenge',
|
||||||
|
reader_cfg=ARC_c_reader_cfg,
|
||||||
|
infer_cfg=ARC_c_infer_cfg,
|
||||||
|
eval_cfg=ARC_c_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,47 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import BoolQDatasetV2
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
|
||||||
|
BoolQ_reader_cfg = dict(
|
||||||
|
input_columns=['question', 'passage'],
|
||||||
|
output_column='label',
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
'B': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='No'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
'A': dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
|
||||||
|
dict(role='BOT', prompt='Yes'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
|
||||||
|
inferencer=dict(type=PPLInferencer, max_out_len=50),
|
||||||
|
)
|
||||||
|
|
||||||
|
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
BoolQ_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='BoolQ',
|
||||||
|
type=BoolQDatasetV2,
|
||||||
|
path='opencompass/boolq',
|
||||||
|
reader_cfg=BoolQ_reader_cfg,
|
||||||
|
infer_cfg=BoolQ_infer_cfg,
|
||||||
|
eval_cfg=BoolQ_eval_cfg,
|
||||||
|
)
|
||||||
|
]
|
57
opencompass/configs/datasets/race/race_few_shot_ppl.py
Normal file
57
opencompass/configs/datasets/race/race_few_shot_ppl.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import PPLInferencer
|
||||||
|
from opencompass.openicl.icl_evaluator import AccEvaluator
|
||||||
|
from opencompass.datasets import RaceDataset
|
||||||
|
|
||||||
|
race_reader_cfg = dict(
|
||||||
|
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
|
||||||
|
output_column='answer',
|
||||||
|
train_split='validation',
|
||||||
|
test_split='test',
|
||||||
|
)
|
||||||
|
|
||||||
|
race_infer_cfg = dict(
|
||||||
|
ice_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template={
|
||||||
|
ans: dict(
|
||||||
|
begin='</E>',
|
||||||
|
round=[
|
||||||
|
dict(
|
||||||
|
role='HUMAN',
|
||||||
|
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
|
||||||
|
),
|
||||||
|
dict(role='BOT', prompt=f'Answer: {ans}'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for ans in ['A', 'B', 'C', 'D']
|
||||||
|
},
|
||||||
|
ice_token='</E>',
|
||||||
|
),
|
||||||
|
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
|
||||||
|
inferencer=dict(type=PPLInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
|
||||||
|
|
||||||
|
race_datasets = [
|
||||||
|
dict(
|
||||||
|
abbr='race-middle',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='middle',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
dict(
|
||||||
|
abbr='race-high',
|
||||||
|
type=RaceDataset,
|
||||||
|
path='opencompass/race',
|
||||||
|
name='high',
|
||||||
|
reader_cfg=race_reader_cfg,
|
||||||
|
infer_cfg=race_infer_cfg,
|
||||||
|
eval_cfg=race_eval_cfg,
|
||||||
|
),
|
||||||
|
]
|
Loading…
Reference in New Issue
Block a user