[Feature] Dataset prompts update for ARC, BoolQ, Race (#1527)

This commit is contained in:
Linchen Xiao 2024-09-13 10:30:43 +08:00 committed by GitHub
parent 6997990c93
commit 90279b6461
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 334 additions and 0 deletions

View File

@ -0,0 +1,63 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey',
)
ARC_c_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'A': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textA}'),
],
),
'B': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textB}'),
],
),
'C': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textC}'),
],
),
'D': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textD}'),
],
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer),
)
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='opencompass/ai2_arc-dev',
name='ARC-Challenge',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg,
)
]

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDatasetV2
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
BoolQ_reader_cfg = dict(
input_columns=['question', 'passage'],
output_column='label',
)
BoolQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'B': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='No'),
]
),
'A': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='Yes'),
]
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer, max_out_len=50),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
abbr='BoolQ',
type=BoolQDatasetV2,
path='opencompass/boolq',
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='validation',
test_split='test',
)
race_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
),
dict(role='BOT', prompt=f'Answer: {ans}'),
],
)
for ans in ['A', 'B', 'C', 'D']
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
inferencer=dict(type=PPLInferencer),
)
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
abbr='race-middle',
type=RaceDataset,
path='opencompass/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
dict(
abbr='race-high',
type=RaceDataset,
path='opencompass/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
]

View File

@ -0,0 +1,63 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import ARCDataset
ARC_c_reader_cfg = dict(
input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
output_column='answerKey',
)
ARC_c_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'A': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textA}'),
],
),
'B': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textB}'),
],
),
'C': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textC}'),
],
),
'D': dict(
begin='</E>',
round=[
dict(role='HUMAN', prompt='Question: {question}\nAnswer: '),
dict(role='BOT', prompt='{textD}'),
],
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer),
)
ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ARC_c_datasets = [
dict(
type=ARCDataset,
abbr='ARC-c',
path='opencompass/ai2_arc-dev',
name='ARC-Challenge',
reader_cfg=ARC_c_reader_cfg,
infer_cfg=ARC_c_infer_cfg,
eval_cfg=ARC_c_eval_cfg,
)
]

View File

@ -0,0 +1,47 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import BoolQDatasetV2
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
BoolQ_reader_cfg = dict(
input_columns=['question', 'passage'],
output_column='label',
)
BoolQ_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
'B': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='No'),
]
),
'A': dict(
round=[
dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
dict(role='BOT', prompt='Yes'),
]
),
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
inferencer=dict(type=PPLInferencer, max_out_len=50),
)
BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
BoolQ_datasets = [
dict(
abbr='BoolQ',
type=BoolQDatasetV2,
path='opencompass/boolq',
reader_cfg=BoolQ_reader_cfg,
infer_cfg=BoolQ_infer_cfg,
eval_cfg=BoolQ_eval_cfg,
)
]

View File

@ -0,0 +1,57 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='validation',
test_split='test',
)
race_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
ans: dict(
begin='</E>',
round=[
dict(
role='HUMAN',
prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}',
),
dict(role='BOT', prompt=f'Answer: {ans}'),
],
)
for ans in ['A', 'B', 'C', 'D']
},
ice_token='</E>',
),
retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
inferencer=dict(type=PPLInferencer),
)
race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
abbr='race-middle',
type=RaceDataset,
path='opencompass/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
dict(
abbr='race-high',
type=RaceDataset,
path='opencompass/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg,
),
]