From 90279b646158087c34e33c7dfa600e9534b143da Mon Sep 17 00:00:00 2001 From: Linchen Xiao Date: Fri, 13 Sep 2024 10:30:43 +0800 Subject: [PATCH] [Feature] Dataset prompts update for ARC, BoolQ, Race (#1527) --- configs/datasets/ARC_c/ARC_c_few_shot_ppl.py | 63 +++++++++++++++++++ .../SuperGLUE_BoolQ_few_shot_ppl.py | 47 ++++++++++++++ configs/datasets/race/race_few_shot_ppl.py | 57 +++++++++++++++++ .../datasets/ARC_c/ARC_c_few_shot_ppl.py | 63 +++++++++++++++++++ .../SuperGLUE_BoolQ_few_shot_ppl.py | 47 ++++++++++++++ .../datasets/race/race_few_shot_ppl.py | 57 +++++++++++++++++ 6 files changed, 334 insertions(+) create mode 100644 configs/datasets/ARC_c/ARC_c_few_shot_ppl.py create mode 100644 configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py create mode 100644 configs/datasets/race/race_few_shot_ppl.py create mode 100644 opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py create mode 100644 opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py create mode 100644 opencompass/configs/datasets/race/race_few_shot_ppl.py diff --git a/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py b/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py new file mode 100644 index 00000000..31087ce8 --- /dev/null +++ b/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py @@ -0,0 +1,63 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'A': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textA}'), + ], + ), + 'B': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textB}'), + ], + ), + 'C': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textC}'), + ], + ), + 'D': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textD}'), + ], + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer), +) + +ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +ARC_c_datasets = [ + dict( + type=ARCDataset, + abbr='ARC-c', + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py new file mode 100644 index 00000000..66528942 --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'B': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ] + ), + 'A': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ] + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/race/race_few_shot_ppl.py b/configs/datasets/race/race_few_shot_ppl.py new file mode 100644 index 00000000..2fa9cd1d --- /dev/null +++ b/configs/datasets/race/race_few_shot_ppl.py @@ -0,0 +1,57 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + ans: dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}', + ), + dict(role='BOT', prompt=f'Answer: {ans}'), + ], + ) + for ans in ['A', 'B', 'C', 'D'] + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=PPLInferencer), +) + +race_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +] diff --git a/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py new file mode 100644 index 00000000..31087ce8 --- /dev/null +++ b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_ppl.py @@ -0,0 +1,63 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey', +) + +ARC_c_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'A': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textA}'), + ], + ), + 'B': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textB}'), + ], + ), + 'C': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textC}'), + ], + ), + 'D': dict( + begin='', + round=[ + dict(role='HUMAN', prompt='Question: {question}\nAnswer: '), + dict(role='BOT', prompt='{textD}'), + ], + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer), +) + +ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +ARC_c_datasets = [ + dict( + type=ARCDataset, + abbr='ARC-c', + path='opencompass/ai2_arc-dev', + name='ARC-Challenge', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py new file mode 100644 index 00000000..66528942 --- /dev/null +++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_ppl.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDatasetV2 +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator + +BoolQ_reader_cfg = dict( + input_columns=['question', 'passage'], + output_column='label', +) + +BoolQ_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + 'B': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='No'), + ] + ), + 'A': dict( + round=[ + dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'), + dict(role='BOT', prompt='Yes'), + ] + ), + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]), + inferencer=dict(type=PPLInferencer, max_out_len=50), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + abbr='BoolQ', + type=BoolQDatasetV2, + path='opencompass/boolq', + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/opencompass/configs/datasets/race/race_few_shot_ppl.py b/opencompass/configs/datasets/race/race_few_shot_ppl.py new file mode 100644 index 00000000..2fa9cd1d --- /dev/null +++ b/opencompass/configs/datasets/race/race_few_shot_ppl.py @@ -0,0 +1,57 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer', + train_split='validation', + test_split='test', +) + +race_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template={ + ans: dict( + begin='', + round=[ + dict( + role='HUMAN', + prompt='Article:\n{article}\nQuestion:\n{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}', + ), + dict(role='BOT', prompt=f'Answer: {ans}'), + ], + ) + for ans in ['A', 'B', 'C', 'D'] + }, + ice_token='', + ), + retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]), + inferencer=dict(type=PPLInferencer), +) + +race_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +race_datasets = [ + dict( + abbr='race-middle', + type=RaceDataset, + path='opencompass/race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), + dict( + abbr='race-high', + type=RaceDataset, + path='opencompass/race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg, + ), +]