From 8d9cee060f87aafccf3cede68d6d802a3a60def2 Mon Sep 17 00:00:00 2001 From: Hubert <42952108+yingfhu@users.noreply.github.com> Date: Fri, 11 Aug 2023 17:33:00 +0800 Subject: [PATCH] [Feat] update postprocessor to get first option more accurately (#193) * [Feat] update postprocessor to get first option * minor fix * minor fix --- configs/datasets/ARC_c/ARC_c_gen_1e0de5.py | 4 +-- configs/datasets/ARC_e/ARC_e_gen_1e0de5.py | 4 +-- .../SuperGLUE_AX_b_gen_4dfefa.py | 4 +-- .../SuperGLUE_AX_g_gen_68aac7.py | 4 +-- .../SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py | 4 +-- .../SuperGLUE_COPA_gen_91ca53.py | 4 +-- .../SuperGLUE_MultiRC_gen_27071f.py | 4 +-- .../SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py | 4 +-- .../datasets/agieval/agieval_gen_397d81.py | 11 +++++--- .../hellaswag/hellaswag_gen_6faab5.py | 13 +++++----- configs/datasets/obqa/obqa_gen_9069e4.py | 14 +++++------ configs/datasets/piqa/piqa_gen_1194eb.py | 8 +++--- configs/datasets/race/race_gen_69ee4f.py | 4 +-- .../storycloze/storycloze_gen_7f656a.py | 4 +-- .../winogrande/winogrande_gen_a9ede5.py | 4 +-- opencompass/utils/text_postprocessors.py | 25 +++++++++++++++++++ 16 files changed, 72 insertions(+), 43 deletions(-) diff --git a/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py b/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py index b09470fb..2f6fc37e 100644 --- a/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py +++ b/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import ARCDataset -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess ARC_c_reader_cfg = dict( input_columns=["question", "textA", "textB", "textC", "textD"], @@ -28,7 +28,7 @@ ARC_c_infer_cfg = dict( ARC_c_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), ) ARC_c_datasets = [ diff --git a/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py b/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py index 5af17e4d..f17065e5 100644 --- a/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py +++ b/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import ARCDataset -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess ARC_e_reader_cfg = dict( input_columns=["question", "textA", "textB", "textC", "textD"], @@ -28,7 +28,7 @@ ARC_e_infer_cfg = dict( ARC_e_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), ) ARC_e_datasets = [ diff --git a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py index ab687c35..43824171 100644 --- a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py +++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import AXDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess AX_b_reader_cfg = dict( input_columns=["sentence1", "sentence2"], @@ -28,7 +28,7 @@ AX_b_infer_cfg = dict( AX_b_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) AX_b_datasets = [ diff --git a/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py b/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py index e057f277..168946c8 100644 --- a/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py +++ b/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import AXDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess AX_g_reader_cfg = dict( input_columns=["hypothesis", "premise"], @@ -28,7 +28,7 @@ AX_g_infer_cfg = dict( AX_g_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) AX_g_datasets = [ diff --git a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py index 4501ecc1..206d9078 100644 --- a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py +++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import CBDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess CB_reader_cfg = dict( input_columns=["premise", "hypothesis"], @@ -29,7 +29,7 @@ CB_infer_cfg = dict( CB_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABC'), ) CB_datasets = [ diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py index 7beb22da..3f0420a9 100644 --- a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py +++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import COPADataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess COPA_reader_cfg = dict( input_columns=["question", "premise", "choice1", "choice2"], @@ -29,7 +29,7 @@ COPA_infer_cfg = dict( COPA_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) COPA_datasets = [ diff --git a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py index f69ad70b..e579a69a 100644 --- a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py +++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import MultiRCDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess MultiRC_reader_cfg = dict( input_columns=["question", "text", "answer"], @@ -28,7 +28,7 @@ MultiRC_infer_cfg = dict( MultiRC_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) MultiRC_datasets = [ diff --git a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py index 7fdc39f3..aabbd95f 100644 --- a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py +++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import AXDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess RTE_reader_cfg = dict( input_columns=["hypothesis", "premise"], @@ -28,7 +28,7 @@ RTE_infer_cfg = dict( RTE_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) RTE_datasets = [ diff --git a/configs/datasets/agieval/agieval_gen_397d81.py b/configs/datasets/agieval/agieval_gen_397d81.py index 2e1461df..2bb73f24 100644 --- a/configs/datasets/agieval/agieval_gen_397d81.py +++ b/configs/datasets/agieval/agieval_gen_397d81.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator -from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi +from opencompass.utils.text_postprocessors import first_option_postprocess, first_capital_postprocess_multi agieval_reader_cfg = dict( input_columns=['question', 'options'], output_column='label') @@ -76,14 +76,16 @@ for _name in agieval_single_choice_sets: prompt_template=dict( type=PromptTemplate, template=dict(round=[ - dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}') + dict( + role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}') ])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer, max_out_len=1024)) agieval_eval_cfg = dict( evaluator=dict(type=AccEvaluator), - pred_postprocessor=dict(type=first_capital_postprocess)) + pred_postprocessor=dict( + type=first_option_postprocess, options='ABCDE')) agieval_datasets.append( dict( @@ -105,7 +107,8 @@ for _name in agieval_multiple_choices_sets: prompt_template=dict( type=PromptTemplate, template=dict(round=[ - dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}') + dict( + role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}') ])), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer, max_out_len=1024)) diff --git a/configs/datasets/hellaswag/hellaswag_gen_6faab5.py b/configs/datasets/hellaswag/hellaswag_gen_6faab5.py index 18f76f04..48f0fe91 100644 --- a/configs/datasets/hellaswag/hellaswag_gen_6faab5.py +++ b/configs/datasets/hellaswag/hellaswag_gen_6faab5.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import hellaswagDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess hellaswag_reader_cfg = dict( input_columns=["ctx", "A", "B", "C", "D"], @@ -16,11 +16,10 @@ hellaswag_infer_cfg = dict( template=dict(round=[ dict( role="HUMAN", - prompt=( - "{ctx}\nQuestion: Which ending makes the most sense?\n" - "A. {A}\nB. {B}\nC. {C}\nD. {D}\n" - "You may choose from 'A', 'B', 'C', 'D'.\n" - "Answer:"), + prompt=("{ctx}\nQuestion: Which ending makes the most sense?\n" + "A. {A}\nB. {B}\nC. {C}\nD. {D}\n" + "You may choose from 'A', 'B', 'C', 'D'.\n" + "Answer:"), ), ]), ), @@ -31,7 +30,7 @@ hellaswag_infer_cfg = dict( hellaswag_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), ) hellaswag_datasets = [ diff --git a/configs/datasets/obqa/obqa_gen_9069e4.py b/configs/datasets/obqa/obqa_gen_9069e4.py index 5183c3d7..352d9ebd 100644 --- a/configs/datasets/obqa/obqa_gen_9069e4.py +++ b/configs/datasets/obqa/obqa_gen_9069e4.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import OBQADataset -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess _input_columns = [ ["question_stem", "A", "B", "C", "D"], @@ -14,14 +14,16 @@ _template = [ round=[ dict( role="HUMAN", - prompt="Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:" + prompt= + "Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:" ), ], ), dict( round=[ dict( role="HUMAN", - prompt="Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:", + prompt= + "Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:", ), ], ), ] @@ -46,16 +48,14 @@ for _i in range(2): obqa_reader_cfg = dict( input_columns=_input_columns[_i], output_column="answerKey") obqa_infer_cfg = dict( - prompt_template=dict( - type=PromptTemplate, - template=_template[_i]), + prompt_template=dict(type=PromptTemplate, template=_template[_i]), retriever=dict(type=ZeroRetriever), inferencer=dict(type=GenInferencer), ) obqa_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), ) obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg diff --git a/configs/datasets/piqa/piqa_gen_1194eb.py b/configs/datasets/piqa/piqa_gen_1194eb.py index e4ba2257..69488edd 100644 --- a/configs/datasets/piqa/piqa_gen_1194eb.py +++ b/configs/datasets/piqa/piqa_gen_1194eb.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import piqaDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess piqa_reader_cfg = dict( input_columns=["goal", "sol1", "sol2"], @@ -15,7 +15,9 @@ piqa_infer_cfg = dict( type=PromptTemplate, template=dict( round=[ - dict(role="HUMAN", prompt="{goal}\nA. {sol1}\nB. {sol2}\nAnswer:") + dict( + role="HUMAN", + prompt="{goal}\nA. {sol1}\nB. {sol2}\nAnswer:") ], ), ), retriever=dict(type=ZeroRetriever), @@ -25,7 +27,7 @@ piqa_infer_cfg = dict( piqa_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) piqa_datasets = [ diff --git a/configs/datasets/race/race_gen_69ee4f.py b/configs/datasets/race/race_gen_69ee4f.py index 607672d7..6ffd013f 100644 --- a/configs/datasets/race/race_gen_69ee4f.py +++ b/configs/datasets/race/race_gen_69ee4f.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import RaceDataset -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess race_reader_cfg = dict( input_columns=['article', 'question', 'A', 'B', 'C', 'D'], @@ -24,7 +24,7 @@ race_infer_cfg = dict( race_eval_cfg = dict( evaluator=dict(type=AccEvaluator), - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'), pred_role='BOT') race_datasets = [ diff --git a/configs/datasets/storycloze/storycloze_gen_7f656a.py b/configs/datasets/storycloze/storycloze_gen_7f656a.py index 77e03d7f..ae141378 100644 --- a/configs/datasets/storycloze/storycloze_gen_7f656a.py +++ b/configs/datasets/storycloze/storycloze_gen_7f656a.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import storyclozeDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess storycloze_reader_cfg = dict( input_columns=["context", "sentence_quiz1", "sentence_quiz2"], @@ -28,7 +28,7 @@ storycloze_infer_cfg = dict( storycloze_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) # The original story cloze dataset and repo are not long maintaining. diff --git a/configs/datasets/winogrande/winogrande_gen_a9ede5.py b/configs/datasets/winogrande/winogrande_gen_a9ede5.py index 2cb147cf..0dabfb2e 100644 --- a/configs/datasets/winogrande/winogrande_gen_a9ede5.py +++ b/configs/datasets/winogrande/winogrande_gen_a9ede5.py @@ -3,7 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import winograndeDataset_V2 -from opencompass.utils.text_postprocessors import first_capital_postprocess +from opencompass.utils.text_postprocessors import first_option_postprocess winogrande_reader_cfg = dict( input_columns=["opt1", "opt2"], @@ -28,7 +28,7 @@ winogrande_infer_cfg = dict( winogrande_eval_cfg = dict( evaluator=dict(type=AccEvaluator), pred_role="BOT", - pred_postprocessor=dict(type=first_capital_postprocess), + pred_postprocessor=dict(type=first_option_postprocess, options='AB'), ) winogrande_datasets = [ diff --git a/opencompass/utils/text_postprocessors.py b/opencompass/utils/text_postprocessors.py index d22dbd7b..9e57f83c 100644 --- a/opencompass/utils/text_postprocessors.py +++ b/opencompass/utils/text_postprocessors.py @@ -48,6 +48,31 @@ def first_capital_postprocess(text: str) -> str: return '' +def first_option_postprocess(text: str, options) -> str: + """Find first valid option for text.""" + + patterns = [ + f'[Tt]he answer is [{options}]', + f'[Tt]he correct answer is [{options}]', + f'答案是(.*?)[{options}]', + f'答案为(.*?)[{options}]', + f'固选(.*?)[{options}]', + f'答案应该是(.*?)[{options}]', + f'(\s|^)[{options}][\s。,,\.$]', # noqa + f'[{options}]', + ] + + regexes = [re.compile(pattern) for pattern in patterns] + for regex in regexes: + match = regex.search(text) + if match: + outputs = match.group(0) + for i in options: + if i in outputs: + return i + return '' + + @TEXT_POSTPROCESSORS.register_module('first-capital-multi') def first_capital_postprocess_multi(text: str) -> str: match = re.search(r'([A-D]+)', text)