From a31a77c5c124ca029bfaab1e3250a7f5ae549260 Mon Sep 17 00:00:00 2001 From: Alexander Lam Date: Tue, 10 Sep 2024 16:06:02 +0800 Subject: [PATCH] [Feature] Add SciCode summarizer config (#1514) * [Feature] added SciCode summarizer config and dataset config for with background evaluation * fix lint issues * removed unnecessary type in summarizer group --- .../scicode/scicode_wbg_gen_085b98.py | 36 +++++++++++++++++++ configs/summarizers/groups/scicode.py | 23 ++++++++++++ configs/summarizers/scicode.py | 15 ++++++++ .../scicode/scicode_wbg_gen_085b98.py | 36 +++++++++++++++++++ .../configs/summarizers/groups/scicode.py | 23 ++++++++++++ opencompass/configs/summarizers/scicode.py | 15 ++++++++ 6 files changed, 148 insertions(+) create mode 100644 configs/datasets/scicode/scicode_wbg_gen_085b98.py create mode 100644 configs/summarizers/groups/scicode.py create mode 100644 configs/summarizers/scicode.py create mode 100644 opencompass/configs/datasets/scicode/scicode_wbg_gen_085b98.py create mode 100644 opencompass/configs/summarizers/groups/scicode.py create mode 100644 opencompass/configs/summarizers/scicode.py diff --git a/configs/datasets/scicode/scicode_wbg_gen_085b98.py b/configs/datasets/scicode/scicode_wbg_gen_085b98.py new file mode 100644 index 00000000..f593edcd --- /dev/null +++ b/configs/datasets/scicode/scicode_wbg_gen_085b98.py @@ -0,0 +1,36 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import ChatInferencer +from opencompass.datasets import SciCodeDataset, SciCodeEvaluator + + +SciCode_reader_cfg = dict(input_columns=['prompt'], output_column=None) + +SciCode_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template='', + ), + + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=ChatInferencer, infer_mode='every', max_out_len=4096)) + +SciCode_eval_cfg = dict( + evaluator=dict( + type=SciCodeEvaluator, + dataset_path='./data/scicode', + with_bg=True + ) +) + +SciCode_datasets = [ + dict( + abbr='SciCode_with_background', + type=SciCodeDataset, + path='./data/scicode', + with_bg=True, + reader_cfg=SciCode_reader_cfg, + infer_cfg=SciCode_infer_cfg, + eval_cfg=SciCode_eval_cfg + ) +] diff --git a/configs/summarizers/groups/scicode.py b/configs/summarizers/groups/scicode.py new file mode 100644 index 00000000..757b8953 --- /dev/null +++ b/configs/summarizers/groups/scicode.py @@ -0,0 +1,23 @@ +scicode_summary_groups = [ + { + 'name': 'SciCode', + 'subsets': [ + ['SciCode', 'accuracy'], + ['SciCode', 'sub_accuracy'], + ] + }, + { + 'name': 'SciCode_with_background', + 'subsets': [ + ['SciCode_with_background', 'accuracy'], + ['SciCode_with_background', 'sub_accuracy'], + ] + }, + { + 'name': 'SciCode_wo_background', + 'subsets': [ + ['SciCode_wo_background', 'accuracy'], + ['SciCode_wo_background', 'sub_accuracy'], + ] + } +] diff --git a/configs/summarizers/scicode.py b/configs/summarizers/scicode.py new file mode 100644 index 00000000..fcf691f6 --- /dev/null +++ b/configs/summarizers/scicode.py @@ -0,0 +1,15 @@ +from mmengine.config import read_base + +with read_base(): + from .groups.scicode import scicode_summary_groups + +summarizer = dict( + dataset_abbrs=[ + ['SciCode_with_background', 'accuracy'], + ['SciCode_with_background', 'sub_accuracy'], + ['SciCode_wo_background', 'accuracy'], + ['SciCode_wo_background', 'sub_accuracy'], + ], + summary_groups=sum( + [v for k, v in locals().items() if k.endswith('_summary_groups')], []) +) diff --git a/opencompass/configs/datasets/scicode/scicode_wbg_gen_085b98.py b/opencompass/configs/datasets/scicode/scicode_wbg_gen_085b98.py new file mode 100644 index 00000000..f593edcd --- /dev/null +++ b/opencompass/configs/datasets/scicode/scicode_wbg_gen_085b98.py @@ -0,0 +1,36 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import ChatInferencer +from opencompass.datasets import SciCodeDataset, SciCodeEvaluator + + +SciCode_reader_cfg = dict(input_columns=['prompt'], output_column=None) + +SciCode_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template='', + ), + + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=ChatInferencer, infer_mode='every', max_out_len=4096)) + +SciCode_eval_cfg = dict( + evaluator=dict( + type=SciCodeEvaluator, + dataset_path='./data/scicode', + with_bg=True + ) +) + +SciCode_datasets = [ + dict( + abbr='SciCode_with_background', + type=SciCodeDataset, + path='./data/scicode', + with_bg=True, + reader_cfg=SciCode_reader_cfg, + infer_cfg=SciCode_infer_cfg, + eval_cfg=SciCode_eval_cfg + ) +] diff --git a/opencompass/configs/summarizers/groups/scicode.py b/opencompass/configs/summarizers/groups/scicode.py new file mode 100644 index 00000000..757b8953 --- /dev/null +++ b/opencompass/configs/summarizers/groups/scicode.py @@ -0,0 +1,23 @@ +scicode_summary_groups = [ + { + 'name': 'SciCode', + 'subsets': [ + ['SciCode', 'accuracy'], + ['SciCode', 'sub_accuracy'], + ] + }, + { + 'name': 'SciCode_with_background', + 'subsets': [ + ['SciCode_with_background', 'accuracy'], + ['SciCode_with_background', 'sub_accuracy'], + ] + }, + { + 'name': 'SciCode_wo_background', + 'subsets': [ + ['SciCode_wo_background', 'accuracy'], + ['SciCode_wo_background', 'sub_accuracy'], + ] + } +] diff --git a/opencompass/configs/summarizers/scicode.py b/opencompass/configs/summarizers/scicode.py new file mode 100644 index 00000000..fcf691f6 --- /dev/null +++ b/opencompass/configs/summarizers/scicode.py @@ -0,0 +1,15 @@ +from mmengine.config import read_base + +with read_base(): + from .groups.scicode import scicode_summary_groups + +summarizer = dict( + dataset_abbrs=[ + ['SciCode_with_background', 'accuracy'], + ['SciCode_with_background', 'sub_accuracy'], + ['SciCode_wo_background', 'accuracy'], + ['SciCode_wo_background', 'sub_accuracy'], + ], + summary_groups=sum( + [v for k, v in locals().items() if k.endswith('_summary_groups')], []) +)