From fb11108723e76ab05df1df56d30901dc7b41ffa3 Mon Sep 17 00:00:00 2001 From: yingfhu Date: Tue, 4 Jul 2023 22:11:33 +0800 Subject: [PATCH] [Feat] support opencompass --- configs/datasets/ARC_c/ARC_c_ppl_2b1755.py | 33 ++++++++++++ configs/datasets/CLUE_C3/CLUE_C3_gen.py | 4 ++ .../datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py | 50 +++++++++++++++++ configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py | 4 ++ .../CLUE_afqmc/CLUE_afqmc_gen_db509b.py | 42 +++++++++++++++ .../CLUE_afqmc/CLUE_afqmc_ppl_00b348.py | 34 ++++++++++++ .../CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py | 44 +++++++++++++++ configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py | 4 ++ configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py | 4 ++ .../datasets/FewCLUE_chid/FewCLUE_chid_ppl.py | 4 ++ .../FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py | 4 ++ .../FewCLUE_cluewsc_gen_276956.py | 50 +++++++++++++++++ .../datasets/FewCLUE_csl/FewCLUE_csl_gen.py | 4 ++ .../FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py | 41 ++++++++++++++ .../FewCLUE_ocnli_fc_gen_bef37f.py | 49 +++++++++++++++++ .../FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py | 48 +++++++++++++++++ .../SuperGLUE_AX_b_ppl_4bd960.py | 53 +++++++++++++++++++ .../SuperGLUE_BoolQ_ppl_f80fb0.py | 45 ++++++++++++++++ .../SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py | 33 ++++++++++++ .../SuperGLUE_COPA/SuperGLUE_COPA_gen.py | 4 ++ .../SuperGLUE_COPA/SuperGLUE_COPA_ppl.py | 4 ++ .../SuperGLUE_COPA_ppl_ed59be.py | 34 ++++++++++++ .../SuperGLUE_MultiRC_gen.py | 4 ++ .../SuperGLUE_MultiRC_ppl_1123bd.py | 30 +++++++++++ .../SuperGLUE_RTE/SuperGLUE_RTE_ppl.py | 4 ++ .../SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py | 4 ++ .../SuperGLUE_ReCoRD_gen_d8f19c.py | 42 +++++++++++++++ .../SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py | 51 ++++++++++++++++++ configs/datasets/XLSum/XLSum_gen.py | 4 ++ configs/datasets/bbh/bbh_gen.py | 4 ++ configs/datasets/collections/base_small.py | 39 ++++++++++++++ configs/datasets/glm/nq.py | 38 +++++++++++++ configs/datasets/glm/triviaqa.py | 41 ++++++++++++++ .../humaneval/humaneval_gen_581044.py | 40 ++++++++++++++ .../jigsawmultilingual_ppl.py | 4 ++ .../jigsawmultilingual_ppl_640128.py | 47 ++++++++++++++++ .../jigsawmultilingual_ppl_da5d28.py | 43 +++++++++++++++ configs/datasets/lambada/lambada_gen.py | 4 ++ configs/datasets/math/math_gen.py | 4 ++ configs/datasets/math/math_gen_b4c82a.py | 53 +++++++++++++++++++ .../datasets/narrativeqa/narrativeqa_gen.py | 4 ++ configs/datasets/nq/nq_gen_c00b89.py | 29 ++++++++++ configs/datasets/piqa/piqa_gen.py | 4 ++ configs/datasets/piqa/piqa_ppl_788dbe.py | 31 +++++++++++ configs/datasets/qabench/qabench_gen.py | 4 ++ configs/datasets/race/race_gen_12de48.py | 46 ++++++++++++++++ configs/datasets/race/race_gen_d18b89.py | 40 ++++++++++++++ configs/datasets/race/race_ppl.py | 4 ++ .../realtoxicprompts/realtoxicprompts_gen.py | 4 ++ configs/datasets/safety/safety_gen.py | 4 ++ configs/datasets/siqa/siqa_gen_a3c714.py | 42 +++++++++++++++ configs/datasets/siqa/siqa_ppl.py | 4 ++ configs/datasets/storycloze/storycloze_ppl.py | 4 ++ .../storycloze/storycloze_ppl_7f4c64.py | 36 +++++++++++++ configs/datasets/summedits/summedits_gen.py | 4 ++ .../summedits/summedits_gen_4f35b5.py | 37 +++++++++++++ .../triviaqarc/triviaqarc_gen_a02306.py | 30 +++++++++++ configs/models/classic/tigerbot-7b-sft.py | 29 ++++++++++ configs/summarizers/groups/bbh.py | 6 +++ docs/en/_templates/404.html | 18 +++++++ docs/en/advanced_guides/new_dataset.md | 1 + docs/zh_cn/prompt/few_shot.md | 1 + docs/zh_cn/user_guides/config.md | 2 + docs/zh_cn/user_guides/framework_overview.md | 1 + opencompass/datasets/TheoremQA.py | 27 ++++++++++ opencompass/datasets/cb.py | 25 +++++++++ opencompass/datasets/chid.py | 43 +++++++++++++++ opencompass/datasets/civilcomments.py | 36 +++++++++++++ opencompass/datasets/commonsenseqa.py | 22 ++++++++ opencompass/datasets/crowspairs.py | 34 ++++++++++++ opencompass/datasets/eprstmt.py | 27 ++++++++++ opencompass/datasets/huggingface.py | 13 +++++ opencompass/datasets/piqa.py | 25 +++++++++ opencompass/datasets/realtoxicprompts.py | 30 +++++++++++ opencompass/datasets/siqa.py | 20 +++++++ opencompass/openicl/icl_evaluator/__init__.py | 5 ++ opencompass/openicl/utils/logging.py | 40 ++++++++++++++ opencompass/partitioners/__init__.py | 2 + opencompass/utils/__init__.py | 10 ++++ opencompass/utils/build.py | 22 ++++++++ opencompass/utils/types.py | 45 ++++++++++++++++ 81 files changed, 1859 insertions(+) create mode 100644 configs/datasets/ARC_c/ARC_c_ppl_2b1755.py create mode 100644 configs/datasets/CLUE_C3/CLUE_C3_gen.py create mode 100644 configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py create mode 100644 configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py create mode 100644 configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py create mode 100644 configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py create mode 100644 configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py create mode 100644 configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py create mode 100644 configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py create mode 100644 configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py create mode 100644 configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py create mode 100644 configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py create mode 100644 configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py create mode 100644 configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py create mode 100644 configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py create mode 100644 configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py create mode 100644 configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py create mode 100644 configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py create mode 100644 configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py create mode 100644 configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py create mode 100644 configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py create mode 100644 configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py create mode 100644 configs/datasets/XLSum/XLSum_gen.py create mode 100644 configs/datasets/bbh/bbh_gen.py create mode 100644 configs/datasets/collections/base_small.py create mode 100644 configs/datasets/glm/nq.py create mode 100644 configs/datasets/glm/triviaqa.py create mode 100644 configs/datasets/humaneval/humaneval_gen_581044.py create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py create mode 100644 configs/datasets/lambada/lambada_gen.py create mode 100644 configs/datasets/math/math_gen.py create mode 100644 configs/datasets/math/math_gen_b4c82a.py create mode 100644 configs/datasets/narrativeqa/narrativeqa_gen.py create mode 100644 configs/datasets/nq/nq_gen_c00b89.py create mode 100644 configs/datasets/piqa/piqa_gen.py create mode 100644 configs/datasets/piqa/piqa_ppl_788dbe.py create mode 100644 configs/datasets/qabench/qabench_gen.py create mode 100644 configs/datasets/race/race_gen_12de48.py create mode 100644 configs/datasets/race/race_gen_d18b89.py create mode 100644 configs/datasets/race/race_ppl.py create mode 100644 configs/datasets/realtoxicprompts/realtoxicprompts_gen.py create mode 100644 configs/datasets/safety/safety_gen.py create mode 100644 configs/datasets/siqa/siqa_gen_a3c714.py create mode 100644 configs/datasets/siqa/siqa_ppl.py create mode 100644 configs/datasets/storycloze/storycloze_ppl.py create mode 100644 configs/datasets/storycloze/storycloze_ppl_7f4c64.py create mode 100644 configs/datasets/summedits/summedits_gen.py create mode 100644 configs/datasets/summedits/summedits_gen_4f35b5.py create mode 100644 configs/datasets/triviaqarc/triviaqarc_gen_a02306.py create mode 100644 configs/models/classic/tigerbot-7b-sft.py create mode 100644 configs/summarizers/groups/bbh.py create mode 100644 docs/en/_templates/404.html create mode 100644 docs/en/advanced_guides/new_dataset.md create mode 100644 docs/zh_cn/prompt/few_shot.md create mode 100644 docs/zh_cn/user_guides/config.md create mode 100644 docs/zh_cn/user_guides/framework_overview.md create mode 100644 opencompass/datasets/TheoremQA.py create mode 100644 opencompass/datasets/cb.py create mode 100644 opencompass/datasets/chid.py create mode 100644 opencompass/datasets/civilcomments.py create mode 100644 opencompass/datasets/commonsenseqa.py create mode 100644 opencompass/datasets/crowspairs.py create mode 100644 opencompass/datasets/eprstmt.py create mode 100644 opencompass/datasets/huggingface.py create mode 100644 opencompass/datasets/piqa.py create mode 100644 opencompass/datasets/realtoxicprompts.py create mode 100644 opencompass/datasets/siqa.py create mode 100644 opencompass/openicl/icl_evaluator/__init__.py create mode 100644 opencompass/openicl/utils/logging.py create mode 100644 opencompass/partitioners/__init__.py create mode 100644 opencompass/utils/__init__.py create mode 100644 opencompass/utils/build.py create mode 100644 opencompass/utils/types.py diff --git a/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py b/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py new file mode 100644 index 00000000..f0351b49 --- /dev/null +++ b/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py @@ -0,0 +1,33 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import ARCDataset + +ARC_c_reader_cfg = dict( + input_columns=['question', 'textA', 'textB', 'textC', 'textD'], + output_column='answerKey') + +ARC_c_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + "A": "Question: {question}\nAnswer: {textA}", + "B": "Question: {question}\nAnswer: {textB}", + "C": "Question: {question}\nAnswer: {textC}", + "D": "Question: {question}\nAnswer: {textD}" + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +ARC_c_datasets = [ + dict( + type=ARCDataset, + abbr='ARC-c', + path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl', + reader_cfg=ARC_c_reader_cfg, + infer_cfg=ARC_c_infer_cfg, + eval_cfg=ARC_c_eval_cfg) +] diff --git a/configs/datasets/CLUE_C3/CLUE_C3_gen.py b/configs/datasets/CLUE_C3/CLUE_C3_gen.py new file mode 100644 index 00000000..6231c46d --- /dev/null +++ b/configs/datasets/CLUE_C3/CLUE_C3_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_C3_gen_9e3de9 import C3_datasets # noqa: F401, F403 diff --git a/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py b/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py new file mode 100644 index 00000000..af5cb16f --- /dev/null +++ b/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py @@ -0,0 +1,50 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import C3Dataset_V2 + +C3_reader_cfg = dict( + input_columns=[ + "question", + "content", + "choice0", + "choice1", + "choice2", + "choice3", + "choices", + ], + output_column="label", +) + +C3_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "{content}\n问:{question}\nA. {choice0}\nB. {choice1}\nC. {choice2}\nD. {choice3}\n请从“A”,“B”,“C”,“D”中进行选择。\n答:", + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +C3_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +C3_datasets = [ + dict( + abbr="C3", + type=C3Dataset_V2, + path="./data/CLUE/C3/dev_0.json", + reader_cfg=C3_reader_cfg, + infer_cfg=C3_infer_cfg, + eval_cfg=C3_eval_cfg, + ) +] diff --git a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py new file mode 100644 index 00000000..bcd8fac0 --- /dev/null +++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_DRCD_gen_03b96b import DRCD_datasets # noqa: F401, F403 diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py new file mode 100644 index 00000000..7591d29c --- /dev/null +++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py @@ -0,0 +1,42 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import AFQMCDataset_V2 + +afqmc_reader_cfg = dict( + input_columns=["sentence1", "sentence2"], + output_column="label", + test_split="train") + +afqmc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "语句一:“{sentence1}”\n语句二:“{sentence2}”\n语句一与语句二是关于蚂蚁金融产品的疑问,两者所询问的内容是否完全一致?\nA. 不完全一致\nB. 完全一致\n请从“A”,“B”中进行选择。\n答:", + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +afqmc_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +afqmc_datasets = [ + dict( + abbr="afqmc-dev", + type=AFQMCDataset_V2, + path="./data/CLUE/AFQMC/dev.json", + reader_cfg=afqmc_reader_cfg, + infer_cfg=afqmc_infer_cfg, + eval_cfg=afqmc_eval_cfg, + ), +] diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py new file mode 100644 index 00000000..57bb8d8b --- /dev/null +++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +afqmc_reader_cfg = dict( + input_columns=['sentence1', 'sentence2'], + output_column='label', + test_split='train') + +afqmc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: "{sentence1},{sentence2}不同。", + 1: "{sentence1},{sentence2}相似。" + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +afqmc_datasets = [ + dict( + type=HFDataset, + abbr='afqmc-dev', + path='json', + data_files='./data/CLUE/AFQMC/dev.json', + split='train', + reader_cfg=afqmc_reader_cfg, + infer_cfg=afqmc_infer_cfg, + eval_cfg=afqmc_eval_cfg), +] diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py new file mode 100644 index 00000000..fc329e1a --- /dev/null +++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py @@ -0,0 +1,44 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +afqmc_reader_cfg = dict( + input_columns=['sentence1', 'sentence2'], + output_column='label', + test_split='train') + +afqmc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: + dict(round=[ + dict( + role="HUMAN", prompt="“{sentence1}”与“{sentence2}”不同还是相似?"), + dict(role="BOT", prompt="不同。") + ]), + 1: + dict(round=[ + dict( + role="HUMAN", prompt="“{sentence1}”与“{sentence2}”不同还是相似?"), + dict(role="BOT", prompt="相似") + ]), + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +afqmc_datasets = [ + dict( + type=HFDataset, + abbr='afqmc-dev', + path='json', + data_files='./data/CLUE/AFQMC/dev.json', + split='train', + reader_cfg=afqmc_reader_cfg, + infer_cfg=afqmc_infer_cfg, + eval_cfg=afqmc_eval_cfg), +] diff --git a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py new file mode 100644 index 00000000..2ff9e91a --- /dev/null +++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_cmnli_gen_316313 import cmnli_datasets # noqa: F401, F403 diff --git a/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py new file mode 100644 index 00000000..e27d25f2 --- /dev/null +++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .CLUE_ocnli_gen_7c44b0 import ocnli_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py new file mode 100644 index 00000000..0845bf3e --- /dev/null +++ b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_chid_ppl_b6cd88 import chid_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py new file mode 100644 index 00000000..4b77bf5e --- /dev/null +++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_cluewsc_gen_276956 import cluewsc_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py new file mode 100644 index 00000000..fd9fbc00 --- /dev/null +++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py @@ -0,0 +1,50 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import CluewscDataset_V2 + +cluewsc_reader_cfg = dict( + input_columns=["span1", "span2", "text", "new_text"], + output_column="label", +) + +cluewsc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "{text}\n此处,“{span2}”是否指代“{span1}“?\nA. 是\nB. 否\n请从”A“,”B“中进行选择。\n答:", + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +cluewsc_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +cluewsc_datasets = [ + dict( + abbr="cluewsc-dev", + type=CluewscDataset_V2, + path="./data/FewCLUE/cluewsc/dev_few_all.json", + reader_cfg=cluewsc_reader_cfg, + infer_cfg=cluewsc_infer_cfg, + eval_cfg=cluewsc_eval_cfg, + ), + dict( + abbr="cluewsc-test", + type=CluewscDataset_V2, + path="./data/FewCLUE/cluewsc/test_public.json", + reader_cfg=cluewsc_reader_cfg, + infer_cfg=cluewsc_infer_cfg, + eval_cfg=cluewsc_eval_cfg, + ), +] diff --git a/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py new file mode 100644 index 00000000..0f62a452 --- /dev/null +++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .FewCLUE_csl_gen_1b0c02 import csl_datasets # noqa: F401, F403 diff --git a/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py new file mode 100644 index 00000000..da875b07 --- /dev/null +++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py @@ -0,0 +1,41 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import CslDataset + +csl_reader_cfg = dict( + input_columns=["abst", "keywords"], output_column='label') + +csl_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: "摘要:{abst}", + 1: "摘要:{abst}\n关键词:{keywords}" + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +csl_datasets = [ + dict( + type=CslDataset, + path='json', + abbr='csl_dev', + data_files='./data/FewCLUE/csl/dev_few_all.json', + split='train', + reader_cfg=csl_reader_cfg, + infer_cfg=csl_infer_cfg, + eval_cfg=csl_eval_cfg), + dict( + type=CslDataset, + path='json', + abbr='csl_test', + data_files='./data/FewCLUE/csl/test_public.json', + split='train', + reader_cfg=csl_reader_cfg, + infer_cfg=csl_infer_cfg, + eval_cfg=csl_eval_cfg) +] diff --git a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py new file mode 100644 index 00000000..2caa8888 --- /dev/null +++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py @@ -0,0 +1,49 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import cmnliDataset_V2 + +ocnli_fc_reader_cfg = dict( + input_columns=["sentence1", "sentence2"], + output_column="label", + test_split="train") + +ocnli_fc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”,“B”,“C”中进行选择。\n答:" + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) +ocnli_fc_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +ocnli_fc_datasets = [ + dict( + abbr="ocnli_fc-dev", + type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli + path="./data/FewCLUE/ocnli/dev_few_all.json", + reader_cfg=ocnli_fc_reader_cfg, + infer_cfg=ocnli_fc_infer_cfg, + eval_cfg=ocnli_fc_eval_cfg, + ), + dict( + abbr="ocnli_fc-test", + type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli + path="./data/FewCLUE/ocnli/test_public.json", + reader_cfg=ocnli_fc_reader_cfg, + infer_cfg=ocnli_fc_infer_cfg, + eval_cfg=ocnli_fc_eval_cfg, + ), +] diff --git a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py new file mode 100644 index 00000000..7496759c --- /dev/null +++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py @@ -0,0 +1,48 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import TNewsDataset + +tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2') + +tnews_labels = [ + '农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯', + '军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻' +] + +tnews_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + lb: dict(round=[ + dict(role='HUMAN', prompt='以下内容属于什么新闻:{sentence}。'), + dict(role='BOT', prompt=lb) + ]) + for lb in tnews_labels + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +tnews_datasets = [ + dict( + type=TNewsDataset, + path='json', + abbr='tnews-dev', + data_files='./data/FewCLUE/tnews/dev_few_all.json', + split='train', + reader_cfg=tnews_reader_cfg, + infer_cfg=tnews_infer_cfg, + eval_cfg=tnews_eval_cfg), + dict( + type=TNewsDataset, + path='json', + abbr='tnews-test', + data_files='./data/FewCLUE/tnews/test_public.json', + split='train', + reader_cfg=tnews_reader_cfg, + infer_cfg=tnews_infer_cfg, + eval_cfg=tnews_eval_cfg) +] diff --git a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py new file mode 100644 index 00000000..f1b2891b --- /dev/null +++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +AX_b_reader_cfg = dict( + input_columns=["sentence1", "sentence2"], + output_column="label", + test_split="train") + +AX_b_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + "entailment": + dict(round=[ + dict( + role="HUMAN", + prompt= + "{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?" + ), + dict(role="BOT", prompt="Yes"), + ]), + "not_entailment": + dict(round=[ + dict( + role="HUMAN", + prompt= + "{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?" + ), + dict(role="BOT", prompt="No"), + ]) + }, + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), +) + +AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +AX_b_datasets = [ + dict( + type=HFDataset, + abbr="AX_b", + path="json", + data_files="./data/SuperGLUE/AX-b/AX-b.jsonl", + split="train", + reader_cfg=AX_b_reader_cfg, + infer_cfg=AX_b_infer_cfg, + eval_cfg=AX_b_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py new file mode 100644 index 00000000..a9fe02cf --- /dev/null +++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py @@ -0,0 +1,45 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import BoolQDataset + +BoolQ_reader_cfg = dict( + input_columns=["question", "passage"], + output_column="answer", + test_split="train") + +BoolQ_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: + dict(round=[ + dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"), + dict(role="BOT", prompt="No"), + ]), + 1: + dict(round=[ + dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"), + dict(role="BOT", prompt="Yes"), + ]), + }, + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), +) + +BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +BoolQ_datasets = [ + dict( + type=BoolQDataset, + abbr="BoolQ", + path="json", + data_files="./data/SuperGLUE/BoolQ/val.jsonl", + split="train", + reader_cfg=BoolQ_reader_cfg, + infer_cfg=BoolQ_infer_cfg, + eval_cfg=BoolQ_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py new file mode 100644 index 00000000..9ee3007d --- /dev/null +++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py @@ -0,0 +1,33 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +CB_reader_cfg = dict( + input_columns=['premise', 'hypothesis'], output_column='label') + +CB_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 'contradiction': '{premise}?contradiction, {hypothesis}', + 'entailment': '{premise}?entailment, {hypothesis}', + 'neutral': '{premise}?neutral, {hypothesis}' + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +CB_datasets = [ + dict( + type=HFDataset, + abbr='CB', + path='json', + split='train', + data_files='./data/SuperGLUE/CB/val.jsonl', + reader_cfg=CB_reader_cfg, + infer_cfg=CB_infer_cfg, + eval_cfg=CB_eval_cfg) +] diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py new file mode 100644 index 00000000..3224b3da --- /dev/null +++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_COPA_gen_6d5e67 import COPA_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py new file mode 100644 index 00000000..998dcd1d --- /dev/null +++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_COPA_ppl_ddb78c import COPA_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py new file mode 100644 index 00000000..1f4d4a48 --- /dev/null +++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py @@ -0,0 +1,34 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +COPA_reader_cfg = dict( + input_columns=['question', 'premise', 'choice1', 'choice2'], + output_column='label', + test_split='train') + +COPA_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.", + 1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.", + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +COPA_datasets = [ + dict( + type=HFDataset, + abbr='COPA', + path='json', + data_files='./data/SuperGLUE/COPA/val.jsonl', + split='train', + reader_cfg=COPA_reader_cfg, + infer_cfg=COPA_infer_cfg, + eval_cfg=COPA_eval_cfg) +] diff --git a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py new file mode 100644 index 00000000..01f9940e --- /dev/null +++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py new file mode 100644 index 00000000..153e02cc --- /dev/null +++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py @@ -0,0 +1,30 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import MultiRCDataset + +MultiRC_reader_cfg = dict( + input_columns=['question', 'text', 'answer'], output_column='label') + +MultiRC_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: "Passage:{text}。\nQuestion:{question}。\nAnswer: {answer}. It is false.", + 1: "Passage:

。\nQuestion:{question}。\nAnswer: {answer}. It is true.", + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +MultiRC_datasets = [ + dict( + type=MultiRCDataset, + abbr='MultiRC', + path='./data/SuperGLUE/MultiRC/val.jsonl', + reader_cfg=MultiRC_reader_cfg, + infer_cfg=MultiRC_infer_cfg, + eval_cfg=MultiRC_eval_cfg) +] diff --git a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py new file mode 100644 index 00000000..1f83906f --- /dev/null +++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_RTE_ppl_29a22c import RTE_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py new file mode 100644 index 00000000..e5f0e8b0 --- /dev/null +++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets # noqa: F401, F403 diff --git a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py new file mode 100644 index 00000000..0d1f7abd --- /dev/null +++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py @@ -0,0 +1,42 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import EMEvaluator +from opencompass.datasets import ReCoRDDataset + +ReCoRD_reader_cfg = dict( + input_columns=["question", "text"], + output_column="answers", +) + +ReCoRD_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "Passage: {text}\nResult: {question}\nQuestion: What entity does ____ refer to in the result? Give me the entity name:" + ), + ]), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +ReCoRD_eval_cfg = dict( + evaluator=dict(type=EMEvaluator), + pred_role='BOT', + pred_postprocessor=dict(type="ReCoRD"), +) + +ReCoRD_datasets = [ + dict( + type=ReCoRDDataset, + abbr="ReCoRD", + path="./data/SuperGLUE/ReCoRD/val.jsonl", + reader_cfg=ReCoRD_reader_cfg, + infer_cfg=ReCoRD_infer_cfg, + eval_cfg=ReCoRD_eval_cfg, + ) +] diff --git a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py new file mode 100644 index 00000000..eda1fad5 --- /dev/null +++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py @@ -0,0 +1,51 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import WSCDataset_V2 + +WSC_reader_cfg = dict( + input_columns=["span1", "span2", "text"], + output_column="label", +) + +WSC_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 'A': + dict(round=[ + dict( + role="HUMAN", + prompt= + "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?" + ), + dict(role='BOT', prompt='Yes'), + ]), + 'B': + dict(round=[ + dict( + role="HUMAN", + prompt= + "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?" + ), + dict(role='BOT', prompt='No'), + ]), + }, + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer), +) + +WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), ) + +WSC_datasets = [ + dict( + abbr="WSC", + type=WSCDataset_V2, + path="./data/SuperGLUE/WSC/val.jsonl", + reader_cfg=WSC_reader_cfg, + infer_cfg=WSC_infer_cfg, + eval_cfg=WSC_eval_cfg, + ) +] diff --git a/configs/datasets/XLSum/XLSum_gen.py b/configs/datasets/XLSum/XLSum_gen.py new file mode 100644 index 00000000..f09668cc --- /dev/null +++ b/configs/datasets/XLSum/XLSum_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .XLSum_gen_1cc5f6 import XLSum_datasets # noqa: F401, F403 diff --git a/configs/datasets/bbh/bbh_gen.py b/configs/datasets/bbh/bbh_gen.py new file mode 100644 index 00000000..f0cee254 --- /dev/null +++ b/configs/datasets/bbh/bbh_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .bbh_gen_58abc3 import bbh_datasets # noqa: F401, F403 diff --git a/configs/datasets/collections/base_small.py b/configs/datasets/collections/base_small.py new file mode 100644 index 00000000..b469bb88 --- /dev/null +++ b/configs/datasets/collections/base_small.py @@ -0,0 +1,39 @@ +from mmengine.config import read_base + +with read_base(): + from ..ceval.ceval_ppl_275812 import ceval_datasets + from ..bbh.bbh_gen_58abc3 import bbh_datasets + from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets + from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets + from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets + from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets + from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets + from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets + from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets + from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets + from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets + from ..lambada.lambada_gen_7ffe3d import lambada_datasets + from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets + from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets + from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets + from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets + from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets + from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets + from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets + from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets + from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets + from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets + from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets + from ..race.race_ppl_04e06a import race_datasets + from ..math.math_gen_78bcba import math_datasets + from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets + from ..summedits.summedits_ppl_163352 import summedits_datasets + from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets + from ..piqa.piqa_ppl_788dbe import piqa_datasets + from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets + from ..obqa.obqa_ppl_2b5b12 import obqa_datasets + from ..nq.nq_gen_c00b89 import nq_datasets + from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets + from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets + +datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) diff --git a/configs/datasets/glm/nq.py b/configs/datasets/glm/nq.py new file mode 100644 index 00000000..35919338 --- /dev/null +++ b/configs/datasets/glm/nq.py @@ -0,0 +1,38 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import NaturalQuestionDataset, NQEvaluator + +nq_reader_cfg = dict( + input_columns=['question'], output_column='answer', train_split='test') + +nq_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template="Q: ?\nA: ", + column_token_map={ + 'question': '', + 'answer': '' + }), + prompt_template=dict( + type=PromptTemplate, + template="Question: ? Answer: ", + column_token_map={ + 'question': '', + 'answer': '' + }, + ice_token=''), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator)) + +nq_datasets = [ + dict( + type=NaturalQuestionDataset, + abbr='nq', + path='/mnt/petrelfs/wuzhiyong/datasets/nq/', + reader_cfg=nq_reader_cfg, + infer_cfg=nq_infer_cfg, + eval_cfg=nq_eval_cfg) +] diff --git a/configs/datasets/glm/triviaqa.py b/configs/datasets/glm/triviaqa.py new file mode 100644 index 00000000..6071fd10 --- /dev/null +++ b/configs/datasets/glm/triviaqa.py @@ -0,0 +1,41 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator + +triviaqa_reader_cfg = dict( + input_columns=['question'], + output_column='answer', + train_split='dev', + test_split='dev') + +triviaqa_infer_cfg = dict( + ice_template=dict( + type=PromptTemplate, + template='Q: \nA: ', + column_token_map={ + 'question': '', + 'answer': '' + }), + prompt_template=dict( + type=PromptTemplate, + template='Question: Answer:', + column_token_map={ + 'question': '', + 'answer': '' + }, + ice_token=''), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=50)) + +triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) + +triviaqa_datasets = [ + dict( + type=TriviaQADataset, + abbr='triviaqa', + path='./data/triviaqa/', + reader_cfg=triviaqa_reader_cfg, + infer_cfg=triviaqa_infer_cfg, + eval_cfg=triviaqa_eval_cfg) +] diff --git a/configs/datasets/humaneval/humaneval_gen_581044.py b/configs/datasets/humaneval/humaneval_gen_581044.py new file mode 100644 index 00000000..4e3e6d78 --- /dev/null +++ b/configs/datasets/humaneval/humaneval_gen_581044.py @@ -0,0 +1,40 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import HFDataset, HumanEvaluator + +humaneval_reader_cfg = dict( + input_columns=['prompt'], output_column='task_id', train_split='test') + +# TODO: allow empty output-column +humaneval_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + begin=[ + dict( + role='SYSTEM', + fallback_role='HUMAN', + prompt='Complete the following python code:'), + ], + round=[ + dict(role='HUMAN', prompt='{prompt}'), + ])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +humaneval_eval_cfg = dict( + evaluator=dict(type=HumanEvaluator), + pred_role='BOT', + k=[1, 10, 100], # the parameter only for humaneval + pred_postprocessor=dict(type='humaneval'), +) + +humaneval_datasets = [ + dict( + type=HFDataset, + path='openai_humaneval', + reader_cfg=humaneval_reader_cfg, + infer_cfg=humaneval_infer_cfg, + eval_cfg=humaneval_eval_cfg) +] diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py new file mode 100644 index 00000000..4dd90d37 --- /dev/null +++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .jigsawmultilingual_ppl_640128 import jigsawmultilingual_datasets # noqa: F401, F403 diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py new file mode 100644 index 00000000..d441c7dd --- /dev/null +++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py @@ -0,0 +1,47 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import CLPInferencer +from opencompass.openicl.icl_evaluator import AUCROCEvaluator +from opencompass.datasets import JigsawMultilingualDataset + +jigsawmultilingual_reader_cfg = dict( + input_columns=['text'], + output_column='label', + train_split='test', + test_split='test') + +# Jigsaw multilingual toxic dataset is not supported in Huggingface +# Please download the test dataset files `test.csv` and `test_labels.csv` from +# below link and move them the data/ folder to perform evaluation. +# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data +jigsawmultilingual_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt="Text: {text}\nQuestion: Does the above text contain " + "rude, hateful, aggressive, disrespectful or unreasonable " + "language?\nAnswer:") + ])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=CLPInferencer)) + +jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), ) + +lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr'] +jigsawmultilingual_datasets = [] + +for _l in lang: + jigsawmultilingual_datasets.append( + dict( + abbr=f'jigsaw_multilingual_{_l}', + type=JigsawMultilingualDataset, + path='data/test.csv', + label='data/test_labels.csv', + lang=_l, + reader_cfg=jigsawmultilingual_reader_cfg, + infer_cfg=jigsawmultilingual_infer_cfg, + eval_cfg=jigsawmultilingual_eval_cfg)) + +del lang, _l diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py new file mode 100644 index 00000000..62a2d727 --- /dev/null +++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py @@ -0,0 +1,43 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import CLPInferencer +from opencompass.openicl.icl_evaluator import AUCROCEvaluator +from opencompass.datasets import JigsawMultilingualDataset + +jigsawmultilingual_reader_cfg = dict( + input_columns=['text'], + output_column='label', + train_split='test', + test_split='test') + +# Jigsaw multilingual toxic dataset is not supported in Huggingface +# Please download the test dataset files `test.csv` and `test_labels.csv` from +# below link and move them the data/ folder to perform evaluation. +# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data +jigsawmultilingual_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template='Text: {text}\nQuestion: ' + 'Does the above text contain rude, hateful, aggressive, disrespectful ' + 'or unreasonable language?\nAnswer:'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=CLPInferencer)) + +jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), ) + +lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr'] +jigsawmultilingual_datasets = [] + +for _l in lang: + jigsawmultilingual_datasets.append( + dict( + abbr=f'jigsaw_multilingual_{_l}', + type=JigsawMultilingualDataset, + path='data/test.csv', + label='data/test_labels.csv', + lang=_l, + reader_cfg=jigsawmultilingual_reader_cfg, + infer_cfg=jigsawmultilingual_infer_cfg, + eval_cfg=jigsawmultilingual_eval_cfg)) + +del lang, _l diff --git a/configs/datasets/lambada/lambada_gen.py b/configs/datasets/lambada/lambada_gen.py new file mode 100644 index 00000000..e27c8689 --- /dev/null +++ b/configs/datasets/lambada/lambada_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .lambada_gen_7ffe3d import lambada_datasets # noqa: F401, F403 diff --git a/configs/datasets/math/math_gen.py b/configs/datasets/math/math_gen.py new file mode 100644 index 00000000..dec061c2 --- /dev/null +++ b/configs/datasets/math/math_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .math_gen_78bcba import math_datasets # noqa: F401, F403 diff --git a/configs/datasets/math/math_gen_b4c82a.py b/configs/datasets/math/math_gen_b4c82a.py new file mode 100644 index 00000000..ddd8bae6 --- /dev/null +++ b/configs/datasets/math/math_gen_b4c82a.py @@ -0,0 +1,53 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import MATHDataset, MATHEvaluator + +math_reader_cfg = dict(input_columns=['problem'], output_column='solution') + +math_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template='''Problem: +Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}} +Solution: +The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$. +Final Answer: The final answer is $[2,5)$. I hope it is correct. + +Problem: +If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$ +Solution: +We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$ +Final Answer: The final answer is $24$. I hope it is correct. + +Problem: +Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight? +Solution: +If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}} +Final Answer: The final answer is $16$. I hope it is correct. + +Problem: +If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero. +Solution: +If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$ +Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct. + +Problem: +{problem} +Solution: +{solution}'''), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer, max_out_len=512)) + +math_eval_cfg = dict( + evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math')) + +math_datasets = [ + dict( + type=MATHDataset, + abbr='math', + path='./data/math/math.json', + reader_cfg=math_reader_cfg, + infer_cfg=math_infer_cfg, + eval_cfg=math_eval_cfg) +] diff --git a/configs/datasets/narrativeqa/narrativeqa_gen.py b/configs/datasets/narrativeqa/narrativeqa_gen.py new file mode 100644 index 00000000..d03cafa3 --- /dev/null +++ b/configs/datasets/narrativeqa/narrativeqa_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .narrativeqa_gen_5786a7 import narrativeqa_datasets # noqa: F401, F403 diff --git a/configs/datasets/nq/nq_gen_c00b89.py b/configs/datasets/nq/nq_gen_c00b89.py new file mode 100644 index 00000000..003ccb69 --- /dev/null +++ b/configs/datasets/nq/nq_gen_c00b89.py @@ -0,0 +1,29 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import NaturalQuestionDataset, NQEvaluator + +nq_reader_cfg = dict( + input_columns=['question'], output_column='answer', train_split='test') + +nq_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict(role='HUMAN', prompt='Question: {question}?\nAnswer: '), + ], )), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT") + +nq_datasets = [ + dict( + type=NaturalQuestionDataset, + abbr='nq', + path='./data/nq/', + reader_cfg=nq_reader_cfg, + infer_cfg=nq_infer_cfg, + eval_cfg=nq_eval_cfg) +] diff --git a/configs/datasets/piqa/piqa_gen.py b/configs/datasets/piqa/piqa_gen.py new file mode 100644 index 00000000..14fe3d92 --- /dev/null +++ b/configs/datasets/piqa/piqa_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .piqa_gen_8287ae import piqa_datasets # noqa: F401, F403 diff --git a/configs/datasets/piqa/piqa_ppl_788dbe.py b/configs/datasets/piqa/piqa_ppl_788dbe.py new file mode 100644 index 00000000..7c43bf6b --- /dev/null +++ b/configs/datasets/piqa/piqa_ppl_788dbe.py @@ -0,0 +1,31 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import HFDataset + +piqa_reader_cfg = dict( + input_columns=['goal', 'sol1', 'sol2'], + output_column='label', + test_split='validation') + +piqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 0: 'The following makes sense: \nQ: {goal}\nA: {sol1}\n', + 1: 'The following makes sense: \nQ: {goal}\nA: {sol2}\n' + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +piqa_datasets = [ + dict( + type=HFDataset, + path='piqa', + reader_cfg=piqa_reader_cfg, + infer_cfg=piqa_infer_cfg, + eval_cfg=piqa_eval_cfg) +] diff --git a/configs/datasets/qabench/qabench_gen.py b/configs/datasets/qabench/qabench_gen.py new file mode 100644 index 00000000..478fff1e --- /dev/null +++ b/configs/datasets/qabench/qabench_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .qabench_gen_0d5967 import qabench_datasets # noqa: F401, F403 diff --git a/configs/datasets/race/race_gen_12de48.py b/configs/datasets/race/race_gen_12de48.py new file mode 100644 index 00000000..f2b218db --- /dev/null +++ b/configs/datasets/race/race_gen_12de48.py @@ -0,0 +1,46 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer') + +race_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + "Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}" + ), + ])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type='first-capital'), + pred_role='BOT') + +race_datasets = [ + dict( + type=RaceDataset, + abbr='race-middle', + path='race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg), + dict( + type=RaceDataset, + abbr='race-high', + path='race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg) +] diff --git a/configs/datasets/race/race_gen_d18b89.py b/configs/datasets/race/race_gen_d18b89.py new file mode 100644 index 00000000..f0f764ea --- /dev/null +++ b/configs/datasets/race/race_gen_d18b89.py @@ -0,0 +1,40 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import RaceDataset + +race_reader_cfg = dict( + input_columns=['article', 'question', 'A', 'B', 'C', 'D'], + output_column='answer') + +race_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template= + 'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}'), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +race_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_postprocessor=dict(type='first-capital')) + +race_datasets = [ + dict( + type=RaceDataset, + abbr='race-middle', + path='race', + name='middle', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg), + dict( + type=RaceDataset, + abbr='race-high', + path='race', + name='high', + reader_cfg=race_reader_cfg, + infer_cfg=race_infer_cfg, + eval_cfg=race_eval_cfg) +] diff --git a/configs/datasets/race/race_ppl.py b/configs/datasets/race/race_ppl.py new file mode 100644 index 00000000..4e905733 --- /dev/null +++ b/configs/datasets/race/race_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .race_ppl_04e06a import race_datasets # noqa: F401, F403 diff --git a/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py b/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py new file mode 100644 index 00000000..5f316e93 --- /dev/null +++ b/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .realtoxicprompts_gen_3ea730 import realtoxicprompts_datasets # noqa: F401, F403 diff --git a/configs/datasets/safety/safety_gen.py b/configs/datasets/safety/safety_gen.py new file mode 100644 index 00000000..8ee8572c --- /dev/null +++ b/configs/datasets/safety/safety_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .safety_gen_c0a5b8 import safety_datasets # noqa: F401, F403 diff --git a/configs/datasets/siqa/siqa_gen_a3c714.py b/configs/datasets/siqa/siqa_gen_a3c714.py new file mode 100644 index 00000000..9da5de36 --- /dev/null +++ b/configs/datasets/siqa/siqa_gen_a3c714.py @@ -0,0 +1,42 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import siqaDataset_V2 + +siqa_reader_cfg = dict( + input_columns=["context", "question", "answerA", "answerB", "answerC"], + output_column="label", + test_split="validation") + +siqa_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict( + round=[ + dict( + role="HUMAN", + prompt= + "{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:" + ) + ], ), + ), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer), +) + +siqa_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +siqa_datasets = [ + dict( + abbr="siqa", + type=siqaDataset_V2, + path="social_i_qa", + reader_cfg=siqa_reader_cfg, + infer_cfg=siqa_infer_cfg, + eval_cfg=siqa_eval_cfg) +] diff --git a/configs/datasets/siqa/siqa_ppl.py b/configs/datasets/siqa/siqa_ppl.py new file mode 100644 index 00000000..3dfdc224 --- /dev/null +++ b/configs/datasets/siqa/siqa_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .siqa_ppl_049da0 import siqa_datasets # noqa: F401, F403 diff --git a/configs/datasets/storycloze/storycloze_ppl.py b/configs/datasets/storycloze/storycloze_ppl.py new file mode 100644 index 00000000..5be70eef --- /dev/null +++ b/configs/datasets/storycloze/storycloze_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .storycloze_ppl_c1912d import storycloze_datasets # noqa: F401, F403 diff --git a/configs/datasets/storycloze/storycloze_ppl_7f4c64.py b/configs/datasets/storycloze/storycloze_ppl_7f4c64.py new file mode 100644 index 00000000..e33bfe38 --- /dev/null +++ b/configs/datasets/storycloze/storycloze_ppl_7f4c64.py @@ -0,0 +1,36 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import PPLInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import storyclozeDataset + +storycloze_reader_cfg = dict( + input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'], + output_column='answer_right_ending', + train_split='test', + test_split='test') + +storycloze_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template={ + 1: "{context}{sentence_quiz1}", + 2: "{context}{sentence_quiz2}", + }), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=PPLInferencer)) + +storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) + +# The original story cloze dataset and repo are not long maintaining. +# Using multilingual version of this dataset. +storycloze_datasets = [ + dict( + abbr='story_cloze', + type=storyclozeDataset, + path='juletxara/xstory_cloze', + name='en', + reader_cfg=storycloze_reader_cfg, + infer_cfg=storycloze_infer_cfg, + eval_cfg=storycloze_eval_cfg) +] diff --git a/configs/datasets/summedits/summedits_gen.py b/configs/datasets/summedits/summedits_gen.py new file mode 100644 index 00000000..d99f3c17 --- /dev/null +++ b/configs/datasets/summedits/summedits_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .summedits_gen_4f35b5 import summedits_datasets # noqa: F401, F403 diff --git a/configs/datasets/summedits/summedits_gen_4f35b5.py b/configs/datasets/summedits/summedits_gen_4f35b5.py new file mode 100644 index 00000000..dd74c417 --- /dev/null +++ b/configs/datasets/summedits/summedits_gen_4f35b5.py @@ -0,0 +1,37 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.openicl.icl_evaluator import AccEvaluator +from opencompass.datasets import SummeditsDataset_V2 + +summedits_reader_cfg = dict( + input_columns=['doc', 'summary'], output_column='label') + +summedits_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template=dict(round=[ + dict( + role="HUMAN", + prompt= + 'Document:\n{doc}Summary:\n{summary}\nQuestion:\nIs the summary factually consistent with the document?\nA. Yes\nB. No\nAnswer:' + ), + ])), + retriever=dict(type=ZeroRetriever), + inferencer=dict(type=GenInferencer)) + +summedits_eval_cfg = dict( + evaluator=dict(type=AccEvaluator), + pred_role="BOT", + pred_postprocessor=dict(type="first-capital"), +) + +summedits_datasets = [ + dict( + abbr='summedits', + type=SummeditsDataset_V2, + path='./data/summedits/summedits.jsonl', + reader_cfg=summedits_reader_cfg, + infer_cfg=summedits_infer_cfg, + eval_cfg=summedits_eval_cfg) +] diff --git a/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py b/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py new file mode 100644 index 00000000..e14be1f0 --- /dev/null +++ b/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py @@ -0,0 +1,30 @@ +from opencompass.openicl.icl_prompt_template import PromptTemplate +from opencompass.openicl.icl_retriever import ZeroRetriever +from opencompass.openicl.icl_inferencer import GenInferencer +from opencompass.datasets import TriviaQArcDataset, TriviaQAEvaluator + +triviaqarc_reader_cfg = dict( + input_columns=['question', 'evidence'], + output_column='answer', + train_split='dev', + test_split='dev') + +triviaqarc_infer_cfg = dict( + prompt_template=dict( + type=PromptTemplate, + template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"), + retriever=dict(type=ZeroRetriever), + inferencer=dict( + type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4)) + +triviaqarc_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator)) + +triviaqarc_datasets = [ + dict( + type=TriviaQArcDataset, + abbr='triviaqarc', + path='./data/triviaqa-rc/', + reader_cfg=triviaqarc_reader_cfg, + infer_cfg=triviaqarc_infer_cfg, + eval_cfg=triviaqarc_eval_cfg) +] diff --git a/configs/models/classic/tigerbot-7b-sft.py b/configs/models/classic/tigerbot-7b-sft.py new file mode 100644 index 00000000..b5ea2860 --- /dev/null +++ b/configs/models/classic/tigerbot-7b-sft.py @@ -0,0 +1,29 @@ +from opencompass.models import HuggingFaceCausalLM + +_meta_template = dict( + round=[ + dict(role='HUMAN', begin='\n\n### Instruction:\n:'), + dict(role='BOT', begin='\n\n### Response:\n:', generate=True), + ], +) + + +models = [ + dict( + type=HuggingFaceCausalLM, + abbr='TigerBot-SFT', + path="TigerResearch/tigerbot-7b-sft", + tokenizer_path='TigerResearch/tigerbot-7b-sft', + tokenizer_kwargs=dict( + padding_side='left', + truncation_side='left', + trust_remote_code=True, + ), + max_out_len=100, + max_seq_len=2048, + batch_size=8, + meta_template=_meta_template, + model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='0ba4d6fc479bdedd6a3f8d4d3425025c5f501800'), + run_cfg=dict(num_gpus=1, num_procs=1), + ) +] diff --git a/configs/summarizers/groups/bbh.py b/configs/summarizers/groups/bbh.py new file mode 100644 index 00000000..8286c5c1 --- /dev/null +++ b/configs/summarizers/groups/bbh.py @@ -0,0 +1,6 @@ +bbh_summary_groups = [] + +# bbh +_bbh = ['temporal_sequences', 'disambiguation_qa', 'date_understanding', 'tracking_shuffled_objects_three_objects', 'penguins_in_a_table','geometric_shapes', 'snarks', 'ruin_names', 'tracking_shuffled_objects_seven_objects', 'tracking_shuffled_objects_five_objects','logical_deduction_three_objects', 'hyperbaton', 'logical_deduction_five_objects', 'logical_deduction_seven_objects', 'movie_recommendation','salient_translation_error_detection', 'reasoning_about_colored_objects', 'multistep_arithmetic_two', 'navigate', 'dyck_languages', 'word_sorting', 'sports_understanding','boolean_expressions', 'object_counting', 'formal_fallacies', 'causal_judgement', 'web_of_lies'] +_bbh = ['bbh-' + s for s in _bbh] +bbh_summary_groups.append({'name': 'bbh', 'subsets': _bbh}) diff --git a/docs/en/_templates/404.html b/docs/en/_templates/404.html new file mode 100644 index 00000000..64910175 --- /dev/null +++ b/docs/en/_templates/404.html @@ -0,0 +1,18 @@ +{% extends "layout.html" %} + +{% block body %} + +

Page Not Found

+

+ The page you are looking for cannot be found. +

+

+ If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in + the content table left, or go to the homepage. +

+ + +{% endblock %} diff --git a/docs/en/advanced_guides/new_dataset.md b/docs/en/advanced_guides/new_dataset.md new file mode 100644 index 00000000..2d1cb0cb --- /dev/null +++ b/docs/en/advanced_guides/new_dataset.md @@ -0,0 +1 @@ +# New Dataset diff --git a/docs/zh_cn/prompt/few_shot.md b/docs/zh_cn/prompt/few_shot.md new file mode 100644 index 00000000..0539a2eb --- /dev/null +++ b/docs/zh_cn/prompt/few_shot.md @@ -0,0 +1 @@ +# Few-shot \ No newline at end of file diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/user_guides/config.md new file mode 100644 index 00000000..fa4e6641 --- /dev/null +++ b/docs/zh_cn/user_guides/config.md @@ -0,0 +1,2 @@ +# 学习配置文件 + diff --git a/docs/zh_cn/user_guides/framework_overview.md b/docs/zh_cn/user_guides/framework_overview.md new file mode 100644 index 00000000..ead5053b --- /dev/null +++ b/docs/zh_cn/user_guides/framework_overview.md @@ -0,0 +1 @@ +# 整体概括 diff --git a/opencompass/datasets/TheoremQA.py b/opencompass/datasets/TheoremQA.py new file mode 100644 index 00000000..fc529a61 --- /dev/null +++ b/opencompass/datasets/TheoremQA.py @@ -0,0 +1,27 @@ +import re + +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class TheoremQADataset(BaseDataset): + + @staticmethod + def load(path: str): + return load_dataset('csv', data_files={'test': path}) + + +@TEXT_POSTPROCESSORS.register_module('TheoremQA') +def TheoremQA_postprocess(text: str) -> str: + + text = text.strip().split('\n')[0].strip() + matches = re.findall(r'answer is (.*)', text) + if len(matches) == 0: + return text + else: + text = matches[0].strip()[:-1] + return text diff --git a/opencompass/datasets/cb.py b/opencompass/datasets/cb.py new file mode 100644 index 00000000..3027183d --- /dev/null +++ b/opencompass/datasets/cb.py @@ -0,0 +1,25 @@ +import json + +from datasets import Dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class CBDataset_V2(BaseDataset): + + @staticmethod + def load(path): + dataset = [] + with open(path, 'r') as f: + for line in f: + line = json.loads(line) + line['label'] = { + 'contradiction': 'A', + 'entailment': 'B', + 'neutral': 'C' + }[line['label']] + dataset.append(line) + return Dataset.from_list(dataset) diff --git a/opencompass/datasets/chid.py b/opencompass/datasets/chid.py new file mode 100644 index 00000000..6c218edc --- /dev/null +++ b/opencompass/datasets/chid.py @@ -0,0 +1,43 @@ +import json + +from datasets import Dataset, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class CHIDDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + + dataset = load_dataset(**kwargs) + + def preprocess(example): + content = example['content'] + for i, c in enumerate(example['candidates']): + example[f'content{i}'] = content.replace('#idiom#', c) + return example + + dataset = dataset.map(preprocess) + return dataset + + +@LOAD_DATASET.register_module() +class CHIDDataset_V2(BaseDataset): + + @staticmethod + def load(path): + data = [] + with open(path, 'r') as f: + for line in f: + line = json.loads(line) + item = {} + item['content'] = line['content'].replace('#idiom#', '______') + for i, c in enumerate(line['candidates']): + item[chr(ord('A') + i)] = c + item['answer'] = 'ABCDEFG'[line['answer']] + data.append(item) + return Dataset.from_list(data) diff --git a/opencompass/datasets/civilcomments.py b/opencompass/datasets/civilcomments.py new file mode 100644 index 00000000..61dae8c4 --- /dev/null +++ b/opencompass/datasets/civilcomments.py @@ -0,0 +1,36 @@ +from datasets import DatasetDict, load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class CivilCommentsDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + train_dataset = load_dataset(**kwargs, split='train') + test_dataset = load_dataset(**kwargs, split='test') + + def pre_process(example): + example['label'] = int(example['toxicity'] >= 0.5) + example['choices'] = ['no', 'yes'] + return example + + def remove_columns(dataset): + return dataset.remove_columns([ + 'severe_toxicity', 'obscene', 'threat', 'insult', + 'identity_attack', 'sexual_explicit' + ]) + + train_dataset = remove_columns(train_dataset) + test_dataset = remove_columns(test_dataset) + test_dataset = test_dataset.shuffle(seed=42) + test_dataset = test_dataset.select(list(range(10000))) + test_dataset = test_dataset.map(pre_process) + + return DatasetDict({ + 'train': train_dataset, + 'test': test_dataset, + }) diff --git a/opencompass/datasets/commonsenseqa.py b/opencompass/datasets/commonsenseqa.py new file mode 100644 index 00000000..17b836d0 --- /dev/null +++ b/opencompass/datasets/commonsenseqa.py @@ -0,0 +1,22 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class commonsenseqaDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs) + + def pre_process(example): + for i in range(5): + example[chr(ord('A') + i)] = example['choices']['text'][i] + return example + + dataset = dataset.map(pre_process).remove_columns( + ['question_concept', 'id', 'choices']) + return dataset diff --git a/opencompass/datasets/crowspairs.py b/opencompass/datasets/crowspairs.py new file mode 100644 index 00000000..c498099f --- /dev/null +++ b/opencompass/datasets/crowspairs.py @@ -0,0 +1,34 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class crowspairsDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + + dataset = load_dataset(**kwargs) + + def preprocess(example): + example['label'] = 0 + return example + + return dataset.map(preprocess) + + +@LOAD_DATASET.register_module() +class crowspairsDataset_V2(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs) + + def preprocess(example): + example['label'] = 'A' + return example + + return dataset.map(preprocess) diff --git a/opencompass/datasets/eprstmt.py b/opencompass/datasets/eprstmt.py new file mode 100644 index 00000000..dd14b960 --- /dev/null +++ b/opencompass/datasets/eprstmt.py @@ -0,0 +1,27 @@ +import json + +from datasets import Dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class eprstmtDataset_V2(BaseDataset): + + @staticmethod + def load(path): + data = [] + with open(path, 'r') as f: + for line in f: + line = json.loads(line) + item = { + 'sentence': line['sentence'], + 'label': { + 'Positive': 'A', + 'Negative': 'B', + }[line['label']], + } + data.append(item) + return Dataset.from_list(data) diff --git a/opencompass/datasets/huggingface.py b/opencompass/datasets/huggingface.py new file mode 100644 index 00000000..2ae23e3f --- /dev/null +++ b/opencompass/datasets/huggingface.py @@ -0,0 +1,13 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class HFDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + return load_dataset(**kwargs) diff --git a/opencompass/datasets/piqa.py b/opencompass/datasets/piqa.py new file mode 100644 index 00000000..f0bd4dcc --- /dev/null +++ b/opencompass/datasets/piqa.py @@ -0,0 +1,25 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class piqaDataset_V2(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs) + + def preprocess(example): + assert isinstance(example['label'], int) + if example['label'] < 0: + example['answer'] = 'NULL' + else: + example['answer'] = 'AB'[example['label']] + example.pop('label') + return example + + dataset = dataset.map(preprocess) + return dataset diff --git a/opencompass/datasets/realtoxicprompts.py b/opencompass/datasets/realtoxicprompts.py new file mode 100644 index 00000000..4098bb3a --- /dev/null +++ b/opencompass/datasets/realtoxicprompts.py @@ -0,0 +1,30 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class RealToxicPromptsDataset(BaseDataset): + + @staticmethod + def load(**kwargs): + challenging_subset = kwargs.pop('challenging_subset', False) + dataset = load_dataset(**kwargs) + + def preprocess(example): + + for k, v in example['prompt'].items(): + k = 'prompt_' + k + example[k] = v + del example['prompt'] + + return example + + dataset = dataset.map(preprocess) + + # return challenging subset if necessary + if challenging_subset: + return dataset.filter(lambda example: example['challenging']) + return dataset diff --git a/opencompass/datasets/siqa.py b/opencompass/datasets/siqa.py new file mode 100644 index 00000000..5091ccd0 --- /dev/null +++ b/opencompass/datasets/siqa.py @@ -0,0 +1,20 @@ +from datasets import load_dataset + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class siqaDataset_V2(BaseDataset): + + @staticmethod + def load(**kwargs): + dataset = load_dataset(**kwargs) + + def preprocess(example): + example['label'] = ' ABC'[int(example['label'])] + return example + + dataset = dataset.map(preprocess) + return dataset diff --git a/opencompass/openicl/icl_evaluator/__init__.py b/opencompass/openicl/icl_evaluator/__init__.py new file mode 100644 index 00000000..fc74ccac --- /dev/null +++ b/opencompass/openicl/icl_evaluator/__init__.py @@ -0,0 +1,5 @@ +from .icl_aucroc_evaluator import AUCROCEvaluator +from .icl_base_evaluator import BaseEvaluator +from .icl_em_evaluator import EMEvaluator +from .icl_hf_evaluator import * # noqa +from .icl_toxic_evaluator import ToxicEvaluator diff --git a/opencompass/openicl/utils/logging.py b/opencompass/openicl/utils/logging.py new file mode 100644 index 00000000..daa792ec --- /dev/null +++ b/opencompass/openicl/utils/logging.py @@ -0,0 +1,40 @@ +import logging + +import torch.distributed as dist + +LOG_LEVEL = logging.INFO +SUBPROCESS_LOG_LEVEL = logging.ERROR +LOG_FORMATTER = '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s' + + +def get_logger(name, level=LOG_LEVEL, log_file=None, file_mode='w'): + formatter = logging.Formatter(LOG_FORMATTER) + + logger = logging.getLogger(name) + + for handler in logger.root.handlers: + if type(handler) is logging.StreamHandler: + handler.setLevel(logging.ERROR) + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + if rank == 0 and log_file is not None: + file_handler = logging.FileHandler(log_file, file_mode) + file_handler.setFormatter(formatter) + file_handler.setLevel(level) + logger.addHandler(file_handler) + + if rank == 0: + logger.setLevel(level) + else: + logger.setLevel(SUBPROCESS_LOG_LEVEL) + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(formatter) + stream_handler.setLevel(level) + logger.addHandler(stream_handler) + + return logger diff --git a/opencompass/partitioners/__init__.py b/opencompass/partitioners/__init__.py new file mode 100644 index 00000000..836081fb --- /dev/null +++ b/opencompass/partitioners/__init__.py @@ -0,0 +1,2 @@ +from .naive import * # noqa: F401, F403 +from .size import * # noqa: F401, F403 diff --git a/opencompass/utils/__init__.py b/opencompass/utils/__init__.py new file mode 100644 index 00000000..c52f215d --- /dev/null +++ b/opencompass/utils/__init__.py @@ -0,0 +1,10 @@ +from .abbr import * # noqa +from .build import * # noqa +from .fileio import * # noqa +from .git import * # noqa +from .lark import * # noqa +from .logging import * # noqa +from .menu import * # noqa +from .prompt import * # noqa +from .summarizer import * # noqa +from .text_postprocessors import * # noqa diff --git a/opencompass/utils/build.py b/opencompass/utils/build.py new file mode 100644 index 00000000..a4e50a36 --- /dev/null +++ b/opencompass/utils/build.py @@ -0,0 +1,22 @@ +import copy + +from mmengine.config import ConfigDict + +from opencompass.registry import LOAD_DATASET, MODELS + + +def build_dataset_from_cfg(dataset_cfg: ConfigDict) -> ConfigDict: + dataset_cfg = copy.deepcopy(dataset_cfg) + dataset_cfg.pop('infer_cfg', None) + dataset_cfg.pop('eval_cfg', None) + dataset_cfg.pop('abbr', None) + return LOAD_DATASET.build(dataset_cfg) + + +def build_model_from_cfg(model_cfg: ConfigDict) -> ConfigDict: + model_cfg = copy.deepcopy(model_cfg) + model_cfg.pop('run_cfg', None) + model_cfg.pop('max_out_len', None) + model_cfg.pop('batch_size', None) + model_cfg.pop('abbr', None) + return MODELS.build(model_cfg) diff --git a/opencompass/utils/types.py b/opencompass/utils/types.py new file mode 100644 index 00000000..914213c9 --- /dev/null +++ b/opencompass/utils/types.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Union + +from datasets import Dataset, DatasetDict + + +def _check_type_list(obj, typelist: List): + for _type in typelist: + if _type is None: + if obj is None: + return obj + elif isinstance(obj, _type): + return obj + raise TypeError( + f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}' + ) + + +def _check_dataset(obj) -> Union[Dataset, DatasetDict]: + if isinstance(obj, Dataset) or isinstance(obj, DatasetDict): + return obj + else: + raise TypeError( + f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}' + ) + + +def _check_list(obj) -> List: + if isinstance(obj, List): + return obj + else: + raise TypeError(f'Expected a List object, but got {obj}') + + +def _check_str(obj) -> str: + if isinstance(obj, str): + return obj + else: + raise TypeError(f'Expected a str object, but got {obj}') + + +def _check_dict(obj) -> Dict: + if isinstance(obj, Dict): + return obj + else: + raise TypeError(f'Expected a Dict object, but got {obj}')