From fb11108723e76ab05df1df56d30901dc7b41ffa3 Mon Sep 17 00:00:00 2001
From: yingfhu
Date: Tue, 4 Jul 2023 22:11:33 +0800
Subject: [PATCH] [Feat] support opencompass
---
configs/datasets/ARC_c/ARC_c_ppl_2b1755.py | 33 ++++++++++++
configs/datasets/CLUE_C3/CLUE_C3_gen.py | 4 ++
.../datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py | 50 +++++++++++++++++
configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py | 4 ++
.../CLUE_afqmc/CLUE_afqmc_gen_db509b.py | 42 +++++++++++++++
.../CLUE_afqmc/CLUE_afqmc_ppl_00b348.py | 34 ++++++++++++
.../CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py | 44 +++++++++++++++
configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py | 4 ++
configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py | 4 ++
.../datasets/FewCLUE_chid/FewCLUE_chid_ppl.py | 4 ++
.../FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py | 4 ++
.../FewCLUE_cluewsc_gen_276956.py | 50 +++++++++++++++++
.../datasets/FewCLUE_csl/FewCLUE_csl_gen.py | 4 ++
.../FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py | 41 ++++++++++++++
.../FewCLUE_ocnli_fc_gen_bef37f.py | 49 +++++++++++++++++
.../FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py | 48 +++++++++++++++++
.../SuperGLUE_AX_b_ppl_4bd960.py | 53 +++++++++++++++++++
.../SuperGLUE_BoolQ_ppl_f80fb0.py | 45 ++++++++++++++++
.../SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py | 33 ++++++++++++
.../SuperGLUE_COPA/SuperGLUE_COPA_gen.py | 4 ++
.../SuperGLUE_COPA/SuperGLUE_COPA_ppl.py | 4 ++
.../SuperGLUE_COPA_ppl_ed59be.py | 34 ++++++++++++
.../SuperGLUE_MultiRC_gen.py | 4 ++
.../SuperGLUE_MultiRC_ppl_1123bd.py | 30 +++++++++++
.../SuperGLUE_RTE/SuperGLUE_RTE_ppl.py | 4 ++
.../SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py | 4 ++
.../SuperGLUE_ReCoRD_gen_d8f19c.py | 42 +++++++++++++++
.../SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py | 51 ++++++++++++++++++
configs/datasets/XLSum/XLSum_gen.py | 4 ++
configs/datasets/bbh/bbh_gen.py | 4 ++
configs/datasets/collections/base_small.py | 39 ++++++++++++++
configs/datasets/glm/nq.py | 38 +++++++++++++
configs/datasets/glm/triviaqa.py | 41 ++++++++++++++
.../humaneval/humaneval_gen_581044.py | 40 ++++++++++++++
.../jigsawmultilingual_ppl.py | 4 ++
.../jigsawmultilingual_ppl_640128.py | 47 ++++++++++++++++
.../jigsawmultilingual_ppl_da5d28.py | 43 +++++++++++++++
configs/datasets/lambada/lambada_gen.py | 4 ++
configs/datasets/math/math_gen.py | 4 ++
configs/datasets/math/math_gen_b4c82a.py | 53 +++++++++++++++++++
.../datasets/narrativeqa/narrativeqa_gen.py | 4 ++
configs/datasets/nq/nq_gen_c00b89.py | 29 ++++++++++
configs/datasets/piqa/piqa_gen.py | 4 ++
configs/datasets/piqa/piqa_ppl_788dbe.py | 31 +++++++++++
configs/datasets/qabench/qabench_gen.py | 4 ++
configs/datasets/race/race_gen_12de48.py | 46 ++++++++++++++++
configs/datasets/race/race_gen_d18b89.py | 40 ++++++++++++++
configs/datasets/race/race_ppl.py | 4 ++
.../realtoxicprompts/realtoxicprompts_gen.py | 4 ++
configs/datasets/safety/safety_gen.py | 4 ++
configs/datasets/siqa/siqa_gen_a3c714.py | 42 +++++++++++++++
configs/datasets/siqa/siqa_ppl.py | 4 ++
configs/datasets/storycloze/storycloze_ppl.py | 4 ++
.../storycloze/storycloze_ppl_7f4c64.py | 36 +++++++++++++
configs/datasets/summedits/summedits_gen.py | 4 ++
.../summedits/summedits_gen_4f35b5.py | 37 +++++++++++++
.../triviaqarc/triviaqarc_gen_a02306.py | 30 +++++++++++
configs/models/classic/tigerbot-7b-sft.py | 29 ++++++++++
configs/summarizers/groups/bbh.py | 6 +++
docs/en/_templates/404.html | 18 +++++++
docs/en/advanced_guides/new_dataset.md | 1 +
docs/zh_cn/prompt/few_shot.md | 1 +
docs/zh_cn/user_guides/config.md | 2 +
docs/zh_cn/user_guides/framework_overview.md | 1 +
opencompass/datasets/TheoremQA.py | 27 ++++++++++
opencompass/datasets/cb.py | 25 +++++++++
opencompass/datasets/chid.py | 43 +++++++++++++++
opencompass/datasets/civilcomments.py | 36 +++++++++++++
opencompass/datasets/commonsenseqa.py | 22 ++++++++
opencompass/datasets/crowspairs.py | 34 ++++++++++++
opencompass/datasets/eprstmt.py | 27 ++++++++++
opencompass/datasets/huggingface.py | 13 +++++
opencompass/datasets/piqa.py | 25 +++++++++
opencompass/datasets/realtoxicprompts.py | 30 +++++++++++
opencompass/datasets/siqa.py | 20 +++++++
opencompass/openicl/icl_evaluator/__init__.py | 5 ++
opencompass/openicl/utils/logging.py | 40 ++++++++++++++
opencompass/partitioners/__init__.py | 2 +
opencompass/utils/__init__.py | 10 ++++
opencompass/utils/build.py | 22 ++++++++
opencompass/utils/types.py | 45 ++++++++++++++++
81 files changed, 1859 insertions(+)
create mode 100644 configs/datasets/ARC_c/ARC_c_ppl_2b1755.py
create mode 100644 configs/datasets/CLUE_C3/CLUE_C3_gen.py
create mode 100644 configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py
create mode 100644 configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py
create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py
create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py
create mode 100644 configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py
create mode 100644 configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py
create mode 100644 configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py
create mode 100644 configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py
create mode 100644 configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py
create mode 100644 configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py
create mode 100644 configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py
create mode 100644 configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py
create mode 100644 configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py
create mode 100644 configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py
create mode 100644 configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py
create mode 100644 configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py
create mode 100644 configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py
create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py
create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py
create mode 100644 configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py
create mode 100644 configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py
create mode 100644 configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py
create mode 100644 configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py
create mode 100644 configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py
create mode 100644 configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py
create mode 100644 configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py
create mode 100644 configs/datasets/XLSum/XLSum_gen.py
create mode 100644 configs/datasets/bbh/bbh_gen.py
create mode 100644 configs/datasets/collections/base_small.py
create mode 100644 configs/datasets/glm/nq.py
create mode 100644 configs/datasets/glm/triviaqa.py
create mode 100644 configs/datasets/humaneval/humaneval_gen_581044.py
create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py
create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
create mode 100644 configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py
create mode 100644 configs/datasets/lambada/lambada_gen.py
create mode 100644 configs/datasets/math/math_gen.py
create mode 100644 configs/datasets/math/math_gen_b4c82a.py
create mode 100644 configs/datasets/narrativeqa/narrativeqa_gen.py
create mode 100644 configs/datasets/nq/nq_gen_c00b89.py
create mode 100644 configs/datasets/piqa/piqa_gen.py
create mode 100644 configs/datasets/piqa/piqa_ppl_788dbe.py
create mode 100644 configs/datasets/qabench/qabench_gen.py
create mode 100644 configs/datasets/race/race_gen_12de48.py
create mode 100644 configs/datasets/race/race_gen_d18b89.py
create mode 100644 configs/datasets/race/race_ppl.py
create mode 100644 configs/datasets/realtoxicprompts/realtoxicprompts_gen.py
create mode 100644 configs/datasets/safety/safety_gen.py
create mode 100644 configs/datasets/siqa/siqa_gen_a3c714.py
create mode 100644 configs/datasets/siqa/siqa_ppl.py
create mode 100644 configs/datasets/storycloze/storycloze_ppl.py
create mode 100644 configs/datasets/storycloze/storycloze_ppl_7f4c64.py
create mode 100644 configs/datasets/summedits/summedits_gen.py
create mode 100644 configs/datasets/summedits/summedits_gen_4f35b5.py
create mode 100644 configs/datasets/triviaqarc/triviaqarc_gen_a02306.py
create mode 100644 configs/models/classic/tigerbot-7b-sft.py
create mode 100644 configs/summarizers/groups/bbh.py
create mode 100644 docs/en/_templates/404.html
create mode 100644 docs/en/advanced_guides/new_dataset.md
create mode 100644 docs/zh_cn/prompt/few_shot.md
create mode 100644 docs/zh_cn/user_guides/config.md
create mode 100644 docs/zh_cn/user_guides/framework_overview.md
create mode 100644 opencompass/datasets/TheoremQA.py
create mode 100644 opencompass/datasets/cb.py
create mode 100644 opencompass/datasets/chid.py
create mode 100644 opencompass/datasets/civilcomments.py
create mode 100644 opencompass/datasets/commonsenseqa.py
create mode 100644 opencompass/datasets/crowspairs.py
create mode 100644 opencompass/datasets/eprstmt.py
create mode 100644 opencompass/datasets/huggingface.py
create mode 100644 opencompass/datasets/piqa.py
create mode 100644 opencompass/datasets/realtoxicprompts.py
create mode 100644 opencompass/datasets/siqa.py
create mode 100644 opencompass/openicl/icl_evaluator/__init__.py
create mode 100644 opencompass/openicl/utils/logging.py
create mode 100644 opencompass/partitioners/__init__.py
create mode 100644 opencompass/utils/__init__.py
create mode 100644 opencompass/utils/build.py
create mode 100644 opencompass/utils/types.py
diff --git a/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py b/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py
new file mode 100644
index 00000000..f0351b49
--- /dev/null
+++ b/configs/datasets/ARC_c/ARC_c_ppl_2b1755.py
@@ -0,0 +1,33 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+
+ARC_c_reader_cfg = dict(
+ input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+ output_column='answerKey')
+
+ARC_c_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ "A": "Question: {question}\nAnswer: {textA}",
+ "B": "Question: {question}\nAnswer: {textB}",
+ "C": "Question: {question}\nAnswer: {textC}",
+ "D": "Question: {question}\nAnswer: {textD}"
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+ARC_c_datasets = [
+ dict(
+ type=ARCDataset,
+ abbr='ARC-c',
+ path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl',
+ reader_cfg=ARC_c_reader_cfg,
+ infer_cfg=ARC_c_infer_cfg,
+ eval_cfg=ARC_c_eval_cfg)
+]
diff --git a/configs/datasets/CLUE_C3/CLUE_C3_gen.py b/configs/datasets/CLUE_C3/CLUE_C3_gen.py
new file mode 100644
index 00000000..6231c46d
--- /dev/null
+++ b/configs/datasets/CLUE_C3/CLUE_C3_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .CLUE_C3_gen_9e3de9 import C3_datasets # noqa: F401, F403
diff --git a/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py b/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py
new file mode 100644
index 00000000..af5cb16f
--- /dev/null
+++ b/configs/datasets/CLUE_C3/CLUE_C3_gen_9e3de9.py
@@ -0,0 +1,50 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import C3Dataset_V2
+
+C3_reader_cfg = dict(
+ input_columns=[
+ "question",
+ "content",
+ "choice0",
+ "choice1",
+ "choice2",
+ "choice3",
+ "choices",
+ ],
+ output_column="label",
+)
+
+C3_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{content}\n问:{question}\nA. {choice0}\nB. {choice1}\nC. {choice2}\nD. {choice3}\n请从“A”,“B”,“C”,“D”中进行选择。\n答:",
+ ),
+ ]),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+
+C3_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+C3_datasets = [
+ dict(
+ abbr="C3",
+ type=C3Dataset_V2,
+ path="./data/CLUE/C3/dev_0.json",
+ reader_cfg=C3_reader_cfg,
+ infer_cfg=C3_infer_cfg,
+ eval_cfg=C3_eval_cfg,
+ )
+]
diff --git a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py
new file mode 100644
index 00000000..bcd8fac0
--- /dev/null
+++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .CLUE_DRCD_gen_03b96b import DRCD_datasets # noqa: F401, F403
diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py
new file mode 100644
index 00000000..7591d29c
--- /dev/null
+++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_db509b.py
@@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AFQMCDataset_V2
+
+afqmc_reader_cfg = dict(
+ input_columns=["sentence1", "sentence2"],
+ output_column="label",
+ test_split="train")
+
+afqmc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "语句一:“{sentence1}”\n语句二:“{sentence2}”\n语句一与语句二是关于蚂蚁金融产品的疑问,两者所询问的内容是否完全一致?\nA. 不完全一致\nB. 完全一致\n请从“A”,“B”中进行选择。\n答:",
+ ),
+ ]),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+
+afqmc_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+afqmc_datasets = [
+ dict(
+ abbr="afqmc-dev",
+ type=AFQMCDataset_V2,
+ path="./data/CLUE/AFQMC/dev.json",
+ reader_cfg=afqmc_reader_cfg,
+ infer_cfg=afqmc_infer_cfg,
+ eval_cfg=afqmc_eval_cfg,
+ ),
+]
diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py
new file mode 100644
index 00000000..57bb8d8b
--- /dev/null
+++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_00b348.py
@@ -0,0 +1,34 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+afqmc_reader_cfg = dict(
+ input_columns=['sentence1', 'sentence2'],
+ output_column='label',
+ test_split='train')
+
+afqmc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0: "{sentence1},{sentence2}不同。",
+ 1: "{sentence1},{sentence2}相似。"
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+afqmc_datasets = [
+ dict(
+ type=HFDataset,
+ abbr='afqmc-dev',
+ path='json',
+ data_files='./data/CLUE/AFQMC/dev.json',
+ split='train',
+ reader_cfg=afqmc_reader_cfg,
+ infer_cfg=afqmc_infer_cfg,
+ eval_cfg=afqmc_eval_cfg),
+]
diff --git a/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py
new file mode 100644
index 00000000..fc329e1a
--- /dev/null
+++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_ppl_2313cf.py
@@ -0,0 +1,44 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+afqmc_reader_cfg = dict(
+ input_columns=['sentence1', 'sentence2'],
+ output_column='label',
+ test_split='train')
+
+afqmc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0:
+ dict(round=[
+ dict(
+ role="HUMAN", prompt="“{sentence1}”与“{sentence2}”不同还是相似?"),
+ dict(role="BOT", prompt="不同。")
+ ]),
+ 1:
+ dict(round=[
+ dict(
+ role="HUMAN", prompt="“{sentence1}”与“{sentence2}”不同还是相似?"),
+ dict(role="BOT", prompt="相似")
+ ]),
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+afqmc_datasets = [
+ dict(
+ type=HFDataset,
+ abbr='afqmc-dev',
+ path='json',
+ data_files='./data/CLUE/AFQMC/dev.json',
+ split='train',
+ reader_cfg=afqmc_reader_cfg,
+ infer_cfg=afqmc_infer_cfg,
+ eval_cfg=afqmc_eval_cfg),
+]
diff --git a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py
new file mode 100644
index 00000000..2ff9e91a
--- /dev/null
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .CLUE_cmnli_gen_316313 import cmnli_datasets # noqa: F401, F403
diff --git a/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py
new file mode 100644
index 00000000..e27d25f2
--- /dev/null
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .CLUE_ocnli_gen_7c44b0 import ocnli_datasets # noqa: F401, F403
diff --git a/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py
new file mode 100644
index 00000000..0845bf3e
--- /dev/null
+++ b/configs/datasets/FewCLUE_chid/FewCLUE_chid_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .FewCLUE_chid_ppl_b6cd88 import chid_datasets # noqa: F401, F403
diff --git a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py
new file mode 100644
index 00000000..4b77bf5e
--- /dev/null
+++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .FewCLUE_cluewsc_gen_276956 import cluewsc_datasets # noqa: F401, F403
diff --git a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py
new file mode 100644
index 00000000..fd9fbc00
--- /dev/null
+++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_276956.py
@@ -0,0 +1,50 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CluewscDataset_V2
+
+cluewsc_reader_cfg = dict(
+ input_columns=["span1", "span2", "text", "new_text"],
+ output_column="label",
+)
+
+cluewsc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{text}\n此处,“{span2}”是否指代“{span1}“?\nA. 是\nB. 否\n请从”A“,”B“中进行选择。\n答:",
+ ),
+ ]),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+
+cluewsc_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+cluewsc_datasets = [
+ dict(
+ abbr="cluewsc-dev",
+ type=CluewscDataset_V2,
+ path="./data/FewCLUE/cluewsc/dev_few_all.json",
+ reader_cfg=cluewsc_reader_cfg,
+ infer_cfg=cluewsc_infer_cfg,
+ eval_cfg=cluewsc_eval_cfg,
+ ),
+ dict(
+ abbr="cluewsc-test",
+ type=CluewscDataset_V2,
+ path="./data/FewCLUE/cluewsc/test_public.json",
+ reader_cfg=cluewsc_reader_cfg,
+ infer_cfg=cluewsc_infer_cfg,
+ eval_cfg=cluewsc_eval_cfg,
+ ),
+]
diff --git a/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py
new file mode 100644
index 00000000..0f62a452
--- /dev/null
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .FewCLUE_csl_gen_1b0c02 import csl_datasets # noqa: F401, F403
diff --git a/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py
new file mode 100644
index 00000000..da875b07
--- /dev/null
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_8eee08.py
@@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CslDataset
+
+csl_reader_cfg = dict(
+ input_columns=["abst", "keywords"], output_column='label')
+
+csl_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0: "摘要:{abst}",
+ 1: "摘要:{abst}\n关键词:{keywords}"
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+csl_datasets = [
+ dict(
+ type=CslDataset,
+ path='json',
+ abbr='csl_dev',
+ data_files='./data/FewCLUE/csl/dev_few_all.json',
+ split='train',
+ reader_cfg=csl_reader_cfg,
+ infer_cfg=csl_infer_cfg,
+ eval_cfg=csl_eval_cfg),
+ dict(
+ type=CslDataset,
+ path='json',
+ abbr='csl_test',
+ data_files='./data/FewCLUE/csl/test_public.json',
+ split='train',
+ reader_cfg=csl_reader_cfg,
+ infer_cfg=csl_infer_cfg,
+ eval_cfg=csl_eval_cfg)
+]
diff --git a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py
new file mode 100644
index 00000000..2caa8888
--- /dev/null
+++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_bef37f.py
@@ -0,0 +1,49 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import cmnliDataset_V2
+
+ocnli_fc_reader_cfg = dict(
+ input_columns=["sentence1", "sentence2"],
+ output_column="label",
+ test_split="train")
+
+ocnli_fc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "阅读文章:{sentence1}\n根据上文,回答如下问题:{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”,“B”,“C”中进行选择。\n答:"
+ ),
+ ]),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+ocnli_fc_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+ocnli_fc_datasets = [
+ dict(
+ abbr="ocnli_fc-dev",
+ type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli
+ path="./data/FewCLUE/ocnli/dev_few_all.json",
+ reader_cfg=ocnli_fc_reader_cfg,
+ infer_cfg=ocnli_fc_infer_cfg,
+ eval_cfg=ocnli_fc_eval_cfg,
+ ),
+ dict(
+ abbr="ocnli_fc-test",
+ type=cmnliDataset_V2, # ocnli_fc share the same format with cmnli
+ path="./data/FewCLUE/ocnli/test_public.json",
+ reader_cfg=ocnli_fc_reader_cfg,
+ infer_cfg=ocnli_fc_infer_cfg,
+ eval_cfg=ocnli_fc_eval_cfg,
+ ),
+]
diff --git a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py
new file mode 100644
index 00000000..7496759c
--- /dev/null
+++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl_33cc73.py
@@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TNewsDataset
+
+tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
+
+tnews_labels = [
+ '农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
+ '军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
+]
+
+tnews_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ lb: dict(round=[
+ dict(role='HUMAN', prompt='以下内容属于什么新闻:{sentence}。'),
+ dict(role='BOT', prompt=lb)
+ ])
+ for lb in tnews_labels
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+tnews_datasets = [
+ dict(
+ type=TNewsDataset,
+ path='json',
+ abbr='tnews-dev',
+ data_files='./data/FewCLUE/tnews/dev_few_all.json',
+ split='train',
+ reader_cfg=tnews_reader_cfg,
+ infer_cfg=tnews_infer_cfg,
+ eval_cfg=tnews_eval_cfg),
+ dict(
+ type=TNewsDataset,
+ path='json',
+ abbr='tnews-test',
+ data_files='./data/FewCLUE/tnews/test_public.json',
+ split='train',
+ reader_cfg=tnews_reader_cfg,
+ infer_cfg=tnews_infer_cfg,
+ eval_cfg=tnews_eval_cfg)
+]
diff --git a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py
new file mode 100644
index 00000000..f1b2891b
--- /dev/null
+++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_ppl_4bd960.py
@@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+AX_b_reader_cfg = dict(
+ input_columns=["sentence1", "sentence2"],
+ output_column="label",
+ test_split="train")
+
+AX_b_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ "entailment":
+ dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?"
+ ),
+ dict(role="BOT", prompt="Yes"),
+ ]),
+ "not_entailment":
+ dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{sentence1}\n{sentence2}\nIs the sentence below entailed by the sentence above?"
+ ),
+ dict(role="BOT", prompt="No"),
+ ])
+ },
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer),
+)
+
+AX_b_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+AX_b_datasets = [
+ dict(
+ type=HFDataset,
+ abbr="AX_b",
+ path="json",
+ data_files="./data/SuperGLUE/AX-b/AX-b.jsonl",
+ split="train",
+ reader_cfg=AX_b_reader_cfg,
+ infer_cfg=AX_b_infer_cfg,
+ eval_cfg=AX_b_eval_cfg,
+ )
+]
diff --git a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py
new file mode 100644
index 00000000..a9fe02cf
--- /dev/null
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_f80fb0.py
@@ -0,0 +1,45 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDataset
+
+BoolQ_reader_cfg = dict(
+ input_columns=["question", "passage"],
+ output_column="answer",
+ test_split="train")
+
+BoolQ_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0:
+ dict(round=[
+ dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"),
+ dict(role="BOT", prompt="No"),
+ ]),
+ 1:
+ dict(round=[
+ dict(role="HUMAN", prompt="{passage}\nQuestion: {question}?"),
+ dict(role="BOT", prompt="Yes"),
+ ]),
+ },
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer),
+)
+
+BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+BoolQ_datasets = [
+ dict(
+ type=BoolQDataset,
+ abbr="BoolQ",
+ path="json",
+ data_files="./data/SuperGLUE/BoolQ/val.jsonl",
+ split="train",
+ reader_cfg=BoolQ_reader_cfg,
+ infer_cfg=BoolQ_infer_cfg,
+ eval_cfg=BoolQ_eval_cfg,
+ )
+]
diff --git a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py
new file mode 100644
index 00000000..9ee3007d
--- /dev/null
+++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_012063.py
@@ -0,0 +1,33 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+CB_reader_cfg = dict(
+ input_columns=['premise', 'hypothesis'], output_column='label')
+
+CB_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 'contradiction': '{premise}?contradiction, {hypothesis}',
+ 'entailment': '{premise}?entailment, {hypothesis}',
+ 'neutral': '{premise}?neutral, {hypothesis}'
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+CB_datasets = [
+ dict(
+ type=HFDataset,
+ abbr='CB',
+ path='json',
+ split='train',
+ data_files='./data/SuperGLUE/CB/val.jsonl',
+ reader_cfg=CB_reader_cfg,
+ infer_cfg=CB_infer_cfg,
+ eval_cfg=CB_eval_cfg)
+]
diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py
new file mode 100644
index 00000000..3224b3da
--- /dev/null
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .SuperGLUE_COPA_gen_6d5e67 import COPA_datasets # noqa: F401, F403
diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py
new file mode 100644
index 00000000..998dcd1d
--- /dev/null
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .SuperGLUE_COPA_ppl_ddb78c import COPA_datasets # noqa: F401, F403
diff --git a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py
new file mode 100644
index 00000000..1f4d4a48
--- /dev/null
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ed59be.py
@@ -0,0 +1,34 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+COPA_reader_cfg = dict(
+ input_columns=['question', 'premise', 'choice1', 'choice2'],
+ output_column='label',
+ test_split='train')
+
+COPA_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0: "Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.",
+ 1: "Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.",
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+COPA_datasets = [
+ dict(
+ type=HFDataset,
+ abbr='COPA',
+ path='json',
+ data_files='./data/SuperGLUE/COPA/val.jsonl',
+ split='train',
+ reader_cfg=COPA_reader_cfg,
+ infer_cfg=COPA_infer_cfg,
+ eval_cfg=COPA_eval_cfg)
+]
diff --git a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py
new file mode 100644
index 00000000..01f9940e
--- /dev/null
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .SuperGLUE_MultiRC_gen_26c9dc import MultiRC_datasets # noqa: F401, F403
diff --git a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py
new file mode 100644
index 00000000..153e02cc
--- /dev/null
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl_1123bd.py
@@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import MultiRCDataset
+
+MultiRC_reader_cfg = dict(
+ input_columns=['question', 'text', 'answer'], output_column='label')
+
+MultiRC_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0: "Passage:{text}。\nQuestion:{question}。\nAnswer: {answer}. It is false.",
+ 1: "Passage:
。\nQuestion:{question}。\nAnswer: {answer}. It is true.",
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+MultiRC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+MultiRC_datasets = [
+ dict(
+ type=MultiRCDataset,
+ abbr='MultiRC',
+ path='./data/SuperGLUE/MultiRC/val.jsonl',
+ reader_cfg=MultiRC_reader_cfg,
+ infer_cfg=MultiRC_infer_cfg,
+ eval_cfg=MultiRC_eval_cfg)
+]
diff --git a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py
new file mode 100644
index 00000000..1f83906f
--- /dev/null
+++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .SuperGLUE_RTE_ppl_29a22c import RTE_datasets # noqa: F401, F403
diff --git a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py
new file mode 100644
index 00000000..e5f0e8b0
--- /dev/null
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets # noqa: F401, F403
diff --git a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py
new file mode 100644
index 00000000..0d1f7abd
--- /dev/null
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_d8f19c.py
@@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import ReCoRDDataset
+
+ReCoRD_reader_cfg = dict(
+ input_columns=["question", "text"],
+ output_column="answers",
+)
+
+ReCoRD_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "Passage: {text}\nResult: {question}\nQuestion: What entity does ____ refer to in the result? Give me the entity name:"
+ ),
+ ]),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+
+ReCoRD_eval_cfg = dict(
+ evaluator=dict(type=EMEvaluator),
+ pred_role='BOT',
+ pred_postprocessor=dict(type="ReCoRD"),
+)
+
+ReCoRD_datasets = [
+ dict(
+ type=ReCoRDDataset,
+ abbr="ReCoRD",
+ path="./data/SuperGLUE/ReCoRD/val.jsonl",
+ reader_cfg=ReCoRD_reader_cfg,
+ infer_cfg=ReCoRD_infer_cfg,
+ eval_cfg=ReCoRD_eval_cfg,
+ )
+]
diff --git a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py
new file mode 100644
index 00000000..eda1fad5
--- /dev/null
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_85f45f.py
@@ -0,0 +1,51 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset_V2
+
+WSC_reader_cfg = dict(
+ input_columns=["span1", "span2", "text"],
+ output_column="label",
+)
+
+WSC_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 'A':
+ dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
+ ),
+ dict(role='BOT', prompt='Yes'),
+ ]),
+ 'B':
+ dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{text}\nIs '{span1}' and '{span2}' refers to the same entity in the above sentence?"
+ ),
+ dict(role='BOT', prompt='No'),
+ ]),
+ },
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer),
+)
+
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+WSC_datasets = [
+ dict(
+ abbr="WSC",
+ type=WSCDataset_V2,
+ path="./data/SuperGLUE/WSC/val.jsonl",
+ reader_cfg=WSC_reader_cfg,
+ infer_cfg=WSC_infer_cfg,
+ eval_cfg=WSC_eval_cfg,
+ )
+]
diff --git a/configs/datasets/XLSum/XLSum_gen.py b/configs/datasets/XLSum/XLSum_gen.py
new file mode 100644
index 00000000..f09668cc
--- /dev/null
+++ b/configs/datasets/XLSum/XLSum_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .XLSum_gen_1cc5f6 import XLSum_datasets # noqa: F401, F403
diff --git a/configs/datasets/bbh/bbh_gen.py b/configs/datasets/bbh/bbh_gen.py
new file mode 100644
index 00000000..f0cee254
--- /dev/null
+++ b/configs/datasets/bbh/bbh_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .bbh_gen_58abc3 import bbh_datasets # noqa: F401, F403
diff --git a/configs/datasets/collections/base_small.py b/configs/datasets/collections/base_small.py
new file mode 100644
index 00000000..b469bb88
--- /dev/null
+++ b/configs/datasets/collections/base_small.py
@@ -0,0 +1,39 @@
+from mmengine.config import read_base
+
+with read_base():
+ from ..ceval.ceval_ppl_275812 import ceval_datasets
+ from ..bbh.bbh_gen_58abc3 import bbh_datasets
+ from ..CLUE_CMRC.CLUE_CMRC_gen_72a8d5 import CMRC_datasets
+ from ..CLUE_DRCD.CLUE_DRCD_gen_03b96b import DRCD_datasets
+ from ..CLUE_afqmc.CLUE_afqmc_ppl_c83c36 import afqmc_datasets
+ from ..FewCLUE_bustm.FewCLUE_bustm_ppl_47f2ab import bustm_datasets
+ from ..FewCLUE_chid.FewCLUE_chid_ppl_b6cd88 import chid_datasets
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_2a9e61 import cluewsc_datasets
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_d3c387 import eprstmt_datasets
+ from ..humaneval.humaneval_gen_d428f1 import humaneval_datasets
+ from ..mbpp.mbpp_gen_4104e4 import mbpp_datasets
+ from ..lambada.lambada_gen_7ffe3d import lambada_datasets
+ from ..storycloze.storycloze_ppl_c1912d import storycloze_datasets
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_4bd960 import AX_b_datasets
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_8d9bf9 import AX_g_datasets
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_f80fb0 import BoolQ_datasets
+ from ..SuperGLUE_CB.SuperGLUE_CB_ppl_32adbb import CB_datasets
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_ddb78c import COPA_datasets
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_29a22c import RTE_datasets
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_d8f19c import ReCoRD_datasets
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_4118db import WiC_datasets
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_85f45f import WSC_datasets
+ from ..race.race_ppl_04e06a import race_datasets
+ from ..math.math_gen_78bcba import math_datasets
+ from ..gsm8k.gsm8k_gen_2dd372 import gsm8k_datasets
+ from ..summedits.summedits_ppl_163352 import summedits_datasets
+ from ..hellaswag.hellaswag_ppl_8e07d6 import hellaswag_datasets
+ from ..piqa.piqa_ppl_788dbe import piqa_datasets
+ from ..winogrande.winogrande_ppl_00f8ad import winogrande_datasets
+ from ..obqa.obqa_ppl_2b5b12 import obqa_datasets
+ from ..nq.nq_gen_c00b89 import nq_datasets
+ from ..triviaqa.triviaqa_gen_cc3cbf import triviaqa_datasets
+ from ..crowspairs.crowspairs_ppl_f60797 import crowspairs_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
diff --git a/configs/datasets/glm/nq.py b/configs/datasets/glm/nq.py
new file mode 100644
index 00000000..35919338
--- /dev/null
+++ b/configs/datasets/glm/nq.py
@@ -0,0 +1,38 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
+
+nq_reader_cfg = dict(
+ input_columns=['question'], output_column='answer', train_split='test')
+
+nq_infer_cfg = dict(
+ ice_template=dict(
+ type=PromptTemplate,
+ template="Q: ?\nA: ",
+ column_token_map={
+ 'question': '',
+ 'answer': ''
+ }),
+ prompt_template=dict(
+ type=PromptTemplate,
+ template="Question: ? Answer: ",
+ column_token_map={
+ 'question': '',
+ 'answer': ''
+ },
+ ice_token=''),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator))
+
+nq_datasets = [
+ dict(
+ type=NaturalQuestionDataset,
+ abbr='nq',
+ path='/mnt/petrelfs/wuzhiyong/datasets/nq/',
+ reader_cfg=nq_reader_cfg,
+ infer_cfg=nq_infer_cfg,
+ eval_cfg=nq_eval_cfg)
+]
diff --git a/configs/datasets/glm/triviaqa.py b/configs/datasets/glm/triviaqa.py
new file mode 100644
index 00000000..6071fd10
--- /dev/null
+++ b/configs/datasets/glm/triviaqa.py
@@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
+
+triviaqa_reader_cfg = dict(
+ input_columns=['question'],
+ output_column='answer',
+ train_split='dev',
+ test_split='dev')
+
+triviaqa_infer_cfg = dict(
+ ice_template=dict(
+ type=PromptTemplate,
+ template='Q: \nA: ',
+ column_token_map={
+ 'question': '',
+ 'answer': ''
+ }),
+ prompt_template=dict(
+ type=PromptTemplate,
+ template='Question: Answer:',
+ column_token_map={
+ 'question': '',
+ 'answer': ''
+ },
+ ice_token=''),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer, max_out_len=50))
+
+triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
+
+triviaqa_datasets = [
+ dict(
+ type=TriviaQADataset,
+ abbr='triviaqa',
+ path='./data/triviaqa/',
+ reader_cfg=triviaqa_reader_cfg,
+ infer_cfg=triviaqa_infer_cfg,
+ eval_cfg=triviaqa_eval_cfg)
+]
diff --git a/configs/datasets/humaneval/humaneval_gen_581044.py b/configs/datasets/humaneval/humaneval_gen_581044.py
new file mode 100644
index 00000000..4e3e6d78
--- /dev/null
+++ b/configs/datasets/humaneval/humaneval_gen_581044.py
@@ -0,0 +1,40 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+humaneval_reader_cfg = dict(
+ input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(
+ begin=[
+ dict(
+ role='SYSTEM',
+ fallback_role='HUMAN',
+ prompt='Complete the following python code:'),
+ ],
+ round=[
+ dict(role='HUMAN', prompt='{prompt}'),
+ ])),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+humaneval_eval_cfg = dict(
+ evaluator=dict(type=HumanEvaluator),
+ pred_role='BOT',
+ k=[1, 10, 100], # the parameter only for humaneval
+ pred_postprocessor=dict(type='humaneval'),
+)
+
+humaneval_datasets = [
+ dict(
+ type=HFDataset,
+ path='openai_humaneval',
+ reader_cfg=humaneval_reader_cfg,
+ infer_cfg=humaneval_infer_cfg,
+ eval_cfg=humaneval_eval_cfg)
+]
diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py
new file mode 100644
index 00000000..4dd90d37
--- /dev/null
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .jigsawmultilingual_ppl_640128 import jigsawmultilingual_datasets # noqa: F401, F403
diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
new file mode 100644
index 00000000..d441c7dd
--- /dev/null
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_640128.py
@@ -0,0 +1,47 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import CLPInferencer
+from opencompass.openicl.icl_evaluator import AUCROCEvaluator
+from opencompass.datasets import JigsawMultilingualDataset
+
+jigsawmultilingual_reader_cfg = dict(
+ input_columns=['text'],
+ output_column='label',
+ train_split='test',
+ test_split='test')
+
+# Jigsaw multilingual toxic dataset is not supported in Huggingface
+# Please download the test dataset files `test.csv` and `test_labels.csv` from
+# below link and move them the data/ folder to perform evaluation.
+# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
+jigsawmultilingual_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt="Text: {text}\nQuestion: Does the above text contain "
+ "rude, hateful, aggressive, disrespectful or unreasonable "
+ "language?\nAnswer:")
+ ])),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=CLPInferencer))
+
+jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
+
+lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
+jigsawmultilingual_datasets = []
+
+for _l in lang:
+ jigsawmultilingual_datasets.append(
+ dict(
+ abbr=f'jigsaw_multilingual_{_l}',
+ type=JigsawMultilingualDataset,
+ path='data/test.csv',
+ label='data/test_labels.csv',
+ lang=_l,
+ reader_cfg=jigsawmultilingual_reader_cfg,
+ infer_cfg=jigsawmultilingual_infer_cfg,
+ eval_cfg=jigsawmultilingual_eval_cfg))
+
+del lang, _l
diff --git a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py
new file mode 100644
index 00000000..62a2d727
--- /dev/null
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_da5d28.py
@@ -0,0 +1,43 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import CLPInferencer
+from opencompass.openicl.icl_evaluator import AUCROCEvaluator
+from opencompass.datasets import JigsawMultilingualDataset
+
+jigsawmultilingual_reader_cfg = dict(
+ input_columns=['text'],
+ output_column='label',
+ train_split='test',
+ test_split='test')
+
+# Jigsaw multilingual toxic dataset is not supported in Huggingface
+# Please download the test dataset files `test.csv` and `test_labels.csv` from
+# below link and move them the data/ folder to perform evaluation.
+# https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
+jigsawmultilingual_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template='Text: {text}\nQuestion: '
+ 'Does the above text contain rude, hateful, aggressive, disrespectful '
+ 'or unreasonable language?\nAnswer:'),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=CLPInferencer))
+
+jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
+
+lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
+jigsawmultilingual_datasets = []
+
+for _l in lang:
+ jigsawmultilingual_datasets.append(
+ dict(
+ abbr=f'jigsaw_multilingual_{_l}',
+ type=JigsawMultilingualDataset,
+ path='data/test.csv',
+ label='data/test_labels.csv',
+ lang=_l,
+ reader_cfg=jigsawmultilingual_reader_cfg,
+ infer_cfg=jigsawmultilingual_infer_cfg,
+ eval_cfg=jigsawmultilingual_eval_cfg))
+
+del lang, _l
diff --git a/configs/datasets/lambada/lambada_gen.py b/configs/datasets/lambada/lambada_gen.py
new file mode 100644
index 00000000..e27c8689
--- /dev/null
+++ b/configs/datasets/lambada/lambada_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .lambada_gen_7ffe3d import lambada_datasets # noqa: F401, F403
diff --git a/configs/datasets/math/math_gen.py b/configs/datasets/math/math_gen.py
new file mode 100644
index 00000000..dec061c2
--- /dev/null
+++ b/configs/datasets/math/math_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .math_gen_78bcba import math_datasets # noqa: F401, F403
diff --git a/configs/datasets/math/math_gen_b4c82a.py b/configs/datasets/math/math_gen_b4c82a.py
new file mode 100644
index 00000000..ddd8bae6
--- /dev/null
+++ b/configs/datasets/math/math_gen_b4c82a.py
@@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MATHDataset, MATHEvaluator
+
+math_reader_cfg = dict(input_columns=['problem'], output_column='solution')
+
+math_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template='''Problem:
+Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Solution:
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+Final Answer: The final answer is $[2,5)$. I hope it is correct.
+
+Problem:
+If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
+Solution:
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+Final Answer: The final answer is $24$. I hope it is correct.
+
+Problem:
+Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
+Solution:
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+Final Answer: The final answer is $16$. I hope it is correct.
+
+Problem:
+If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+Solution:
+If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
+Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+
+Problem:
+{problem}
+Solution:
+{solution}'''),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer, max_out_len=512))
+
+math_eval_cfg = dict(
+ evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+
+math_datasets = [
+ dict(
+ type=MATHDataset,
+ abbr='math',
+ path='./data/math/math.json',
+ reader_cfg=math_reader_cfg,
+ infer_cfg=math_infer_cfg,
+ eval_cfg=math_eval_cfg)
+]
diff --git a/configs/datasets/narrativeqa/narrativeqa_gen.py b/configs/datasets/narrativeqa/narrativeqa_gen.py
new file mode 100644
index 00000000..d03cafa3
--- /dev/null
+++ b/configs/datasets/narrativeqa/narrativeqa_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .narrativeqa_gen_5786a7 import narrativeqa_datasets # noqa: F401, F403
diff --git a/configs/datasets/nq/nq_gen_c00b89.py b/configs/datasets/nq/nq_gen_c00b89.py
new file mode 100644
index 00000000..003ccb69
--- /dev/null
+++ b/configs/datasets/nq/nq_gen_c00b89.py
@@ -0,0 +1,29 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
+
+nq_reader_cfg = dict(
+ input_columns=['question'], output_column='answer', train_split='test')
+
+nq_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(
+ round=[
+ dict(role='HUMAN', prompt='Question: {question}?\nAnswer: '),
+ ], )),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
+
+nq_datasets = [
+ dict(
+ type=NaturalQuestionDataset,
+ abbr='nq',
+ path='./data/nq/',
+ reader_cfg=nq_reader_cfg,
+ infer_cfg=nq_infer_cfg,
+ eval_cfg=nq_eval_cfg)
+]
diff --git a/configs/datasets/piqa/piqa_gen.py b/configs/datasets/piqa/piqa_gen.py
new file mode 100644
index 00000000..14fe3d92
--- /dev/null
+++ b/configs/datasets/piqa/piqa_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .piqa_gen_8287ae import piqa_datasets # noqa: F401, F403
diff --git a/configs/datasets/piqa/piqa_ppl_788dbe.py b/configs/datasets/piqa/piqa_ppl_788dbe.py
new file mode 100644
index 00000000..7c43bf6b
--- /dev/null
+++ b/configs/datasets/piqa/piqa_ppl_788dbe.py
@@ -0,0 +1,31 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+piqa_reader_cfg = dict(
+ input_columns=['goal', 'sol1', 'sol2'],
+ output_column='label',
+ test_split='validation')
+
+piqa_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 0: 'The following makes sense: \nQ: {goal}\nA: {sol1}\n',
+ 1: 'The following makes sense: \nQ: {goal}\nA: {sol2}\n'
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+piqa_datasets = [
+ dict(
+ type=HFDataset,
+ path='piqa',
+ reader_cfg=piqa_reader_cfg,
+ infer_cfg=piqa_infer_cfg,
+ eval_cfg=piqa_eval_cfg)
+]
diff --git a/configs/datasets/qabench/qabench_gen.py b/configs/datasets/qabench/qabench_gen.py
new file mode 100644
index 00000000..478fff1e
--- /dev/null
+++ b/configs/datasets/qabench/qabench_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .qabench_gen_0d5967 import qabench_datasets # noqa: F401, F403
diff --git a/configs/datasets/race/race_gen_12de48.py b/configs/datasets/race/race_gen_12de48.py
new file mode 100644
index 00000000..f2b218db
--- /dev/null
+++ b/configs/datasets/race/race_gen_12de48.py
@@ -0,0 +1,46 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+
+race_reader_cfg = dict(
+ input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+ output_column='answer')
+
+race_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
+ ),
+ ])),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+race_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_postprocessor=dict(type='first-capital'),
+ pred_role='BOT')
+
+race_datasets = [
+ dict(
+ type=RaceDataset,
+ abbr='race-middle',
+ path='race',
+ name='middle',
+ reader_cfg=race_reader_cfg,
+ infer_cfg=race_infer_cfg,
+ eval_cfg=race_eval_cfg),
+ dict(
+ type=RaceDataset,
+ abbr='race-high',
+ path='race',
+ name='high',
+ reader_cfg=race_reader_cfg,
+ infer_cfg=race_infer_cfg,
+ eval_cfg=race_eval_cfg)
+]
diff --git a/configs/datasets/race/race_gen_d18b89.py b/configs/datasets/race/race_gen_d18b89.py
new file mode 100644
index 00000000..f0f764ea
--- /dev/null
+++ b/configs/datasets/race/race_gen_d18b89.py
@@ -0,0 +1,40 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+
+race_reader_cfg = dict(
+ input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+ output_column='answer')
+
+race_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=
+ 'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}'),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+race_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_postprocessor=dict(type='first-capital'))
+
+race_datasets = [
+ dict(
+ type=RaceDataset,
+ abbr='race-middle',
+ path='race',
+ name='middle',
+ reader_cfg=race_reader_cfg,
+ infer_cfg=race_infer_cfg,
+ eval_cfg=race_eval_cfg),
+ dict(
+ type=RaceDataset,
+ abbr='race-high',
+ path='race',
+ name='high',
+ reader_cfg=race_reader_cfg,
+ infer_cfg=race_infer_cfg,
+ eval_cfg=race_eval_cfg)
+]
diff --git a/configs/datasets/race/race_ppl.py b/configs/datasets/race/race_ppl.py
new file mode 100644
index 00000000..4e905733
--- /dev/null
+++ b/configs/datasets/race/race_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .race_ppl_04e06a import race_datasets # noqa: F401, F403
diff --git a/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py b/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py
new file mode 100644
index 00000000..5f316e93
--- /dev/null
+++ b/configs/datasets/realtoxicprompts/realtoxicprompts_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .realtoxicprompts_gen_3ea730 import realtoxicprompts_datasets # noqa: F401, F403
diff --git a/configs/datasets/safety/safety_gen.py b/configs/datasets/safety/safety_gen.py
new file mode 100644
index 00000000..8ee8572c
--- /dev/null
+++ b/configs/datasets/safety/safety_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .safety_gen_c0a5b8 import safety_datasets # noqa: F401, F403
diff --git a/configs/datasets/siqa/siqa_gen_a3c714.py b/configs/datasets/siqa/siqa_gen_a3c714.py
new file mode 100644
index 00000000..9da5de36
--- /dev/null
+++ b/configs/datasets/siqa/siqa_gen_a3c714.py
@@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import siqaDataset_V2
+
+siqa_reader_cfg = dict(
+ input_columns=["context", "question", "answerA", "answerB", "answerC"],
+ output_column="label",
+ test_split="validation")
+
+siqa_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(
+ round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ "{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:"
+ )
+ ], ),
+ ),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer),
+)
+
+siqa_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+siqa_datasets = [
+ dict(
+ abbr="siqa",
+ type=siqaDataset_V2,
+ path="social_i_qa",
+ reader_cfg=siqa_reader_cfg,
+ infer_cfg=siqa_infer_cfg,
+ eval_cfg=siqa_eval_cfg)
+]
diff --git a/configs/datasets/siqa/siqa_ppl.py b/configs/datasets/siqa/siqa_ppl.py
new file mode 100644
index 00000000..3dfdc224
--- /dev/null
+++ b/configs/datasets/siqa/siqa_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .siqa_ppl_049da0 import siqa_datasets # noqa: F401, F403
diff --git a/configs/datasets/storycloze/storycloze_ppl.py b/configs/datasets/storycloze/storycloze_ppl.py
new file mode 100644
index 00000000..5be70eef
--- /dev/null
+++ b/configs/datasets/storycloze/storycloze_ppl.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .storycloze_ppl_c1912d import storycloze_datasets # noqa: F401, F403
diff --git a/configs/datasets/storycloze/storycloze_ppl_7f4c64.py b/configs/datasets/storycloze/storycloze_ppl_7f4c64.py
new file mode 100644
index 00000000..e33bfe38
--- /dev/null
+++ b/configs/datasets/storycloze/storycloze_ppl_7f4c64.py
@@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import storyclozeDataset
+
+storycloze_reader_cfg = dict(
+ input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
+ output_column='answer_right_ending',
+ train_split='test',
+ test_split='test')
+
+storycloze_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template={
+ 1: "{context}{sentence_quiz1}",
+ 2: "{context}{sentence_quiz2}",
+ }),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=PPLInferencer))
+
+storycloze_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+# The original story cloze dataset and repo are not long maintaining.
+# Using multilingual version of this dataset.
+storycloze_datasets = [
+ dict(
+ abbr='story_cloze',
+ type=storyclozeDataset,
+ path='juletxara/xstory_cloze',
+ name='en',
+ reader_cfg=storycloze_reader_cfg,
+ infer_cfg=storycloze_infer_cfg,
+ eval_cfg=storycloze_eval_cfg)
+]
diff --git a/configs/datasets/summedits/summedits_gen.py b/configs/datasets/summedits/summedits_gen.py
new file mode 100644
index 00000000..d99f3c17
--- /dev/null
+++ b/configs/datasets/summedits/summedits_gen.py
@@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+ from .summedits_gen_4f35b5 import summedits_datasets # noqa: F401, F403
diff --git a/configs/datasets/summedits/summedits_gen_4f35b5.py b/configs/datasets/summedits/summedits_gen_4f35b5.py
new file mode 100644
index 00000000..dd74c417
--- /dev/null
+++ b/configs/datasets/summedits/summedits_gen_4f35b5.py
@@ -0,0 +1,37 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import SummeditsDataset_V2
+
+summedits_reader_cfg = dict(
+ input_columns=['doc', 'summary'], output_column='label')
+
+summedits_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template=dict(round=[
+ dict(
+ role="HUMAN",
+ prompt=
+ 'Document:\n{doc}Summary:\n{summary}\nQuestion:\nIs the summary factually consistent with the document?\nA. Yes\nB. No\nAnswer:'
+ ),
+ ])),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(type=GenInferencer))
+
+summedits_eval_cfg = dict(
+ evaluator=dict(type=AccEvaluator),
+ pred_role="BOT",
+ pred_postprocessor=dict(type="first-capital"),
+)
+
+summedits_datasets = [
+ dict(
+ abbr='summedits',
+ type=SummeditsDataset_V2,
+ path='./data/summedits/summedits.jsonl',
+ reader_cfg=summedits_reader_cfg,
+ infer_cfg=summedits_infer_cfg,
+ eval_cfg=summedits_eval_cfg)
+]
diff --git a/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py b/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py
new file mode 100644
index 00000000..e14be1f0
--- /dev/null
+++ b/configs/datasets/triviaqarc/triviaqarc_gen_a02306.py
@@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import TriviaQArcDataset, TriviaQAEvaluator
+
+triviaqarc_reader_cfg = dict(
+ input_columns=['question', 'evidence'],
+ output_column='answer',
+ train_split='dev',
+ test_split='dev')
+
+triviaqarc_infer_cfg = dict(
+ prompt_template=dict(
+ type=PromptTemplate,
+ template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
+ retriever=dict(type=ZeroRetriever),
+ inferencer=dict(
+ type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
+
+triviaqarc_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
+
+triviaqarc_datasets = [
+ dict(
+ type=TriviaQArcDataset,
+ abbr='triviaqarc',
+ path='./data/triviaqa-rc/',
+ reader_cfg=triviaqarc_reader_cfg,
+ infer_cfg=triviaqarc_infer_cfg,
+ eval_cfg=triviaqarc_eval_cfg)
+]
diff --git a/configs/models/classic/tigerbot-7b-sft.py b/configs/models/classic/tigerbot-7b-sft.py
new file mode 100644
index 00000000..b5ea2860
--- /dev/null
+++ b/configs/models/classic/tigerbot-7b-sft.py
@@ -0,0 +1,29 @@
+from opencompass.models import HuggingFaceCausalLM
+
+_meta_template = dict(
+ round=[
+ dict(role='HUMAN', begin='\n\n### Instruction:\n:'),
+ dict(role='BOT', begin='\n\n### Response:\n:', generate=True),
+ ],
+)
+
+
+models = [
+ dict(
+ type=HuggingFaceCausalLM,
+ abbr='TigerBot-SFT',
+ path="TigerResearch/tigerbot-7b-sft",
+ tokenizer_path='TigerResearch/tigerbot-7b-sft',
+ tokenizer_kwargs=dict(
+ padding_side='left',
+ truncation_side='left',
+ trust_remote_code=True,
+ ),
+ max_out_len=100,
+ max_seq_len=2048,
+ batch_size=8,
+ meta_template=_meta_template,
+ model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='0ba4d6fc479bdedd6a3f8d4d3425025c5f501800'),
+ run_cfg=dict(num_gpus=1, num_procs=1),
+ )
+]
diff --git a/configs/summarizers/groups/bbh.py b/configs/summarizers/groups/bbh.py
new file mode 100644
index 00000000..8286c5c1
--- /dev/null
+++ b/configs/summarizers/groups/bbh.py
@@ -0,0 +1,6 @@
+bbh_summary_groups = []
+
+# bbh
+_bbh = ['temporal_sequences', 'disambiguation_qa', 'date_understanding', 'tracking_shuffled_objects_three_objects', 'penguins_in_a_table','geometric_shapes', 'snarks', 'ruin_names', 'tracking_shuffled_objects_seven_objects', 'tracking_shuffled_objects_five_objects','logical_deduction_three_objects', 'hyperbaton', 'logical_deduction_five_objects', 'logical_deduction_seven_objects', 'movie_recommendation','salient_translation_error_detection', 'reasoning_about_colored_objects', 'multistep_arithmetic_two', 'navigate', 'dyck_languages', 'word_sorting', 'sports_understanding','boolean_expressions', 'object_counting', 'formal_fallacies', 'causal_judgement', 'web_of_lies']
+_bbh = ['bbh-' + s for s in _bbh]
+bbh_summary_groups.append({'name': 'bbh', 'subsets': _bbh})
diff --git a/docs/en/_templates/404.html b/docs/en/_templates/404.html
new file mode 100644
index 00000000..64910175
--- /dev/null
+++ b/docs/en/_templates/404.html
@@ -0,0 +1,18 @@
+{% extends "layout.html" %}
+
+{% block body %}
+
+Page Not Found
+
+ The page you are looking for cannot be found.
+
+
+ If you just switched documentation versions, it is likely that the page you were on is moved. You can look for it in
+ the content table left, or go to the homepage.
+
+
+
+{% endblock %}
diff --git a/docs/en/advanced_guides/new_dataset.md b/docs/en/advanced_guides/new_dataset.md
new file mode 100644
index 00000000..2d1cb0cb
--- /dev/null
+++ b/docs/en/advanced_guides/new_dataset.md
@@ -0,0 +1 @@
+# New Dataset
diff --git a/docs/zh_cn/prompt/few_shot.md b/docs/zh_cn/prompt/few_shot.md
new file mode 100644
index 00000000..0539a2eb
--- /dev/null
+++ b/docs/zh_cn/prompt/few_shot.md
@@ -0,0 +1 @@
+# Few-shot
\ No newline at end of file
diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/user_guides/config.md
new file mode 100644
index 00000000..fa4e6641
--- /dev/null
+++ b/docs/zh_cn/user_guides/config.md
@@ -0,0 +1,2 @@
+# 学习配置文件
+
diff --git a/docs/zh_cn/user_guides/framework_overview.md b/docs/zh_cn/user_guides/framework_overview.md
new file mode 100644
index 00000000..ead5053b
--- /dev/null
+++ b/docs/zh_cn/user_guides/framework_overview.md
@@ -0,0 +1 @@
+# 整体概括
diff --git a/opencompass/datasets/TheoremQA.py b/opencompass/datasets/TheoremQA.py
new file mode 100644
index 00000000..fc529a61
--- /dev/null
+++ b/opencompass/datasets/TheoremQA.py
@@ -0,0 +1,27 @@
+import re
+
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class TheoremQADataset(BaseDataset):
+
+ @staticmethod
+ def load(path: str):
+ return load_dataset('csv', data_files={'test': path})
+
+
+@TEXT_POSTPROCESSORS.register_module('TheoremQA')
+def TheoremQA_postprocess(text: str) -> str:
+
+ text = text.strip().split('\n')[0].strip()
+ matches = re.findall(r'answer is (.*)', text)
+ if len(matches) == 0:
+ return text
+ else:
+ text = matches[0].strip()[:-1]
+ return text
diff --git a/opencompass/datasets/cb.py b/opencompass/datasets/cb.py
new file mode 100644
index 00000000..3027183d
--- /dev/null
+++ b/opencompass/datasets/cb.py
@@ -0,0 +1,25 @@
+import json
+
+from datasets import Dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class CBDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(path):
+ dataset = []
+ with open(path, 'r') as f:
+ for line in f:
+ line = json.loads(line)
+ line['label'] = {
+ 'contradiction': 'A',
+ 'entailment': 'B',
+ 'neutral': 'C'
+ }[line['label']]
+ dataset.append(line)
+ return Dataset.from_list(dataset)
diff --git a/opencompass/datasets/chid.py b/opencompass/datasets/chid.py
new file mode 100644
index 00000000..6c218edc
--- /dev/null
+++ b/opencompass/datasets/chid.py
@@ -0,0 +1,43 @@
+import json
+
+from datasets import Dataset, load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class CHIDDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+ content = example['content']
+ for i, c in enumerate(example['candidates']):
+ example[f'content{i}'] = content.replace('#idiom#', c)
+ return example
+
+ dataset = dataset.map(preprocess)
+ return dataset
+
+
+@LOAD_DATASET.register_module()
+class CHIDDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(path):
+ data = []
+ with open(path, 'r') as f:
+ for line in f:
+ line = json.loads(line)
+ item = {}
+ item['content'] = line['content'].replace('#idiom#', '______')
+ for i, c in enumerate(line['candidates']):
+ item[chr(ord('A') + i)] = c
+ item['answer'] = 'ABCDEFG'[line['answer']]
+ data.append(item)
+ return Dataset.from_list(data)
diff --git a/opencompass/datasets/civilcomments.py b/opencompass/datasets/civilcomments.py
new file mode 100644
index 00000000..61dae8c4
--- /dev/null
+++ b/opencompass/datasets/civilcomments.py
@@ -0,0 +1,36 @@
+from datasets import DatasetDict, load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class CivilCommentsDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ train_dataset = load_dataset(**kwargs, split='train')
+ test_dataset = load_dataset(**kwargs, split='test')
+
+ def pre_process(example):
+ example['label'] = int(example['toxicity'] >= 0.5)
+ example['choices'] = ['no', 'yes']
+ return example
+
+ def remove_columns(dataset):
+ return dataset.remove_columns([
+ 'severe_toxicity', 'obscene', 'threat', 'insult',
+ 'identity_attack', 'sexual_explicit'
+ ])
+
+ train_dataset = remove_columns(train_dataset)
+ test_dataset = remove_columns(test_dataset)
+ test_dataset = test_dataset.shuffle(seed=42)
+ test_dataset = test_dataset.select(list(range(10000)))
+ test_dataset = test_dataset.map(pre_process)
+
+ return DatasetDict({
+ 'train': train_dataset,
+ 'test': test_dataset,
+ })
diff --git a/opencompass/datasets/commonsenseqa.py b/opencompass/datasets/commonsenseqa.py
new file mode 100644
index 00000000..17b836d0
--- /dev/null
+++ b/opencompass/datasets/commonsenseqa.py
@@ -0,0 +1,22 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class commonsenseqaDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ dataset = load_dataset(**kwargs)
+
+ def pre_process(example):
+ for i in range(5):
+ example[chr(ord('A') + i)] = example['choices']['text'][i]
+ return example
+
+ dataset = dataset.map(pre_process).remove_columns(
+ ['question_concept', 'id', 'choices'])
+ return dataset
diff --git a/opencompass/datasets/crowspairs.py b/opencompass/datasets/crowspairs.py
new file mode 100644
index 00000000..c498099f
--- /dev/null
+++ b/opencompass/datasets/crowspairs.py
@@ -0,0 +1,34 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class crowspairsDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+ example['label'] = 0
+ return example
+
+ return dataset.map(preprocess)
+
+
+@LOAD_DATASET.register_module()
+class crowspairsDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+ example['label'] = 'A'
+ return example
+
+ return dataset.map(preprocess)
diff --git a/opencompass/datasets/eprstmt.py b/opencompass/datasets/eprstmt.py
new file mode 100644
index 00000000..dd14b960
--- /dev/null
+++ b/opencompass/datasets/eprstmt.py
@@ -0,0 +1,27 @@
+import json
+
+from datasets import Dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class eprstmtDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(path):
+ data = []
+ with open(path, 'r') as f:
+ for line in f:
+ line = json.loads(line)
+ item = {
+ 'sentence': line['sentence'],
+ 'label': {
+ 'Positive': 'A',
+ 'Negative': 'B',
+ }[line['label']],
+ }
+ data.append(item)
+ return Dataset.from_list(data)
diff --git a/opencompass/datasets/huggingface.py b/opencompass/datasets/huggingface.py
new file mode 100644
index 00000000..2ae23e3f
--- /dev/null
+++ b/opencompass/datasets/huggingface.py
@@ -0,0 +1,13 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class HFDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ return load_dataset(**kwargs)
diff --git a/opencompass/datasets/piqa.py b/opencompass/datasets/piqa.py
new file mode 100644
index 00000000..f0bd4dcc
--- /dev/null
+++ b/opencompass/datasets/piqa.py
@@ -0,0 +1,25 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class piqaDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+ assert isinstance(example['label'], int)
+ if example['label'] < 0:
+ example['answer'] = 'NULL'
+ else:
+ example['answer'] = 'AB'[example['label']]
+ example.pop('label')
+ return example
+
+ dataset = dataset.map(preprocess)
+ return dataset
diff --git a/opencompass/datasets/realtoxicprompts.py b/opencompass/datasets/realtoxicprompts.py
new file mode 100644
index 00000000..4098bb3a
--- /dev/null
+++ b/opencompass/datasets/realtoxicprompts.py
@@ -0,0 +1,30 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class RealToxicPromptsDataset(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ challenging_subset = kwargs.pop('challenging_subset', False)
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+
+ for k, v in example['prompt'].items():
+ k = 'prompt_' + k
+ example[k] = v
+ del example['prompt']
+
+ return example
+
+ dataset = dataset.map(preprocess)
+
+ # return challenging subset if necessary
+ if challenging_subset:
+ return dataset.filter(lambda example: example['challenging'])
+ return dataset
diff --git a/opencompass/datasets/siqa.py b/opencompass/datasets/siqa.py
new file mode 100644
index 00000000..5091ccd0
--- /dev/null
+++ b/opencompass/datasets/siqa.py
@@ -0,0 +1,20 @@
+from datasets import load_dataset
+
+from opencompass.registry import LOAD_DATASET
+
+from .base import BaseDataset
+
+
+@LOAD_DATASET.register_module()
+class siqaDataset_V2(BaseDataset):
+
+ @staticmethod
+ def load(**kwargs):
+ dataset = load_dataset(**kwargs)
+
+ def preprocess(example):
+ example['label'] = ' ABC'[int(example['label'])]
+ return example
+
+ dataset = dataset.map(preprocess)
+ return dataset
diff --git a/opencompass/openicl/icl_evaluator/__init__.py b/opencompass/openicl/icl_evaluator/__init__.py
new file mode 100644
index 00000000..fc74ccac
--- /dev/null
+++ b/opencompass/openicl/icl_evaluator/__init__.py
@@ -0,0 +1,5 @@
+from .icl_aucroc_evaluator import AUCROCEvaluator
+from .icl_base_evaluator import BaseEvaluator
+from .icl_em_evaluator import EMEvaluator
+from .icl_hf_evaluator import * # noqa
+from .icl_toxic_evaluator import ToxicEvaluator
diff --git a/opencompass/openicl/utils/logging.py b/opencompass/openicl/utils/logging.py
new file mode 100644
index 00000000..daa792ec
--- /dev/null
+++ b/opencompass/openicl/utils/logging.py
@@ -0,0 +1,40 @@
+import logging
+
+import torch.distributed as dist
+
+LOG_LEVEL = logging.INFO
+SUBPROCESS_LOG_LEVEL = logging.ERROR
+LOG_FORMATTER = '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s'
+
+
+def get_logger(name, level=LOG_LEVEL, log_file=None, file_mode='w'):
+ formatter = logging.Formatter(LOG_FORMATTER)
+
+ logger = logging.getLogger(name)
+
+ for handler in logger.root.handlers:
+ if type(handler) is logging.StreamHandler:
+ handler.setLevel(logging.ERROR)
+
+ if dist.is_available() and dist.is_initialized():
+ rank = dist.get_rank()
+ else:
+ rank = 0
+
+ if rank == 0 and log_file is not None:
+ file_handler = logging.FileHandler(log_file, file_mode)
+ file_handler.setFormatter(formatter)
+ file_handler.setLevel(level)
+ logger.addHandler(file_handler)
+
+ if rank == 0:
+ logger.setLevel(level)
+ else:
+ logger.setLevel(SUBPROCESS_LOG_LEVEL)
+
+ stream_handler = logging.StreamHandler()
+ stream_handler.setFormatter(formatter)
+ stream_handler.setLevel(level)
+ logger.addHandler(stream_handler)
+
+ return logger
diff --git a/opencompass/partitioners/__init__.py b/opencompass/partitioners/__init__.py
new file mode 100644
index 00000000..836081fb
--- /dev/null
+++ b/opencompass/partitioners/__init__.py
@@ -0,0 +1,2 @@
+from .naive import * # noqa: F401, F403
+from .size import * # noqa: F401, F403
diff --git a/opencompass/utils/__init__.py b/opencompass/utils/__init__.py
new file mode 100644
index 00000000..c52f215d
--- /dev/null
+++ b/opencompass/utils/__init__.py
@@ -0,0 +1,10 @@
+from .abbr import * # noqa
+from .build import * # noqa
+from .fileio import * # noqa
+from .git import * # noqa
+from .lark import * # noqa
+from .logging import * # noqa
+from .menu import * # noqa
+from .prompt import * # noqa
+from .summarizer import * # noqa
+from .text_postprocessors import * # noqa
diff --git a/opencompass/utils/build.py b/opencompass/utils/build.py
new file mode 100644
index 00000000..a4e50a36
--- /dev/null
+++ b/opencompass/utils/build.py
@@ -0,0 +1,22 @@
+import copy
+
+from mmengine.config import ConfigDict
+
+from opencompass.registry import LOAD_DATASET, MODELS
+
+
+def build_dataset_from_cfg(dataset_cfg: ConfigDict) -> ConfigDict:
+ dataset_cfg = copy.deepcopy(dataset_cfg)
+ dataset_cfg.pop('infer_cfg', None)
+ dataset_cfg.pop('eval_cfg', None)
+ dataset_cfg.pop('abbr', None)
+ return LOAD_DATASET.build(dataset_cfg)
+
+
+def build_model_from_cfg(model_cfg: ConfigDict) -> ConfigDict:
+ model_cfg = copy.deepcopy(model_cfg)
+ model_cfg.pop('run_cfg', None)
+ model_cfg.pop('max_out_len', None)
+ model_cfg.pop('batch_size', None)
+ model_cfg.pop('abbr', None)
+ return MODELS.build(model_cfg)
diff --git a/opencompass/utils/types.py b/opencompass/utils/types.py
new file mode 100644
index 00000000..914213c9
--- /dev/null
+++ b/opencompass/utils/types.py
@@ -0,0 +1,45 @@
+from typing import Dict, List, Union
+
+from datasets import Dataset, DatasetDict
+
+
+def _check_type_list(obj, typelist: List):
+ for _type in typelist:
+ if _type is None:
+ if obj is None:
+ return obj
+ elif isinstance(obj, _type):
+ return obj
+ raise TypeError(
+ f'Expected an object in {[_.__name__ if _ is not None else None for _ in typelist]} type, but got {obj}'
+ )
+
+
+def _check_dataset(obj) -> Union[Dataset, DatasetDict]:
+ if isinstance(obj, Dataset) or isinstance(obj, DatasetDict):
+ return obj
+ else:
+ raise TypeError(
+ f'Expected a datasets.Dataset or a datasets.DatasetDict object, but got {obj}'
+ )
+
+
+def _check_list(obj) -> List:
+ if isinstance(obj, List):
+ return obj
+ else:
+ raise TypeError(f'Expected a List object, but got {obj}')
+
+
+def _check_str(obj) -> str:
+ if isinstance(obj, str):
+ return obj
+ else:
+ raise TypeError(f'Expected a str object, but got {obj}')
+
+
+def _check_dict(obj) -> Dict:
+ if isinstance(obj, Dict):
+ return obj
+ else:
+ raise TypeError(f'Expected a Dict object, but got {obj}')