initial commit

2025-05-30 16:03:24 +08:00 · 2023-07-04 21:34:55 +08:00 · 2023-07-04 21:34:55 +08:00 · 7d346000bb
commit 7d346000bb
188 changed files with 17029 additions and 0 deletions
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@ -0,0 +1,66 @@
+<div align="center">
+  <img src="https://user-images.githubusercontent.com/22607038/250798681-b52045d2-cedd-4070-84e2-410903ac404f.png" width="500px"/>
+
+[![docs](https://readthedocs.org/projects/opencompass/badge/?version=dev-1.x)](https://opencompass.readthedocs.io/en/dev-1.x/?badge=dev-1.x)
+[![license](https://img.shields.io/github/license/IntenLM/opencompass.svg)](https://github.com/InternLM/opencompass/blob/main/LICENSE)
+[![PyPI](https://badge.fury.io/py/opencompass.svg)](https://pypi.org/project/opencompass/)
+
+[📘Documentation](https://opencompass.readthedocs.io/en/latest/) |
+[🛠️Installation](https://opencompass.readthedocs.io/en/latest/get_started/install.html) |
+[🤔Reporting Issues](https://github.com/InternLM/opencompass/issues/new/choose)
+
+[English](/README.md) | 简体中文
+
+</div>
+
+## 介绍
+
+OpenCompass 是面向大模型评测的一站式平台，旨在提供一套公平、公开、可复现的大模型评测基准方案。其主要特点如下：
+
+- **模型及数据集的全方位支持**：预支持 20+ HuggingFace 及 API 模型，并提供 50+ 个数据集约 30 万题的的模型评测方案，6 大维度的能力全面评测。
+
+- **高效分布式评测**：一行命令实现任务分割和分布式评测，数小时即可完成千亿模型全量评测\*。
+
+- **多样化评测范式**：支持零样本、小样本及思维链评测，结合标准型或对话型提示词模板，轻松激发各种模型最大性能。
+
+- **易于扩展的模块化设计**：想增加新模型或数据集？想要自定义更高级的任务分割策略，甚至接入新的集群管理系统？OpenCompass 的一切均可轻松扩展！
+
+- **完善的实验记录及上报机制**：使用配置文件完整记录每一次实验，关键信息有迹可循；结果实时上报飞书机器人，第一时间知晓实验情况。
+
+## 模型能力排名
+
+## 能力维度 & 模型支持
+
+## 安装
+
+下面展示了快速安装的步骤。有部分第三方功能可能需要额外步骤才能正常运行，详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)。
+
+```Python
+conda create --name opencompass python=3.8 pytorch torchvision -c pytorch -y
+conda activate opencompass
+git clone https://github.com/InternLM/opencompass opencompass
+cd opencompass
+pip install -r requirements/runtime.txt
+pip install -e .
+# 下载数据集到 data/ 处
+# TODO: ....
+```
+
+## 评测
+
+请阅读[快速上手](https://opencompass.readthedocs.io/zh_cn/latest/get_started.html)了解如何运行一个评测任务。
+
+## 致谢
+
+该项目部分的代码引用并修改自 [OpenICL](https://github.com/Shark-NLP/OpenICL)。
+
+## 引用
+
+```bibtex
+@misc{2023opencompass,
+    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
+    author={OpenCompass Contributors},
+    howpublished = {\url{https://github.com/InternLM/OpenCompass}},
+    year={2023}
+}
+```
--- a/configs/datasets/ARC_c/ARC_c_ppl.py
+++ b/configs/datasets/ARC_c/ARC_c_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .ARC_c_ppl_ba951c import ARC_c_datasets  # noqa: F401, F403
--- a/configs/datasets/ARC_c/ARC_c_ppl_ba951c.py
+++ b/configs/datasets/ARC_c/ARC_c_ppl_ba951c.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+
+ARC_c_reader_cfg = dict(
+    input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+    output_column='answerKey')
+
+ARC_c_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            "A":
+            dict(
+                round=[
+                    dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                    dict(role="BOT", prompt="{textA}")
+                ], ),
+            "B":
+            dict(
+                round=[
+                    dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                    dict(role="BOT", prompt="{textB}")
+                ], ),
+            "C":
+            dict(
+                round=[
+                    dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                    dict(role="BOT", prompt="{textC}")
+                ], ),
+            "D":
+            dict(
+                round=[
+                    dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                    dict(role="BOT", prompt="{textD}")
+                ], ),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+ARC_c_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+ARC_c_datasets = [
+    dict(
+        type=ARCDataset,
+        abbr='ARC-c',
+        path='./data/ARC/ARC-c/ARC-Challenge-Dev.jsonl',
+        reader_cfg=ARC_c_reader_cfg,
+        infer_cfg=ARC_c_infer_cfg,
+        eval_cfg=ARC_c_eval_cfg)
+]
--- a/configs/datasets/ARC_e/ARC_e_gen.py
+++ b/configs/datasets/ARC_e/ARC_e_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .ARC_e_gen_0a29bf import ARC_e_datasets  # noqa: F401, F403
--- a/configs/datasets/CLUE_C3/CLUE_C3_ppl.py
+++ b/configs/datasets/CLUE_C3/CLUE_C3_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .CLUE_C3_ppl_588820 import C3_datasets  # noqa: F401, F403
--- a/configs/datasets/CLUE_C3/CLUE_C3_ppl_20320d.py
+++ b/configs/datasets/CLUE_C3/CLUE_C3_ppl_20320d.py
@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import C3Dataset
+
+C3_reader_cfg = dict(
+    input_columns=[
+        'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
+        'choices'
+    ],
+    output_column='label')
+
+C3_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "文章：{content}\n问题：{question}\n答案：{choice0}",
+            1: "文章：{content}\n问题：{question}\n答案：{choice1}",
+            2: "文章：{content}\n问题：{question}\n答案：{choice2}",
+            3: "文章：{content}\n问题：{question}\n答案：{choice3}"
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+C3_datasets = [
+    dict(
+        type=C3Dataset,
+        abbr='C3',
+        path='./data/CLUE/C3/dev_0.json',
+        reader_cfg=C3_reader_cfg,
+        infer_cfg=C3_infer_cfg,
+        eval_cfg=C3_eval_cfg)
+]
--- a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_698c27.py
+++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_698c27.py
@ -0,0 +1,27 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import DRCDDataset
+
+DRCD_reader_cfg = dict(
+    input_columns=['question', 'context'], output_column='answers')
+
+DRCD_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template="文章：{context}\n根据上文，回答如下问题： {question}\n答："),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+DRCD_eval_cfg = dict(evaluator=dict(type=EMEvaluator), )
+
+DRCD_datasets = [
+    dict(
+        type=DRCDDataset,
+        abbr='DRCD_dev',
+        path='./data/CLUE/DRCD/dev.json',
+        reader_cfg=DRCD_reader_cfg,
+        infer_cfg=DRCD_infer_cfg,
+        eval_cfg=DRCD_eval_cfg),
+]
--- a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_9b30c2.py
+++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_9b30c2.py
@ -0,0 +1,33 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import DRCDDataset
+
+DRCD_reader_cfg = dict(
+    input_columns=['question', 'context'], output_column='answers')
+
+DRCD_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role="HUMAN", prompt="文章：{context}\n根据上文，回答如下问题：{question}"),
+            dict(role="BOT", prompt="答："),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+DRCD_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator),
+    pred_role="BOT",
+)
+
+DRCD_datasets = [
+    dict(
+        type=DRCDDataset,
+        abbr='DRCD_dev',
+        path='./data/CLUE/DRCD/dev.json',
+        reader_cfg=DRCD_reader_cfg,
+        infer_cfg=DRCD_infer_cfg,
+        eval_cfg=DRCD_eval_cfg),
+]
--- a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_2ea62b.py
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_2ea62b.py
@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import cmnliDataset_V2
+
+cmnli_reader_cfg = dict(
+    input_columns=["sentence1", "sentence2"],
+    output_column="label",
+    test_split="train")
+
+cmnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "阅读文章：{sentence1}\n根据上文，回答如下问题：{sentence2}\nA. 对\nB. 错\nC. 可能\n请从“A”，“B”，“C”中进行选择。\n答："
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+cmnli_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+cmnli_datasets = [
+    dict(
+        abbr="cmnli",
+        type=cmnliDataset_V2,
+        path="./data/CLUE/cmnli/cmnli_public/dev.json",
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg,
+    )
+]
--- a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_316313.py
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_316313.py
@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import cmnliDataset_V2
+
+cmnli_reader_cfg = dict(
+    input_columns=["sentence1", "sentence2"],
+    output_column="label",
+    test_split="train")
+
+cmnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？\nA. 蕴含\nB. 矛盾\nC. 无关\n请从“A”，“B”，“C”中进行选择。\n答："
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+cmnli_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+cmnli_datasets = [
+    dict(
+        abbr="cmnli",
+        type=cmnliDataset_V2,
+        path="./data/CLUE/cmnli/cmnli_public/dev.json",
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg,
+    )
+]
--- a/configs/datasets/CLUE_cmnli/CLUE_cmnli_ppl_1c652a.py
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_ppl_1c652a.py
@ -0,0 +1,56 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+cmnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+cmnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="矛盾")
+            ]),
+            'entailment':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="蕴含")
+            ]),
+            'neutral':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="无关")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+cmnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='cmnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg)
+]
--- a/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_7c44b0.py
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_7c44b0.py
@ -0,0 +1,43 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import cmnliDataset_V2
+
+ocnli_reader_cfg = dict(
+    input_columns=["sentence1", "sentence2"],
+    output_column="label",
+)
+
+# TODO: two prompt templates for ocnli
+ocnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？\nA. 蕴含\n B. 矛盾\n C. 无关\n请从“A”，“B”，“C”中进行选择。\n答："
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ocnli_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+ocnli_datasets = [
+    dict(
+        abbr="ocnli",
+        type=cmnliDataset_V2,  # ocnli share the same format with cmnli
+        path="./data/CLUE/OCNLI/dev.json",
+        reader_cfg=ocnli_reader_cfg,
+        infer_cfg=ocnli_infer_cfg,
+        eval_cfg=ocnli_eval_cfg,
+    )
+]
--- a/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_eaa2be.py
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_eaa2be.py
@ -0,0 +1,51 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'], output_column='label')
+
+# TODO: two prompt templates for ocnli
+ocnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="阅读文章：{sentence1}\n根据上文，回答如下问题：{sentence2}？"),
+                dict(role="BOT", prompt="错")
+            ]),
+            'entailment':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="阅读文章：{sentence1}\n根据上文，回答如下问题：{sentence2}？"),
+                dict(role="BOT", prompt="对")
+            ]),
+            'neutral':
+            dict(round=[
+                dict(
+                    role="HUMAN", prompt="如果{sentence1}为真，那么{sentence2}也为真吗？"),
+                dict(role="BOT", prompt="可能")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+ocnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/OCNLI/dev.json',
+        reader_cfg=ocnli_reader_cfg,
+        infer_cfg=ocnli_infer_cfg,
+        eval_cfg=ocnli_eval_cfg)
+]
--- a/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_f103ab.py
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_ppl_f103ab.py
@ -0,0 +1,55 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'], output_column='label')
+
+# TODO: two prompt templates for ocnli
+ocnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="矛盾")
+            ]),
+            'entailment':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="蕴含")
+            ]),
+            'neutral':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="语句一：“{sentence1}”\n语句二：“{sentence2}”\n请问这两句话是什么关系？"
+                ),
+                dict(role="BOT", prompt="无关")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+ocnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/OCNLI/dev.json',
+        reader_cfg=ocnli_reader_cfg,
+        infer_cfg=ocnli_infer_cfg,
+        eval_cfg=ocnli_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_ppl_332a41.py
+++ b/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_ppl_332a41.py
@ -0,0 +1,65 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+bustm_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+bustm_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(
+                begin=[
+                    dict(
+                        role='SYSTEM',
+                        fallback_role='HUMAN',
+                        prompt="请判断以下两句话说的是否是一个意思：")
+                ],
+                round=[
+                    dict(role="HUMAN", prompt="{sentence1}，{sentence2}"),
+                    dict(role="BOT", prompt="两句话说的毫不相关。")
+                ]),
+            1:
+            dict(
+                begin=[
+                    dict(
+                        role='SYSTEM',
+                        fallback_role='HUMAN',
+                        prompt="请判断以下两句话说的是否是一个意思：")
+                ],
+                round=[
+                    dict(role="HUMAN", prompt="{sentence1}，{sentence2}"),
+                    dict(role="BOT", prompt="两句话说是的一个意思。")
+                ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+bustm_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+bustm_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='bustm-dev',
+        path='json',
+        data_files='./data/FewCLUE/bustm/dev_few_all.json',
+        split='train',
+        reader_cfg=bustm_reader_cfg,
+        infer_cfg=bustm_infer_cfg,
+        eval_cfg=bustm_eval_cfg),
+    dict(
+        type=HFDataset,
+        abbr='bustm-test',
+        path='json',
+        data_files='./data/FewCLUE/bustm/test_public.json',
+        split='train',
+        reader_cfg=bustm_reader_cfg,
+        infer_cfg=bustm_infer_cfg,
+        eval_cfg=bustm_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_ppl_47f2ab.py
+++ b/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_ppl_47f2ab.py
@ -0,0 +1,59 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+bustm_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+bustm_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "语句一：“{sentence1}”\n语句二：“{sentence2}”\n请判断语句一和语句二说的是否是一个意思？"
+                ),
+                dict(role="BOT", prompt="两句话说的毫不相关。")
+            ]),
+            1:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "语句一：“{sentence1}”\n语句二：“{sentence2}”\n请判断语句一和语句二说的是否是一个意思？"
+                ),
+                dict(role="BOT", prompt="两句话说是的一个意思。")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+bustm_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+bustm_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='bustm-dev',
+        path='json',
+        data_files='./data/FewCLUE/bustm/dev_few_all.json',
+        split='train',
+        reader_cfg=bustm_reader_cfg,
+        infer_cfg=bustm_infer_cfg,
+        eval_cfg=bustm_eval_cfg),
+    dict(
+        type=HFDataset,
+        abbr='bustm-test',
+        path='json',
+        data_files='./data/FewCLUE/bustm/test_public.json',
+        split='train',
+        reader_cfg=bustm_reader_cfg,
+        infer_cfg=bustm_infer_cfg,
+        eval_cfg=bustm_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_chid/FewCLUE_chid_gen_686c63.py
+++ b/configs/datasets/FewCLUE_chid/FewCLUE_chid_gen_686c63.py
@ -0,0 +1,50 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CHIDDataset_V2
+
+chid_reader_cfg = dict(
+    input_columns=["content","A","B","C","D","E","F","G"],
+    output_column="answer",
+)
+
+chid_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{content}\n请选择______处所填的词\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nE. {E}\nF. {F}\nG. {G}\n请从”A“，”B“，”C“，”D“，”E“，”F“，”G“中进行选择。答：",
+                ),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+chid_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+chid_datasets = [
+    dict(
+        abbr="chid-dev",
+        type=CHIDDataset_V2,
+        path="./data/FewCLUE/chid/dev_few_all.json",
+        reader_cfg=chid_reader_cfg,
+        infer_cfg=chid_infer_cfg,
+        eval_cfg=chid_eval_cfg,
+    ),
+    dict(
+        abbr="chid-test",
+        type=CHIDDataset_V2,
+        path="./data/FewCLUE/chid/test_public.json",
+        reader_cfg=chid_reader_cfg,
+        infer_cfg=chid_infer_cfg,
+        eval_cfg=chid_eval_cfg,
+    ),
+]
--- a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_ppl_2a9e61.py
+++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_ppl_2a9e61.py
@ -0,0 +1,54 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CluewscDataset
+
+cluewsc_reader_cfg = dict(
+    input_columns=['span1', 'span2', 'text', 'new_text'],
+    output_column='answer')
+
+cluewsc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="{text}\n此处，代词“{span2}“被用于指代“{span1}“吗?"),
+                dict(role="BOT", prompt="否")
+            ]),
+            1:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="{text}\n此处，代词“{span2}“被用于指代“{span1}“吗?"),
+                dict(role="BOT", prompt="是")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+cluewsc_datasets = [
+    dict(
+        type=CluewscDataset,
+        path='json',
+        abbr='cluewsc-dev',
+        data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
+        split='train',
+        reader_cfg=cluewsc_reader_cfg,
+        infer_cfg=cluewsc_infer_cfg,
+        eval_cfg=cluewsc_eval_cfg),
+    dict(
+        type=CluewscDataset,
+        path='json',
+        abbr='cluewsc-test',
+        data_files='./data/FewCLUE/cluewsc/test_public.json',
+        split='train',
+        reader_cfg=cluewsc_reader_cfg,
+        infer_cfg=cluewsc_infer_cfg,
+        eval_cfg=cluewsc_eval_cfg),
+]
--- a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_ppl_d335d5.py
+++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_ppl_d335d5.py
@ -0,0 +1,44 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CluewscDataset
+
+cluewsc_reader_cfg = dict(
+    input_columns=['span1', 'span2', 'text', 'new_text'],
+    output_column='answer')
+
+cluewsc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            "{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"? No.",
+            1:
+            "{text}\nHere, is the pronoun \"{span2}\" used to mean \"{span1}\"? Yes.",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+cluewsc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+cluewsc_datasets = [
+    dict(
+        type=CluewscDataset,
+        path='json',
+        abbr='cluewsc-dev',
+        data_files='./data/FewCLUE/cluewsc/dev_few_all.json',
+        split='train',
+        reader_cfg=cluewsc_reader_cfg,
+        infer_cfg=cluewsc_infer_cfg,
+        eval_cfg=cluewsc_eval_cfg),
+    dict(
+        type=CluewscDataset,
+        path='json',
+        abbr='cluewsc-test',
+        data_files='./data/FewCLUE/cluewsc/test_public.json',
+        split='train',
+        reader_cfg=cluewsc_reader_cfg,
+        infer_cfg=cluewsc_infer_cfg,
+        eval_cfg=cluewsc_eval_cfg),
+]
--- a/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl.py
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .FewCLUE_csl_ppl_8eee08 import csl_datasets  # noqa: F401, F403
--- a/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_f99d7a.py
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_ppl_f99d7a.py
@ -0,0 +1,45 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CslDataset
+
+csl_reader_cfg = dict(
+    input_columns=["abst", "keywords"], output_column='label')
+
+csl_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(round=[dict(role="HUMAN", prompt="摘要：{abst}")]),
+            1:
+            dict(
+                round=[dict(role="HUMAN", prompt="摘要：{abst}\n关键词：{keywords}")
+                       ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+csl_datasets = [
+    dict(
+        type=CslDataset,
+        path='json',
+        abbr='csl_dev',
+        data_files='./data/FewCLUE/csl/dev_few_all.json',
+        split='train',
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg),
+    dict(
+        type=CslDataset,
+        path='json',
+        abbr='csl_test',
+        data_files='./data/FewCLUE/csl/test_public.json',
+        split='train',
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_b59c1f.py
+++ b/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_b59c1f.py
@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+eprstmt_reader_cfg = dict(
+    input_columns=['sentence'], output_column='label', test_split='train')
+
+eprstmt_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'Negative': ' 内容： "{sentence}"。情绪分类：消极。',
+            'Positive': ' 内容： "{sentence}"。情绪分类：积极。',
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+eprstmt_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+eprstmt_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='eprstmt-dev',
+        path='json',
+        data_files='./data/FewCLUE/eprstmt/dev_few_all.json',
+        split='train',
+        reader_cfg=eprstmt_reader_cfg,
+        infer_cfg=eprstmt_infer_cfg,
+        eval_cfg=eprstmt_eval_cfg),
+    dict(
+        type=HFDataset,
+        abbr='eprstmt-test',
+        path='json',
+        data_files='./data/FewCLUE/eprstmt/test_public.json',
+        split='train',
+        reader_cfg=eprstmt_reader_cfg,
+        infer_cfg=eprstmt_infer_cfg,
+        eval_cfg=eprstmt_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl_29abd6.py
+++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl_29abd6.py
@ -0,0 +1,60 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_fc_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+ocnli_fc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="阅读文章：{sentence1}\n根据上文，回答如下问题：{sentence2}？"),
+                dict(role="BOT", prompt="错")
+            ]),
+            'entailment':
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="阅读文章：{sentence1}\n根据上文，回答如下问题：{sentence2}？"),
+                dict(role="BOT", prompt="对")
+            ]),
+            'neutral':
+            dict(round=[
+                dict(
+                    role="HUMAN", prompt="如果{sentence1}为真，那么{sentence2}也为真吗？"),
+                dict(role="BOT", prompt="可能")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+ocnli_fc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+ocnli_fc_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli_fc-dev',
+        path='json',
+        split='train',
+        data_files='./data/FewCLUE/ocnli/dev_few_all.json',
+        reader_cfg=ocnli_fc_reader_cfg,
+        infer_cfg=ocnli_fc_infer_cfg,
+        eval_cfg=ocnli_fc_eval_cfg),
+    dict(
+        type=HFDataset,
+        abbr='ocnli_fc-test',
+        path='json',
+        split='train',
+        data_files='./data/FewCLUE/ocnli/test_public.json',
+        reader_cfg=ocnli_fc_reader_cfg,
+        infer_cfg=ocnli_fc_infer_cfg,
+        eval_cfg=ocnli_fc_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl_b828fc.py
+++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_ppl_b828fc.py
@ -0,0 +1,44 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_fc_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+ocnli_fc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            '阅读文章：{sentence1}\n根据上文，回答如下问题： {sentence2}？\n答：错',
+            'entailment': '阅读文章：{sentence1}\n根据上文，回答如下问题： {sentence2}？\n答：对',
+            'neutral': '如果{sentence1}为真，那么{sentence2}也为真吗?可能'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+ocnli_fc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+ocnli_fc_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli_fc-dev',
+        path='json',
+        split='train',
+        data_files='./data/FewCLUE/ocnli/dev_few_all.json',
+        reader_cfg=ocnli_fc_reader_cfg,
+        infer_cfg=ocnli_fc_infer_cfg,
+        eval_cfg=ocnli_fc_eval_cfg),
+    dict(
+        type=HFDataset,
+        abbr='ocnli_fc-test',
+        path='json',
+        split='train',
+        data_files='./data/FewCLUE/ocnli/test_public.json',
+        reader_cfg=ocnli_fc_reader_cfg,
+        infer_cfg=ocnli_fc_infer_cfg,
+        eval_cfg=ocnli_fc_eval_cfg)
+]
--- a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_gen_8d59ba.py
+++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_gen_8d59ba.py
@ -0,0 +1,74 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TNewsDataset_V2
+
+tnews_reader_cfg = dict(
+    input_columns="sentence",
+    output_column="label_desc2",
+)
+
+tnews_labels = [
+    "农业新闻",  # news_agriculture
+    "旅游新闻",  # news_travel
+    "游戏新闻",  # news_game
+    "科技类别公司新闻",  # news_tech
+    "体育类别新闻",  # news_sports
+    "初升高教育新闻",  # news_edu
+    "娱乐圈新闻",  # news_entertainment
+    "投资资讯",  # news_finance
+    "军事类别常识",  # news_military
+    "车辆新闻",  # news_car
+    "楼市新闻",  # news_house
+    "环球不含中国类别新闻",  # news_world
+    "书籍文化历史类别新闻",  # news_culture
+    "故事类别新闻",  # news_story
+    "股票市场类别新闻",  # news_stock
+]
+_tnews_options_list_str = "\n".join(f'{chr(ord("A") + i)}. {tnews_labels[i]}'
+                                    for i in range(len(tnews_labels)))
+_tnews_options_range_str = "，".join(f'“{chr(ord("A") + i)}”'
+                                    for i in range(len(tnews_labels)))
+
+tnews_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                f"{{sentence}}\n请判断上述内容属于什么新闻？\n{_tnews_options_list_str}\n请从{_tnews_options_range_str}中进行选择。\n答：",
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+tnews_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+tnews_datasets = [
+    dict(
+        abbr="tnews-dev",
+        type=TNewsDataset_V2,
+        path="./data/FewCLUE/tnews/dev_few_all.json",
+        reader_cfg=tnews_reader_cfg,
+        infer_cfg=tnews_infer_cfg,
+        eval_cfg=tnews_eval_cfg,
+    ),
+    dict(
+        abbr="tnews-test",
+        type=TNewsDataset_V2,
+        path="./data/FewCLUE/tnews/test_public.json",
+        reader_cfg=tnews_reader_cfg,
+        infer_cfg=tnews_infer_cfg,
+        eval_cfg=tnews_eval_cfg,
+    ),
+]
+
+del _tnews_options_list_str, _tnews_options_range_str
--- a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl.py
+++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .FewCLUE_tnews_ppl_784b9e import tnews_datasets  # noqa: F401, F403
--- a/configs/datasets/GaokaoBench/GaokaoBench_gen_aed980.py
+++ b/configs/datasets/GaokaoBench/GaokaoBench_gen_aed980.py
@ -0,0 +1,301 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import GaokaoBenchDataset
+
+_MCQ_prompts = [
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_II_MCQs",
+        "prefix_prompt":
+        "请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_I_MCQs",
+        "prefix_prompt":
+        "请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_History_MCQs",
+        "prefix_prompt":
+        "请你做一道历史选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Biology_MCQs",
+        "prefix_prompt":
+        "请你做一道生物选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Political_Science_MCQs",
+        "prefix_prompt":
+        "请你做一道政治选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "multi_choice",
+        "keyword":
+        "2010-2022_Physics_MCQs",
+        "prefix_prompt":
+        "请你做一道物理选择题。\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出所有符合题意的答案，并写在【答案】和<eoa>之间。\n例如：【答案】 AB <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】... <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Chemistry_MCQs",
+        "prefix_prompt":
+        "请你做一道化学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2013_English_MCQs",
+        "prefix_prompt":
+        "请你做一道英语选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Chinese_Modern_Lit",
+        "prefix_prompt":
+        "请你做一道语文阅读理解题，其中包含三个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_English_Fill_in_Blanks",
+        "prefix_prompt":
+        "请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "five_out_of_seven",
+        "keyword":
+        "2012-2022_English_Cloze_Test",
+        "prefix_prompt":
+        "请回答下面的问题，将符合题意的五个选项的字母写在【答案】和<eoa>之间，例如“【答案】 A B C D E <eoa>\n请严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Geography_MCQs",
+        "prefix_prompt":
+        "请你做一道地理选择题，其中包含两到三个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_English_Reading_Comp",
+        "prefix_prompt":
+        "请你做一道英语阅读理解题，其中包含三到五个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Chinese_Lang_and_Usage_MCQs",
+        "prefix_prompt":
+        "请你做一道语文选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n（1）【解析】 ... <eoe>\n【答案】 ... <eoa>\n（2）【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答\n题目如下："
+    },
+]
+_FBQ_prompts = [{
+    "type": "cloze",
+    "keyword": "2010-2022_Math_I_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2010-2022_Math_II_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword":
+    "2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
+    "prefix_prompt":
+    "请回答下面的语文填空题\n请你仔细阅读题目，先找到题目对应的中国名篇，再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2014-2022_English_Language_Cloze_Passage",
+    "prefix_prompt":
+    "请回答下面的英语短文填词题\n仔细阅读题目，空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考，将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}]
+_OEQ_prompts = [
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Geography_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chemistry_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_I_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_History_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的历史解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Biology_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_II_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Physics_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的物理解答题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Political_Science_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的政治解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "correction",
+        "keyword": "2012-2022_English_Language_Error_Correction",
+        "prefix_prompt":
+        "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        # "prefix_prompt": [
+        #     "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        #     "请比较下面两篇短文，找到第二篇和第一篇的10处不同，每处不同只涉及一个单词，请将结果写在【答案】和<eoa>之间。例如：【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下：【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
+        # ],
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
+        "prefix_prompt":
+        "请解答下面的语文古代诗歌阅读题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文实用类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文学类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文言文阅读，仔细阅读题目，前三题是单选题，最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面，例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword":
+        "2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的语文解答题，仔细阅读题目，注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    }
+]
+
+GaokaoBench_datasets = []
+for _folder, _prompts in [
+    ("Multiple-choice_Questions", _MCQ_prompts),
+    ("Fill-in-the-blank_Questions", _FBQ_prompts),
+    ("Open-ended_Questions", _OEQ_prompts),
+]:
+    for _p in _prompts:
+        _reader_cfg = {
+            "input_columns": ['question'],
+            "output_column": 'answer',
+        }
+        _infer_cfg = {
+            "ice_template": {
+                "type": PromptTemplate,
+                "template": {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p['prefix_prompt'] + '{question}'
+                    }]
+                },
+                "ice_token": "</E>"
+            },
+            "retriever": {
+                "type": ZeroRetriever
+            },
+            "inferencer": {
+                "type": GenInferencer,
+                "max_out_len": 1024,
+            }
+        }
+        _eval_cfg = {
+            "evaluator": {
+                "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+            },
+            "pred_role": "BOT",
+        }
+        _base_path = './data/GAOKAO-BENCH/data'
+        _dataset = {
+            "type": GaokaoBenchDataset,
+            "abbr": "GaokaoBench_" + _p['keyword'],
+            "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+            "reader_cfg": _reader_cfg,
+            "infer_cfg": _infer_cfg,
+            "eval_cfg": _eval_cfg,
+        }
+
+        GaokaoBench_datasets.append(_dataset)
+
+_temporary_variables = [k for k in globals() if k.startswith('_')]
+for _t in _temporary_variables:
+    del globals()[_t]
+del _temporary_variables, _t
--- a/configs/datasets/GaokaoBench/GaokaoBench_mixed.py
+++ b/configs/datasets/GaokaoBench/GaokaoBench_mixed.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .GaokaoBench_mixed_f2038e import GaokaoBench_datasets  # noqa: F401, F403
--- a/configs/datasets/GaokaoBench/GaokaoBench_mixed_f2038e.py
+++ b/configs/datasets/GaokaoBench/GaokaoBench_mixed_f2038e.py
@ -0,0 +1,353 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer, PPLInferencer
+from opencompass.datasets import GaokaoBenchDataset
+
+_MCQ_prompts = [
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_II_MCQs",
+        "prefix_prompt":
+        "请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_I_MCQs",
+        "prefix_prompt":
+        "请你做一道数学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_History_MCQs",
+        "prefix_prompt":
+        "请你做一道历史选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Biology_MCQs",
+        "prefix_prompt":
+        "请你做一道生物选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Political_Science_MCQs",
+        "prefix_prompt":
+        "请你做一道政治选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "multi_choice",
+        "keyword":
+        "2010-2022_Physics_MCQs",
+        "prefix_prompt":
+        "请你做一道物理选择题。\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出所有符合题意的答案，并写在【答案】和<eoa>之间。\n例如：【答案】 AB <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】... <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2022_Chemistry_MCQs",
+        "prefix_prompt":
+        "请你做一道化学选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "single_choice",
+        "keyword":
+        "2010-2013_English_MCQs",
+        "prefix_prompt":
+        "请你做一道英语选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。\n题目如下："
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Chinese_Modern_Lit",
+        "prefix_prompt":
+        "请你做一道语文阅读理解题，其中包含三个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_English_Fill_in_Blanks",
+        "prefix_prompt":
+        "请你做一道英语完形填空题,其中包含二十个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "five_out_of_seven",
+        "keyword":
+        "2012-2022_English_Cloze_Test",
+        "prefix_prompt":
+        "请回答下面的问题，将符合题意的五个选项的字母写在【答案】和<eoa>之间，例如“【答案】 A B C D E <eoa>\n请严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Geography_MCQs",
+        "prefix_prompt":
+        "请你做一道地理选择题，其中包含两到三个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_English_Reading_Comp",
+        "prefix_prompt":
+        "请你做一道英语阅读理解题，其中包含三到五个小题。\n请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：（1）【答案】 A <eoa>\n（2）【答案】 B <eoa>\n请你严格按照上述格式作答。\n"
+    },
+    {
+        "type":
+        "multi_question_choice",
+        "keyword":
+        "2010-2022_Chinese_Lang_and_Usage_MCQs",
+        "prefix_prompt":
+        "请你做一道语文选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案】和<eoa>之间。\n例如：【答案】: A <eoa>\n完整的题目回答的格式如下：\n（1）【解析】 ... <eoe>\n【答案】 ... <eoa>\n（2）【解析】 ... <eoe>\n【答案】 ... <eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答\n题目如下："
+    },
+]
+_FBQ_prompts = [{
+    "type": "cloze",
+    "keyword": "2010-2022_Math_I_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2010-2022_Math_II_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword":
+    "2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
+    "prefix_prompt":
+    "请回答下面的语文填空题\n请你仔细阅读题目，先找到题目对应的中国名篇，再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2014-2022_English_Language_Cloze_Passage",
+    "prefix_prompt":
+    "请回答下面的英语短文填词题\n仔细阅读题目，空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考，将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}]
+_OEQ_prompts = [
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Geography_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chemistry_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_I_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_History_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的历史解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Biology_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_II_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Physics_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的物理解答题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Political_Science_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的政治解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "correction",
+        "keyword": "2012-2022_English_Language_Error_Correction",
+        "prefix_prompt":
+        "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        # "prefix_prompt": [
+        #     "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        #     "请比较下面两篇短文，找到第二篇和第一篇的10处不同，每处不同只涉及一个单词，请将结果写在【答案】和<eoa>之间。例如：【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下：【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
+        # ],
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
+        "prefix_prompt":
+        "请解答下面的语文古代诗歌阅读题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文实用类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文学类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文言文阅读，仔细阅读题目，前三题是单选题，最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面，例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword":
+        "2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的语文解答题，仔细阅读题目，注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    }
+]
+
+GaokaoBench_datasets = []
+for _folder, _prompts in [
+    ("Multiple-choice_Questions", _MCQ_prompts),
+    ("Fill-in-the-blank_Questions", _FBQ_prompts),
+    ("Open-ended_Questions", _OEQ_prompts),
+]:
+    for _p in _prompts:
+        if _p['type'] == "single_choice":
+            continue
+        _reader_cfg = {
+            "input_columns": ['question'],
+            "output_column": 'answer',
+        }
+        _infer_cfg = {
+            "ice_template": {
+                "type": PromptTemplate,
+                "template": {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p['prefix_prompt'] + '{question}'
+                    }]
+                },
+                "ice_token": "</E>"
+            },
+            "retriever": {
+                "type": ZeroRetriever
+            },
+            "inferencer": {
+                "type": GenInferencer,
+                "max_out_len": 1024,
+            }
+        }
+        _eval_cfg = {
+            "evaluator": {
+                "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+            },
+            "pred_role": "BOT",
+        }
+        _base_path = './data/GAOKAO-BENCH/data'
+        _dataset = {
+            "type": GaokaoBenchDataset,
+            "abbr": "GaokaoBench_" + _p['keyword'],
+            "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+            "reader_cfg": _reader_cfg,
+            "infer_cfg": _infer_cfg,
+            "eval_cfg": _eval_cfg,
+        }
+
+        GaokaoBench_datasets.append(_dataset)
+
+_folder = "Multiple-choice_Questions"
+for _p in _MCQ_prompts:
+    if _p['type'] != "single_choice":
+        continue
+    _reader_cfg = {
+        "input_columns": ['question'],
+        "output_column": 'answer',
+    }
+    _infer_cfg = {
+        "ice_template": {
+            "type": PromptTemplate,
+            "template": {
+                answer: {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p['prefix_prompt'] + '{question}'
+                    }, {
+                        "role": "BOT",
+                        "prompt": f"【答案】{answer} <eoa>"
+                    }]
+                }
+                for answer in ['A', 'B', 'C', 'D']
+            },
+            "ice_token": "</E>"
+        },
+        "retriever": {
+            "type": ZeroRetriever
+        },
+        "inferencer": {
+            "type": PPLInferencer
+        }
+    }
+    _eval_cfg = {
+        "evaluator": {
+            "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+        },
+        "pred_role": "BOT",
+    }
+    _base_path = './data/GAOKAO-BENCH/data'
+    _dataset = {
+        "type": GaokaoBenchDataset,
+        "abbr": "GaokaoBench_" + _p['keyword'],
+        "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+        "reader_cfg": _reader_cfg,
+        "infer_cfg": _infer_cfg,
+        "eval_cfg": _eval_cfg,
+    }
+
+    GaokaoBench_datasets.append(_dataset)
+
+_temporary_variables = [k for k in globals() if k.startswith('_')]
+for _t in _temporary_variables:
+    del globals()[_t]
+del _temporary_variables, _t
--- a/configs/datasets/PJExam/PJExam_gen.py
+++ b/configs/datasets/PJExam/PJExam_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .PJExam_gen_785c37 import PJExam_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen.py
+++ b/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_AX_g_gen_7a5dee import AX_g_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_BoolQ_gen_8525d1 import BoolQ_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_094411.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_094411.py
@ -0,0 +1,45 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDataset
+
+BoolQ_reader_cfg = dict(
+    input_columns=["question", "passage"],
+    output_column="answer",
+    test_split="train")
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(round=[
+                dict(role="HUMAN", prompt="{passage}\nQuestion: {question}"),
+                dict(role="BOT", prompt="No."),
+            ]),
+            1:
+            dict(round=[
+                dict(role="HUMAN", prompt="{passage}\nQuestion: {question}"),
+                dict(role="BOT", prompt="Yes."),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+BoolQ_datasets = [
+    dict(
+        type=BoolQDataset,
+        abbr="BoolQ",
+        path="json",
+        data_files="./data/SuperGLUE/BoolQ/val.jsonl",
+        split="train",
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl.py
+++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_CB_ppl_32adbb import CB_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_32adbb.py
+++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_ppl_32adbb.py
@ -0,0 +1,62 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+CB_reader_cfg = dict(
+    input_columns=["premise", "hypothesis"],
+    output_column="label",
+)
+
+CB_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            "contradiction":
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
+                ),
+                dict(role="BOT", prompt="Contradiction"),
+            ]),
+            "entailment":
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
+                ),
+                dict(role="BOT", prompt="Entailment"),
+            ]),
+            "neutral":
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{premise}\n{hypothesis}\nWhat is the relation between the two sentences?"
+                ),
+                dict(role="BOT", prompt="Neutral"),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+CB_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+CB_datasets = [
+    dict(
+        type=HFDataset,
+        abbr="CB",
+        path="json",
+        split="train",
+        data_files="./data/SuperGLUE/CB/val.jsonl",
+        reader_cfg=CB_reader_cfg,
+        infer_cfg=CB_infer_cfg,
+        eval_cfg=CB_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ddb78c.py
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_ddb78c.py
@ -0,0 +1,49 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+COPA_reader_cfg = dict(
+    input_columns=["question", "premise", "choice1", "choice2"],
+    output_column="label",
+    test_split="train")
+
+COPA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="{premise}\nQuestion: What may be the {question}?\nAnswer:"),
+                dict(role="BOT", prompt="{choice1}"),
+            ]),
+            1:
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt="{premise}\nQuestion: What may be the {question}?\nAnswer:"),
+                dict(role="BOT", prompt="{choice2}"),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+COPA_datasets = [
+    dict(
+        type=HFDataset,
+        abbr="COPA",
+        path="json",
+        data_files="./data/SuperGLUE/COPA/val.jsonl",
+        split="train",
+        reader_cfg=COPA_reader_cfg,
+        infer_cfg=COPA_infer_cfg,
+        eval_cfg=COPA_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl.py
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_MultiRC_ppl_83a304 import MultiRC_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_ce346a.py
+++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_ce346a.py
@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AXDataset_V2
+
+RTE_reader_cfg = dict(
+    input_columns=["hypothesis", "premise"],
+    output_column="label",
+)
+
+RTE_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?\nA. Yes\nB. No\nAnswer:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+RTE_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+RTE_datasets = [
+    dict(
+        abbr="RTE",
+        type=AXDataset_V2,  # rte share the same format with ax
+        path="./data/SuperGLUE/RTE/val.jsonl",
+        reader_cfg=RTE_reader_cfg,
+        infer_cfg=RTE_infer_cfg,
+        eval_cfg=RTE_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl_29a22c.py
+++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_ppl_29a22c.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+RTE_reader_cfg = dict(
+    input_columns=["hypothesis", "premise"],
+    output_column="label",
+    test_split="train")
+
+RTE_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            "entailment":
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
+                ),
+                dict(role="BOT", prompt="Yes"),
+            ]),
+            "not_entailment":
+            dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "{premise}\n{hypothesis}\nIs the sentence below entailed by the sentence above?"
+                ),
+                dict(role="BOT", prompt="No"),
+            ])
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+RTE_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+RTE_datasets = [
+    dict(
+        type=HFDataset,
+        abbr="RTE",
+        path="json",
+        data_files="./data/SuperGLUE/RTE/val.jsonl",
+        split="train",
+        reader_cfg=RTE_reader_cfg,
+        infer_cfg=RTE_infer_cfg,
+        eval_cfg=RTE_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_90d5b6.py
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_90d5b6.py
@ -0,0 +1,29 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import ReCoRDDataset
+
+ReCoRD_reader_cfg = dict(
+    input_columns=['question', 'text'], output_column='answers')
+
+ReCoRD_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        "Passage:{text}\nResult:{question}\nQuestion: What entity does ____ refer to in the result?Give me the entity name:"),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+ReCoRD_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type='ReCoRD'))
+
+ReCoRD_datasets = [
+    dict(
+        type=ReCoRDDataset,
+        abbr='ReCoRD',
+        path='./data/SuperGLUE/ReCoRD/val.jsonl',
+        reader_cfg=ReCoRD_reader_cfg,
+        infer_cfg=ReCoRD_infer_cfg,
+        eval_cfg=ReCoRD_eval_cfg)
+]
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .SuperGLUE_WSC_ppl_85f45f import WSC_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_89e489.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_89e489.py
@ -0,0 +1,34 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset
+
+WSC_reader_cfg = dict(
+    input_columns=['span1', 'span2', 'text', 'new_text'],
+    output_column='answer')
+
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "{text}",
+            1: "{new_text}"
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+WSC_datasets = [
+    dict(
+        type=WSCDataset,
+        path='json',
+        abbr='WSC',
+        data_files='./data/SuperGLUE/WSC/val.jsonl',
+        split='train',
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_9ed37a.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_ppl_9ed37a.py
@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset
+
+WSC_reader_cfg = dict(
+    input_columns=["span1", "span2", "text", "new_text"],
+    output_column="answer",
+)
+
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: dict(round=[
+                dict(role="HUMAN", prompt="{text}"),
+            ]),
+            1: dict(round=[
+                dict(role="HUMAN", prompt="{new_text}"),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+WSC_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+WSC_datasets = [
+    dict(
+        type=WSCDataset,
+        path="json",
+        abbr="WSC",
+        data_files="./data/SuperGLUE/WSC/val.jsonl",
+        split="train",
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/TheoremQA/TheoremQA_gen.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .TheoremQA_gen_891fcf import TheoremQA_datasets  # noqa: F401, F403
--- a/configs/datasets/TheoremQA/TheoremQA_gen_24bc13.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_24bc13.py
@ -0,0 +1,37 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TheoremQADataset
+
+TheoremQA_reader_cfg = dict(
+    input_columns=['Question', 'Answer_type'],
+    output_column='Answer',
+    train_split='test')
+
+TheoremQA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt=
+                """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:\n1. a numerical value like 0.1, no symbol and no unit at all.\n2. a list of number like [2, 3, 4].\n3. True/False.\n4. an option like (a), (b), (c), (d)\nQuestion: {Question}\nLet\'s think step by step."""
+            ),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+TheoremQA_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='TheoremQA'))
+
+TheoremQA_datasets = [
+    dict(
+        abbr='TheoremQA',
+        type=TheoremQADataset,
+        path="./data/TheoremQA/test.csv",
+        reader_cfg=TheoremQA_reader_cfg,
+        infer_cfg=TheoremQA_infer_cfg,
+        eval_cfg=TheoremQA_eval_cfg)
+]
--- a/configs/datasets/TheoremQA/TheoremQA_gen_891fcf.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_891fcf.py
@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import TheoremQADataset
+
+TheoremQA_reader_cfg = dict(
+    input_columns=['Question', 'Answer_type'],
+    output_column='Answer',
+    train_split='test')
+
+TheoremQA_prompt1 = """You are a mathematician, you are supposed to answer the given question. You need to output the answer in your final sentence like "Therefore, the answer is ...". The answer can only be one of the following forms:
+1. a numerical value like 0.1, no symbol and no unit at all.
+2. a list of number like [2, 3, 4].
+3. True/False.
+4. an option like (a), (b), (c), (d)
+"""
+TheoremQA_prompt2 = 'Question: {Question}\nLet\'s think step by step.'
+
+TheoremQA_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt=TheoremQA_prompt1),
+            ],
+            round=[
+                dict(role='HUMAN', prompt=TheoremQA_prompt2),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+TheoremQA_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='TheoremQA'))
+
+TheoremQA_datasets = [
+    dict(
+        abbr='TheoremQA',
+        type=TheoremQADataset,
+        path="./data/TheoremQA/test.csv",
+        reader_cfg=TheoremQA_reader_cfg,
+        infer_cfg=TheoremQA_infer_cfg,
+        eval_cfg=TheoremQA_eval_cfg)
+]
--- a/configs/datasets/Xsum/Xsum_gen.py
+++ b/configs/datasets/Xsum/Xsum_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .Xsum_gen_d2126e import Xsum_datasets  # noqa: F401, F403
--- a/configs/datasets/Xsum/Xsum_gen_bb6e7b.py
+++ b/configs/datasets/Xsum/Xsum_gen_bb6e7b.py
@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import RougeEvaluator
+from opencompass.datasets import XsumDataset
+
+Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')
+
+Xsum_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='Document：{dialogue}\n'
+        'Based on the previous text, provide a brief single summary:'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+Xsum_eval_cfg = dict(
+    evaluator=dict(type=RougeEvaluator),
+    pred_postprocessor=dict(type='Xsum'),
+)
+
+Xsum_datasets = [
+    dict(
+        type=XsumDataset,
+        abbr='Xsum',
+        path='./data/Xsum/dev.jsonl',
+        reader_cfg=Xsum_reader_cfg,
+        infer_cfg=Xsum_infer_cfg,
+        eval_cfg=Xsum_eval_cfg)
+]
--- a/configs/datasets/Xsum/Xsum_gen_d2126e.py
+++ b/configs/datasets/Xsum/Xsum_gen_d2126e.py
@ -0,0 +1,39 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import RougeEvaluator
+from opencompass.datasets import XsumDataset
+
+Xsum_reader_cfg = dict(input_columns=["dialogue"], output_column="summary")
+
+Xsum_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "Document：{dialogue}\nBased on the previous text, provide a brief single summary:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+Xsum_eval_cfg = dict(
+    evaluator=dict(type=RougeEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type="Xsum"),
+)
+
+Xsum_datasets = [
+    dict(
+        type=XsumDataset,
+        abbr="Xsum",
+        path="./data/Xsum/dev.jsonl",
+        reader_cfg=Xsum_reader_cfg,
+        infer_cfg=Xsum_infer_cfg,
+        eval_cfg=Xsum_eval_cfg,
+    )
+]
--- a/configs/datasets/agieval/agieval_gen_508849.py
+++ b/configs/datasets/agieval/agieval_gen_508849.py
@ -0,0 +1,84 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
+
+agieval_reader_cfg = dict(
+    input_columns=['problem_input'], output_column='label')
+
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    # 'jec-qa-kd',  # 数据需要额外处理
+    # 'jec-qa-ca',  # 数据需要额外处理
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+
+agieval_datasets = []
+for name in agieval_single_choice_sets:
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt='{problem_input}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, labels=['A', 'B', 'C', 'D']))
+
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for name in agieval_cloze_sets:
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role="HUMAN", prompt='{problem_input}')])),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(type=GenInferencer))
+
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AGIEvalEvaluator), pred_role="BOT")
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+del name, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/agieval/agieval_gen_dc7dae.py
+++ b/configs/datasets/agieval/agieval_gen_dc7dae.py
@ -0,0 +1,203 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+
+agieval_reader_cfg = dict(
+    input_columns=['question', 'options'], output_column='label')
+
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',
+    'jec-qa-ca',
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+agieval_chinese_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'gaokao-mathcloze',
+]
+agieval_english_sets = [
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+    'math',
+]
+agieval_gaokao_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+]
+
+agieval_datasets = []
+for _name in agieval_single_choice_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _name in agieval_multiple_choices_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _name in agieval_cloze_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt=f'{{question}}\n{_hint}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+
+    agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _item in agieval_datasets:
+    _name = _item['name']
+    _intro = {
+        'gaokao-chinese':
+        '以下是一道中国高考语文选择题，请选择正确的答案。',
+        'gaokao-english':
+        '以下是一道中国高考英语选择题，请选择正确的答案。',
+        'gaokao-geography':
+        '以下是一道中国高考地理选择题，请选择正确的答案。',
+        'gaokao-history':
+        '以下是一道中国高考历史选择题，请选择正确的答案。',
+        'gaokao-biology':
+        '以下是一道中国高考生物选择题，请选择正确的答案。',
+        'gaokao-chemistry':
+        '以下是一道中国高考化学选择题，请选择正确的答案。',
+        'gaokao-physics':
+        '以下是一道中国高考物理选择题，请选择正确的答案。',
+        'gaokao-mathqa':
+        '以下是一道中国高考数学选择题，请选择正确的答案。',
+        'logiqa-zh':
+        '以下是一道中国公务员考试题，请选择正确的答案。',
+        'lsat-ar':
+        'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
+        'lsat-lr':
+        'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
+        'lsat-rc':
+        'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
+        'logiqa-en':
+        'The following is a Logic Reasoning question. Please select the correct answer.',
+        'sat-math':
+        'The following is a SAT Math question. Please select the correct answer.',
+        'sat-en':
+        'The following is a SAT English question. Please select the correct answer.',
+        'sat-en-without-passage':
+        'The following is a SAT English question. Please select the correct answer.',
+        'aqua-rat':
+        'The following is a AQUA-RAT question. Please select the correct answer.',
+        'jec-qa-kd':
+        '以下是一道中国司法考试基础知识题，请选择正确的答案。',
+        'jec-qa-ca':
+        '以下是一道中国司法考试案例分析题，请选择正确的答案。',
+        'gaokao-mathcloze':
+        '以下是一道中国高考数学填空题，请填入正确的答案。',
+        'math':
+        'The following is a Math question. Please select the correct answer.',
+    }[_name]
+    _templates = _item['infer_cfg']['prompt_template']['template']
+    _templates['round'][0][
+        'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
+
+del _item, _intro, _templates, _name, _hint, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/agieval/agieval_mixed_2f14ad.py
+++ b/configs/datasets/agieval/agieval_mixed_2f14ad.py
@ -0,0 +1,217 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',
+    'jec-qa-ca',
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+agieval_chinese_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'gaokao-mathcloze',
+]
+agieval_english_sets = [
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+    'math',
+]
+agieval_gaokao_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+]
+
+agieval_datasets = []
+for _name in agieval_single_choice_sets:
+    if _name in ['lsat-ar', 'lsat-lr', 'lsat-rc', 'aqua-rat']:
+        _options = ['A', 'B', 'C', 'D', 'E']
+    else:
+        _options = ['A', 'B', 'C', 'D']
+    if _name in agieval_chinese_sets:
+        _hint = '答案是：'
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template={
+                label: dict(round=[
+                    dict(role='HUMAN', prompt='{question}\n{options}'),
+                    dict(role='BOT', prompt=f'{_hint}{label}')
+                ])
+                for label in _options
+            }),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=PPLInferencer, labels=_options))
+
+    agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'] + _options,
+                output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _name in agieval_multiple_choices_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是： '
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'], output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _name in agieval_cloze_sets:
+    if _name in agieval_chinese_sets:
+        _hint = '答案是：'
+    else:
+        _hint = 'The answer is '
+    agieval_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                round=[dict(role='HUMAN', prompt=f'{{question}}{_hint}')])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=1024))
+
+    agieval_eval_cfg = dict(evaluator=dict(type=AGIEvalEvaluator))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset_v2,
+            path='./data/AGIEval/data/v1/',
+            name=_name,
+            abbr='agieval-' + _name,
+            setting_name='zero-shot',
+            reader_cfg=dict(
+                input_columns=['question', 'options'], output_column='label'),
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for _item in agieval_datasets:
+    _name = _item['name']
+    _intro = {
+        'gaokao-chinese':
+        '以下是一道中国高考语文选择题，请选择正确的答案。',
+        'gaokao-english':
+        '以下是一道中国高考英语选择题，请选择正确的答案。',
+        'gaokao-geography':
+        '以下是一道中国高考地理选择题，请选择正确的答案。',
+        'gaokao-history':
+        '以下是一道中国高考历史选择题，请选择正确的答案。',
+        'gaokao-biology':
+        '以下是一道中国高考生物选择题，请选择正确的答案。',
+        'gaokao-chemistry':
+        '以下是一道中国高考化学选择题，请选择正确的答案。',
+        'gaokao-physics':
+        '以下是一道中国高考物理选择题，请选择正确的答案。',
+        'gaokao-mathqa':
+        '以下是一道中国高考数学选择题，请选择正确的答案。',
+        'logiqa-zh':
+        '以下是一道中国公务员考试题，请选择正确的答案。',
+        'lsat-ar':
+        'The following is a LSAT Analytical Reasoning question. Please select the correct answer.',
+        'lsat-lr':
+        'The following is a LSAT Logical Reasoning question. Please select the correct answer.',
+        'lsat-rc':
+        'The following is a LSAT Reading Comprehension question. Please select the correct answer.',
+        'logiqa-en':
+        'The following is a Logic Reasoning question. Please select the correct answer.',
+        'sat-math':
+        'The following is a SAT Math question. Please select the correct answer.',
+        'sat-en':
+        'The following is a SAT English question. Please select the correct answer.',
+        'sat-en-without-passage':
+        'The following is a SAT English question. Please select the correct answer.',
+        'aqua-rat':
+        'The following is a AQUA-RAT question. Please select the correct answer.',
+        'jec-qa-kd':
+        '以下是一道中国司法考试基础知识题，请选择正确的答案。',
+        'jec-qa-ca':
+        '以下是一道中国司法考试案例分析题，请选择正确的答案。',
+        'gaokao-mathcloze':
+        '以下是一道中国高考数学填空题，请填入正确的答案。',
+        'math':
+        'The following is a Math question. Please select the correct answer.',
+    }[_name]
+    _templates = _item['infer_cfg']['prompt_template']['template']
+
+    if _item['infer_cfg']['inferencer']['type'] == PPLInferencer:
+        for _label in _templates:
+            _templates[_label]['round'][0][
+                'prompt'] = _intro + '\n' + _templates[_label]['round'][0][
+                    'prompt']
+    else:
+        _templates['round'][0][
+            'prompt'] = _intro + '\n' + _templates['round'][0]['prompt']
+
+del _item, _intro, _templates, _label, _name, _options, _hint, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/apps/apps_gen_193ec0.py
+++ b/configs/datasets/apps/apps_gen_193ec0.py
@ -0,0 +1,40 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt='Write a python program:'),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{question}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/apps/apps_gen_36c409.py
+++ b/configs/datasets/apps/apps_gen_36c409.py
@ -0,0 +1,33 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(role='HUMAN', prompt='Write a python program:\n{question}'),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/apps/apps_gen_e85947.py
+++ b/configs/datasets/apps/apps_gen_e85947.py
@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+apps_reader_cfg = dict(
+    input_columns=['question'], output_column='problem_id', train_split='test')
+
+# TODO: allow empty output-column
+apps_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{question}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+apps_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    k=[1, 10, 100],
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+apps_datasets = [
+    dict(
+        type=HFDataset,
+        path='codeparrot/apps',
+        reader_cfg=apps_reader_cfg,
+        infer_cfg=apps_infer_cfg,
+        eval_cfg=apps_eval_cfg)
+]
--- a/configs/datasets/bbh/bbh_gen_58abc3.py
+++ b/configs/datasets/bbh/bbh_gen_58abc3.py
@ -0,0 +1,100 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BBHDataset, BBHEvaluator
+
+bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
+
+_path_prefix = "./data/BBH"
+
+bbh_multiple_choice_sets = [
+    'temporal_sequences',
+    'disambiguation_qa',
+    'date_understanding',
+    'tracking_shuffled_objects_three_objects',
+    'penguins_in_a_table',
+    'geometric_shapes',
+    'snarks',
+    'ruin_names',
+    'tracking_shuffled_objects_seven_objects',
+    'tracking_shuffled_objects_five_objects',
+    'logical_deduction_three_objects',
+    'hyperbaton',
+    'logical_deduction_five_objects',
+    'logical_deduction_seven_objects',
+    'movie_recommendation',
+    'salient_translation_error_detection',
+    'reasoning_about_colored_objects',
+]
+bbh_free_form_sets = [
+    'multistep_arithmetic_two',
+    'navigate',
+    'dyck_languages',
+    'word_sorting',
+    'sports_understanding',
+    'boolean_expressions',
+    'object_counting',
+    'formal_fallacies',
+    'causal_judgement',
+    'web_of_lies',
+]
+
+bbh_datasets = []
+for _name in bbh_multiple_choice_sets:
+    _hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
+    bbh_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
+                )
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+    bbh_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type='bbh-mcq'),
+        dataset_postprocessor=dict(type='bbh-mcq'))
+
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path=f"{_path_prefix}/data",
+            name=_name,
+            abbr='bbh-' + _name,
+            reader_cfg=bbh_reader_cfg,
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
+
+for _name in bbh_free_form_sets:
+    _hint = open(f"{_path_prefix}/lib_prompt/{_name}.txt", 'r').read()
+    bbh_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    f"Follow the given examples and answer the question.\n{_hint}\n\nQ: {{input}}\nA: Let's think step by step."
+                )
+            ])),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+    bbh_eval_cfg = dict(evaluator=dict(type=BBHEvaluator), pred_role="BOT")
+
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path=f"{_path_prefix}/data",
+            name=_name,
+            abbr='bbh-' + _name,
+            reader_cfg=bbh_reader_cfg,
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
+
+del _name, _hint, _path_prefix
--- a/configs/datasets/ceval/ceval_gen_c24a21.py
+++ b/configs/datasets/ceval/ceval_gen_c24a21.py
@ -0,0 +1,187 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val", "test"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template=dict(
+                    begin="</E>",
+                    round=[
+                        dict(
+                            role="HUMAN",
+                            prompt=
+                            f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                        ),
+                        dict(role="BOT", prompt="{answer}"),
+                    ]),
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+
+        ceval_eval_cfg = dict(
+            evaluator=dict(type=AccEvaluator),
+            pred_postprocessor=dict(type='first-capital'))
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+
+del _split, _name, _ch_name
--- a/configs/datasets/ceval/ceval_gen_ee2cb0.py
+++ b/configs/datasets/ceval/ceval_gen_ee2cb0.py
@ -0,0 +1,185 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template=dict(
+                    begin="</E>",
+                    round=[
+                        dict(
+                            role="HUMAN",
+                            prompt=
+                            f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                        ),
+                        dict(role="BOT", prompt="{answer}"),
+                    ]),
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+
+del _split, _name, _ch_name
--- a/configs/datasets/ceval/ceval_ppl.py
+++ b/configs/datasets/ceval/ceval_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .ceval_ppl_275812 import ceval_datasets  # noqa: F401, F403
--- a/configs/datasets/ceval/ceval_ppl_162686.py
+++ b/configs/datasets/ceval/ceval_ppl_162686.py
@ -0,0 +1,188 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val", "test"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template={
+                    answer: dict(
+                        begin="</E>",
+                        round=[
+                            dict(
+                                role="HUMAN",
+                                prompt=
+                                f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                            ),
+                            dict(role="BOT", prompt=answer),
+                        ])
+                    for answer in ["A", "B", "C", "D"]
+                },
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+
+del _split, _name, _ch_name
--- a/configs/datasets/ceval/ceval_ppl_275812.py
+++ b/configs/datasets/ceval/ceval_ppl_275812.py
@ -0,0 +1,188 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CEvalDataset
+
+ceval_subject_mapping = {
+    "computer_network":
+    ["Computer Network", "\u8ba1\u7b97\u673a\u7f51\u7edc", "STEM"],
+    "operating_system":
+    ["Operating System", "\u64cd\u4f5c\u7cfb\u7edf", "STEM"],
+    "computer_architecture":
+    ["Computer Architecture", "\u8ba1\u7b97\u673a\u7ec4\u6210", "STEM"],
+    "college_programming":
+    ["College Programming", "\u5927\u5b66\u7f16\u7a0b", "STEM"],
+    "college_physics": ["College Physics", "\u5927\u5b66\u7269\u7406", "STEM"],
+    "college_chemistry":
+    ["College Chemistry", "\u5927\u5b66\u5316\u5b66", "STEM"],
+    "advanced_mathematics":
+    ["Advanced Mathematics", "\u9ad8\u7b49\u6570\u5b66", "STEM"],
+    "probability_and_statistics":
+    ["Probability and Statistics", "\u6982\u7387\u7edf\u8ba1", "STEM"],
+    "discrete_mathematics":
+    ["Discrete Mathematics", "\u79bb\u6563\u6570\u5b66", "STEM"],
+    "electrical_engineer": [
+        "Electrical Engineer", "\u6ce8\u518c\u7535\u6c14\u5de5\u7a0b\u5e08",
+        "STEM"
+    ],
+    "metrology_engineer":
+    ["Metrology Engineer", "\u6ce8\u518c\u8ba1\u91cf\u5e08", "STEM"],
+    "high_school_mathematics":
+    ["High School Mathematics", "\u9ad8\u4e2d\u6570\u5b66", "STEM"],
+    "high_school_physics":
+    ["High School Physics", "\u9ad8\u4e2d\u7269\u7406", "STEM"],
+    "high_school_chemistry":
+    ["High School Chemistry", "\u9ad8\u4e2d\u5316\u5b66", "STEM"],
+    "high_school_biology": [
+        "High School Biology", "\u9ad8\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_mathematics": [
+        "Middle School Mathematics", "\u521d\u4e2d\u6570\u5b66", "STEM"
+    ],
+    "middle_school_biology": [
+        "Middle School Biology", "\u521d\u4e2d\u751f\u7269", "STEM"
+    ],
+    "middle_school_physics": [
+        "Middle School Physics", "\u521d\u4e2d\u7269\u7406", "STEM"
+    ],
+    "middle_school_chemistry": [
+        "Middle School Chemistry", "\u521d\u4e2d\u5316\u5b66", "STEM"
+    ],
+    "veterinary_medicine": [
+        "Veterinary Medicine", "\u517d\u533b\u5b66", "STEM"
+    ],
+    "college_economics": [
+        "College Economics", "\u5927\u5b66\u7ecf\u6d4e\u5b66", "Social Science"
+    ],
+    "business_administration": [
+        "Business Administration", "\u5de5\u5546\u7ba1\u7406", "Social Science"
+    ],
+    "marxism": [
+        "Marxism", "\u9a6c\u514b\u601d\u4e3b\u4e49\u57fa\u672c\u539f\u7406",
+        "Social Science"
+    ],
+    "mao_zedong_thought": [
+        "Mao Zedong Thought",
+        "\u6bdb\u6cfd\u4e1c\u601d\u60f3\u548c\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u7406\u8bba\u4f53\u7cfb\u6982\u8bba",
+        "Social Science"
+    ],
+    "education_science": [
+        "Education Science", "\u6559\u80b2\u5b66", "Social Science"
+    ],
+    "teacher_qualification": [
+        "Teacher Qualification", "\u6559\u5e08\u8d44\u683c", "Social Science"
+    ],
+    "high_school_politics": [
+        "High School Politics", "\u9ad8\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "high_school_geography": [
+        "High School Geography", "\u9ad8\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "middle_school_politics": [
+        "Middle School Politics", "\u521d\u4e2d\u653f\u6cbb", "Social Science"
+    ],
+    "middle_school_geography": [
+        "Middle School Geography", "\u521d\u4e2d\u5730\u7406", "Social Science"
+    ],
+    "modern_chinese_history":
+    ["Modern Chinese History", "\u8fd1\u4ee3\u53f2\u7eb2\u8981", "Humanities"],
+    "ideological_and_moral_cultivation": [
+        "Ideological and Moral Cultivation",
+        "\u601d\u60f3\u9053\u5fb7\u4fee\u517b\u4e0e\u6cd5\u5f8b\u57fa\u7840",
+        "Humanities"
+    ],
+    "logic": ["Logic", "\u903b\u8f91\u5b66", "Humanities"],
+    "law": ["Law", "\u6cd5\u5b66", "Humanities"],
+    "chinese_language_and_literature": [
+        "Chinese Language and Literature",
+        "\u4e2d\u56fd\u8bed\u8a00\u6587\u5b66", "Humanities"
+    ],
+    "art_studies": ["Art Studies", "\u827a\u672f\u5b66", "Humanities"],
+    "professional_tour_guide": [
+        "Professional Tour Guide", "\u5bfc\u6e38\u8d44\u683c", "Humanities"
+    ],
+    "legal_professional": [
+        "Legal Professional", "\u6cd5\u5f8b\u804c\u4e1a\u8d44\u683c",
+        "Humanities"
+    ],
+    "high_school_chinese": [
+        "High School Chinese", "\u9ad8\u4e2d\u8bed\u6587", "Humanities"
+    ],
+    "high_school_history": [
+        "High School History", "\u9ad8\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "middle_school_history": [
+        "Middle School History", "\u521d\u4e2d\u5386\u53f2", "Humanities"
+    ],
+    "civil_servant": ["Civil Servant", "\u516c\u52a1\u5458", "Other"],
+    "sports_science": ["Sports Science", "\u4f53\u80b2\u5b66", "Other"],
+    "plant_protection": [
+        "Plant Protection", "\u690d\u7269\u4fdd\u62a4", "Other"
+    ],
+    "basic_medicine": ["Basic Medicine", "\u57fa\u7840\u533b\u5b66", "Other"],
+    "clinical_medicine": [
+        "Clinical Medicine", "\u4e34\u5e8a\u533b\u5b66", "Other"
+    ],
+    "urban_and_rural_planner": [
+        "Urban and Rural Planner",
+        "\u6ce8\u518c\u57ce\u4e61\u89c4\u5212\u5e08", "Other"
+    ],
+    "accountant": ["Accountant", "\u6ce8\u518c\u4f1a\u8ba1\u5e08", "Other"],
+    "fire_engineer": [
+        "Fire Engineer", "\u6ce8\u518c\u6d88\u9632\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "environmental_impact_assessment_engineer": [
+        "Environmental Impact Assessment Engineer",
+        "\u73af\u5883\u5f71\u54cd\u8bc4\u4ef7\u5de5\u7a0b\u5e08", "Other"
+    ],
+    "tax_accountant": ["Tax Accountant", "\u7a0e\u52a1\u5e08", "Other"],
+    "physician": ["Physician", "\u533b\u5e08\u8d44\u683c", "Other"]
+}
+ceval_all_sets = list(ceval_subject_mapping.keys())
+
+ceval_datasets = []
+for _split in ["val"]:
+    for _name in ceval_all_sets:
+        _ch_name = ceval_subject_mapping[_name][1]
+        ceval_infer_cfg = dict(
+            ice_template=dict(
+                type=PromptTemplate,
+                template={
+                    answer: dict(
+                        begin="</E>",
+                        round=[
+                            dict(
+                                role="HUMAN",
+                                prompt=
+                                f"以下是中国关于{_ch_name}考试的单项选择题，请选出其中的正确答案。\n{{question}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n答案: "
+                            ),
+                            dict(role="BOT", prompt=answer),
+                        ])
+                    for answer in ["A", "B", "C", "D"]
+                },
+                ice_token="</E>",
+            ),
+            retriever=dict(type=FixKRetriever),
+            inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+        )
+
+        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+        ceval_datasets.append(
+            dict(
+                type=CEvalDataset,
+                path="./data/ceval/formal_ceval",
+                name=_name,
+                abbr="ceval-" + _name if _split == "val" else "ceval-test-" +
+                _name,
+                reader_cfg=dict(
+                    input_columns=["question", "A", "B", "C", "D"],
+                    output_column="answer",
+                    train_split="dev",
+                    test_split=_split),
+                infer_cfg=ceval_infer_cfg,
+                eval_cfg=ceval_eval_cfg,
+            ))
+
+del _split, _name, _ch_name
--- a/configs/datasets/commonsenseqa/commonsenseqa_ppl_2ca33c.py
+++ b/configs/datasets/commonsenseqa/commonsenseqa_ppl_2ca33c.py
@ -0,0 +1,50 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import MDLRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import commonsenseqaDataset
+
+commonsenseqa_reader_cfg = dict(
+    input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
+    output_column='answerKey',
+    test_split='validation')
+
+_ice_template = dict(
+    type=PromptTemplate,
+    template={
+        ans: dict(
+            begin='</E>',
+            round=[
+                dict(role="HUMAN", prompt="Question: {question}\nAnswer: "),
+                dict(role="BOT", prompt=ans_token),
+            ])
+        for ans, ans_token in [["A", "{A}"], ["B", "{B}"],
+                               ["C", "{C}"], ["D", "{D}"],
+                               ["E", "{E}"]]
+    },
+    ice_token='</E>')
+
+commonsenseqa_infer_cfg = dict(
+    ice_template=_ice_template,
+    retriever=dict(
+        type=MDLRetriever,
+        ice_num=8,
+        candidate_num=30,
+        select_time=10,
+        seed=1,
+        batch_size=12,
+        ice_template=_ice_template),
+    inferencer=dict(type=PPLInferencer))
+
+commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+commonsenseqa_datasets = [
+    dict(
+        type=commonsenseqaDataset,
+        path='commonsense_qa',
+        reader_cfg=commonsenseqa_reader_cfg,
+        infer_cfg=commonsenseqa_infer_cfg,
+        eval_cfg=commonsenseqa_eval_cfg)
+]
+
+del _ice_template
--- a/configs/datasets/crowspairs/crowspairs_ppl.py
+++ b/configs/datasets/crowspairs/crowspairs_ppl.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .crowspairs_ppl_f60797 import crowspairs_datasets  # noqa: F401, F403
--- a/configs/datasets/drop/drop_gen_e54fe7.py
+++ b/configs/datasets/drop/drop_gen_e54fe7.py
@ -0,0 +1,42 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import dropDataset
+
+drop_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
+Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
+Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
+
+Text: Playing in their second straight Thanksgiving game, the Eagles struggled especially on defense, where they were unable to stop the much-hyped Lions offense. The worst of it all was how unproven rookie Eric Rowe was tasked with covering wide receiver Calvin Johnson, leading to Johnson catching 3 touchdowns. Stafford’s five passing touchdowns, including three of them to Johnson was too much for the Eagles to overcome and for the second consecutive time this season, the Eagles gave up 45 points in a game. With the loss, the Eagles drop to 4-7 on the season and 6-1 when playing on Thanksgiving.
+Question: How many TD passes did Stafford throw other than to Johnson?
+Anawer: According to the text, Stafford threw 5 TD passes, 3 of which were to Johnson. 5-3=2. So the answer is 2.
+
+Text: {prompt}
+Question: {question}
+Anawer:'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+drop_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(
+        type='gsm8k'))  # use the same processor to find answer
+
+drop_datasets = [
+    dict(
+        abbr='drop',
+        type=dropDataset,
+        path='drop',
+        reader_cfg=dict(
+            input_columns=['prompt'],
+            output_column='answers',
+            train_split='validation',
+            test_split='validation',
+        ),
+        infer_cfg=drop_infer_cfg,
+        eval_cfg=drop_eval_cfg)
+]
--- a/configs/datasets/flores/flores_gen_45df8b.py
+++ b/configs/datasets/flores/flores_gen_45df8b.py
@ -0,0 +1,154 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ["eng", "eng_Latn", "English", "Indo-European-Germanic"],
+    ["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
+    ["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
+    ["deu", "deu_Latn", "German", "Indo-European-Germanic"],
+    ["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
+    ["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
+    ["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
+    ["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
+    ["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
+    ["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
+    ["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
+    ["fra", "fra_Latn", "French", "Indo-European-Romance"],
+    ["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
+    ["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
+    ["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
+    ["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
+    ["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
+    ["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
+    ["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
+    ["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
+    ["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
+    ["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
+    ["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
+    ["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
+    ["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
+    ["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
+    ["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
+    ["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
+    ["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
+    ["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
+    ["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
+    ["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
+    ["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
+    ["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
+    ["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
+    ["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
+    ["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
+    ["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
+    ["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
+    ["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
+    ["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
+    ["ell", "ell_Grek", "Greek", "Indo-European-Other"],
+    ["fas", "pes_Arab", "Persian", "Indo-European-Other"],
+    ["gle", "gle_Latn", "Irish", "Indo-European-Other"],
+    ["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
+    ["ita", "ita_Latn", "Italian", "Indo-European-Other"],
+    ["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
+    ["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
+    ["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
+    ["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
+    ["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
+    ["ind", "ind_Latn", "Indonesian", "Austronesian"],
+    ["jav", "jav_Latn", "Javanese", "Austronesian"],
+    ["mri", "mri_Latn", "Maori", "Austronesian"],
+    ["msa", "zsm_Latn", "Malay", "Austronesian"],
+    ["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
+    ["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
+    ["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
+    ["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
+    ["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
+    ["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
+    ["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
+    ["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
+    ["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
+    ["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
+    ["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
+    ["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
+    ["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
+    ["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
+    ["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
+    ["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
+    ["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
+    ["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
+    ["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
+    ["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
+    ["som", "som_Latn", "Somali", "Afro-Asiatic"],
+    ["azj", "azj_Latn", "Azerbaijani", "Turkic"],
+    ["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
+    ["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
+    ["tur", "tur_Latn", "Turkish", "Turkic"],
+    ["uzb", "uzn_Latn", "Uzbek", "Turkic"],
+    ["kan", "kan_Knda", "Kannada", "Dravidian"],
+    ["mal", "mal_Mlym", "Malayalam", "Dravidian"],
+    ["tam", "tam_Taml", "Tamil", "Dravidian"],
+    ["tel", "tel_Telu", "Telugu", "Dravidian"],
+    ["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
+    ["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
+    ["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
+    ["est", "est_Latn", "Estonian", "Other"],
+    ["fin", "fin_Latn", "Finnish", "Other"],
+    ["hau", "hau_Latn", "Hausa", "Other"],
+    ["heb", "heb_Hebr", "Hebrew", "Other"],
+    ["hun", "hun_Latn", "Hungarian", "Other"],
+    ["jpn", "jpn_Jpan", "Japanese", "Other"],
+    ["kat", "kat_Geor", "Georgian", "Other"],
+    ["khm", "khm_Khmr", "Khmer", "Other"],
+    ["kor", "kor_Hang", "Korean", "Other"],
+    ["lao", "lao_Laoo", "Lao", "Other"],
+    ["luo", "luo_Latn", "Luo", "Other"],
+    ["mon", "khk_Cyrl", "Mongolian", "Other"],
+    ["tha", "tha_Thai", "Thai", "Other"],
+    ["vie", "vie_Latn", "Vietnamese", "Other"],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
+                    ] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split("-")
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=f"</E>{{sentence_{_flores_source}}} = {{sentence_{_flores_target}}}" if _flores_subtask != "zho_simpl-eng"
+            else f"</E>Chinese: {{sentence_{_flores_source}}}\nEnglish: {{sentence_{_flores_target}}}",
+            ice_token="</E>",
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type="flores"),
+        dataset_postprocessor=dict(type="flores"),
+    )
+    if _tgt == "zho_simpl":
+        flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
+        flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
+    flores_datasets.append(
+        dict(
+            type=FloresFirst100Dataset,
+            abbr=f"flores_100_{_src}-{_tgt}",
+            name=f"{_flores_source}-{_flores_target}",
+            reader_cfg=dict(
+                input_columns=f"sentence_{_flores_source}",
+                output_column=f"sentence_{_flores_target}",
+                train_split="dev",
+                test_split="devtest"),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
+
+del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
--- a/configs/datasets/flores/flores_gen_8eb9ca.py
+++ b/configs/datasets/flores/flores_gen_8eb9ca.py
@ -0,0 +1,163 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import TopkRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import FloresFirst100Dataset
+
+_flores_lang_map = [
+    ["eng", "eng_Latn", "English", "Indo-European-Germanic"],
+    ["afr", "afr_Latn", "Afrikaans", "Indo-European-Germanic"],
+    ["dan", "dan_Latn", "Danish", "Indo-European-Germanic"],
+    ["deu", "deu_Latn", "German", "Indo-European-Germanic"],
+    ["isl", "isl_Latn", "Icelandic", "Indo-European-Germanic"],
+    ["ltz", "ltz_Latn", "Luxembourgish", "Indo-European-Germanic"],
+    ["nld", "nld_Latn", "Dutch", "Indo-European-Germanic"],
+    ["nob", "nob_Latn", "Norwegian", "Indo-European-Germanic"],
+    ["swe", "swe_Latn", "Swedish", "Indo-European-Germanic"],
+    ["ast", "ast_Latn", "Asturian", "Indo-European-Romance"],
+    ["cat", "cat_Latn", "Catalan", "Indo-European-Romance"],
+    ["fra", "fra_Latn", "French", "Indo-European-Romance"],
+    ["glg", "glg_Latn", "Galician", "Indo-European-Romance"],
+    ["oci", "oci_Latn", "Occitan", "Indo-European-Romance"],
+    ["por", "por_Latn", "Portuguese", "Indo-European-Romance"],
+    ["ron", "ron_Latn", "Romanian", "Indo-European-Romance"],
+    ["spa", "spa_Latn", "Spanish", "Indo-European-Romance"],
+    ["bel", "bel_Cyrl", "Belarusian", "Indo-European-Slavic"],
+    ["bos", "bos_Latn", "Bosnian", "Indo-European-Slavic"],
+    ["bul", "bul_Cyrl", "Bulgarian", "Indo-European-Slavic"],
+    ["ces", "ces_Latn", "Czech", "Indo-European-Slavic"],
+    ["hrv", "hrv_Latn", "Croatian", "Indo-European-Slavic"],
+    ["mkd", "mkd_Cyrl", "Macedonian", "Indo-European-Slavic"],
+    ["pol", "pol_Latn", "Polish", "Indo-European-Slavic"],
+    ["rus", "rus_Cyrl", "Russian", "Indo-European-Slavic"],
+    ["slk", "slk_Latn", "Slovak", "Indo-European-Slavic"],
+    ["slv", "slv_Latn", "Slovenian", "Indo-European-Slavic"],
+    ["srp", "srp_Cyrl", "Serbian", "Indo-European-Slavic"],
+    ["ukr", "ukr_Cyrl", "Ukrainian", "Indo-European-Slavic"],
+    ["asm", "asm_Beng", "Assamese", "Indo-European-Indo-Aryan"],
+    ["ben", "ben_Beng", "Bengali", "Indo-European-Indo-Aryan"],
+    ["guj", "guj_Gujr", "Gujarati", "Indo-European-Indo-Aryan"],
+    ["hin", "hin_Deva", "Hindi", "Indo-European-Indo-Aryan"],
+    ["mar", "mar_Deva", "Marathi", "Indo-European-Indo-Aryan"],
+    ["npi", "npi_Deva", "Nepali", "Indo-European-Indo-Aryan"],
+    ["ory", "ory_Orya", "Oriya", "Indo-European-Indo-Aryan"],
+    ["pan", "pan_Guru", "Punjabi", "Indo-European-Indo-Aryan"],
+    ["snd", "snd_Arab", "Sindhi", "Indo-European-Indo-Aryan"],
+    ["urd", "urd_Arab", "Urdu", "Indo-European-Indo-Aryan"],
+    ["ckb", "ckb_Arab", "Kurdish", "Indo-European-Other"],
+    ["cym", "cym_Latn", "Welsh", "Indo-European-Other"],
+    ["ell", "ell_Grek", "Greek", "Indo-European-Other"],
+    ["fas", "pes_Arab", "Persian", "Indo-European-Other"],
+    ["gle", "gle_Latn", "Irish", "Indo-European-Other"],
+    ["hye", "hye_Armn", "Armenian", "Indo-European-Other"],
+    ["ita", "ita_Latn", "Italian", "Indo-European-Other"],
+    ["lav", "lvs_Latn", "Latvian", "Indo-European-Other"],
+    ["lit", "lit_Latn", "Lithuanian", "Indo-European-Other"],
+    ["pus", "pbt_Arab", "Pashto", "Indo-European-Other"],
+    ["tgk", "tgk_Cyrl", "Tajik", "Indo-European-Other"],
+    ["ceb", "ceb_Latn", "Cebuano", "Austronesian"],
+    ["ind", "ind_Latn", "Indonesian", "Austronesian"],
+    ["jav", "jav_Latn", "Javanese", "Austronesian"],
+    ["mri", "mri_Latn", "Maori", "Austronesian"],
+    ["msa", "zsm_Latn", "Malay", "Austronesian"],
+    ["tgl", "tgl_Latn", "Tagalog", "Austronesian"],
+    ["ibo", "ibo_Latn", "Igbo", "Atlantic-Congo"],
+    ["kam", "kam_Latn", "Kamba", "Atlantic-Congo"],
+    ["kea", "kea_Latn", "Kabuverdianu", "Atlantic-Congo"],
+    ["lin", "lin_Latn", "Lingala", "Atlantic-Congo"],
+    ["lug", "lug_Latn", "Luganda", "Atlantic-Congo"],
+    ["nso", "nso_Latn", "Northern Sotho", "Atlantic-Congo"],
+    ["nya", "nya_Latn", "Nyanja", "Atlantic-Congo"],
+    ["sna", "sna_Latn", "Shona", "Atlantic-Congo"],
+    ["swh", "swh_Latn", "Swahili", "Atlantic-Congo"],
+    ["umb", "umb_Latn", "Umbundu", "Atlantic-Congo"],
+    ["wol", "wol_Latn", "Wolof", "Atlantic-Congo"],
+    ["xho", "xho_Latn", "Xhosa", "Atlantic-Congo"],
+    ["yor", "yor_Latn", "Yoruba", "Atlantic-Congo"],
+    ["zul", "zul_Latn", "Zulu", "Atlantic-Congo"],
+    ["amh", "amh_Ethi", "Amharic", "Afro-Asiatic"],
+    ["ara", "arb_Arab", "Arabic", "Afro-Asiatic"],
+    ["ful", "fuv_Latn", "Fulah", "Afro-Asiatic"],
+    ["mlt", "mlt_Latn", "Maltese", "Afro-Asiatic"],
+    ["orm", "gaz_Latn", "Oromo", "Afro-Asiatic"],
+    ["som", "som_Latn", "Somali", "Afro-Asiatic"],
+    ["azj", "azj_Latn", "Azerbaijani", "Turkic"],
+    ["kaz", "kaz_Cyrl", "Kazakh", "Turkic"],
+    ["kir", "kir_Cyrl", "Kyrgyz", "Turkic"],
+    ["tur", "tur_Latn", "Turkish", "Turkic"],
+    ["uzb", "uzn_Latn", "Uzbek", "Turkic"],
+    ["kan", "kan_Knda", "Kannada", "Dravidian"],
+    ["mal", "mal_Mlym", "Malayalam", "Dravidian"],
+    ["tam", "tam_Taml", "Tamil", "Dravidian"],
+    ["tel", "tel_Telu", "Telugu", "Dravidian"],
+    ["mya", "mya_Mymr", "Burmese", "Sino-Tibetan"],
+    ["zho_simpl", "zho_Hans", "Chinese (Simpl)", "Sino-Tibetan"],
+    ["zho_trad", "zho_Hant", "Chinese (Trad)", "Sino-Tibetan"],
+    ["est", "est_Latn", "Estonian", "Other"],
+    ["fin", "fin_Latn", "Finnish", "Other"],
+    ["hau", "hau_Latn", "Hausa", "Other"],
+    ["heb", "heb_Hebr", "Hebrew", "Other"],
+    ["hun", "hun_Latn", "Hungarian", "Other"],
+    ["jpn", "jpn_Jpan", "Japanese", "Other"],
+    ["kat", "kat_Geor", "Georgian", "Other"],
+    ["khm", "khm_Khmr", "Khmer", "Other"],
+    ["kor", "kor_Hang", "Korean", "Other"],
+    ["lao", "lao_Laoo", "Lao", "Other"],
+    ["luo", "luo_Latn", "Luo", "Other"],
+    ["mon", "khk_Cyrl", "Mongolian", "Other"],
+    ["tha", "tha_Thai", "Thai", "Other"],
+    ["vie", "vie_Latn", "Vietnamese", "Other"],
+]
+flores_lang_map = {i[0]: i for i in _flores_lang_map}
+_flores_subtasks = [f"eng-{i}" for i in flores_lang_map if i != "eng"
+                    ] + [f"{i}-eng" for i in flores_lang_map if i != "eng"]
+
+flores_datasets = []
+for _flores_subtask in _flores_subtasks:
+    _src, _tgt = _flores_subtask.split("-")
+    _, _flores_source, _src_inst, _ = flores_lang_map[_src]
+    _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
+
+    flores_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin="</E>",
+                round=[
+                    dict(
+                        role="HUMAN",
+                        prompt=
+                        f"Translate the following {_src_inst} statements to {_tgt_inst}.\n{{sentence_{_flores_source}}}"
+                    ),
+                    dict(role="BOT", prompt=f"{{sentence_{_flores_target}}}"),
+                ],
+            ),
+            ice_token="</E>",
+        ),
+        retriever=dict(type=TopkRetriever, ice_num=8),
+        inferencer=dict(type=GenInferencer),
+    )
+    flores_eval_cfg = dict(
+        evaluator=dict(type=BleuEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type="flores"),
+        dataset_postprocessor=dict(type="flores"),
+    )
+    if _tgt == "zho_simpl":
+        flores_eval_cfg["pred_postprocessor"] = dict(type="flores-chinese")
+        flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
+    flores_datasets.append(
+        dict(
+            type=FloresFirst100Dataset,
+            abbr=f"flores_100_{_src}-{_tgt}",
+            name=f"{_flores_source}-{_flores_target}",
+            reader_cfg=dict(
+                input_columns=f"sentence_{_flores_source}",
+                output_column=f"sentence_{_flores_target}",
+                train_split="dev",
+                test_split="devtest"),
+            infer_cfg=flores_infer_cfg.copy(),
+            eval_cfg=flores_eval_cfg.copy(),
+        ))
+
+del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
--- a/configs/datasets/glm/GaokaoBench.py
+++ b/configs/datasets/glm/GaokaoBench.py
@ -0,0 +1,368 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer, GLMChoiceInferencer
+from opencompass.datasets import GaokaoBenchDataset
+
+MCQ_TMPL = """\
+请你做一道{type}。
+请你一步一步思考并将思考过程写在[解析]和<eoe>之间。你将从A，B，C，D中选出正确的答案，并写在【答案]和<eoa>之间。
+例如：[答案]: A <eoa>
+完整的题目回答的格式如下：
+回答：[解析] ... <eoe>
+[答案] ... <eoa>
+请你严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+MULTI_MCQ_TMPL = """\
+请你做一道{type}。
+请你一步一步思考。每一题你将从A，B，C，D中选出正确的答案，并写在[答案]和<eoa>之间。
+例如：（1）[答案] A <eoa>
+(2)[答案] B <eoa>
+请你严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+CLOZE_TMPL = """\
+请你做一道{type}。
+请你一步一步思考。将符合题意的五个选项的字母写在[答案]和<eoa>之间。
+例如：[答案] A B C D E <eoa>
+请严格按照上述格式作答。
+题目如下：{{question}}
+回答："""
+
+_MCQ_prompts = [
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_II_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Math_I_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
+        "comment": ""
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_History_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='历史选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Biology_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='生物选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Political_Science_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='政治选择题'),
+    },
+    {
+        "type": "multi_choice",
+        "keyword": "2010-2022_Physics_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='物理选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2022_Chemistry_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='化学选择题'),
+    },
+    {
+        "type": "single_choice",
+        "keyword": "2010-2013_English_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='英语选择题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Chinese_Modern_Lit",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='语文阅读理解题，其中包含三个小题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_English_Fill_in_Blanks",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='英语完形填空题,其中包含二十个小题'),
+    },
+    {
+        "type": "five_out_of_seven",
+        "keyword": "2012-2022_English_Cloze_Test",
+        "prefix_prompt": CLOZE_TMPL.format(type='英语完形填空题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Geography_MCQs",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='地理选择题'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_English_Reading_Comp",
+        "prefix_prompt": MULTI_MCQ_TMPL.format(type='英语阅读理解题，其中包含三到五个小题。'),
+    },
+    {
+        "type": "multi_question_choice",
+        "keyword": "2010-2022_Chinese_Lang_and_Usage_MCQs",
+        "prefix_prompt": MCQ_TMPL.format(type='语文选择题'),
+    },
+]
+
+_FBQ_prompts = [{
+    "type": "cloze",
+    "keyword": "2010-2022_Math_I_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2010-2022_Math_II_Fill-in-the-Blank",
+    "prefix_prompt":
+    "请解答下面的数学填空题\n仔细阅读题目，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword":
+    "2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
+    "prefix_prompt":
+    "请回答下面的语文填空题\n请你仔细阅读题目，先找到题目对应的中国名篇，再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}, {
+    "type": "cloze",
+    "keyword": "2014-2022_English_Language_Cloze_Passage",
+    "prefix_prompt":
+    "请回答下面的英语短文填词题\n仔细阅读题目，空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考，将思考过程写在【解析】和<eoe>之间，将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下：\n（1）【解析】 ...<eoe>\n【答案】...<eoa>\n（2）【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+    "comment": ""
+}]
+_OEQ_prompts = [
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Geography_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下：",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chemistry_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_I_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_History_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的历史解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Biology_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Math_II_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间，答案需要有完整的解题步骤。\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Physics_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的物理解答题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Political_Science_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的政治解答题\n仔细阅读材料和题目，并充分结合你已有的知识，解答其中的问题，请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下：\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "correction",
+        "keyword": "2012-2022_English_Language_Error_Correction",
+        "prefix_prompt":
+        "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        # "prefix_prompt": [
+        #     "请解答下面的英语短文改错题，仔细阅读题目并充分结合你你已有的知识，找出其中10处需要改动的地方。请你一步步思考，把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下：【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
+        #     "请比较下面两篇短文，找到第二篇和第一篇的10处不同，每处不同只涉及一个单词，请将结果写在【答案】和<eoa>之间。例如：【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下：【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
+        # ],
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
+        "prefix_prompt":
+        "请解答下面的语文古代诗歌阅读题，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文实用类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文学类文本阅读，仔细阅读题目，注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
+        "prefix_prompt":
+        "请解答下面的语文文言文阅读，仔细阅读题目，前三题是单选题，最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面，例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下：（1）[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答，如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    },
+    {
+        "type": "subjective",
+        "keyword":
+        "2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
+        "prefix_prompt":
+        "请解答下面的语文解答题，仔细阅读题目，注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案，例如“【答案】A <eoa>”。\n完整的题目回答格式如下：（1）【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题，请分别作答。\n题目如下:",
+        "comment": ""
+    }
+]
+
+gaokao_bench_datasets = []
+for _folder, _prompts in [
+    ("Multiple-choice_Questions", _MCQ_prompts),
+    ("Fill-in-the-blank_Questions", _FBQ_prompts),
+    ("Open-ended_Questions", _OEQ_prompts),
+]:
+    for _p in _prompts:
+        if _p['type'] == "single_choice":
+            continue
+        _reader_cfg = {
+            "input_columns": ['question'],
+            "output_column": 'answer',
+        }
+        _infer_cfg = {
+            "ice_template": {
+                "type": PromptTemplate,
+                "template": {
+                    "round": [{
+                        "role":
+                        "HUMAN",
+                        "prompt":
+                        _p['prefix_prompt'].format(question='</question>')
+                    }]
+                },
+                "column_token_map": {
+                    "question": "</question>"
+                },
+                "ice_token": "</E>"
+            },
+            "retriever": {
+                "type": ZeroRetriever
+            },
+            "inferencer": {
+                "type": GenInferencer
+            }
+        }
+        _eval_cfg = {
+            "evaluator": {
+                "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+            },
+            "pred_role": "BOT",
+        }
+        _base_path = './data/GAOKAO-BENCH/data'
+        _dataset = {
+            "type": GaokaoBenchDataset,
+            "abbr": "GaokaoBench_" + _p['keyword'],
+            "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+            "reader_cfg": _reader_cfg,
+            "infer_cfg": _infer_cfg,
+            "eval_cfg": _eval_cfg,
+        }
+
+        gaokao_bench_datasets.append(_dataset)
+
+# Single choice dataset
+_folder = "Multiple-choice_Questions"
+for _p in _MCQ_prompts:
+    if _p['type'] != "single_choice":
+        continue
+    _reader_cfg = {
+        "input_columns": ['question'],
+        "output_column": 'answer',
+    }
+    _infer_cfg = {
+        "ice_template": {
+            "type": PromptTemplate,
+            "template": {
+                answer: {
+                    "round": [{
+                        "role": "HUMAN",
+                        "prompt": _p['prefix_prompt'] + '</question>'
+                    }, {
+                        "role": "BOT",
+                        "prompt": f"【答案】{answer} <eoa>"
+                    }]
+                }
+                for answer in ['A', 'B', 'C', 'D']
+            },
+            "column_token_map": {
+                "question": "</question>"
+            },
+            "ice_token": "</E>"
+        },
+        "prompt_template": {
+            "type": PromptTemplate,
+            "template": {
+                "round": [{
+                    "role": "HUMAN",
+                    "prompt": _p['prefix_prompt'] + '</question>'
+                }, {
+                    "role": "BOT",
+                    "prompt": "【答案】("
+                }]
+            },
+            "column_token_map": {
+                "question": "</question>"
+            },
+        },
+        "retriever": {
+            "type": ZeroRetriever
+        },
+        "inferencer": {
+            "type": GLMChoiceInferencer,
+            "choices": ['A', 'B', 'C', 'D'],
+        }
+    }
+    _eval_cfg = {
+        "evaluator": {
+            "type": "GaokaoBenchEvaluator" + "_" + _p['type'],
+        },
+        "pred_role": "BOT",
+    }
+    _base_path = './data/GAOKAO-BENCH/data'
+    _dataset = {
+        "type": GaokaoBenchDataset,
+        "abbr": "GaokaoBench_" + _p['keyword'],
+        "path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
+        "reader_cfg": _reader_cfg,
+        "infer_cfg": _infer_cfg,
+        "eval_cfg": _eval_cfg,
+    }
+
+    gaokao_bench_datasets.append(_dataset)
+
+_temporary_variables = [k for k in globals() if k.startswith('_')]
+for _t in _temporary_variables:
+    del globals()[_t]
+del _temporary_variables, _t
--- a/configs/datasets/glm/_all_.py
+++ b/configs/datasets/glm/_all_.py
@ -0,0 +1,32 @@
+_base_ = [
+    'bustm.py',
+    'afqmc.py',
+    'eprstmt.py',
+    'ocnli_fc.py',
+    'ocnli.py',
+    'cmnli.py',
+    'csl.py',
+    'chid.py',
+    'cluewsc.py',
+    'tnews.py',
+    'C3.py',
+    'CMRC.py',
+    'DRCD.py',
+    'lcsts.py',
+    'piqa.py',
+    'commonsenseqa.py',
+    'gsm8k.py',
+    'flores.py',
+    'humaneval.py',
+    'mbpp.py',
+    'triviaqa.py',
+    'nq.py',
+    'agieval.py',
+    'mmlu.py',
+    'ceval.py',
+]
+
+datasets = []
+for k, v in _base_.items():
+    if k.endswith("_datasets"):
+        datasets += v
--- a/configs/datasets/glm/afqmc.py
+++ b/configs/datasets/glm/afqmc.py
@ -0,0 +1,38 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+afqmc_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+afqmc_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            #  0: "{sentence1},{sentence2}不同。",
+            #  1: "{sentence1},{sentence2}相似。"
+            0:
+            "I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]no",
+            1:
+            "I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]yes",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+afqmc_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='afqmc',
+        path='json',
+        data_files='./data/CLUE/AFQMC/test_public.json',
+        split='train',
+        reader_cfg=afqmc_reader_cfg,
+        infer_cfg=afqmc_infer_cfg,
+        eval_cfg=afqmc_eval_cfg)
+]
--- a/configs/datasets/glm/agieval.py
+++ b/configs/datasets/glm/agieval.py
@ -0,0 +1,110 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import AGIEvalDataset
+
+agieval_reader_cfg = dict(
+    input_columns=['problem_input'], output_column='label')
+
+agieval_single_choice_sets = [
+    'gaokao-chinese',
+    'gaokao-english',
+    'gaokao-geography',
+    'gaokao-history',
+    'gaokao-biology',
+    'gaokao-chemistry',
+    'gaokao-physics',
+    'gaokao-mathqa',
+    'logiqa-zh',
+    'lsat-ar',
+    'lsat-lr',
+    'lsat-rc',
+    'logiqa-en',
+    'sat-math',
+    'sat-en',
+    'sat-en-without-passage',
+    'aqua-rat',
+]
+agieval_multiple_choices_sets = [
+    'jec-qa-kd',  # 数据需要额外处理
+    'jec-qa-ca',  # 数据需要额外处理
+]
+agieval_cloze_sets = ['gaokao-mathcloze', 'math']
+
+agieval_datasets = []
+for name in agieval_single_choice_sets:
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template={
+                label: f'{{problem_input}} {label}'
+                for label in ['A', 'B', 'C', 'D']
+            }),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(
+            type=GLMChoiceInferencer, choices=['A', 'B', 'C', 'D']))
+
+    agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for name in agieval_multiple_choices_sets:
+    _hint = '答案是： '
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template=dict(round=[
+                dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
+            ]),
+            ice_token='</E>'),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type='GenInferencer'))
+    agieval_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_postprocessor=dict(type='first-capital-multi'))
+    agieval_datasets.append(
+        dict(
+            type='AGIEvalDataset_v2',
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+for name in agieval_cloze_sets:
+    agieval_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template='</E>{problem_input}',
+            ice_token='</E>'),
+        retriever=dict(type=ZeroRetriever
+                       ),  # retriver 不起作用，以输入参数为准 (zero-shot / few-shot)
+        inferencer=dict(type='GenInferencer'))
+
+    agieval_eval_cfg = dict(evaluator=dict(type='AGIEvalEvaluator'))
+
+    agieval_datasets.append(
+        dict(
+            type=AGIEvalDataset,
+            path='./data/AGIEval/data/v1/',
+            name=name,
+            abbr='agieval-' + name,
+            setting_name='zero-shot',
+            reader_cfg=agieval_reader_cfg,
+            infer_cfg=agieval_infer_cfg.copy(),
+            eval_cfg=agieval_eval_cfg.copy()))
+
+del name, agieval_infer_cfg, agieval_eval_cfg
--- a/configs/datasets/glm/cmnli.py
+++ b/configs/datasets/glm/cmnli.py
@ -0,0 +1,38 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+cmnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'],
+    output_column='label',
+    test_split='train')
+
+cmnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]never',
+            'entailment':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]always',
+            'neutral':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+cmnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='cmnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
+        reader_cfg=cmnli_reader_cfg,
+        infer_cfg=cmnli_infer_cfg,
+        eval_cfg=cmnli_eval_cfg)
+]
--- a/configs/datasets/glm/csl.py
+++ b/configs/datasets/glm/csl.py
@ -0,0 +1,46 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CslDataset
+
+csl_reader_cfg = dict(
+    input_columns=["abst", "keywords"], output_column='label')
+
+csl_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "</E>摘要：</A>",
+            1: "</E>摘要：</A>关键词：</K>"
+        },
+        column_token_map={
+            "abst": '</A>',
+            'keywords': '</K>'
+        },
+        ice_token='</E>'),
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '</E>Abstract: </A>\nKeyword: </K>\n Does all keywords come from the given abstract? (Yes or No)',
+        column_token_map={
+            "abst": '</A>',
+            'keywords': '</K>'
+        },
+        ice_token='</E>'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GLMChoiceInferencer, choices=['No', 'Yes']))
+
+csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+csl_datasets = [
+    dict(
+        type=CslDataset,
+        path='json',
+        abbr='csl',
+        data_files='./data/FewCLUE/csl/test_public.json',
+        split='train',
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg)
+]
--- a/configs/datasets/glm/humaneval.py
+++ b/configs/datasets/glm/humaneval.py
@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{prompt}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+humaneval_datasets = [
+    dict(
+        type=HFDataset,
+        path='openai_humaneval',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
--- a/configs/datasets/glm/ocnli.py
+++ b/configs/datasets/glm/ocnli.py
@ -0,0 +1,37 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+ocnli_reader_cfg = dict(
+    input_columns=['sentence1', 'sentence2'], output_column='label')
+
+# TODO: two prompt templates for ocnli
+ocnli_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'contradiction':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]never',
+            'entailment':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]always',
+            'neutral':
+            '{sentence1}\nKeeping in mind the above text, consider: {sentence2}？\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+ocnli_datasets = [
+    dict(
+        type=HFDataset,
+        abbr='ocnli',
+        path='json',
+        split='train',
+        data_files='./data/CLUE/OCNLI/dev.json',
+        reader_cfg=ocnli_reader_cfg,
+        infer_cfg=ocnli_infer_cfg,
+        eval_cfg=ocnli_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_6a4ce6.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_6a4ce6.py
@ -0,0 +1,50 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
+                dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
+                dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
+                dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
+                dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
+                dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
+                dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
+                dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
+                dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
+                dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
+                dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
+                dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
+                dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
+                dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
+                dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: {question}'),
+                dict(role='BOT', prompt='A: {answer}\n'),
+            ],
+        )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
+                      pred_role="BOT",
+                      pred_postprocessor=dict(type='gsm8k'),
+                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_76914c.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_76914c.py
@ -0,0 +1,51 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?'),
+                dict(role='BOT', prompt='A: We start with 15 trees. Later we have 21 trees. The difference must be the number of trees they planted. So, they must have planted 21 - 15 = 6 trees. The answer is 6.\n'),
+                dict(role='HUMAN', prompt='Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?'),
+                dict(role='BOT', prompt='A: There are 3 cars in the parking lot already. 2 more arrive. Now there are 3 + 2 = 5 cars. The answer is 5.\n'),
+                dict(role='HUMAN', prompt='Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?'),
+                dict(role='BOT', prompt="A: Leah had 32 chocolates and Leah's sister had 42. That means there were originally 32 + 42 = 74 chocolates. 35 have been eaten. So in total they still have 74 - 35 = 39 chocolates. The answer is 39.\n"),
+                dict(role='HUMAN', prompt='Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?'),
+                dict(role='BOT', prompt='A: Jason had 20 lollipops. Since he only has 12 now, he must have given the rest to Denny. The number of lollipops he has given to Denny must have been 20 - 12 = 8 lollipops. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?'),
+                dict(role='BOT', prompt='A: He has 5 toys. He got 2 from mom, so after that he has 5 + 2 = 7 toys. Then he got 2 more from dad, so in total he has 7 + 2 = 9 toys. The answer is 9.\n'),
+                dict(role='HUMAN', prompt='Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?'),
+                dict(role='BOT', prompt='A: There are 4 days from monday to thursday. 5 computers were added each day. That means in total 4 * 5 = 20 computers were added. There were 9 computers in the beginning, so now there are 9 + 20 = 29 computers. The answer is 29.\n'),
+                dict(role='HUMAN', prompt='Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?'),
+                dict(role='BOT', prompt='A: Michael initially had 58 balls. He lost 23 on Tuesday, so after that he has 58 - 23 = 35 balls. On Wednesday he lost 2 more so now he has 35 - 2 = 33 balls. The answer is 33.\n'),
+                dict(role='HUMAN', prompt='Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?'),
+                dict(role='BOT', prompt='A: She bought 5 bagels for $3 each. This means she spent 5 * $3 = $15 on the bagels. She had $23 in beginning, so now she has $23 - $15 = $8. The answer is 8.\n'),
+                dict(role='HUMAN', prompt='Q: {question}'),
+                dict(role='BOT', prompt='A: {answer}\n'),
+            ],
+        )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
+                      pred_role="BOT",
+                      pred_postprocessor=dict(type='gsm8k'),
+                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        abbr='gsm8k',
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_aa9688.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_aa9688.py
@ -0,0 +1,88 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '''Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?
+Let's think step by step
+Answer:
+Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.
+For the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.
+Angelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.
+However, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.
+They also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.
+And they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.
+So Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.
+They want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75
+They will need to plan to study 4 days to allow for all the time they need.
+The answer is 4
+
+Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws.  Their opponents score double the 2 pointers but half the 3 pointers and free throws.  What's the total number of points scored by both teams added together?
+Let's think step by step
+Answer:
+Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.
+His team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers
+They scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.
+All together his team scored 50+24+10= 84 points
+Mark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.
+His opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.
+They also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.
+All together Mark's opponents scored 100+12+5=117 points
+The total score for the game is both team's scores added together, so it is 84+117=201 points
+The answer is 201
+
+Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?
+Let's think step by step
+Answer:
+When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24
+The total number of marbles she'll have is 60+24 = 84
+If Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.
+If Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.
+The total number of frisbees she'll have will increase to 30+12 = 42
+Bella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards
+If she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.
+The total number of deck cards she'll have is 10+4 = 14
+Together, Bella will have a total of 14+42+84 = 140 items
+The answer is 140
+
+Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?
+Let's think step by step
+Answer:
+For the first three baskets, the number of apples and oranges in one basket is 9+15=24
+In total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.
+Since there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.
+The number of apples in the fourth basket is 9-2=7
+There are also 15-2=13 oranges in the fourth basket
+The combined number of oranges and apples in the fourth basket is 13+7=20
+The fourth basket also contains 14-2=12 bananas.
+In total, the fourth basket has 20+12=32 fruits.
+The four baskets together have 32+114=146 fruits.
+The answer is 146
+
+Question: {question}{answer}
+'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+gsm8k_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='gsm8k'),
+    dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        abbr='gsm8k',
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/gsm8k/gsm8k_gen_d19ade.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_d19ade.py
@ -0,0 +1,87 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
+
+gsm8k_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=
+        '''Question: Angelo and Melanie want to plan how many hours over the next week they should study together for their test next week. They have 2 chapters of their textbook to study and 4 worksheets to memorize. They figure out that they should dedicate 3 hours to each chapter of their textbook and 1.5 hours for each worksheet. If they plan to study no more than 4 hours each day, how many days should they plan to study total over the next week if they take a 10-minute break every hour, include 3 10-minute snack breaks each day, and 30 minutes for lunch each day?
+Let's think step by step
+Answer:
+Angelo and Melanie think they should dedicate 3 hours to each of the 2 chapters, 3 hours x 2 chapters = 6 hours total.
+For the worksheets they plan to dedicate 1.5 hours for each worksheet, 1.5 hours x 4 worksheets = 6 hours total.
+Angelo and Melanie need to start with planning 12 hours to study, at 4 hours a day, 12 / 4 = 3 days.
+However, they need to include time for breaks and lunch. Every hour they want to include a 10-minute break, so 12 total hours x 10 minutes = 120 extra minutes for breaks.
+They also want to include 3 10-minute snack breaks, 3 x 10 minutes = 30 minutes.
+And they want to include 30 minutes for lunch each day, so 120 minutes for breaks + 30 minutes for snack breaks + 30 minutes for lunch = 180 minutes, or 180 / 60 minutes per hour = 3 extra hours.
+So Angelo and Melanie want to plan 12 hours to study + 3 hours of breaks = 15 hours total.
+They want to study no more than 4 hours each day, 15 hours / 4 hours each day = 3.75
+They will need to plan to study 4 days to allow for all the time they need.
+The answer is 4
+
+Question: Mark's basketball team scores 25 2 pointers, 8 3 pointers and 10 free throws.  Their opponents score double the 2 pointers but half the 3 pointers and free throws.  What's the total number of points scored by both teams added together?
+Let's think step by step
+Answer:
+Mark's team scores 25 2 pointers, meaning they scored 25*2= 50 points in 2 pointers.
+His team also scores 6 3 pointers, meaning they scored 8*3= 24 points in 3 pointers
+They scored 10 free throws, and free throws count as one point so they scored 10*1=10 points in free throws.
+All together his team scored 50+24+10= 84 points
+Mark's opponents scored double his team's number of 2 pointers, meaning they scored 50*2=100 points in 2 pointers.
+His opponents scored half his team's number of 3 pointers, meaning they scored 24/2= 12 points in 3 pointers.
+They also scored half Mark's team's points in free throws, meaning they scored 10/2=5 points in free throws.
+All together Mark's opponents scored 100+12+5=117 points
+The total score for the game is both team's scores added together, so it is 84+117=201 points
+The answer is 201
+
+Question: Bella has two times as many marbles as frisbees. She also has 20 more frisbees than deck cards. If she buys 2/5 times more of each item, what would be the total number of the items she will have if she currently has 60 marbles?
+Let's think step by step
+Answer:
+When Bella buys 2/5 times more marbles, she'll have increased the number of marbles by 2/5*60 = 24
+The total number of marbles she'll have is 60+24 = 84
+If Bella currently has 60 marbles, and she has two times as many marbles as frisbees, she has 60/2 = 30 frisbees.
+If Bella buys 2/5 times more frisbees, she'll have 2/5*30 = 12 more frisbees.
+The total number of frisbees she'll have will increase to 30+12 = 42
+Bella also has 20 more frisbees than deck cards, meaning she has 30-20 = 10 deck cards
+If she buys 2/5 times more deck cards, she'll have 2/5*10 = 4 more deck cards.
+The total number of deck cards she'll have is 10+4 = 14
+Together, Bella will have a total of 14+42+84 = 140 items
+The answer is 140
+
+Question: A group of 4 fruit baskets contains 9 apples, 15 oranges, and 14 bananas in the first three baskets and 2 less of each fruit in the fourth basket. How many fruits are there?
+Let's think step by step
+Answer:
+For the first three baskets, the number of apples and oranges in one basket is 9+15=24
+In total, together with bananas, the number of fruits in one basket is 24+14=38 for the first three baskets.
+Since there are three baskets each having 38 fruits, there are 3*38=114 fruits in the first three baskets.
+The number of apples in the fourth basket is 9-2=7
+There are also 15-2=13 oranges in the fourth basket
+The combined number of oranges and apples in the fourth basket is 13+7=20
+The fourth basket also contains 14-2=12 bananas.
+In total, the fourth basket has 20+12=32 fruits.
+The four baskets together have 32+114=146 fruits.
+The answer is 146
+
+Question: {question}{answer}
+'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+gsm8k_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='gsm8k'),
+    dataset_postprocessor=dict(type='gsm8k_dataset'))
+
+gsm8k_datasets = [
+    dict(
+        type=HFDataset,
+        path='gsm8k',
+        name='main',
+        reader_cfg=gsm8k_reader_cfg,
+        infer_cfg=gsm8k_infer_cfg,
+        eval_cfg=gsm8k_eval_cfg)
+]
--- a/configs/datasets/hellaswag/hellaswag_ppl_8e07d6.py
+++ b/configs/datasets/hellaswag/hellaswag_ppl_8e07d6.py
@ -0,0 +1,35 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import hellaswagDataset
+
+hellaswag_reader_cfg = dict(
+    input_columns=['ctx', 'A', 'B', 'C', 'D'],
+    output_column='label',
+    train_split='validation',
+    test_split='validation')
+
+hellaswag_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            i: dict(round=[
+                dict(role="HUMAN", prompt="{ctx}"),
+                dict(role="BOT", prompt=f"{{{chr(ord('A') + i)}}}"),
+            ])
+            for i in range(4)
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+hellaswag_datasets = [
+    dict(
+        type=hellaswagDataset,
+        path='hellaswag',
+        reader_cfg=hellaswag_reader_cfg,
+        infer_cfg=hellaswag_infer_cfg,
+        eval_cfg=hellaswag_eval_cfg)
+]
--- a/configs/datasets/hellaswag/hellaswag_ppl_e57e0f.py
+++ b/configs/datasets/hellaswag/hellaswag_ppl_e57e0f.py
@ -0,0 +1,34 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import hellaswagDataset
+
+hellaswag_reader_cfg = dict(
+    input_columns=['ctx', 'A', 'B', 'C', 'D'],
+    output_column='label',
+    train_split='validation',
+    test_split='validation')
+
+hellaswag_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            0: "{ctx} {A}",
+            1: "{ctx} {B}",
+            2: "{ctx} {C}",
+            3: "{ctx} {D}",
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+hellaswag_datasets = [
+    dict(
+        type=hellaswagDataset,
+        path='hellaswag',
+        reader_cfg=hellaswag_reader_cfg,
+        infer_cfg=hellaswag_infer_cfg,
+        eval_cfg=hellaswag_eval_cfg)
+]
--- a/configs/datasets/humaneval/humaneval_gen_bce65c.py
+++ b/configs/datasets/humaneval/humaneval_gen_bce65c.py
@ -0,0 +1,40 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import HFDataset, HumanEvaluator
+
+humaneval_reader_cfg = dict(
+    input_columns=['prompt'], output_column='task_id', train_split='test')
+
+# TODO: allow empty output-column
+humaneval_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin=[
+                dict(
+                    role='SYSTEM',
+                    fallback_role='HUMAN',
+                    prompt='Complete the following python code:'),
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{prompt}'),
+            ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+humaneval_eval_cfg = dict(
+    evaluator=dict(type=HumanEvaluator),
+    pred_role='BOT',
+    k=[1, 10, 100],  # the parameter only for humaneval
+    pred_postprocessor=dict(type='humaneval'),
+)
+
+humaneval_datasets = [
+    dict(
+        type=HFDataset,
+        path='openai_humaneval',
+        reader_cfg=humaneval_reader_cfg,
+        infer_cfg=humaneval_infer_cfg,
+        eval_cfg=humaneval_eval_cfg)
+]
--- a/configs/datasets/iwslt2017/iwslt2017_gen.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .iwslt2017_gen_02ea0b import iwslt2017_datasets  # noqa: F401, F403
--- a/configs/datasets/iwslt2017/iwslt2017_gen_66a5bb.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_66a5bb.py
@ -0,0 +1,40 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import BM25Retriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import BleuEvaluator
+from opencompass.datasets import IWSLT2017Dataset
+
+iwslt2017_reader_cfg = dict(
+    input_columns='en', output_column='de', train_split='validation')
+
+iwslt2017_infer_cfg = dict(
+    ice_template=dict(type='PromptTemplate',
+                    template=dict(
+                        begin=[
+                            dict(role='SYSTEM', fallback_role="HUMAN", prompt='Please translate the following English statements to German:'),
+                            '</E>',
+                        ],
+                        round=[
+                            dict(role='HUMAN', prompt='{en}'),
+                            dict(role='BOT', prompt='{de}'),
+                        ]
+                    ),
+                    ice_token='</E>'),
+    retriever=dict(type=BM25Retriever, ice_num=1),
+    inferencer=dict(type=GenInferencer))
+
+iwslt2017_eval_cfg = dict(
+    evaluator=dict(type=BleuEvaluator), 
+    pred_role='BOT', 
+    pred_postprocessor=dict(type='general_cn'),
+    dataset_postprocessor=dict(type='general_cn'))
+
+iwslt2017_datasets = [
+    dict(
+        type=IWSLT2017Dataset,
+        path='iwslt2017',
+        name='iwslt2017-en-de',
+        reader_cfg=iwslt2017_reader_cfg,
+        infer_cfg=iwslt2017_infer_cfg,
+        eval_cfg=iwslt2017_eval_cfg)
+]
--- a/configs/datasets/lambada/lambada_gen_8d0a67.py
+++ b/configs/datasets/lambada/lambada_gen_8d0a67.py
@ -0,0 +1,29 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import lambadaDataset, LambadaEvaluator
+
+lambada_reader_cfg = dict(
+    input_columns=['prompt'],
+    output_column='label',
+    train_split='test',
+    test_split='test')
+
+lambada_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='Please complete the following sentence: {prompt}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=5))
+
+lambada_eval_cfg = dict(evaluator=dict(type=LambadaEvaluator))
+
+lambada_datasets = [
+    dict(
+        abbr='lambada',
+        type=lambadaDataset,
+        path='craffel/openai_lambada',
+        reader_cfg=lambada_reader_cfg,
+        infer_cfg=lambada_infer_cfg,
+        eval_cfg=lambada_eval_cfg)
+]
--- a/configs/datasets/math/math_gen_2f9b1c.py
+++ b/configs/datasets/math/math_gen_2f9b1c.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MATHDataset, MATHEvaluator
+
+math_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='''Problem:
+Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Solution:
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+Final Answer: The final answer is $[2,5)$. I hope it is correct.
+
+Problem:
+If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
+Solution:
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+Final Answer: The final answer is $24$. I hope it is correct.
+
+Problem:
+Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
+Solution:
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+Final Answer: The final answer is $16$. I hope it is correct.
+
+Problem:
+If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+Solution:
+If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
+Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+
+Problem:
+{problem}Solution:
+{solution}'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+math_eval_cfg = dict(
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+
+math_datasets = [
+    dict(
+        type=MATHDataset,
+        abbr='math',
+        path='./data/math/math.json',
+        reader_cfg=dict(
+            input_columns=['problem'],
+            output_column='solution',
+        ),
+        infer_cfg=math_infer_cfg,
+        eval_cfg=math_eval_cfg)
+]
--- a/configs/datasets/mbpp/mbpp_gen_4104e4.py
+++ b/configs/datasets/mbpp/mbpp_gen_4104e4.py
@ -0,0 +1,64 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import MBPPDataset, MBPPEvaluator
+
+mbpp_reader_cfg = dict(
+    input_columns=['text', 'test_list'], output_column='code')
+
+mbpp_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "You are an expert Python programmer, and here is your task: Write a function to find the similar elements from the given two tuple lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n"
+                ),
+                dict(
+                    role="BOT",
+                    prompt=
+                    "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\r\n  res = tuple(set(test_tup1) & set(test_tup2))\r\n  return (res)' \n[DONE] \n\n "
+                ),
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n"
+                ),
+                dict(
+                    role="BOT",
+                    prompt=
+                    "[BEGIN]\n 'import math\r\ndef is_not_prime(n):\r\n    result = False\r\n    for i in range(2,int(math.sqrt(n)) + 1):\r\n        if n % i == 0:\r\n            result = True\r\n    return result' \n[DONE] \n\n "
+                ),
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "You are an expert Python programmer, and here is your task: Write a function to find the largest integers from a given list of numbers using heap queue algorithm. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n"
+                ),
+                dict(
+                    role="BOT",
+                    prompt=
+                    "[BEGIN]\n 'import heapq as hq\r\ndef heap_queue_largest(nums,n):\r\n  largest_nums = hq.nlargest(n, nums)\r\n  return largest_nums' \n[DONE] \n\n "
+                ),
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list}  \n"
+                ),
+                dict(role="BOT", prompt="[BEGIN]\n"),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator), pred_role="BOT")
+
+mbpp_datasets = [
+    dict(
+        type=MBPPDataset,
+        abbr='mbpp',
+        path='./data/mbpp/mbpp.jsonl',
+        reader_cfg=mbpp_reader_cfg,
+        infer_cfg=mbpp_infer_cfg,
+        eval_cfg=mbpp_eval_cfg)
+]
--- a/configs/datasets/mmlu/mmlu_gen_c3ca20.py
+++ b/configs/datasets/mmlu/mmlu_gen_c3ca20.py
@ -0,0 +1,123 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import MMLUDataset
+
+# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
+# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
+
+mmlu_reader_cfg = dict(
+    input_columns=["input", "A", "B", "C", "D"],
+    output_column="target",
+    train_split='dev')
+
+mmlu_prompt_template = dict(
+    type='PromptTemplate',
+    template=None,
+    ice_token='</E>')
+
+mmlu_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role='HUMAN',
+                prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '
+            ),
+            dict(role='BOT', prompt='{target}\n')
+        ])),
+    prompt_template=mmlu_prompt_template,
+    retriever=dict(type=FixKRetriever),
+    inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]))
+
+mmlu_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='first-capital'))
+
+mmlu_all_sets = [
+    "college_biology",
+    "college_chemistry",
+    "college_computer_science",
+    "college_mathematics",
+    "college_physics",
+    "electrical_engineering",
+    "astronomy",
+    "anatomy",
+    "abstract_algebra",
+    "machine_learning",
+    "clinical_knowledge",
+    "global_facts",
+    "management",
+    "nutrition",
+    "marketing",
+    "professional_accounting",
+    "high_school_geography",
+    "international_law",
+    "moral_scenarios",
+    "computer_security",
+    "high_school_microeconomics",
+    "professional_law",
+    "medical_genetics",
+    "professional_psychology",
+    "jurisprudence",
+    "world_religions",
+    "philosophy",
+    "virology",
+    "high_school_chemistry",
+    "public_relations",
+    "high_school_macroeconomics",
+    "human_sexuality",
+    "elementary_mathematics",
+    "high_school_physics",
+    "high_school_computer_science",
+    "high_school_european_history",
+    "business_ethics",
+    "moral_disputes",
+    "high_school_statistics",
+    "miscellaneous",
+    "formal_logic",
+    "high_school_government_and_politics",
+    "prehistory",
+    "security_studies",
+    "high_school_biology",
+    "logical_fallacies",
+    "high_school_world_history",
+    "professional_medicine",
+    "high_school_mathematics",
+    "college_medicine",
+    "high_school_us_history",
+    "sociology",
+    "econometrics",
+    "high_school_psychology",
+    "human_aging",
+    "us_foreign_policy",
+    "conceptual_physics",
+]
+
+mmlu_datasets = []
+for _name in mmlu_all_sets:
+    mmlu_datasets.append(
+        dict(
+            abbr=f"lukaemon_mmlu_{_name}",
+            type=MMLUDataset,
+            path="./data/mmlu/",
+            name=_name,
+            reader_cfg=mmlu_reader_cfg,
+            infer_cfg=mmlu_infer_cfg.copy(),
+            eval_cfg=mmlu_eval_cfg))
+
+    mmlu_datasets[-1]['infer_cfg'][
+        'prompt_template'] = mmlu_prompt_template.copy()
+    mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = \
+        dict(
+            begin=[
+                dict(role='SYSTEM', fallback_role='HUMAN', prompt=f'The following are multiple choice questions (with answers) about {_name.replace("_", " ")}.'),
+                '</E>',
+            ],
+            round=[
+                dict(role='HUMAN', prompt='{input}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer: '),
+            ]
+        )
+
+del _name
--- a/configs/datasets/mmlu/mmlu_ppl_c6bbe6.py
+++ b/configs/datasets/mmlu/mmlu_ppl_c6bbe6.py
@ -0,0 +1,113 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import MMLUDataset
+
+# None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
+# Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
+
+mmlu_reader_cfg = dict(
+    input_columns=["input", "A", "B", "C", "D"],
+    output_column="target",
+    train_split='dev')
+
+mmlu_all_sets = [
+    "college_biology",
+    "college_chemistry",
+    "college_computer_science",
+    "college_mathematics",
+    "college_physics",
+    "electrical_engineering",
+    "astronomy",
+    "anatomy",
+    "abstract_algebra",
+    "machine_learning",
+    "clinical_knowledge",
+    "global_facts",
+    "management",
+    "nutrition",
+    "marketing",
+    "professional_accounting",
+    "high_school_geography",
+    "international_law",
+    "moral_scenarios",
+    "computer_security",
+    "high_school_microeconomics",
+    "professional_law",
+    "medical_genetics",
+    "professional_psychology",
+    "jurisprudence",
+    "world_religions",
+    "philosophy",
+    "virology",
+    "high_school_chemistry",
+    "public_relations",
+    "high_school_macroeconomics",
+    "human_sexuality",
+    "elementary_mathematics",
+    "high_school_physics",
+    "high_school_computer_science",
+    "high_school_european_history",
+    "business_ethics",
+    "moral_disputes",
+    "high_school_statistics",
+    "miscellaneous",
+    "formal_logic",
+    "high_school_government_and_politics",
+    "prehistory",
+    "security_studies",
+    "high_school_biology",
+    "logical_fallacies",
+    "high_school_world_history",
+    "professional_medicine",
+    "high_school_mathematics",
+    "college_medicine",
+    "high_school_us_history",
+    "sociology",
+    "econometrics",
+    "high_school_psychology",
+    "human_aging",
+    "us_foreign_policy",
+    "conceptual_physics",
+]
+
+mmlu_datasets = []
+for _name in mmlu_all_sets:
+    _hint = f'The following are multiple choice questions (with answers) about  {_name.replace("_", " ")}.\n\n'
+    mmlu_infer_cfg = dict(
+        ice_template=dict(
+            type=PromptTemplate,
+            template={
+                opt:
+                f"{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}\n"
+                for opt in ["A", "B", "C", "D"]
+            },
+        ),
+        prompt_template=dict(
+            type=PromptTemplate,
+            template={
+                opt:
+                f"{_hint}</E>{{input}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\nAnswer: {opt}"
+                for opt in ["A", "B", "C", "D"]
+            },
+            ice_token="</E>",
+        ),
+        retriever=dict(type=FixKRetriever),
+        inferencer=dict(type=PPLInferencer, fix_id_list=[0, 1, 2, 3, 4]),
+    )
+
+    mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+
+    mmlu_datasets.append(
+        dict(
+            abbr=f"lukaemon_mmlu_{_name}",
+            type=MMLUDataset,
+            path="./data/mmlu/",
+            name=_name,
+            reader_cfg=mmlu_reader_cfg,
+            infer_cfg=mmlu_infer_cfg,
+            eval_cfg=mmlu_eval_cfg,
+        ))
+
+del _name, _hint
--- a/configs/datasets/nq/nq_gen_f491e3.py
+++ b/configs/datasets/nq/nq_gen_f491e3.py
@ -0,0 +1,27 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
+
+nq_reader_cfg = dict(
+    input_columns=['question'], output_column='answer', train_split='test')
+
+nq_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template="Answer these questions:\nQ: {question}?\nA:{answer}",
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
+
+nq_datasets = [
+    dict(
+        type=NaturalQuestionDataset,
+        abbr='nq',
+        path='./data/nq/',
+        reader_cfg=nq_reader_cfg,
+        infer_cfg=nq_infer_cfg,
+        eval_cfg=nq_eval_cfg)
+]
--- a/configs/datasets/obqa/obqa_gen.py
+++ b/configs/datasets/obqa/obqa_gen.py
@ -0,0 +1,4 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .obqa_gen_b2cde9 import obqa_datasets  # noqa: F401, F403
--- a/configs/datasets/obqa/obqa_gen_b2cde9.py
+++ b/configs/datasets/obqa/obqa_gen_b2cde9.py
@ -0,0 +1,62 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import OBQADataset
+
+_input_columns = [
+    ["question_stem", "A", "B", "C", "D"],
+    ["question_stem", "A", "B", "C", "D", "fact1"],
+]
+_template = [
+    dict(
+        round=[
+            dict(
+                role="HUMAN",
+                prompt="Question: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:"
+            ),
+        ], ),
+    dict(
+        round=[
+            dict(
+                role="HUMAN",
+                prompt="Given the fact: {fact1}\nQuestion: {question_stem}\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:",
+            ),
+        ], ),
+]
+
+obqa_datasets = [
+    dict(
+        abbr="openbookqa",
+        type=OBQADataset,
+        path="openbookqa",
+        split="test",
+    ),
+    dict(
+        abbr="openbookqa_fact",
+        type=OBQADataset,
+        path="openbookqa",
+        name="additional",
+        split="test",
+    ),
+]
+
+for _i in range(2):
+    obqa_reader_cfg = dict(
+        input_columns=_input_columns[_i], output_column="answerKey")
+    obqa_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=_template[_i]),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )
+    obqa_eval_cfg = dict(
+        evaluator=dict(type=AccEvaluator),
+        pred_role="BOT",
+        pred_postprocessor=dict(type="first-capital"),
+    )
+
+    obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
+    obqa_datasets[_i]["infer_cfg"] = obqa_infer_cfg
+    obqa_datasets[_i]["eval_cfg"] = obqa_eval_cfg
--- a/configs/datasets/qasper/qasper_gen_1230f2.py
+++ b/configs/datasets/qasper/qasper_gen_1230f2.py
@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
+
+qasper_reader_cfg = dict(
+    input_columns=['question', 'evidence'],
+    output_column='answer',
+    train_split='dev',
+    test_split='dev')
+
+qasper_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='{evidence}\nAnswer these questions:\nQ: {question}?A:'),
+                dict(role='BOT', prompt=''),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(
+        type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
+
+qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator), pred_role='BOT')
+
+qasper_datasets = [
+    dict(
+        type=QASPERDataset,
+        abbr='QASPER',
+        path='./data/QASPER/',
+        reader_cfg=qasper_reader_cfg,
+        infer_cfg=qasper_infer_cfg,
+        eval_cfg=qasper_eval_cfg)
+]
--- a/configs/datasets/qasper/qasper_gen_bf7900.py
+++ b/configs/datasets/qasper/qasper_gen_bf7900.py
@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import QASPERDataset, TriviaQAEvaluator
+
+qasper_reader_cfg = dict(
+    input_columns=['question', 'evidence'],
+    output_column='answer',
+    train_split='dev',
+    test_split='dev')
+
+qasper_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template="{evidence}\nAnswer these questions:\nQ: {question}?\nA:"),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(
+        type=GenInferencer, max_out_len=50, max_seq_len=8192, batch_size=4))
+
+qasper_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
+
+qasper_datasets = [
+    dict(
+        type=QASPERDataset,
+        abbr='QASPER',
+        path='./data/QASPER/',
+        reader_cfg=qasper_reader_cfg,
+        infer_cfg=qasper_infer_cfg,
+        eval_cfg=qasper_eval_cfg)
+]
--- a/configs/datasets/race/race_ppl_04e06a.py
+++ b/configs/datasets/race/race_ppl_04e06a.py
@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer')
+
+race_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            ans: dict(round=[
+                dict(
+                    role="HUMAN",
+                    prompt=
+                    "Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
+                ),
+                dict(role="BOT", prompt=ans_token),
+            ])
+            for ans, ans_token in [["A", "{A}"], ["B", "{B}"], ["C", "{C}"],
+                                   ["D", "{D}"]]
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+race_datasets = [
+    dict(
+        type=RaceDataset,
+        abbr='race-middle',
+        path='race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg),
+    dict(
+        type=RaceDataset,
+        abbr='race-high',
+        path='race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg)
+]
--- a/configs/datasets/race/race_ppl_d816a7.py
+++ b/configs/datasets/race/race_ppl_d816a7.py
@ -0,0 +1,46 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer')
+
+race_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'A':
+            'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: A',
+            'B':
+            'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: B',
+            'C':
+            'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: C',
+            'D':
+            'Read the article, and answer the question by replying A, B, C or D.\n\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\n\nAnswer: D',
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+race_datasets = [
+    dict(
+        type=RaceDataset,
+        abbr='race-middle',
+        path='race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg),
+    dict(
+        type=RaceDataset,
+        abbr='race-high',
+        path='race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg)
+]
--- a/configs/datasets/realtoxicprompts/realtoxicprompts_gen_3ea730.py
+++ b/configs/datasets/realtoxicprompts/realtoxicprompts_gen_3ea730.py
@ -0,0 +1,34 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import ToxicEvaluator
+from opencompass.datasets import RealToxicPromptsDataset
+
+realtoxicprompts_reader_cfg = dict(
+    input_columns=['prompt_text'],
+    output_column='filename',
+    train_split='train',
+    test_split='train')
+
+# TODO: allow empty output-column
+realtoxicprompts_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='{prompt_text}'),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+# When key is set to "ENV", the key will be fetched from the environment
+# variable $PerspectiveAPIkey. Otherwise, set key in here directly.
+realtoxicprompts_eval_cfg = dict(
+    evaluator=dict(type=ToxicEvaluator, key='ENV'), )
+
+realtoxicprompts_datasets = [
+    dict(
+        type=RealToxicPromptsDataset,
+        path='allenai/real-toxicity-prompts',
+        challenging_subset=True,
+        reader_cfg=realtoxicprompts_reader_cfg,
+        infer_cfg=realtoxicprompts_infer_cfg,
+        eval_cfg=realtoxicprompts_eval_cfg)
+]
--- a/configs/datasets/siqa/siqa_ppl_049da0.py
+++ b/configs/datasets/siqa/siqa_ppl_049da0.py
@ -0,0 +1,45 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+siqa_reader_cfg = dict(
+    input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
+    output_column='label',
+    test_split='validation')
+
+siqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            1:
+            dict(round=[
+                dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
+                dict(role='BOT', prompt="{answerA}")
+            ]),
+            2:
+            dict(round=[
+                dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
+                dict(role='BOT', prompt="{answerB}")
+            ]),
+            3:
+            dict(round=[
+                dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
+                dict(role='BOT', prompt="{answerC}")
+            ]),
+        }),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer))
+
+siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+siqa_datasets = [
+    dict(
+        abbr="siqa",
+        type=HFDataset,
+        path='social_i_qa',
+        reader_cfg=siqa_reader_cfg,
+        infer_cfg=siqa_infer_cfg,
+        eval_cfg=siqa_eval_cfg)
+]
--- a/configs/datasets/storycloze/storycloze_gen_c5a230.py
+++ b/configs/datasets/storycloze/storycloze_gen_c5a230.py
@ -0,0 +1,45 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import storyclozeDataset_V2
+
+storycloze_reader_cfg = dict(
+    input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
+    output_column="answer_right_ending",
+)
+
+storycloze_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "{context}\nQuestion: Which ending makes the most sense?\nA. {sentence_quiz1}\nB. {sentence_quiz2}\nYou may choose between 'A' and 'B'.\nAnswer:",
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+storycloze_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type="first-capital"),
+)
+
+# The original story cloze dataset and repo are not long maintaining.
+# Using multilingual version of this dataset.
+storycloze_datasets = [
+    dict(
+        abbr="story_cloze",
+        type=storyclozeDataset_V2,
+        path="juletxara/xstory_cloze",
+        name="en",
+        reader_cfg=storycloze_reader_cfg,
+        infer_cfg=storycloze_infer_cfg,
+        eval_cfg=storycloze_eval_cfg,
+    )
+]
--- a/configs/datasets/strategyqa/strategyqa_gen_a8bf83.py
+++ b/configs/datasets/strategyqa/strategyqa_gen_a8bf83.py
@ -0,0 +1,58 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+strategyqa_reader_cfg = dict(
+    input_columns=['question'],
+    output_column='answer',
+    train_split='test',
+    test_split='test')
+
+strategyqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template='''Yes or no: Q: Do hamsters provide food for any animals?
+A: Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.
+So the answer is yes.
+
+Q: Yes or no: Could Brooke Shields succeed at University of Pennsylvania?
+A: Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.
+So the answer is yes.
+
+Q: Yes or no: Hydrogen's atomic number squared exceeds number of Spice Girls?
+A: Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen's atomic number squared is less than 5.
+So the answer is no.
+
+Q: Yes or no: Is it common to see frost during some college commencements?
+A: College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.
+So the answer is yes.
+
+Q: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?
+A: The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.
+So the answer is no.
+
+Q: Yes or no: Would a pear sink in water?
+A: The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.
+So the answer is no.
+
+Q: {question}{answer}
+'''),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+strategyqa_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='strategyqa'),
+    dataset_postprocessor=dict(type='strategyqa_dataset'))
+
+strategyqa_datasets = [
+    dict(
+        abbr='strategyqa',
+        type=HFDataset,
+        path='wics/strategy-qa',
+        reader_cfg=strategyqa_reader_cfg,
+        infer_cfg=strategyqa_infer_cfg,
+        eval_cfg=strategyqa_eval_cfg)
+]
--- a/configs/datasets/strategyqa/strategyqa_gen_be3f8d.py
+++ b/configs/datasets/strategyqa/strategyqa_gen_be3f8d.py
@ -0,0 +1,94 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import HFDataset
+
+strategyqa_reader_cfg = dict(
+    input_columns=['question'],
+    output_column='answer',
+    train_split='test',
+    test_split='test')
+
+strategyqa_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    'Question: Do hamsters provide food for any animals?\nAnswer:'
+                ),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes'
+                ),
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    'Question: Could Brooke Shields succeed at University of Pennsylvania?\nAnswer:'
+                ),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes'
+                ),
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    'Question: Hydrogen\'s atomic number squared exceeds number of Spice Girls?\nAnswer:'
+                ),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no'
+                ),
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    'Question: Is it common to see frost during some college commencements?\nAnswer:'
+                ),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes'
+                ),
+                dict(
+                    role='HUMAN',
+                    prompt=
+                    'Question: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?\nAnswer:'
+                ),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no'
+                ),
+                dict(
+                    role='HUMAN',
+                    prompt='Question: Would a pear sink in water?\nAnswer:'),
+                dict(
+                    role='BOT',
+                    prompt=
+                    'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no'
+                ),
+                dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer, max_out_len=512))
+
+strategyqa_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type='strategyqa'),
+    dataset_postprocessor=dict(type='strategyqa_dataset'))
+
+strategyqa_datasets = [
+    dict(
+        abbr='strategyqa',
+        type=HFDataset,
+        path='wics/strategy-qa',
+        reader_cfg=strategyqa_reader_cfg,
+        infer_cfg=strategyqa_infer_cfg,
+        eval_cfg=strategyqa_eval_cfg)
+]
--- a/Show More
+++ b/Show More