Update configs (#9)

* Update implements * Update
2025-05-30 16:03:24 +08:00 · 2023-07-06 12:27:41 +08:00 · 2023-07-06 12:27:41 +08:00 · 86d5ec3d0f
commit 86d5ec3d0f
parent 2d0b184bb6
138 changed files with 807 additions and 371 deletions
--- a/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py
+++ b/configs/datasets/ARC_c/ARC_c_gen_1e0de5.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ARC_c_reader_cfg = dict(
    input_columns=["question", "textA", "textB", "textC", "textD"],
@ -27,7 +28,7 @@ ARC_c_infer_cfg = dict(
 ARC_c_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 ARC_c_datasets = [
--- a/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py
+++ b/configs/datasets/ARC_e/ARC_e_gen_1e0de5.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ARC_e_reader_cfg = dict(
    input_columns=["question", "textA", "textB", "textC", "textD"],
@ -27,7 +28,7 @@ ARC_e_infer_cfg = dict(
 ARC_e_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 ARC_e_datasets = [
--- a/configs/datasets/CLUE_C3/CLUE_C3_gen_8c358f.py
+++ b/configs/datasets/CLUE_C3/CLUE_C3_gen_8c358f.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import C3Dataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 C3_reader_cfg = dict(
    input_columns=[
@ -35,7 +36,7 @@ C3_infer_cfg = dict(
 C3_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 C3_datasets = [
--- a/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen.py
+++ b/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .CLUE_CMRC_gen_941108 import CMRC_datasets  # noqa: F401, F403
+    from .CLUE_CMRC_gen_1bd3c8 import CMRC_datasets  # noqa: F401, F403
--- a/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py
+++ b/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py
@ -0,0 +1,35 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import CMRCDataset, cmrc_postprocess
+
+CMRC_reader_cfg = dict(
+    input_columns=['question', 'context'], output_column='answers')
+
+CMRC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt="根据文章回答问题。你的答案应该尽可能简练，请以 ‘答案是’ 开头的句式作答。\n文章：{context}\n问：{question}\n答："),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+CMRC_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=cmrc_postprocess),
+)
+
+CMRC_datasets = [
+    dict(
+        type=CMRCDataset,
+        abbr='CMRC_dev',
+        path='./data/CLUE/CMRC/dev.json',
+        reader_cfg=CMRC_reader_cfg,
+        infer_cfg=CMRC_infer_cfg,
+        eval_cfg=CMRC_eval_cfg),
+]
--- a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py
+++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .CLUE_DRCD_gen_941108 import DRCD_datasets  # noqa: F401, F403
+    from .CLUE_DRCD_gen_1bd3c8 import DRCD_datasets  # noqa: F401, F403
--- a/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_1bd3c8.py
+++ b/configs/datasets/CLUE_DRCD/CLUE_DRCD_gen_1bd3c8.py
@ -0,0 +1,36 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import EMEvaluator
+from opencompass.datasets import DRCDDataset, drcd_postprocess
+
+DRCD_reader_cfg = dict(
+    input_columns=['question', 'context'], output_column='answers')
+
+DRCD_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt="根据文章回答问题。你的答案应该尽可能简练，请以 ‘答案是’ 开头的句式作答。\n文章：{context}\n问：{question}\n答："),
+        ])),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+DRCD_eval_cfg = dict(
+    evaluator=dict(type=EMEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=drcd_postprocess),
+
+)
+
+DRCD_datasets = [
+    dict(
+        type=DRCDDataset,
+        abbr='DRCD_dev',
+        path='./data/CLUE/DRCD/dev.json',
+        reader_cfg=DRCD_reader_cfg,
+        infer_cfg=DRCD_infer_cfg,
+        eval_cfg=DRCD_eval_cfg),
+]
--- a/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_901306.py
+++ b/configs/datasets/CLUE_afqmc/CLUE_afqmc_gen_901306.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AFQMCDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 afqmc_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -27,7 +28,7 @@ afqmc_infer_cfg = dict(
 afqmc_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 afqmc_datasets = [
--- a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_1abf97.py
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_1abf97.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import cmnliDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 cmnli_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -27,7 +28,7 @@ cmnli_infer_cfg = dict(
 cmnli_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 cmnli_datasets = [
--- a/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_51e956.py
+++ b/configs/datasets/CLUE_cmnli/CLUE_cmnli_gen_51e956.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import cmnliDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 cmnli_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -27,7 +28,7 @@ cmnli_infer_cfg = dict(
 cmnli_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 cmnli_datasets = [
--- a/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_51e956.py
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_51e956.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import cmnliDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ocnli_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -28,7 +29,7 @@ ocnli_infer_cfg = dict(
 ocnli_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 ocnli_datasets = [
--- a/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_c4cb6c.py
+++ b/configs/datasets/CLUE_ocnli/CLUE_ocnli_gen_c4cb6c.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import cmnliDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ocnli_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -28,7 +29,7 @@ ocnli_infer_cfg = dict(
 ocnli_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 ocnli_datasets = [
--- a/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen_634f41.py
+++ b/configs/datasets/FewCLUE_bustm/FewCLUE_bustm_gen_634f41.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AFQMCDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 bustm_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -27,7 +28,7 @@ bustm_infer_cfg = dict(
 bustm_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 bustm_datasets = [
--- a/configs/datasets/FewCLUE_chid/FewCLUE_chid_gen_0a29a2.py
+++ b/configs/datasets/FewCLUE_chid/FewCLUE_chid_gen_0a29a2.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CHIDDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 chid_reader_cfg = dict(
    input_columns=["content","A","B","C","D","E","F","G"],
@ -27,7 +28,7 @@ chid_infer_cfg = dict(
 chid_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 chid_datasets = [
--- a/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_c68933.py
+++ b/configs/datasets/FewCLUE_cluewsc/FewCLUE_cluewsc_gen_c68933.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CluewscDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 cluewsc_reader_cfg = dict(
    input_columns=["span1", "span2", "text", "new_text"],
@ -27,7 +28,7 @@ cluewsc_infer_cfg = dict(
 cluewsc_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 cluewsc_datasets = [
--- a/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .FewCLUE_csl_gen_87f4a8 import csl_datasets  # noqa: F401, F403
+    from .FewCLUE_csl_gen_28b223 import csl_datasets  # noqa: F401, F403
--- a/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_28b223.py
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_28b223.py
@ -0,0 +1,51 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import CslDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+csl_reader_cfg = dict(
+    input_columns=["abst", "keywords"],
+    output_column="label",
+)
+
+csl_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "摘要是对论文内容不加注释和评论的简短陈述，要求扼要地说明研究工作的目的、研究方法和最终结论等。\n关键词是一篇学术论文的核心词汇，一般由一系列名词组成。关键词在全文中应有较高出现频率，且能起到帮助文献检索的作用。\n摘要：{abst}\n关键词：{keywords}\n请问上述关键词是否匹配摘要且符合要求？\nA. 否\nB. 是\n请从”A“，”B“中进行选择。\n答："
+            )
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+csl_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+csl_datasets = [
+    dict(
+        abbr="csl_dev",
+        type=CslDataset_V2,
+        path="./data/FewCLUE/csl/dev_few_all.json",
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg,
+    ),
+    dict(
+        abbr="csl_test",
+        type=CslDataset_V2,
+        path="./data/FewCLUE/csl/test_public.json",
+        reader_cfg=csl_reader_cfg,
+        infer_cfg=csl_infer_cfg,
+        eval_cfg=csl_eval_cfg,
+    ),
+]
--- a/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_87f4a8.py
+++ b/configs/datasets/FewCLUE_csl/FewCLUE_csl_gen_87f4a8.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CslDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 csl_reader_cfg = dict(
    input_columns=["abst", "keywords"],
@ -27,7 +28,7 @@ csl_infer_cfg = dict(
 csl_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 csl_datasets = [
--- a/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen_740ea0.py
+++ b/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen_740ea0.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import eprstmtDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 eprstmt_reader_cfg = dict(
    input_columns=["sentence"], output_column="label", test_split="train")
@ -25,7 +26,7 @@ eprstmt_infer_cfg = dict(
 eprstmt_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 eprstmt_datasets = [
--- a/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_f97a97.py
+++ b/configs/datasets/FewCLUE_ocnli_fc/FewCLUE_ocnli_fc_gen_f97a97.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import cmnliDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ocnli_fc_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -26,7 +27,7 @@ ocnli_fc_infer_cfg = dict(
 ocnli_fc_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 ocnli_fc_datasets = [
--- a/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_gen_b90e4a.py
+++ b/configs/datasets/FewCLUE_tnews/FewCLUE_tnews_gen_b90e4a.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import TNewsDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 tnews_reader_cfg = dict(
    input_columns="sentence",
@ -49,7 +50,7 @@ tnews_infer_cfg = dict(
 tnews_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 tnews_datasets = [
--- a/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py
+++ b/configs/datasets/SuperGLUE_AX_b/SuperGLUE_AX_b_gen_4dfefa.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AXDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 AX_b_reader_cfg = dict(
    input_columns=["sentence1", "sentence2"],
@ -27,7 +28,7 @@ AX_b_infer_cfg = dict(
 AX_b_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 AX_b_datasets = [
--- a/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py
+++ b/configs/datasets/SuperGLUE_AX_g/SuperGLUE_AX_g_gen_68aac7.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AXDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 AX_g_reader_cfg = dict(
    input_columns=["hypothesis", "premise"],
@ -27,7 +28,7 @@ AX_g_infer_cfg = dict(
 AX_g_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 AX_g_datasets = [
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import BoolQDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 BoolQ_reader_cfg = dict(
    input_columns=["question", "passage"],
@ -25,7 +26,7 @@ BoolQ_infer_cfg = dict(
 BoolQ_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 BoolQ_datasets = [
--- a/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py
+++ b/configs/datasets/SuperGLUE_CB/SuperGLUE_CB_gen_854c6c.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CBDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 CB_reader_cfg = dict(
    input_columns=["premise", "hypothesis"],
@ -28,7 +29,7 @@ CB_infer_cfg = dict(
 CB_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 CB_datasets = [
--- a/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py
+++ b/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import COPADataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 COPA_reader_cfg = dict(
    input_columns=["question", "premise", "choice1", "choice2"],
@ -28,7 +29,7 @@ COPA_infer_cfg = dict(
 COPA_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 COPA_datasets = [
--- a/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py
+++ b/configs/datasets/SuperGLUE_MultiRC/SuperGLUE_MultiRC_gen_27071f.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import MultiRCDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 MultiRC_reader_cfg = dict(
    input_columns=["question", "text", "answer"],
@ -27,7 +28,7 @@ MultiRC_infer_cfg = dict(
 MultiRC_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 MultiRC_datasets = [
--- a/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py
+++ b/configs/datasets/SuperGLUE_RTE/SuperGLUE_RTE_gen_68aac7.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AXDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 RTE_reader_cfg = dict(
    input_columns=["hypothesis", "premise"],
@ -27,7 +28,7 @@ RTE_infer_cfg = dict(
 RTE_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 RTE_datasets = [
--- a/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_0f7784.py
+++ b/configs/datasets/SuperGLUE_ReCoRD/SuperGLUE_ReCoRD_gen_0f7784.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import EMEvaluator
-from opencompass.datasets import ReCoRDDataset
+from opencompass.datasets import ReCoRDDataset, ReCoRD_postprocess

 ReCoRD_reader_cfg = dict(
    input_columns=['question', 'text'], output_column='answers')
@ -16,7 +16,7 @@ ReCoRD_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))

 ReCoRD_eval_cfg = dict(
-    evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type='ReCoRD'))
+    evaluator=dict(type=EMEvaluator), pred_postprocessor=dict(type=ReCoRD_postprocess))

 ReCoRD_datasets = [
    dict(
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .SuperGLUE_WSC_gen_6dc406 import WSC_datasets  # noqa: F401, F403
+    from .SuperGLUE_WSC_gen_8a881c import WSC_datasets  # noqa: F401, F403
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_6dc406.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_6dc406.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import WSCDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 WSC_reader_cfg = dict(
    input_columns=["span1", "span2", "text"],
@ -27,7 +28,7 @@ WSC_infer_cfg = dict(
 WSC_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 WSC_datasets = [
--- a/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_8a881c.py
+++ b/configs/datasets/SuperGLUE_WSC/SuperGLUE_WSC_gen_8a881c.py
@ -0,0 +1,43 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import WSCDataset_V3
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+WSC_reader_cfg = dict(
+    input_columns=["span1", "span2", "text"],
+    output_column="label",
+)
+
+WSC_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(round=[
+            dict(
+                role="HUMAN",
+                prompt=
+                "Passage: {text}\nDoes the pronoun # {span2} # refer to * {span1} *?\nA. Yes\nB. No\nAnseer:"
+            ),
+        ]),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+WSC_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+WSC_datasets = [
+    dict(
+        abbr="WSC",
+        type=WSCDataset_V3,
+        path="./data/SuperGLUE/WSC/val.jsonl",
+        reader_cfg=WSC_reader_cfg,
+        infer_cfg=WSC_infer_cfg,
+        eval_cfg=WSC_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen_d06864.py
+++ b/configs/datasets/SuperGLUE_WiC/SuperGLUE_WiC_gen_d06864.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import WiCDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 WiC_reader_cfg = dict(
    input_columns=[
@ -31,7 +32,7 @@ WiC_infer_cfg = dict(
 WiC_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 WiC_datasets = [
--- a/configs/datasets/TheoremQA/TheoremQA_gen.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .TheoremQA_gen_a27a10 import TheoremQA_datasets  # noqa: F401, F403
+    from .TheoremQA_gen_7009de import TheoremQA_datasets  # noqa: F401, F403
--- a/configs/datasets/TheoremQA/TheoremQA_gen_424e0a.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_424e0a.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import TheoremQADataset
+from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

 TheoremQA_reader_cfg = dict(
    input_columns=['Question', 'Answer_type'],
@ -23,11 +23,11 @@ TheoremQA_infer_cfg = dict(
        type=PromptTemplate,
        template=TheoremQA_prompt2),
    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer))
+    inferencer=dict(type=GenInferencer, max_out_len=512))

 TheoremQA_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='TheoremQA'))
+    pred_postprocessor=dict(type=TheoremQA_postprocess))

 TheoremQA_datasets = [
    dict(
--- a/configs/datasets/TheoremQA/TheoremQA_gen_7009de.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_7009de.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import TheoremQADataset
+from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

 TheoremQA_reader_cfg = dict(
    input_columns=['Question', 'Answer_type'],
@ -31,11 +31,11 @@ TheoremQA_infer_cfg = dict(
                dict(role='HUMAN', prompt=TheoremQA_prompt2),
            ])),
    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer))
+    inferencer=dict(type=GenInferencer, max_out_len=512))

 TheoremQA_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='TheoremQA'))
+    pred_postprocessor=dict(type=TheoremQA_postprocess))

 TheoremQA_datasets = [
    dict(
--- a/configs/datasets/TheoremQA/TheoremQA_gen_ef26ca.py
+++ b/configs/datasets/TheoremQA/TheoremQA_gen_ef26ca.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import TheoremQADataset
+from opencompass.datasets import TheoremQADataset, TheoremQA_postprocess

 TheoremQA_reader_cfg = dict(
    input_columns=['Question', 'Answer_type'],
@ -20,11 +20,11 @@ TheoremQA_infer_cfg = dict(
            ),
        ])),
    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer))
+    inferencer=dict(type=GenInferencer, max_out_len=512))

 TheoremQA_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='TheoremQA'))
+    pred_postprocessor=dict(type=TheoremQA_postprocess))

 TheoremQA_datasets = [
    dict(
--- a/configs/datasets/XLSum/XLSum_gen_2bb71c.py
+++ b/configs/datasets/XLSum/XLSum_gen_2bb71c.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import XLSUMDataset
+from opencompass.datasets import XLSUMDataset, Xsum_postprocess

 XLSum_reader_cfg = dict(input_columns=['text'], output_column='summary')

@ -16,7 +16,7 @@ XLSum_infer_cfg = dict(

 XLSum_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
-    pred_postprocessor=dict(type='Xsum'),
+    pred_postprocessor=dict(type=Xsum_postprocess),
 )

 XLSum_datasets = [
--- a/configs/datasets/Xsum/Xsum_gen_8ea5f8.py
+++ b/configs/datasets/Xsum/Xsum_gen_8ea5f8.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import XsumDataset
+from opencompass.datasets import XsumDataset, Xsum_postprocess

 Xsum_reader_cfg = dict(input_columns=['dialogue'], output_column='summary')

@ -16,7 +16,7 @@ Xsum_infer_cfg = dict(

 Xsum_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
-    pred_postprocessor=dict(type='Xsum'),
+    pred_postprocessor=dict(type=Xsum_postprocess),
 )

 Xsum_datasets = [
--- a/configs/datasets/agieval/agieval_gen_0a9ace.py
+++ b/configs/datasets/agieval/agieval_gen_0a9ace.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AGIEvalDataset, AGIEvalEvaluator
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 agieval_reader_cfg = dict(
    input_columns=['problem_input'], output_column='label')
@ -44,7 +45,7 @@ for name in agieval_single_choice_sets:

    agieval_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type='first-capital'))
+        pred_postprocessor=dict(type=first_capital_postprocess))

    agieval_datasets.append(
        dict(
--- a/configs/datasets/agieval/agieval_gen_397d81.py
+++ b/configs/datasets/agieval/agieval_gen_397d81.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi

 agieval_reader_cfg = dict(
    input_columns=['question', 'options'], output_column='label')
@ -82,7 +83,7 @@ for _name in agieval_single_choice_sets:

    agieval_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type='first-capital'))
+        pred_postprocessor=dict(type=first_capital_postprocess))

    agieval_datasets.append(
        dict(
@ -111,7 +112,7 @@ for _name in agieval_multiple_choices_sets:

    agieval_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type='first-capital-multi'))
+        pred_postprocessor=dict(type=first_capital_postprocess_multi))

    agieval_datasets.append(
        dict(
--- a/configs/datasets/agieval/agieval_mixed_2f14ad.py
+++ b/configs/datasets/agieval/agieval_mixed_2f14ad.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import PPLInferencer, GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import AGIEvalDataset_v2, AGIEvalEvaluator
+from opencompass.utils.text_postprocessors import first_capital_postprocess_multi

 agieval_single_choice_sets = [
    'gaokao-chinese',
@ -116,7 +117,7 @@ for _name in agieval_multiple_choices_sets:

    agieval_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type='first-capital-multi'))
+        pred_postprocessor=dict(type=first_capital_postprocess_multi))

    agieval_datasets.append(
        dict(
--- a/configs/datasets/apps/apps_gen_5b4254.py
+++ b/configs/datasets/apps/apps_gen_5b4254.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 apps_reader_cfg = dict(
    input_columns=['question'], output_column='problem_id', train_split='test')
@ -20,7 +20,7 @@ apps_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 apps_datasets = [
--- a/configs/datasets/apps/apps_gen_7fbb95.py
+++ b/configs/datasets/apps/apps_gen_7fbb95.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 apps_reader_cfg = dict(
    input_columns=['question'], output_column='problem_id', train_split='test')
@ -27,7 +27,7 @@ apps_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 apps_datasets = [
--- a/configs/datasets/apps/apps_gen_b4dee3.py
+++ b/configs/datasets/apps/apps_gen_b4dee3.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 apps_reader_cfg = dict(
    input_columns=['question'], output_column='problem_id', train_split='test')
@ -17,7 +17,7 @@ apps_infer_cfg = dict(
 apps_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    k=[1, 10, 100],
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 apps_datasets = [
--- a/configs/datasets/bbh/bbh_gen.py
+++ b/configs/datasets/bbh/bbh_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .bbh_gen_6bd693 import bbh_datasets  # noqa: F401, F403
+    from .bbh_gen_5b92b0 import bbh_datasets  # noqa: F401, F403
--- a/configs/datasets/bbh/bbh_gen_5b92b0.py
+++ b/configs/datasets/bbh/bbh_gen_5b92b0.py
@ -3,7 +3,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import BBHDataset, BBHEvaluator
+from opencompass.datasets import BBHDataset, BBHEvaluator, bbh_mcq_postprocess

 bbh_reader_cfg = dict(input_columns=["input"], output_column="target")

@ -61,8 +61,8 @@ for _name in bbh_multiple_choice_sets:
    bbh_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
        pred_role="BOT",
-        pred_postprocessor=dict(type='bbh-mcq'),
-        dataset_postprocessor=dict(type='bbh-mcq'))
+        pred_postprocessor=dict(type=bbh_mcq_postprocess),
+        dataset_postprocessor=dict(type=bbh_mcq_postprocess))

    bbh_datasets.append(
        dict(
--- a/configs/datasets/ceval/ceval_gen_2daf24.py
+++ b/configs/datasets/ceval/ceval_gen_2daf24.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CEvalDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ceval_subject_mapping = {
    "computer_network":
@ -166,7 +167,7 @@ for _split in ["val", "test"]:

        ceval_eval_cfg = dict(
            evaluator=dict(type=AccEvaluator),
-            pred_postprocessor=dict(type='first-capital'))
+            pred_postprocessor=dict(type=first_capital_postprocess))

        ceval_datasets.append(
            dict(
--- a/configs/datasets/ceval/ceval_gen_5f30c7.py
+++ b/configs/datasets/ceval/ceval_gen_5f30c7.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import CEvalDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 ceval_subject_mapping = {
    "computer_network":
@ -164,7 +165,9 @@ for _split in ["val"]:
            inferencer=dict(type=GenInferencer, fix_id_list=[0, 1, 2, 3, 4]),
        )

-        ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
+        ceval_eval_cfg = dict(
+            evaluator=dict(type=AccEvaluator),
+            pred_postprocessor=dict(type=first_capital_postprocess))

        ceval_datasets.append(
            dict(
--- a/configs/datasets/collections/base_medium.py
+++ b/configs/datasets/collections/base_medium.py
@ -5,18 +5,18 @@ with read_base():
    from ..ceval.ceval_ppl_578f8d import ceval_datasets
    from ..agieval.agieval_mixed_2f14ad import agieval_datasets
    from ..GaokaoBench.GaokaoBench_mixed_f2038e import GaokaoBench_datasets
-    from ..bbh.bbh_gen_6bd693 import bbh_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
    from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
-    from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
-    from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
    from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
    from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
    from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
    from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
-    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
+    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
    from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
    from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
@ -33,24 +33,24 @@ with read_base():
    from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
-    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
-    from ..race.race_ppl_ab8734 import race_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
+    from ..race.race_ppl_a138cd import race_datasets
    from ..Xsum.Xsum_gen_31397e import Xsum_datasets
    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
    from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
-    from ..math.math_gen_3e92f6 import math_datasets
-    from ..TheoremQA.TheoremQA_gen_8acdf7 import TheoremQA_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
    from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
    from ..ARC_e.ARC_e_ppl_a450bd import ARC_e_datasets
    from ..ARC_c.ARC_c_ppl_a450bd import ARC_c_datasets
    from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
    from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
    from ..siqa.siqa_ppl_ced5f6 import siqa_datasets
-    from ..strategyqa.strategyqa_gen_b3ff20 import strategyqa_datasets
-    from ..winogrande.winogrande_ppl_18e5de import winogrande_datasets
+    from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
+    from ..winogrande.winogrande_ppl_55a66e import winogrande_datasets
    from ..obqa.obqa_ppl_c7c154 import obqa_datasets
-    from ..nq.nq_gen_3dcea1 import nq_datasets
-    from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
    from ..flores.flores_gen_806ede import flores_datasets
    from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets

--- a/configs/datasets/collections/base_small.py
+++ b/configs/datasets/collections/base_small.py
@ -2,9 +2,9 @@ from mmengine.config import read_base

 with read_base():
    from ..ceval.ceval_ppl_578f8d import ceval_datasets
-    from ..bbh.bbh_gen_6bd693 import bbh_datasets
-    from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
-    from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
    from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
    from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
    from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
@ -24,16 +24,16 @@ with read_base():
    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
    from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
    from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
-    from ..race.race_ppl_ab8734 import race_datasets
-    from ..math.math_gen_3e92f6 import math_datasets
+    from ..race.race_ppl_a138cd import race_datasets
+    from ..math.math_gen_265cce import math_datasets
    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
    from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
    from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
    from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
-    from ..winogrande.winogrande_ppl_18e5de import winogrande_datasets
+    from ..winogrande.winogrande_ppl_55a66e import winogrande_datasets
    from ..obqa.obqa_ppl_c7c154 import obqa_datasets
-    from ..nq.nq_gen_3dcea1 import nq_datasets
-    from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
    from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets

 datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/configs/datasets/collections/chat_medium.py
+++ b/configs/datasets/collections/chat_medium.py
@ -5,19 +5,19 @@ with read_base():
    from ..ceval.ceval_gen_5f30c7 import ceval_datasets
    from ..agieval.agieval_gen_397d81 import agieval_datasets
    from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
-    from ..bbh.bbh_gen_6bd693 import bbh_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
    from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
    from ..mbpp.mbpp_gen_1e1056 import mbpp_datasets
    from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
-    from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
-    from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
    from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
    from ..CLUE_cmnli.CLUE_cmnli_gen_1abf97 import cmnli_datasets
    from ..CLUE_ocnli.CLUE_ocnli_gen_c4cb6c import ocnli_datasets
    from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
    from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
    from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
-    from ..FewCLUE_csl.FewCLUE_csl_gen_87f4a8 import csl_datasets
+    from ..FewCLUE_csl.FewCLUE_csl_gen_28b223 import csl_datasets
    from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
    from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_f97a97 import ocnli_fc_datasets
    from ..FewCLUE_tnews.FewCLUE_tnews_gen_b90e4a import tnews_datasets
@ -37,20 +37,20 @@ with read_base():
    from ..race.race_gen_69ee4f import race_datasets
    from ..Xsum.Xsum_gen_31397e import Xsum_datasets
    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
-    from ..summedits.summedits_gen_4fb38b import summedits_datasets
-    from ..math.math_gen_3e92f6 import math_datasets
-    from ..TheoremQA.TheoremQA_gen_a27a10 import TheoremQA_datasets
+    from ..summedits.summedits_gen_315438 import summedits_datasets
+    from ..math.math_gen_265cce import math_datasets
+    from ..TheoremQA.TheoremQA_gen_7009de import TheoremQA_datasets
    from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
    from ..ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
    from ..ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
    from ..commonsenseqa.commonsenseqa_gen_c946f2 import commonsenseqa_datasets
    from ..piqa.piqa_gen_1194eb import piqa_datasets
    from ..siqa.siqa_gen_e78df3 import siqa_datasets
-    from ..strategyqa.strategyqa_gen_b3ff20 import strategyqa_datasets
+    from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
    from ..obqa.obqa_gen_9069e4 import obqa_datasets
-    from ..nq.nq_gen_68c1c6 import nq_datasets
-    from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
    from ..flores.flores_gen_806ede import flores_datasets
    from ..crowspairs.crowspairs_gen_02b6c1 import crowspairs_datasets

--- a/configs/datasets/collections/chat_small.py
+++ b/configs/datasets/collections/chat_small.py
@ -3,9 +3,9 @@ from mmengine.config import read_base
 with read_base():
    from ..mmlu.mmlu_gen_a484b3 import mmlu_datasets
    from ..ceval.ceval_gen_5f30c7 import ceval_datasets
-    from ..bbh.bbh_gen_6bd693 import bbh_datasets
-    from ..CLUE_CMRC.CLUE_CMRC_gen_941108 import CMRC_datasets
-    from ..CLUE_DRCD.CLUE_DRCD_gen_941108 import DRCD_datasets
+    from ..bbh.bbh_gen_5b92b0 import bbh_datasets
+    from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
+    from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
    from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
    from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
    from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
@ -24,17 +24,17 @@ with read_base():
    from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
    from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
    from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
-    from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_6dc406 import WSC_datasets
+    from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_8a881c import WSC_datasets
    from ..race.race_gen_69ee4f import race_datasets
-    from ..math.math_gen_3e92f6 import math_datasets
+    from ..math.math_gen_265cce import math_datasets
    from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
-    from ..summedits.summedits_gen_4fb38b import summedits_datasets
+    from ..summedits.summedits_gen_315438 import summedits_datasets
    from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
    from ..piqa.piqa_gen_1194eb import piqa_datasets
    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
    from ..obqa.obqa_gen_9069e4 import obqa_datasets
-    from ..nq.nq_gen_68c1c6 import nq_datasets
-    from ..triviaqa.triviaqa_gen_3e39a5 import triviaqa_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets
+    from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
    from ..crowspairs.crowspairs_gen_02b6c1 import crowspairs_datasets

 datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/configs/datasets/collections/example.py
+++ b/configs/datasets/collections/example.py
@ -2,6 +2,6 @@ from mmengine.config import read_base

 with read_base():
    from ..piqa.piqa_gen_1194eb import piqa_datasets
-    from ..nq.nq_gen_68c1c6 import nq_datasets
+    from ..nq.nq_gen_c788f6 import nq_datasets

 datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
--- a/configs/datasets/commonsenseqa/commonsenseqa_gen_c946f2.py
+++ b/configs/datasets/commonsenseqa/commonsenseqa_gen_c946f2.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import MDLRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import commonsenseqaDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 commonsenseqa_reader_cfg = dict(
    input_columns=["question", "A", "B", "C", "D", "E"],
@ -44,7 +45,7 @@ commonsenseqa_infer_cfg = dict(

 commonsenseqa_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 commonsenseqa_datasets = [
--- a/configs/datasets/crowspairs/crowspairs_gen_02b6c1.py
+++ b/configs/datasets/crowspairs/crowspairs_gen_02b6c1.py
@ -3,10 +3,11 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import crowspairsDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 crowspairs_reader_cfg = dict(
    input_columns=['sent_more', 'sent_less'],
-    output_column='id',
+    output_column='label',
    train_split='test',
    test_split='test')

@ -26,7 +27,7 @@ crowspairs_infer_cfg = dict(
 crowspairs_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 crowspairs_datasets = [
--- a/configs/datasets/crowspairs/crowspairs_ppl_47f211.py
+++ b/configs/datasets/crowspairs/crowspairs_ppl_47f211.py
@ -6,7 +6,7 @@ from opencompass.datasets import crowspairsDataset

 crowspairs_reader_cfg = dict(
    input_columns=['sent_more', 'sent_less'],
-    output_column='id',
+    output_column='label',
    train_split='test',
    test_split='test')

--- a/configs/datasets/crowspairs/crowspairs_ppl_e811e1.py
+++ b/configs/datasets/crowspairs/crowspairs_ppl_e811e1.py
@ -6,7 +6,7 @@ from opencompass.datasets import crowspairsDataset

 crowspairs_reader_cfg = dict(
    input_columns=['sent_more', 'sent_less'],
-    output_column='id',
+    output_column='label',
    train_split='test',
    test_split='test')

--- a/configs/datasets/govrepcrs/govrepcrs_gen_aa5eb3.py
+++ b/configs/datasets/govrepcrs/govrepcrs_gen_aa5eb3.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import GovRepcrsDataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess

 govrepcrs_reader_cfg = dict(
    input_columns='content',
@ -21,8 +22,8 @@ govrepcrs_infer_cfg = dict(

 govrepcrs_eval_cfg = dict(
    evaluator=dict(type=BleuEvaluator),
-    pred_postprocessor=dict(type='general_cn'),
-    dataset_postprocessor=dict(type='general_cn'))
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))

 govrepcrs_datasets = [
    dict(
--- a/configs/datasets/govrepcrs/govrepcrs_gen_db7930.py
+++ b/configs/datasets/govrepcrs/govrepcrs_gen_db7930.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import GovRepcrsDataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess

 govrepcrs_reader_cfg = dict(
    input_columns='content',
@ -33,8 +34,8 @@ govrepcrs_infer_cfg = dict(
 govrepcrs_eval_cfg = dict(
    evaluator=dict(type=BleuEvaluator),
    pred_role='BOT',
-    pred_postprocessor=dict(type='general_cn'),
-    dataset_postprocessor=dict(type='general_cn'))
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))

 govrepcrs_datasets = [
    dict(
--- a/configs/datasets/gsm8k/gsm8k_gen_1d7fe4.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_1d7fe4.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess

 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')

@ -26,8 +26,8 @@ gsm8k_infer_cfg = dict(
    inferencer=dict(type=GenInferencer, max_out_len=512))

 gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
-                      pred_postprocessor=dict(type='gsm8k'),
-                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+                      pred_postprocessor=dict(type=gsm8k_postprocess),
+                      dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))

 gsm8k_datasets = [
    dict(
--- a/configs/datasets/gsm8k/gsm8k_gen_1dce88.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_1dce88.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess

 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')

@ -73,8 +73,8 @@ Question: {question}{answer}

 gsm8k_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='gsm8k'),
-    dataset_postprocessor=dict(type='gsm8k_dataset'))
+    pred_postprocessor=dict(type=gsm8k_postprocess),
+    dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))

 gsm8k_datasets = [
    dict(
--- a/configs/datasets/gsm8k/gsm8k_gen_e9e91e.py
+++ b/configs/datasets/gsm8k/gsm8k_gen_e9e91e.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess

 gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')

@ -36,8 +36,8 @@ gsm8k_infer_cfg = dict(

 gsm8k_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
                      pred_role="BOT",
-                      pred_postprocessor=dict(type='gsm8k'),
-                      dataset_postprocessor=dict(type='gsm8k_dataset'))
+                      pred_postprocessor=dict(type=gsm8k_postprocess),
+                      dataset_postprocessor=dict(type=gsm8k_dataset_postprocess))

 gsm8k_datasets = [
    dict(
--- a/configs/datasets/hellaswag/hellaswag_gen_6faab5.py
+++ b/configs/datasets/hellaswag/hellaswag_gen_6faab5.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import hellaswagDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 hellaswag_reader_cfg = dict(
    input_columns=["ctx", "A", "B", "C", "D"],
@ -30,7 +31,7 @@ hellaswag_infer_cfg = dict(
 hellaswag_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 hellaswag_datasets = [
--- a/configs/datasets/humaneval/humaneval_gen_6f294d.py
+++ b/configs/datasets/humaneval/humaneval_gen_6f294d.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 humaneval_datasets = [
--- a/configs/datasets/humaneval/humaneval_gen_8e312c.py
+++ b/configs/datasets/humaneval/humaneval_gen_8e312c.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -22,7 +22,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 humaneval_datasets = [
--- a/configs/datasets/humaneval/humaneval_gen_fd5822.py
+++ b/configs/datasets/humaneval/humaneval_gen_fd5822.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -17,7 +17,7 @@ humaneval_infer_cfg = dict(
 humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 humaneval_datasets = [
--- a/configs/datasets/humaneval/humaneval_gen_ff7054.py
+++ b/configs/datasets/humaneval/humaneval_gen_ff7054.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, HumanEvaluator
+from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess

 humaneval_reader_cfg = dict(
    input_columns=['prompt'], output_column='task_id', train_split='test')
@ -27,7 +27,7 @@ humaneval_eval_cfg = dict(
    evaluator=dict(type=HumanEvaluator),
    pred_role='BOT',
    k=[1, 10, 100],  # the parameter only for humaneval
-    pred_postprocessor=dict(type='humaneval'),
+    pred_postprocessor=dict(type=humaneval_postprocess),
 )

 humaneval_datasets = [
--- a/configs/datasets/iwslt2017/iwslt2017_gen_69ce16.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_69ce16.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess

 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@ -15,10 +16,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))

 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
-    pred_role='BOT', 
-    pred_postprocessor=dict(type='general_cn'),
-    dataset_postprocessor=dict(type='general_cn'))
+    evaluator=dict(type=BleuEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))

 iwslt2017_datasets = [
    dict(
@ -28,4 +29,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
-]
+]
--- a/configs/datasets/iwslt2017/iwslt2017_gen_b4a814.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_b4a814.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess

 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@ -24,10 +25,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))

 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
-    pred_role='BOT', 
-    pred_postprocessor=dict(type='general_cn'),
-    dataset_postprocessor=dict(type='general_cn'))
+    evaluator=dict(type=BleuEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))

 iwslt2017_datasets = [
    dict(
@ -37,4 +38,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
-]
+]
--- a/configs/datasets/iwslt2017/iwslt2017_gen_d0ebd1.py
+++ b/configs/datasets/iwslt2017/iwslt2017_gen_d0ebd1.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import BM25Retriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import BleuEvaluator
 from opencompass.datasets import IWSLT2017Dataset
+from opencompass.utils.text_postprocessors import general_cn_postprocess

 iwslt2017_reader_cfg = dict(
    input_columns='en', output_column='de', train_split='validation')
@ -22,10 +23,10 @@ iwslt2017_infer_cfg = dict(
    inferencer=dict(type=GenInferencer))

 iwslt2017_eval_cfg = dict(
-    evaluator=dict(type=BleuEvaluator), 
-    pred_role='BOT', 
-    pred_postprocessor=dict(type='general_cn'),
-    dataset_postprocessor=dict(type='general_cn'))
+    evaluator=dict(type=BleuEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=general_cn_postprocess),
+    dataset_postprocessor=dict(type=general_cn_postprocess))

 iwslt2017_datasets = [
    dict(
@ -35,4 +36,4 @@ iwslt2017_datasets = [
        reader_cfg=iwslt2017_reader_cfg,
        infer_cfg=iwslt2017_infer_cfg,
        eval_cfg=iwslt2017_eval_cfg)
-]
+]
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_1af0ae.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_1af0ae.py
@ -33,8 +33,8 @@ for _l in lang:
        dict(
            abbr=f'jigsaw_multilingual_{_l}',
            type=JigsawMultilingualDataset,
-            path='data/test.csv',
-            label='data/test_labels.csv',
+            path='data/jigsawmultilingual/test.csv',
+            label='data/jigsawmultilingual/test_labels.csv',
            lang=_l,
            reader_cfg=jigsawmultilingual_reader_cfg,
            infer_cfg=jigsawmultilingual_infer_cfg,
--- a/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_fe50d8.py
+++ b/configs/datasets/jigsawmultilingual/jigsawmultilingual_ppl_fe50d8.py
@ -37,8 +37,8 @@ for _l in lang:
        dict(
            abbr=f'jigsaw_multilingual_{_l}',
            type=JigsawMultilingualDataset,
-            path='data/test.csv',
-            label='data/test_labels.csv',
+            path='data/jigsawmultilingual/test.csv',
+            label='data/jigsawmultilingual/test_labels.csv',
            lang=_l,
            reader_cfg=jigsawmultilingual_reader_cfg,
            infer_cfg=jigsawmultilingual_infer_cfg,
--- a/configs/datasets/lcsts/lcsts_gen_8ee1fe.py
+++ b/configs/datasets/lcsts/lcsts_gen_8ee1fe.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import LCSTSDataset
+from opencompass.datasets import LCSTSDataset, lcsts_postprocess

 lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')

@ -18,7 +18,7 @@ lcsts_infer_cfg = dict(
 lcsts_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
    pred_role='BOT',
-    pred_postprocessor=dict(type='lcsts'),
+    pred_postprocessor=dict(type=lcsts_postprocess),
 )

 lcsts_datasets = [
--- a/configs/datasets/lcsts/lcsts_gen_9b0b89.py
+++ b/configs/datasets/lcsts/lcsts_gen_9b0b89.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import RougeEvaluator
-from opencompass.datasets import LCSTSDataset
+from opencompass.datasets import LCSTSDataset, lcsts_postprocess

 lcsts_reader_cfg = dict(input_columns=['content'], output_column='abst')

@ -14,7 +14,7 @@ lcsts_infer_cfg = dict(

 lcsts_eval_cfg = dict(
    evaluator=dict(type=RougeEvaluator),
-    pred_postprocessor=dict(type='lcsts'),
+    pred_postprocessor=dict(type=lcsts_postprocess),
 )

 lcsts_datasets = [
--- a/configs/datasets/math/math_gen.py
+++ b/configs/datasets/math/math_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .math_gen_3e92f6 import math_datasets  # noqa: F401, F403
+    from .math_gen_265cce import math_datasets  # noqa: F401, F403
--- a/configs/datasets/math/math_gen_265cce.py
+++ b/configs/datasets/math/math_gen_265cce.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess

 math_reader_cfg = dict(input_columns=['problem'], output_column='solution')

@ -12,12 +12,12 @@ math_infer_cfg = dict(
            dict(
                role="HUMAN",
                prompt=
-                "Problem:\nFind the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
+                "Problem:\nFind the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}\nSolution:"
            ),
            dict(
                role="BOT",
                prompt=
-                "The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct."
+                "The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.\nFinal Answer: The final answer is $[2,5)$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
@ -27,7 +27,7 @@ math_infer_cfg = dict(
            dict(
                role="BOT",
                prompt=
-                "We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct."
+                "We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$\nFinal Answer: The final answer is $24$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
@ -37,17 +37,17 @@ math_infer_cfg = dict(
            dict(
                role="BOT",
                prompt=
-                "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct."
+                "If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}\nFinal Answer: The final answer is $16$. I hope it is correct.\n"
            ),
            dict(
                role="HUMAN",
                prompt=
-                "Problem:\nIf the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
+                "Problem:\nIf the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.\nSolution:"
            ),
            dict(
                role="BOT",
                prompt=
-                "If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct."
+                "If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$\nFinal Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.\n"
            ),
            dict(role="HUMAN", prompt="Problem:\n{problem}\nSolution:\n"),
        ])),
@ -55,7 +55,7 @@ math_infer_cfg = dict(
    inferencer=dict(type=GenInferencer, max_out_len=512))

 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))

 math_datasets = [
    dict(
--- a/configs/datasets/math/math_gen_559593.py
+++ b/configs/datasets/math/math_gen_559593.py
@ -1,7 +1,7 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess

 math_reader_cfg = dict(input_columns=['problem'], output_column='solution')

@ -9,28 +9,28 @@ math_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template='''Problem:
-Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
 Solution:
-The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
 Final Answer: The final answer is $[2,5)$. I hope it is correct.

 Problem:
 If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
 Solution:
-We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
 Final Answer: The final answer is $24$. I hope it is correct.

 Problem:
 Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
 Solution:
-If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
 Final Answer: The final answer is $16$. I hope it is correct.

 Problem:
-If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
 Solution:
-If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
-Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
+Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.

 Problem:
 {problem}
@ -40,7 +40,7 @@ Solution:
    inferencer=dict(type=GenInferencer, max_out_len=512))

 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))

 math_datasets = [
    dict(
--- a/configs/datasets/math/math_gen_5e8458.py
+++ b/configs/datasets/math/math_gen_5e8458.py
@ -1,34 +1,34 @@
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import MATHDataset, MATHEvaluator
+from opencompass.datasets import MATHDataset, MATHEvaluator, math_postprocess

 math_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template='''Problem:
-Find the domain of the expression $\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
+Find the domain of the expression $\\frac{{\sqrt{{x-2}}}}{{\sqrt{{5-x}}}}$.}}
 Solution:
-The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\boxed{{[2,5)}}$.
+The expressions inside each square root must be non-negative. Therefore, $x-2 \ge 0$, so $x\ge2$, and $5 - x \ge 0$, so $x \le 5$. Also, the denominator cannot be equal to zero, so $5-x>0$, which gives $x<5$. Therefore, the domain of the expression is $\\boxed{{[2,5)}}$.
 Final Answer: The final answer is $[2,5)$. I hope it is correct.

 Problem:
 If $\det \mathbf{{A}} = 2$ and $\det \mathbf{{B}} = 12,$ then find $\det (\mathbf{{A}} \mathbf{{B}}).$
 Solution:
-We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \boxed{{24}}.$
+We have that $\det (\mathbf{{A}} \mathbf{{B}}) = (\det \mathbf{{A}})(\det \mathbf{{B}}) = (2)(12) = \\boxed{{24}}.$
 Final Answer: The final answer is $24$. I hope it is correct.

 Problem:
 Terrell usually lifts two 20-pound weights 12 times. If he uses two 15-pound weights instead, how many times must Terrell lift them in order to lift the same total weight?
 Solution:
-If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \begin{{align*}} 30n&=480\\ \Rightarrow\qquad n&=480/30=\boxed{{16}} \end{{align*}}
+If Terrell lifts two 20-pound weights 12 times, he lifts a total of $2\cdot 12\cdot20=480$ pounds of weight. If he lifts two 15-pound weights instead for $n$ times, he will lift a total of $2\cdot15\cdot n=30n$ pounds of weight. Equating this to 480 pounds, we can solve for $n$: \\begin{{align*}} 30n&=480\\\\ \Rightarrow\qquad n&=480/30=\\boxed{{16}} \end{{align*}}
 Final Answer: The final answer is $16$. I hope it is correct.

 Problem:
-If the system of equations: \begin{{align*}} 6x-4y&=a,\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\frac{{a}}{{b}},$ assuming $b$ is nonzero.
+If the system of equations: \\begin{{align*}} 6x-4y&=a,\\\\ 6y-9x &=b. \end{{align*}}has a solution $(x, y)$ where $x$ and $y$ are both nonzero, find $\\frac{{a}}{{b}},$ assuming $b$ is nonzero.
 Solution:
-If we multiply the first equation by $-\frac{{3}}{{2}}$, we obtain $$6y-9x=-\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\frac{{3}}{{2}}a=b\Rightarrow\frac{{a}}{{b}}=\boxed{{-\frac{{2}}{{3}}}}.$$
-Final Answer: The final answer is $-\frac{{2}}{{3}}$. I hope it is correct.
+If we multiply the first equation by $-\\frac{{3}}{{2}}$, we obtain $$6y-9x=-\\frac{{3}}{{2}}a.$$Since we also know that $6y-9x=b$, we have $$-\\frac{{3}}{{2}}a=b\Rightarrow\\frac{{a}}{{b}}=\\boxed{{-\\frac{{2}}{{3}}}}.$$
+Final Answer: The final answer is $-\\frac{{2}}{{3}}$. I hope it is correct.

 Problem:
 {problem}Solution:
@ -37,7 +37,7 @@ Problem:
    inferencer=dict(type=GenInferencer, max_out_len=512))

 math_eval_cfg = dict(
-    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type='math'))
+    evaluator=dict(type=MATHEvaluator), pred_postprocessor=dict(type=math_postprocess))

 math_datasets = [
    dict(
--- a/configs/datasets/mbpp/mbpp_gen_1e1056.py
+++ b/configs/datasets/mbpp/mbpp_gen_1e1056.py
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import MBPPDataset, MBPPEvaluator

 mbpp_reader_cfg = dict(
-    input_columns=['text', 'test_list'], output_column='code')
+    input_columns=['text', 'test_list'], output_column='test_list_2')

 mbpp_infer_cfg = dict(
    prompt_template=dict(
--- a/configs/datasets/mbpp/mbpp_gen_6590b0.py
+++ b/configs/datasets/mbpp/mbpp_gen_6590b0.py
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import MBPPDataset, MBPPEvaluator

 mbpp_reader_cfg = dict(
-    input_columns=['text', 'test_list'], output_column='code')
+    input_columns=['text', 'test_list'], output_column='test_list_2')

 mbpp_infer_cfg = dict(
    prompt_template=dict(
--- a/configs/datasets/mbpp/mbpp_gen_78c1bc.py
+++ b/configs/datasets/mbpp/mbpp_gen_78c1bc.py
@ -4,7 +4,7 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets import MBPPDataset, MBPPEvaluator

 mbpp_reader_cfg = dict(
-    input_columns=['text', 'test_list'], output_column='code')
+    input_columns=['text', 'test_list'], output_column='test_list_2')

 mbpp_infer_cfg = dict(
    prompt_template=dict(
--- a/configs/datasets/mmlu/mmlu_gen_23a9a9.py
+++ b/configs/datasets/mmlu/mmlu_gen_23a9a9.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import MMLUDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 # None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
 # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
@ -33,7 +34,7 @@ mmlu_infer_cfg = dict(

 mmlu_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='first-capital'))
+    pred_postprocessor=dict(type=first_capital_postprocess))

 mmlu_all_sets = [
    "college_biology",
--- a/configs/datasets/mmlu/mmlu_gen_5d1409.py
+++ b/configs/datasets/mmlu/mmlu_gen_5d1409.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import MMLUDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 # None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
 # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
@ -107,7 +108,7 @@ for _name in mmlu_all_sets:

    mmlu_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type="first-capital"))
+        pred_postprocessor=dict(type=first_capital_postprocess))

    mmlu_datasets.append(
        dict(
--- a/configs/datasets/mmlu/mmlu_gen_79e572.py
+++ b/configs/datasets/mmlu/mmlu_gen_79e572.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import MMLUDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 # None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
 # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
@ -92,7 +93,7 @@ for _name in mmlu_all_sets:

    mmlu_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type="first-capital"),
+        pred_postprocessor=dict(type=first_capital_postprocess),
    )

    mmlu_datasets.append(
--- a/configs/datasets/mmlu/mmlu_gen_a484b3.py
+++ b/configs/datasets/mmlu/mmlu_gen_a484b3.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import FixKRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import MMLUDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 # None of the mmlu dataset in huggingface is correctly parsed, so we use our own dataset reader
 # Please download the dataset from https://people.eecs.berkeley.edu/~hendrycks/data.tar
@ -107,7 +108,7 @@ for _name in mmlu_all_sets:

    mmlu_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
-        pred_postprocessor=dict(type="first-capital"))
+        pred_postprocessor=dict(type=first_capital_postprocess))

    mmlu_datasets.append(
        dict(
--- a/configs/datasets/nq/nq_gen_c788f6.py
+++ b/configs/datasets/nq/nq_gen_c788f6.py
@ -0,0 +1,30 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
+
+nq_reader_cfg = dict(
+    input_columns=['question'], output_column='answer', train_split='test')
+
+nq_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt='Answer these questions, your answer should be as simple as possible, start your answer with the prompt \'The answer is \'.\nQ: {question}?'),
+                dict(role='BOT', prompt='A:'),
+            ], )),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer))
+
+nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator), pred_role="BOT")
+
+nq_datasets = [
+    dict(
+        type=NaturalQuestionDataset,
+        abbr='nq',
+        path='./data/nq/',
+        reader_cfg=nq_reader_cfg,
+        infer_cfg=nq_infer_cfg,
+        eval_cfg=nq_eval_cfg)
+]
--- a/configs/datasets/obqa/obqa_gen_9069e4.py
+++ b/configs/datasets/obqa/obqa_gen_9069e4.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import OBQADataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 _input_columns = [
    ["question_stem", "A", "B", "C", "D"],
@ -54,7 +55,7 @@ for _i in range(2):
    obqa_eval_cfg = dict(
        evaluator=dict(type=AccEvaluator),
        pred_role="BOT",
-        pred_postprocessor=dict(type="first-capital"),
+        pred_postprocessor=dict(type=first_capital_postprocess),
    )

    obqa_datasets[_i]["reader_cfg"] = obqa_reader_cfg
--- a/configs/datasets/piqa/piqa_gen_1194eb.py
+++ b/configs/datasets/piqa/piqa_gen_1194eb.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import piqaDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 piqa_reader_cfg = dict(
    input_columns=["goal", "sol1", "sol2"],
@ -24,7 +25,7 @@ piqa_infer_cfg = dict(
 piqa_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 piqa_datasets = [
--- a/configs/datasets/race/race_gen_69ee4f.py
+++ b/configs/datasets/race/race_gen_69ee4f.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 race_reader_cfg = dict(
    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
@ -23,7 +24,7 @@ race_infer_cfg = dict(

 race_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='first-capital'),
+    pred_postprocessor=dict(type=first_capital_postprocess),
    pred_role='BOT')

 race_datasets = [
--- a/configs/datasets/race/race_gen_9302a5.py
+++ b/configs/datasets/race/race_gen_9302a5.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 race_reader_cfg = dict(
    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
@ -18,7 +19,7 @@ race_infer_cfg = dict(

 race_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='first-capital'))
+    pred_postprocessor=dict(type=first_capital_postprocess))

 race_datasets = [
    dict(
--- a/configs/datasets/race/race_ppl.py
+++ b/configs/datasets/race/race_ppl.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .race_ppl_ab8734 import race_datasets  # noqa: F401, F403
+    from .race_ppl_a138cd import race_datasets  # noqa: F401, F403
--- a/configs/datasets/race/race_ppl_a138cd.py
+++ b/configs/datasets/race/race_ppl_a138cd.py
@ -18,10 +18,9 @@ race_infer_cfg = dict(
                    prompt=
                    "Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}"
                ),
-                dict(role="BOT", prompt=ans_token),
+                dict(role="BOT", prompt=f'A: {ans}'),
            ])
-            for ans, ans_token in [["A", "{A}"], ["B", "{B}"], ["C", "{C}"],
-                                   ["D", "{D}"]]
+            for ans in ['A', 'B', 'C', 'D']
        }),
    retriever=dict(type=ZeroRetriever),
    inferencer=dict(type=PPLInferencer))
--- a/configs/datasets/realtoxicprompts/realtoxicprompts_gen_7605e4.py
+++ b/configs/datasets/realtoxicprompts/realtoxicprompts_gen_7605e4.py
@ -27,8 +27,9 @@ realtoxicprompts_eval_cfg = dict(

 realtoxicprompts_datasets = [
    dict(
+        abbr='real-toxicity-prompts',
        type=RealToxicPromptsDataset,
-        path='allenai/real-toxicity-prompts',
+        path='data/realtoxicprompts/realtoxicprompts_train.arrow',
        challenging_subset=True,
        reader_cfg=realtoxicprompts_reader_cfg,
        infer_cfg=realtoxicprompts_infer_cfg,
--- a/configs/datasets/realtoxicprompts/realtoxicprompts_gen_ac723c.py
+++ b/configs/datasets/realtoxicprompts/realtoxicprompts_gen_ac723c.py
@ -25,8 +25,9 @@ realtoxicprompts_eval_cfg = dict(

 realtoxicprompts_datasets = [
    dict(
+        abbr='real-toxicity-prompts',
        type=RealToxicPromptsDataset,
-        path='allenai/real-toxicity-prompts',
+        path='data/realtoxicprompts/realtoxicprompts_train.arrow',
        challenging_subset=True,
        reader_cfg=realtoxicprompts_reader_cfg,
        infer_cfg=realtoxicprompts_infer_cfg,
--- a/configs/datasets/siqa/siqa_gen_e78df3.py
+++ b/configs/datasets/siqa/siqa_gen_e78df3.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import siqaDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 siqa_reader_cfg = dict(
    input_columns=["context", "question", "answerA", "answerB", "answerC"],
@ -28,7 +29,7 @@ siqa_infer_cfg = dict(
 siqa_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 siqa_datasets = [
--- a/configs/datasets/storycloze/storycloze_gen_7f656a.py
+++ b/configs/datasets/storycloze/storycloze_gen_7f656a.py
@ -3,6 +3,7 @@ from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
 from opencompass.datasets import storyclozeDataset_V2
+from opencompass.utils.text_postprocessors import first_capital_postprocess

 storycloze_reader_cfg = dict(
    input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
@ -27,7 +28,7 @@ storycloze_infer_cfg = dict(
 storycloze_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
    pred_role="BOT",
-    pred_postprocessor=dict(type="first-capital"),
+    pred_postprocessor=dict(type=first_capital_postprocess),
 )

 # The original story cloze dataset and repo are not long maintaining.
--- a/configs/datasets/strategyqa/strategyqa_gen.py
+++ b/configs/datasets/strategyqa/strategyqa_gen.py
@ -1,4 +1,4 @@
 from mmengine.config import read_base

 with read_base():
-    from .strategyqa_gen_b3ff20 import strategyqa_datasets  # noqa: F401, F403
+    from .strategyqa_gen_1180a7 import strategyqa_datasets  # noqa: F401, F403
--- a/configs/datasets/strategyqa/strategyqa_gen_1180a7.py
+++ b/configs/datasets/strategyqa/strategyqa_gen_1180a7.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess

 strategyqa_reader_cfg = dict(
    input_columns=['question'],
@ -23,7 +23,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes'
+                    'Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals.\nSo the answer is yes\n'
                ),
                dict(
                    role='HUMAN',
@ -33,7 +33,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes'
+                    'Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania.\nSo the answer is yes\n'
                ),
                dict(
                    role='HUMAN',
@ -43,7 +43,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no'
+                    'Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen\'s atomic number squared is less than 5.\nSo the answer is no\n'
                ),
                dict(
                    role='HUMAN',
@ -53,7 +53,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes'
+                    'College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements.\nSo the answer is yes\n'
                ),
                dict(
                    role='HUMAN',
@ -63,7 +63,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no'
+                    'The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam.\nSo the answer is no\n'
                ),
                dict(
                    role='HUMAN',
@ -71,7 +71,7 @@ strategyqa_infer_cfg = dict(
                dict(
                    role='BOT',
                    prompt=
-                    'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no'
+                    'The density of a pear is about 0.6g/cm3, which is less than water. Objects less dense than water float. Thus, a pear would float.\nSo the answer is no\n'
                ),
                dict(role='HUMAN', prompt='Question: {question}\nAnswer:'),
            ], )),
@ -80,8 +80,8 @@ strategyqa_infer_cfg = dict(

 strategyqa_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='strategyqa'),
-    dataset_postprocessor=dict(type='strategyqa_dataset'))
+    pred_postprocessor=dict(type=strategyqa_pred_postprocess),
+    dataset_postprocessor=dict(type=strategyqa_dataset_postprocess))

 strategyqa_datasets = [
    dict(
--- a/configs/datasets/strategyqa/strategyqa_gen_934441.py
+++ b/configs/datasets/strategyqa/strategyqa_gen_934441.py
@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
 from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_evaluator import AccEvaluator
-from opencompass.datasets import HFDataset
+from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess

 strategyqa_reader_cfg = dict(
    input_columns=['question'],
@ -44,8 +44,8 @@ Q: {question}{answer}

 strategyqa_eval_cfg = dict(
    evaluator=dict(type=AccEvaluator),
-    pred_postprocessor=dict(type='strategyqa'),
-    dataset_postprocessor=dict(type='strategyqa_dataset'))
+    pred_postprocessor=dict(type=strategyqa_pred_postprocess),
+    dataset_postprocessor=dict(type=strategyqa_dataset_postprocess))

 strategyqa_datasets = [
    dict(
--- a/Show More
+++ b/Show More