[Feature] Fullbench v0.1 language update (#1463)

* update * update * update * update
2025-05-30 16:03:24 +08:00 · 2024-08-28 14:01:05 +08:00 · 2024-08-28 14:01:05 +08:00 · 245664f4c0
commit 245664f4c0
parent 463231c651
35 changed files with 861 additions and 20 deletions
--- a/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
+++ b/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
+
+QUERY_TEMPLATE = """
+Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
+
+{question}
+
+A. {textA}
+B. {textB}
+C. {textC}
+D. {textD}
+""".strip()
+
+ARC_c_reader_cfg = dict(
+    input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+    output_column='answerKey')
+
+ARC_c_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt=QUERY_TEMPLATE)
+            ], ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ARC_c_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
+)
+
+ARC_c_datasets = [
+    dict(
+        abbr='ARC-c',
+        type=ARCDataset,
+        path='opencompass/ai2_arc-dev',
+        name='ARC-Challenge',
+        reader_cfg=ARC_c_reader_cfg,
+        infer_cfg=ARC_c_infer_cfg,
+        eval_cfg=ARC_c_eval_cfg,
+    )
+]
--- a/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
+++ b/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+ARC_c_reader_cfg = dict(
+    input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+    output_column='answerKey',
+)
+
+ARC_c_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
+                ),
+                dict(role='BOT', prompt='{answerKey}'),
+            ],
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+ARC_c_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+ARC_c_datasets = [
+    dict(
+        abbr='ARC-c',
+        type=ARCDataset,
+        path='opencompass/ai2_arc-dev',
+        name='ARC-Challenge',
+        reader_cfg=ARC_c_reader_cfg,
+        infer_cfg=ARC_c_infer_cfg,
+        eval_cfg=ARC_c_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
@ -0,0 +1,55 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import (
+    first_option_postprocess,
+)
+
+QUERY_TEMPLATE = """
+Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
+
+Passage: {passage}
+
+Question: {question}
+
+A. Yes
+B. NO
+
+""".strip()
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=QUERY_TEMPLATE),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
@ -0,0 +1,47 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
+                ),
+                dict(role='BOT', prompt='{label}'),
+            ],
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
@ -33,7 +33,7 @@ BoolQ_datasets = [
    dict(
        abbr='BoolQ',
        type=BoolQDatasetV2,
-        path='./data/SuperGLUE/BoolQ/val.jsonl',
+        path='opencompass/boolq',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
        eval_cfg=BoolQ_eval_cfg,
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py
@ -0,0 +1,43 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'A':
+            dict(round=[
+                dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                dict(role='BOT', prompt='Yes'),
+            ]),
+            'B':
+            dict(round=[
+                dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                dict(role='BOT', prompt='No'),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py
@ -35,7 +35,7 @@ BoolQ_datasets = [
    dict(
        abbr='BoolQ',
        type=BoolQDatasetV3,
-        path='./data/SuperGLUE/BoolQ/val.jsonl',
+        path='opencompass/boolq',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
        eval_cfg=BoolQ_eval_cfg,
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py
@ -36,7 +36,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py
@ -36,7 +36,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py
+++ b/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py
@ -26,7 +26,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/configs/datasets/race/race_cot_gen_d95929.py
+++ b/configs/datasets/race/race_cot_gen_d95929.py
@ -0,0 +1,68 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import (
+    first_option_postprocess,
+)
+
+QUERY_TEMPLATE = """
+Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
+
+Article: {article}
+
+Q: {question}
+
+A. {A}
+B. {B}
+C. {C}
+D. {D}
+""".strip()
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer',
+    train_split='validation',
+    test_split='test',
+)
+
+race_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=QUERY_TEMPLATE),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+race_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
+    pred_role='BOT',
+)
+
+race_datasets = [
+    dict(
+        abbr='race-middle',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg,
+    ),
+    dict(
+        abbr='race-high',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg,
+    ),
+]
--- a/configs/datasets/race/race_few_shot_gen_a498ed.py
+++ b/configs/datasets/race/race_few_shot_gen_a498ed.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer',
+    train_split='validation',
+    test_split='test'
+)
+
+race_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
+                dict(role='BOT', prompt='{answer}'),
+            ]
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+race_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_capital_postprocess),
+    pred_role='BOT')
+
+race_datasets = [
+    dict(
+        abbr='race-middle',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg),
+    dict(
+        abbr='race-high',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg)
+]
--- a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
+++ b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='glm-4-9b-chat-turbomind',
+        path='THUDM/glm-4-9b-chat',
+        engine_config=dict(max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=8192,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
--- a/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModel
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3.1-8b-turbomind',
+        path='meta-llama/Meta-Llama-3.1-8B',
+        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=7168,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
--- a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
+++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
@ -0,0 +1,16 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='llama-3.1-8b-instruct-turbomind',
+        path='meta-llama/Meta-Llama-3.1-8B-Instruct',
+        engine_config=dict(max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=7168,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
+    )
+]
--- a/opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
+++ b/opencompass/configs/datasets/ARC_c/ARC_c_cot_gen_926652.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_option_postprocess, match_answer_pattern
+
+QUERY_TEMPLATE = """
+Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
+
+{question}
+
+A. {textA}
+B. {textB}
+C. {textC}
+D. {textD}
+""".strip()
+
+ARC_c_reader_cfg = dict(
+    input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+    output_column='answerKey')
+
+ARC_c_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt=QUERY_TEMPLATE)
+            ], ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+ARC_c_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
+)
+
+ARC_c_datasets = [
+    dict(
+        abbr='ARC-c',
+        type=ARCDataset,
+        path='opencompass/ai2_arc-dev',
+        name='ARC-Challenge',
+        reader_cfg=ARC_c_reader_cfg,
+        infer_cfg=ARC_c_infer_cfg,
+        eval_cfg=ARC_c_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
+++ b/opencompass/configs/datasets/ARC_c/ARC_c_few_shot_gen_e9b043.py
@ -0,0 +1,48 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import ARCDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+ARC_c_reader_cfg = dict(
+    input_columns=['question', 'textA', 'textB', 'textC', 'textD'],
+    output_column='answerKey',
+)
+
+ARC_c_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='Question: {question}\nA. {textA}\nB. {textB}\nC. {textC}\nD. {textD}\nAnswer:',
+                ),
+                dict(role='BOT', prompt='{answerKey}'),
+            ],
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+ARC_c_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+ARC_c_datasets = [
+    dict(
+        abbr='ARC-c',
+        type=ARCDataset,
+        path='opencompass/ai2_arc-dev',
+        name='ARC-Challenge',
+        reader_cfg=ARC_c_reader_cfg,
+        infer_cfg=ARC_c_infer_cfg,
+        eval_cfg=ARC_c_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_cot_gen_1d56df.py
@ -0,0 +1,55 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import (
+    first_option_postprocess,
+)
+
+QUERY_TEMPLATE = """
+Answer the following question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of AB. Think step by step before answering.
+
+Passage: {passage}
+
+Question: {question}
+
+A. Yes
+B. NO
+
+""".strip()
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=QUERY_TEMPLATE),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_few_shot_gen_ba58ea.py
@ -0,0 +1,47 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='{passage}\nQuestion: {question}\nA. Yes\nB. No\nAnswer:',
+                ),
+                dict(role='BOT', prompt='{label}'),
+            ],
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+BoolQ_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role='BOT',
+    pred_postprocessor=dict(type=first_capital_postprocess),
+)
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_gen_883d50.py
@ -33,7 +33,7 @@ BoolQ_datasets = [
    dict(
        abbr='BoolQ',
        type=BoolQDatasetV2,
-        path='./data/SuperGLUE/BoolQ/val.jsonl',
+        path='opencompass/boolq',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
        eval_cfg=BoolQ_eval_cfg,
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_16b1d9.py
@ -0,0 +1,43 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import PPLInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import BoolQDatasetV2
+
+BoolQ_reader_cfg = dict(
+    input_columns=['question', 'passage'],
+    output_column='label',
+)
+
+BoolQ_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template={
+            'A':
+            dict(round=[
+                dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                dict(role='BOT', prompt='Yes'),
+            ]),
+            'B':
+            dict(round=[
+                dict(role='HUMAN', prompt='{passage}\nQuestion: {question}?'),
+                dict(role='BOT', prompt='No'),
+            ]),
+        },
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=PPLInferencer),
+)
+
+BoolQ_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
+
+BoolQ_datasets = [
+    dict(
+        abbr='BoolQ',
+        type=BoolQDatasetV2,
+        path='opencompass/boolq',
+        reader_cfg=BoolQ_reader_cfg,
+        infer_cfg=BoolQ_infer_cfg,
+        eval_cfg=BoolQ_eval_cfg,
+    )
+]
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314797.py
@ -35,7 +35,7 @@ BoolQ_datasets = [
    dict(
        abbr='BoolQ',
        type=BoolQDatasetV3,
-        path='./data/SuperGLUE/BoolQ/val.jsonl',
+        path='opencompass/boolq',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
        eval_cfg=BoolQ_eval_cfg,
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_314b96.py
@ -36,7 +36,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_4da4db.py
@ -36,7 +36,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py
+++ b/opencompass/configs/datasets/SuperGLUE_BoolQ/SuperGLUE_BoolQ_ppl_9619db.py
@ -26,7 +26,7 @@ BoolQ_datasets = [
        type=BoolQDataset,
        abbr='BoolQ',
        path='json',
-        data_files='./data/SuperGLUE/BoolQ/val.jsonl',
+        data_files='opencompass/boolq',
        split='train',
        reader_cfg=BoolQ_reader_cfg,
        infer_cfg=BoolQ_infer_cfg,
--- a/opencompass/configs/datasets/race/race_cot_gen_d95929.py
+++ b/opencompass/configs/datasets/race/race_cot_gen_d95929.py
@ -0,0 +1,68 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import (
+    first_option_postprocess,
+)
+
+QUERY_TEMPLATE = """
+Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
+
+Article: {article}
+
+Q: {question}
+
+A. {A}
+B. {B}
+C. {C}
+D. {D}
+""".strip()
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer',
+    train_split='validation',
+    test_split='test',
+)
+
+race_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role='HUMAN', prompt=QUERY_TEMPLATE),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+race_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
+    pred_role='BOT',
+)
+
+race_datasets = [
+    dict(
+        abbr='race-middle',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg,
+    ),
+    dict(
+        abbr='race-high',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg,
+    ),
+]
--- a/opencompass/configs/datasets/race/race_few_shot_gen_a498ed.py
+++ b/opencompass/configs/datasets/race/race_few_shot_gen_a498ed.py
@ -0,0 +1,53 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever, FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import RaceDataset
+from opencompass.utils.text_postprocessors import first_capital_postprocess
+
+race_reader_cfg = dict(
+    input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
+    output_column='answer',
+    train_split='validation',
+    test_split='test'
+)
+
+race_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin='</E>',
+            round=[
+                dict(role='HUMAN', prompt='Read the article, and answer the question by replying A, B, C or D.\n\nArticle:\n{article}\n\nQ: {question}\n\nA. {A}\nB. {B}\nC. {C}\nD. {D}\nAnswer:'),
+                dict(role='BOT', prompt='{answer}'),
+            ]
+        ),
+        ice_token='</E>',
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4]),
+    inferencer=dict(type=GenInferencer, max_out_len=50),
+)
+
+race_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_postprocessor=dict(type=first_capital_postprocess),
+    pred_role='BOT')
+
+race_datasets = [
+    dict(
+        abbr='race-middle',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='middle',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg),
+    dict(
+        abbr='race-high',
+        type=RaceDataset,
+        path='opencompass/race',
+        name='high',
+        reader_cfg=race_reader_cfg,
+        infer_cfg=race_infer_cfg,
+        eval_cfg=race_eval_cfg)
+]
--- a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
+++ b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py
@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='glm-4-9b-chat-turbomind',
+        path='THUDM/glm-4-9b-chat',
+        engine_config=dict(max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=8192,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b.py
@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModel
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3.1-8b-turbomind',
+        path='meta-llama/Meta-Llama-3.1-8B',
+        engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=7168,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
--- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
+++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py
@ -0,0 +1,16 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='llama-3.1-8b-instruct-turbomind',
+        path='meta-llama/Meta-Llama-3.1-8B-Instruct',
+        engine_config=dict(max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
+        max_seq_len=7168,
+        max_out_len=1024,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
+    )
+]
--- a/opencompass/datasets/boolq.py
+++ b/opencompass/datasets/boolq.py
@ -31,7 +31,7 @@ class BoolQDatasetV2(BaseDataset):

    @staticmethod
    def load(path):
-        path = get_data_path(path, local_mode=True)
+        path = get_data_path(path)
        dataset = []
        with open(path, 'r') as f:
            for line in f:
--- a/opencompass/openicl/icl_retriever/init.py
+++ b/opencompass/openicl/icl_retriever/init.py
@ -4,7 +4,7 @@ from .icl_dpp_retriever import DPPRetriever  # noqa
 from .icl_fix_k_retriever import FixKRetriever  # noqa
 from .icl_mdl_retriever import MDLRetriever  # noqa
 from .icl_random_retriever import RandomRetriever  # noqa
+from .icl_sliding_k_retriever import SlidingWindowRetriever  # noqa
 from .icl_topk_retriever import TopkRetriever  # noqa
 from .icl_votek_retriever import VotekRetriever  # noqa
 from .icl_zero_retriever import ZeroRetriever  # noqa
-from .icl_sliding_k_retriever import SlidingWindowRetriever  # noqa
--- a/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py
+++ b/opencompass/openicl/icl_retriever/icl_sliding_k_retriever.py
@ -51,8 +51,8 @@ class SlidingWindowRetriever(BaseRetriever):
        for current_index in trange(len(self.test_ds),
                                    disable=not self.is_main_process):
            if current_index < self.k:
-                """For the first few examples,
-                get the previous ones and pad with the last ones"""
+                """For the first few examples, get the previous ones and pad
+                with the last ones."""
                start_index = max(0, current_index - self.k)
                previous_shots = list(range(start_index, current_index))
                if len(previous_shots) < self.k:
--- a/opencompass/utils/datasets_info.py
+++ b/opencompass/utils/datasets_info.py
@ -203,7 +203,7 @@ DATASETS_MAPPING = {
    "opencompass/race": {
        "ms_id": "opencompass/race",
        "hf_id": "opencompass/race",
-        "local": "./data/race",
+        "local": "./data/race/",
    },
    # SIQA
    "opencompass/siqa": {
@ -229,6 +229,12 @@ DATASETS_MAPPING = {
        "hf_id": "opencompass/summedits",
        "local": "./data/summedits/summedits.jsonl",
    },
+    # SuperGLUE
+    "opencompass/boolq": {
+        "ms_id": "opencompass/boolq",
+        "hf_id": "opencompass/boolq",
+        "local": "./data/SuperGLUE/BoolQ/val.jsonl",
+    },
    # TriviaQA
    "opencompass/trivia_qa": {
        "ms_id": "opencompass/trivia_qa",
@ -292,10 +298,6 @@ DATASETS_URL = {
        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/BBH.zip",
        "md5": "60c49f9bef5148aa7e1941328e96a554",
    },
-    "/mmlu/": {
-        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/mmlu.zip",
-        "md5": "761310671509a239e41c4b717f7fab9c",
-    },
    "/compass_arena/": {
        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/compass_arena.zip",
        "md5": "cd59b54a179d16f2a858b359b60588f6",
@ -367,5 +369,17 @@ DATASETS_URL = {
    "FewCLUE": {
        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/FewCLUE.zip",
        "md5": "7976e2bb0e9d885ffd3c55f7c5d4021e",
-    }
+    },
+    "/race": {
+        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/race.zip",
+        "md5": "b758251764a264746cf45749c02363f9",
+    },
+    "/ARC": {
+        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/ARC.zip",
+        "md5": "d720629b69f1a51cfe78bf65b00b44f6",
+    },
+    "/SuperGLUE": {
+        "url": "http://opencompass.oss-cn-shanghai.aliyuncs.com/datasets/data/SuperGLUE.zip",
+        "md5": "b60904915b0b61d1a04ea52280169936",
+    },
 }
--- a/opencompass/utils/text_postprocessors.py
+++ b/opencompass/utils/text_postprocessors.py
@ -98,6 +98,7 @@ def first_option_postprocess(text: str, options: str, cushion=True) -> str:
        f'答案是\s?(\S+)(?:。|$)',
        f'答案应该是\s?(\S+)(?:。|$)',
        f'答案为\s?(\S+)(?:。|$)',
+        f'(?i)ANSWER\s*:\s*([{options}])',
        f'[Tt]he answer is:?\s+\(?([{options}])\)?',
        f'[Tt]he answer is option:?\s+\(?([{options}])\)?',
        f'[Tt]he correct answer is:?\s+\(?([{options}])\)?',