update examples and configs

2025-05-30 16:03:24 +08:00 · 2025-02-10 23:03:50 +08:00 · 2025-02-10 23:03:50 +08:00 · b4ecd718a0
commit b4ecd718a0
parent f55810ae48
10 changed files with 112 additions and 87 deletions
--- a/examples/eval_OpenHuEval_HuMatchingFIB.py
+++ b/examples/eval_OpenHuEval_HuMatchingFIB.py
@ -1,12 +1,11 @@
 from mmengine.config import read_base

 with read_base():
-    from opencompass.configs.datasets.OpenHuEval.HuMatchingFIB.HuMatchingFIB import hu_matching_fib_datasets
+    from opencompass.configs.datasets.OpenHuEval.HuMatchingFIB.HuMatchingFIB import HuMatchingFIB_datasets

    from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
    from opencompass.configs.models.openai.gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
-    from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
-    # from opencompass.configs.models.deepseek.deepseek_v3_api import models as deepseek_v3_api_model
+    from opencompass.configs.models.deepseek.deepseek_v3_api_aliyun import models as deepseek_v3_api_aliyun_model

    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct_model
@ -16,11 +15,21 @@ with read_base():
    from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model

    from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
-
-    # from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
+    from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
    from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
    # from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model

-datasets = hu_matching_fib_datasets
+datasets = HuMatchingFIB_datasets
 models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+
+for model in models:
+    if model['abbr'].startswith('deepseek_r1_api_'):
+        model['return_reasoning_content'] = True
+        model['pred_postprocessor'] = {
+            'OpenHuEval_*': {
+                'type': 'rm_<think>_before_eval'
+            }
+        }
+del model
+
 work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable
--- a/examples/eval_OpenHuEval_HuProverbRea_2CQ.py
+++ b/examples/eval_OpenHuEval_HuProverbRea_2CQ.py
@ -5,8 +5,7 @@ with read_base():

    from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
    from opencompass.configs.models.openai.gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
-    from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
-    # from opencompass.configs.models.deepseek.deepseek_v3_api import models as deepseek_v3_api_model
+    from opencompass.configs.models.deepseek.deepseek_v3_api_aliyun import models as deepseek_v3_api_aliyun_model

    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct_model
@ -16,11 +15,21 @@ with read_base():
    from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model

    from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
-
-    # from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
+    from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
    from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
    # from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model

 datasets = HuProverbRea_datasets
 models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+
+for model in models:
+    if model['abbr'].startswith('deepseek_r1_api_'):
+        model['return_reasoning_content'] = True
+        model['pred_postprocessor'] = {
+            'OpenHuEval_*': {
+                'type': 'rm_<think>_before_eval'
+            }
+        }
+del model
+
 work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable
--- a/examples/eval_OpenHuEval_HuProverbRea_OE.py
+++ b/examples/eval_OpenHuEval_HuProverbRea_OE.py
@ -3,10 +3,9 @@ from mmengine.config import read_base
 with read_base():
    from opencompass.configs.datasets.OpenHuEval.HuProverbRea.HuProverbRea_OE import HuProverbRea_datasets

-    from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
-    from opencompass.configs.models.openai.gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
-    from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
-    # from opencompass.configs.models.deepseek.deepseek_v3_api import models as deepseek_v3_api_model
+    # from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
+    # from opencompass.configs.models.openai.gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
+    # from opencompass.configs.models.deepseek.deepseek_v3_api_aliyun import models as deepseek_v3_api_aliyun_model

    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct_model
@ -16,11 +15,21 @@ with read_base():
    from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model

    from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
-
-    # from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
-    from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
+    # from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
+    # from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
    # from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model

 datasets = HuProverbRea_datasets
 models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+
+for model in models:
+    if model['abbr'].startswith('deepseek_r1_api_'):
+        model['return_reasoning_content'] = True
+        model['pred_postprocessor'] = {
+            'OpenHuEval_*': {
+                'type': 'rm_<think>_before_eval'
+            }
+        }
+del model
+
 work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable
--- a/examples/eval_OpenHuEval_HuStandardFIB.py
+++ b/examples/eval_OpenHuEval_HuStandardFIB.py
@ -1,12 +1,11 @@
 from mmengine.config import read_base

 with read_base():
-    from opencompass.configs.datasets.OpenHuEval.HuStandardFIB.HuStandardFIB import hu_standard_fib_datasets
+    from opencompass.configs.datasets.OpenHuEval.HuStandardFIB.HuStandardFIB import HuStandardFIB_datasets

    from opencompass.configs.models.openai.gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
    from opencompass.configs.models.openai.gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
-    from opencompass.configs.models.deepseek.deepseek_v3_api_siliconflow import models as deepseek_v3_api_siliconflow_model
-    # from opencompass.configs.models.deepseek.deepseek_v3_api import models as deepseek_v3_api_model
+    from opencompass.configs.models.deepseek.deepseek_v3_api_aliyun import models as deepseek_v3_api_aliyun_model

    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct_model
@ -16,11 +15,21 @@ with read_base():
    from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model

    from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
-
-    # from opencompass.configs.models.deepseek.deepseek_r1_siliconflow import models as deepseek_r1_siliconflow_model
+    from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
    from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
    # from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model

-datasets = hu_standard_fib_datasets
+datasets = HuStandardFIB_datasets
 models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+
+for model in models:
+    if model['abbr'].startswith('deepseek_r1_api_'):
+        model['return_reasoning_content'] = True
+        model['pred_postprocessor'] = {
+            'OpenHuEval_*': {
+                'type': 'rm_<think>_before_eval'
+            }
+        }
+del model
+
 work_dir = './outputs/' + __file__.split('/')[-1].split('.')[0] + '/' # do NOT modify this line, yapf: disable, pylint: disable
--- a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB.py
@ -6,40 +6,45 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets.OpenHuEval.HuMatchingFIB import HuMatchingFIBDataset, HuMatchingFIBEvaluator

 with read_base():
-    from .HuMatchingFIB_setting import INSTRUCTION, DATA_PATH, DATA_VERSION
+    from .HuMatchingFIB_setting import INSTRUCTIONS, DATA_PATH, DATA_VERSION

-instruction = INSTRUCTION['prompt_template']
-prompt_version = INSTRUCTION['version']
+PROMPT_LANGUAGES = [
+    'en',
+    # 'hu',
+]

-hu_matching_fib_reader_cfg = dict(
+HuMatchingFIB_reader_cfg = dict(
    input_columns=['question', 'options', 'hu_specific_dim'],
    output_column='reference')

-hu_matching_fib_datasets = []
+HuMatchingFIB_datasets = []

-hu_matching_fib_infer_cfg = dict(
-    prompt_template=dict(
-        type=PromptTemplate,
-        template=dict(
-            begin='</E>',
-            round=[
-                dict(role='HUMAN', prompt=instruction),
-            ],
+for lang in PROMPT_LANGUAGES:
+    instruction = INSTRUCTIONS[lang]
+
+    HuMatchingFIB_infer_cfg = dict(
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin='</E>',
+                round=[
+                    dict(role='HUMAN', prompt=instruction),
+                ],
+            ),
+            ice_token='</E>',
        ),
-        ice_token='</E>',
-    ),
-    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer),
-)
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer),
+    )

-hu_matching_fib_eval_cfg = dict(evaluator=dict(type=HuMatchingFIBEvaluator))
+    HuMatchingFIB_eval_cfg = dict(evaluator=dict(type=HuMatchingFIBEvaluator))

-hu_matching_fib_datasets.append(
-    dict(
-        abbr=f'hu_matching_fib_{DATA_VERSION}-prompt_{prompt_version}',
-        type=HuMatchingFIBDataset,
-        filepath=DATA_PATH,
-        reader_cfg=hu_matching_fib_reader_cfg,
-        infer_cfg=hu_matching_fib_infer_cfg,
-        eval_cfg=hu_matching_fib_eval_cfg,
-    ))
+    HuMatchingFIB_datasets.append(
+        dict(
+            abbr=f'OpenHuEval_HuMatchingFIB_{DATA_VERSION}-prompt_{lang}',
+            type=HuMatchingFIBDataset,
+            filepath=DATA_PATH,
+            reader_cfg=HuMatchingFIB_reader_cfg,
+            infer_cfg=HuMatchingFIB_infer_cfg,
+            eval_cfg=HuMatchingFIB_eval_cfg,
+        ))
--- a/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuMatchingFIB/HuMatchingFIB_setting.py
@ -1,20 +1,6 @@
-# INSTRUCTIONS = {
-#     'hu': """
-#     The following question is in hungarian language on {subject}, please read the question, and try to fill in the blank by chosing appropriate option from the option list. Please organize the answer in a list. An example:
-#     {
-#         "q_main": "Egészítsd ki a Janus Pannonius életére vonatkozó rövid szöveget! Segítségként használd az internetet! Vigyázz, nem minden szót kell felhasználnod!\nJanus Pannonius nem csupán költőként volt jelentős személyisége kora Magyarországának. #0# unokaöccseként a politikából is hamar kivette a részét. #1# tanulmányai után pécsi #2# lett, majd a királyné mellett #3#. Főkincstartóként és a #4# báni cím elnyerésével komoly politikai karriert futott be Mátyás király udvarában. A királlyal megromló kapcsolata miatt részt vett a #5# elleni összeesküvésben, ezért menekülnie kellett. Ez, és az akkor már súlyosbodó betegsége okozta halálát #6#.",
-#         "options": ["A.érsek", "B.szlavón", "C.Vitéz János", "D.püspök", "E.főpohárnok", "F.Ulászló", "G.1474-ben", "H.főkancellár", "I.Itáliai", "J.Kinizsi Pál", "K.Kálmán", "L.1472-ben", "M.Prágai", "N.Mátyás"],
-#         "std_ans": ["#0#C", "#1#I", "#2#D", "#3#H", "#4#B", "#5#N", "#6#L"],
-#     }
-#     Now try to answer the following question, your response should be in a JSON format. Contain the std_ans like the case given above.
-#     The question is: {question}.
-#     """,
-#     'version':'V1',
-#     'description': 'Initial version, using 1shot, incontext, #0# as place holder, output in JSON format',
-# }
-
-INSTRUCTION = {
-    'prompt_template': """You are a native Hungarian teacher. The following question is in Hungarian language on {hu_specific_dim}. Please read the question, and choose the appropriate option from the provided "options" list to fill in each blanks in the text based on the context. Read the entire text, then fill in the blanks. Some options can be selected repeatedly. Please organize the answer in a list. An example:
+INSTRUCTIONS = {
+    'en':
+    """You are a native Hungarian teacher. The following question is in Hungarian language on {hu_specific_dim}. Please read the question, and choose the appropriate option from the provided "options" list to fill in each blanks in the text based on the context. Read the entire text, then fill in the blanks. Some options can be selected repeatedly. Please organize the answer in a list. An example:
 {
    "question": "Egészítsd ki a Janus Pannonius életére vonatkozó rövid szöveget! Segítségként használd az internetet! Vigyázz, nem minden szót kell felhasználnod!\nJanus Pannonius nem csupán költőként volt jelentős személyisége kora Magyarországának. #0# unokaöccseként a politikából is hamar kivette a részét. #1# tanulmányai után pécsi #2# lett, majd a királyné mellett #3#. Főkincstartóként és a #4# báni cím elnyerésével komoly politikai karriert futott be Mátyás király udvarában. A királlyal megromló kapcsolata miatt részt vett a #5# elleni összeesküvésben, ezért menekülnie kellett. Ez, és az akkor már súlyosbodó betegsége okozta halálát #6#.",
    "options": ["A.érsek", "B.szlavón", "C.Vitéz János", "D.püspök", "E.főpohárnok", "F.Ulászló", "G.1474-ben", "H.főkancellár", "I.Itáliai", "J.Kinizsi Pál", "K.Kálmán", "L.1472-ben", "M.Prágai", "N.Mátyás"],
@ -30,8 +16,6 @@ The question and options are:
    "options": {options},
 }
 """,
-    'version':'V2',
-    'description': 'Version 2, using 1shot, more incontext, "#0#" as place holder, output in JSON format'
 }

 OpenHuEval_Path = '/mnt/hwfile/opendatalab/weixingjian/OpenHuEval'
--- a/opencompass/configs/datasets/OpenHuEval/HuProverbRea/HuProverbRea_2CQ.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuProverbRea/HuProverbRea_2CQ.py
@ -40,7 +40,7 @@ HuProverbRea_eval_cfg = dict(evaluator=dict(type=HuProverb_Evaluator_2CQ))
 HuProverbRea_datasets.append(
    dict(
        abbr=
-        f'HuProverbRea_{DATA_VERSION}_2CQ-prompt_{prompt_template_language}',
+        f'OpenHuEval_HuProverbRea_{DATA_VERSION}_2CQ-prompt_{prompt_template_language}',
        type=HuProverbDataset2CQ,
        filepath=DATA_PATH,
        reader_cfg=HuProverbRea_reader_cfg,
--- a/opencompass/configs/datasets/OpenHuEval/HuProverbRea/HuProverbRea_OE.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuProverbRea/HuProverbRea_OE.py
@ -43,7 +43,7 @@ HuProverbRea_eval_cfg = dict(evaluator=dict(
 HuProverbRea_datasets.append(
    dict(
        abbr=
-        f'HuProverbRea_{DATA_VERSION}_OE-prompt_{prompt_template_language}',
+        f'OpenHuEval_HuProverbRea_{DATA_VERSION}_OE-prompt_{prompt_template_language}',
        type=HuProverbDatasetOE,
        filepath=DATA_PATH,
        reader_cfg=HuProverbRea_reader_cfg,
--- a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB.py
@ -6,17 +6,17 @@ from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.datasets.OpenHuEval.HuStandardFIB import HuStandardFIBDataset, HuStandardFIBEvaluator

 with read_base():
-    from .HuStandardFIB_setting import INSTRUCTION, DATA_PATH, DATA_VERSION
+    from .HuStandardFIB_setting import INSTRUCTIONS, DATA_PATH, DATA_VERSION

-instruction = INSTRUCTION['prompt_template']
-prompt_version = INSTRUCTION['version']
+prompt_lang = 'en'
+instruction = INSTRUCTIONS[prompt_lang]

-hu_standard_fib_reader_cfg = dict(input_columns=['question', 'subject'],
-                                  output_column='reference')
+HuStandardFIB_reader_cfg = dict(input_columns=['question', 'subject'],
+                                output_column='reference')

-hu_standard_fib_datasets = []
+HuStandardFIB_datasets = []

-hu_standard_fib_infer_cfg = dict(
+HuStandardFIB_infer_cfg = dict(
    prompt_template=dict(
        type=PromptTemplate,
        template=dict(
@ -31,14 +31,14 @@ hu_standard_fib_infer_cfg = dict(
    inferencer=dict(type=GenInferencer),
 )

-hu_standard_fib_eval_cfg = dict(evaluator=dict(type=HuStandardFIBEvaluator))
+HuStandardFIB_eval_cfg = dict(evaluator=dict(type=HuStandardFIBEvaluator))

-hu_standard_fib_datasets.append(
+HuStandardFIB_datasets.append(
    dict(
-        abbr=f'hu_standard_fib_{DATA_VERSION}-prompt_{prompt_version}',
+        abbr=f'OpenHuEval_HuStandardFIB_{DATA_VERSION}-prompt_{prompt_lang}',
        type=HuStandardFIBDataset,
        filepath=DATA_PATH,
-        reader_cfg=hu_standard_fib_reader_cfg,
-        infer_cfg=hu_standard_fib_infer_cfg,
-        eval_cfg=hu_standard_fib_eval_cfg,
+        reader_cfg=HuStandardFIB_reader_cfg,
+        infer_cfg=HuStandardFIB_infer_cfg,
+        eval_cfg=HuStandardFIB_eval_cfg,
    ))
--- a/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py
+++ b/opencompass/configs/datasets/OpenHuEval/HuStandardFIB/HuStandardFIB_setting.py
@ -1,5 +1,5 @@
-INSTRUCTION = {
-    'prompt_template':
+INSTRUCTIONS = {
+    'en':
    """The following questions are in Hungarian language on {hu_specific_dim}, please read the questions, and try to fill in the blanks in the question list. Please organize the answer in a list. An example:
 {
    "instruction": "Írd be a megfelelő meghatározás mellé a fogalmat!",