OpenCompass/examples/eval_OpenHuEval_HuStandardFIB.py

82 lines
3.5 KiB
Python
Raw Normal View History

2025-02-02 14:46:39 +08:00
from mmengine.config import read_base
with read_base():
2025-03-13 17:07:15 +08:00
from opencompass.configs.datasets.OpenHuEval.HuStandardFIB.HuStandardFIB import (
HuStandardFIB_datasets,
)
2025-02-02 14:46:39 +08:00
2025-03-13 17:07:15 +08:00
from opencompass.configs.models.openai.gpt_4o_mini_20240718 import (
models as gpt_4o_mini_20240718_model,
)
from opencompass.configs.models.openai.gpt_4o_2024_11_20 import (
models as gpt_4o_2024_11_20_model,
)
from opencompass.configs.models.deepseek.deepseek_v3_api_aliyun import (
models as deepseek_v3_api_aliyun_model,
)
2025-02-10 10:23:43 +08:00
2025-03-13 17:07:15 +08:00
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import (
models as lmdeploy_qwen2_5_7b_instruct_model,
)
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import (
models as lmdeploy_qwen2_5_72b_instruct_model,
)
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import (
models as lmdeploy_llama3_1_8b_instruct_model,
)
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_70b_instruct import (
models as lmdeploy_llama3_1_70b_instruct_model,
)
2025-02-10 10:23:43 +08:00
2025-03-13 17:07:15 +08:00
from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import (
models as lmdeploy_internlm3_8b_instruct_model,
)
2025-02-10 10:23:43 +08:00
2025-03-13 17:16:16 +08:00
<<<<<<< HEAD
2025-03-13 17:07:15 +08:00
from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import (
models as lmdeploy_qwq_32b_preview_model,
)
from opencompass.configs.models.qwq.qwq_32b import models as qwq_32b_model
from opencompass.configs.models.qwq.qwq_plus_2025_03_05 import (
models as qwq_plus_2025_03_05_model,
)
from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import (
models as deepseek_r1_api_aliyun_model,
)
from opencompass.configs.models.deepseek.deepseek_r1_api_siliconflow import (
models as deepseek_r1_api_siliconflow_model,
)
from opencompass.configs.models.openai.o1_mini_2024_09_12 import (
models as o1_mini_2024_09_12_model,
)
2025-03-13 17:16:16 +08:00
=======
from opencompass.configs.models.qwq.lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
from opencompass.configs.models.qwq.qwq_32b import models as qwq_32b_model
from opencompass.configs.models.qwq.qwq_plus_2025_03_05 import models as qwq_plus_2025_03_05_model
from opencompass.configs.models.deepseek.deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
from opencompass.configs.models.deepseek.deepseek_r1_api_siliconflow import models as deepseek_r1_api_siliconflow_model
from opencompass.configs.models.openai.o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
>>>>>>> 6b84df9... add_qwen_api_qwq_32b
2025-02-10 10:23:43 +08:00
# from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model
2025-02-02 14:46:39 +08:00
2025-02-10 23:03:50 +08:00
datasets = HuStandardFIB_datasets
2025-02-02 14:46:39 +08:00
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
2025-02-10 23:03:50 +08:00
for model in models:
2025-03-13 17:07:15 +08:00
if model['abbr'].startswith('deepseek_r1_api_') or (
model['abbr'].startswith('QwQ') and model['abbr'] != 'QwQ-32B-Preview'
):
2025-02-10 23:03:50 +08:00
model['return_reasoning_content'] = True
model['pred_postprocessor'] = {
2025-03-13 17:07:15 +08:00
'OpenHuEval_*': {'type': 'rm_<think>_before_eval'}
2025-02-10 23:03:50 +08:00
}
2025-03-13 17:07:15 +08:00
if model['abbr'] == 'QwQ-32B-Preview':
model['pred_postprocessor'] = {
2025-03-13 17:07:15 +08:00
'OpenHuEval_*': {'type': 'extract_qwq_answer_before_eval_for_hustandardfib'}
}
2025-02-10 23:03:50 +08:00
del model
2025-03-13 17:07:15 +08:00
work_dir = (
'./outputs/' + __file__.split('/')[-1].split('.')[0] + '/'
) # do NOT modify this line, yapf: disable, pylint: disable