2025-02-10 21:20:39 +08:00
from mmengine . config import read_base
2025-02-11 16:55:07 +08:00
from opencompass . summarizers . subjective . husimpleqa import HuSimpleQASummarizer
2025-02-10 21:20:39 +08:00
with read_base ( ) :
2025-02-14 20:44:53 +08:00
from opencompass . configs . datasets . OpenHuEval . HuSimpleQA . HuSimpleQA import HuSimpleQA_datasets , PROMPT_LANGUAGES
2025-02-10 21:20:39 +08:00
2025-02-11 16:55:07 +08:00
from opencompass . configs . models . openai . gpt_4o_mini_20240718 import models as gpt_4o_mini_20240718_model
from opencompass . configs . models . openai . gpt_4o_2024_11_20 import models as gpt_4o_2024_11_20_model
from opencompass . configs . models . deepseek . deepseek_v3_api_aliyun import models as deepseek_v3_api_aliyun_model
2025-02-10 21:20:39 +08:00
from opencompass . configs . models . qwen2_5 . lmdeploy_qwen2_5_7b_instruct import models as lmdeploy_qwen2_5_7b_instruct_model
2025-02-15 11:55:52 +08:00
# from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b_instruct import models as lmdeploy_qwen2_5_32b_instruct_model
# from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_14b_instruct import models as lmdeploy_qwen2_5_14b_instruct_model
2025-02-10 21:20:39 +08:00
from opencompass . configs . models . qwen2_5 . lmdeploy_qwen2_5_72b_instruct import models as lmdeploy_qwen2_5_72b_instruct_model
from opencompass . configs . models . hf_llama . lmdeploy_llama3_1_8b_instruct import models as lmdeploy_llama3_1_8b_instruct_model
from opencompass . configs . models . hf_llama . lmdeploy_llama3_1_70b_instruct import models as lmdeploy_llama3_1_70b_instruct_model
2025-02-15 11:55:52 +08:00
# from opencompass.configs.models.hf_llama.llama3_3_70b_api_siliconflow import models as llama3_3_70b_api_siliconflow_model
2025-02-10 21:20:39 +08:00
2025-02-14 20:44:53 +08:00
# from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import models as lmdeploy_internlm3_8b_instruct_model
2025-02-10 21:20:39 +08:00
from opencompass . configs . models . qwq . lmdeploy_qwq_32b_preview import models as lmdeploy_qwq_32b_preview_model
2025-02-11 16:55:07 +08:00
from opencompass . configs . models . deepseek . deepseek_r1_api_aliyun import models as deepseek_r1_api_aliyun_model
2025-02-15 11:55:52 +08:00
# from opencompass.configs.models.deepseek.deepseek_r1_distill_llama_8b_api_aliyun import models as deepseek_r1_distill_llama_8b_api_aliyun_model
# from opencompass.configs.models.deepseek.lmdeploy_deepseek_r1_distill_qwen_32b_instruct import models as deepseek_r1_distill_qwen_32b_instruct_model
# from opencompass.configs.models.deepseek.lmdeploy_deepseek_r1_distill_qwen_14b_instruct import models as deepseek_r1_distill_qwen_14b_instruct_model
# from opencompass.configs.models.deepseek.lmdeploy_deepseek_r1_distill_llama_70b_instruct import models as deepseek_r1_distill_llama_70b_instruct_model
2025-02-11 16:55:07 +08:00
from opencompass . configs . models . openai . o1_mini_2024_09_12 import models as o1_mini_2024_09_12_model
2025-02-13 11:28:49 +08:00
# from opencompass.configs.models.openai.o3_mini_2025_01_31 import models as o3_mini_2025_01_31_model
2025-02-11 16:55:07 +08:00
from opencompass . models import OpenAI
from opencompass . partitioners import (
NumWorkerPartitioner ,
SubjectiveNumWorkerPartitioner ,
)
from opencompass . runners import LocalRunner , SlurmSequentialRunner
from opencompass . tasks import OpenICLInferTask
from opencompass . tasks . subjective_eval import SubjectiveEvalTask
api_meta_template = dict ( round = [
dict ( role = ' HUMAN ' , api_role = ' HUMAN ' ) ,
dict ( role = ' BOT ' , api_role = ' BOT ' , generate = True ) ,
] )
2025-02-10 21:20:39 +08:00
models = sum ( [ v for k , v in locals ( ) . items ( ) if k . endswith ( ' _model ' ) ] , [ ] )
for model in models :
if model [ ' abbr ' ] . startswith ( ' deepseek_r1_api_ ' ) :
model [ ' return_reasoning_content ' ] = True
model [ ' pred_postprocessor ' ] = {
' OpenHuEval_* ' : {
' type ' : ' rm_<think>_before_eval '
}
}
2025-02-14 20:44:53 +08:00
if model [ ' abbr ' ] . startswith ( ' QwQ ' ) :
model [ ' pred_postprocessor ' ] = {
' OpenHuEval_* ' : {
' type ' : ' extract_qwq_answer_before_eval_for_husimpleqa '
}
}
2025-02-10 21:20:39 +08:00
del model
2025-02-11 16:55:07 +08:00
judge_models = [
dict (
abbr = ' GPT-4o-2024-08-06 ' ,
type = OpenAI ,
path = ' gpt-4o-2024-08-06 ' ,
key = ' ENV ' ,
openai_proxy_url = ' ENV ' ,
verbose = True ,
meta_template = api_meta_template ,
query_per_second = 2 ,
max_out_len = 8192 ,
max_seq_len = 16384 ,
batch_size = 8 ,
temperature = 0 ,
)
]
datasets = HuSimpleQA_datasets
del HuSimpleQA_datasets
infer = dict (
partitioner = dict (
type = NumWorkerPartitioner ,
num_worker = 8 ,
) ,
runner = dict (
type = SlurmSequentialRunner ,
max_num_workers = 16 ,
task = dict ( type = OpenICLInferTask ) ,
) ,
)
eval = dict (
partitioner = dict (
type = SubjectiveNumWorkerPartitioner ,
num_worker = 8 ,
models = models ,
judge_models = judge_models ,
) ,
runner = dict ( type = LocalRunner ,
max_num_workers = 16 ,
task = dict ( type = SubjectiveEvalTask ) ) ,
)
2025-02-14 20:44:53 +08:00
summarizer = dict ( type = HuSimpleQASummarizer ,
prompt_languages = PROMPT_LANGUAGES )
2025-02-11 16:55:07 +08:00
work_dir = (
' ./outputs/ ' + __file__ . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ] + ' / '
) # do NOT modify this line, yapf: disable, pylint: disable