diff --git a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py index 2f8218a6..c5cb8c4d 100644 --- a/configs/models/chatglm/lmdeploy_glm4_9b_chat.py +++ b/configs/models/chatglm/lmdeploy_glm4_9b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='glm-4-9b-chat-turbomind', path='THUDM/glm-4-9b-chat', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=8192, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py b/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py index e369e6e1..67624eb8 100644 --- a/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py +++ b/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py @@ -7,8 +7,8 @@ models = [ path='deepseek-ai/deepseek-llm-67b-chat', engine_config=dict(max_batch_size=16, tp=4), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9), - max_seq_len=7168, - max_out_len=1024, + max_seq_len=8192, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py b/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py index 26aa2afc..2c108cc1 100644 --- a/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py +++ b/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py @@ -7,8 +7,8 @@ models = [ path='deepseek-ai/deepseek-llm-7b-chat', engine_config=dict(max_batch_size=16, tp=1), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9), - max_seq_len=7168, - max_out_len=1024, + max_seq_len=8192, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py index 5d5c257b..cf4691f1 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-1_8b-chat-turbomind', path='internlm/internlm2_5-1_8b-chat', - engine_config=dict(session_len=8192, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), - max_seq_len=8192, - max_out_len=2048, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py index f1bb1b08..7fb52161 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-20b-chat-turbomind', path='internlm/internlm2_5-20b-chat', - engine_config=dict(session_len=8192, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), - max_seq_len=8192, - max_out_len=2048, + engine_config=dict(session_len=16384, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py index 75fb9371..8dce2684 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-7b-chat-turbomind', path='internlm/internlm2_5-7b-chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py index 9c358d5a..f5df7926 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-1.8b-turbomind', path='internlm/internlm2-chat-1_8b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py index 44371549..23f35636 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-20b-turbomind', path='internlm/internlm2-chat-20b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py index 82ad2e46..60097e37 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-7b-turbomind', path='internlm/internlm2-chat-7b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py index 8718a6cf..e9af5578 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py +++ b/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py @@ -6,9 +6,9 @@ models = [ abbr='internlm-chat-20b-turbomind', path='internlm/internlm-chat-20b', engine_config=dict(session_len=4096, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py index ea61313a..50656a5f 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py +++ b/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py @@ -6,9 +6,9 @@ models = [ abbr='internlm-chat-7b-turbomind', path='internlm/internlm-chat-7b', engine_config=dict(session_len=4096, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py index 4a17de93..c7527bb5 100644 --- a/configs/models/hf_llama/hf_llama3_1_70b_instruct.py +++ b/configs/models/hf_llama/hf_llama3_1_70b_instruct.py @@ -5,7 +5,7 @@ models = [ type=HuggingFacewithChatTemplate, abbr='llama-3_1-70b-instruct-hf', path='meta-llama/Meta-Llama-3.1-70B-Instruct', - max_out_len=1024, + max_out_len=4096, batch_size=8, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/configs/models/hf_llama/hf_llama3_1_8b.py b/configs/models/hf_llama/hf_llama3_1_8b.py new file mode 100644 index 00000000..a41e1ddf --- /dev/null +++ b/configs/models/hf_llama/hf_llama3_1_8b.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFaceBaseModel + +models = [ + dict( + type=HuggingFaceBaseModel, + abbr='llama-3_1-8b-hf', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py index cb42cb29..cacdec9a 100644 --- a/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py +++ b/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-13b-chat-turbomind', path='meta-llama/Llama-2-13b-chat-hf', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py index d6c69c6f..b850106b 100644 --- a/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py +++ b/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-70b-chat-turbomind', path='meta-llama/Llama-2-70b-chat-hf', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py b/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py index f520ce8b..aa345248 100644 --- a/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py +++ b/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-7b-chat-turbomind', path='meta-llama/Llama-2-7b-chat-hf', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py index 23f9bc2a..9674169f 100644 --- a/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py +++ b/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3_1-70b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-70B-Instruct', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py index 429dfec7..2754eb83 100644 --- a/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py +++ b/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3_1-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py index 333dc015..12fc944c 100644 --- a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py +++ b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3-70b-instruct-turbomind', path='meta-llama/Meta-Llama-3-70B-Instruct', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py index cc5b3bd4..5a6431b7 100644 --- a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py +++ b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py b/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py new file mode 100644 index 00000000..4c867b60 --- /dev/null +++ b/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='mistral-7b-instruct-v0.3-turbomind', + path='mistralai/Mistral-7B-Instruct-v0.3', + engine_config=dict(session_len=32768, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=32768, + max_out_len=4096, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py b/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py new file mode 100644 index 00000000..e79a1f73 --- /dev/null +++ b/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='mixtral-large-instruct-2407-turbomind', + path='mistralai/Mistral-Large-Instruct-2407', + engine_config=dict(session_len=32768, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=32768, + max_out_len=4096, + batch_size=16, + run_cfg=dict(num_gpus=4), + ) +] diff --git a/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py index 9b92b814..bc123b40 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-110b-chat-turbomind', path='Qwen/Qwen1.5-110B-Chat', - engine_config=dict(session_len=7168, max_batch_size=8, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16834, max_batch_size=8, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16834, + max_out_len=4096, batch_size=8, run_cfg=dict(num_gpus=4), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py index d2b85c2a..5f0d54b9 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-14b-chat-turbomind', path='Qwen/Qwen1.5-14B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py index ff28ac0b..803ff333 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-1.8b-chat-turbomind', path='Qwen/Qwen1.5-1.8B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py index 1196548a..96fd1e43 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-32b-chat-turbomind', path='Qwen/Qwen1.5-32B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py index bde14a29..f9fcc3fb 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-4b-chat-turbomind', path='Qwen/Qwen1.5-4B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py index 38175eaf..64a5f7cb 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-72b-chat-turbomind', path='Qwen/Qwen1.5-72B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py b/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py index ca733c0b..1ab39303 100644 --- a/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py +++ b/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-7b-chat-turbomind', path='Qwen/Qwen1.5-7B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py index 502de187..f050ca38 100644 --- a/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py +++ b/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-1.5b-instruct-turbomind', path='Qwen/Qwen2-1.5B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py index 69ecb798..c29482b5 100644 --- a/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py +++ b/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-72b-instruct-turbomind', path='Qwen/Qwen2-72B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py b/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py index 4dff85e0..05fa25c5 100644 --- a/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py +++ b/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-7b-instruct-turbomind', path='Qwen/Qwen2-7B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py new file mode 100644 index 00000000..d296a100 --- /dev/null +++ b/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-34b-chat-turbomind', + path='01-ai/Yi-1.5-34B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=2), + ) +] diff --git a/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py new file mode 100644 index 00000000..eeaf8ea2 --- /dev/null +++ b/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-6b-chat-turbomind', + path='01-ai/Yi-1.5-6B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py b/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py new file mode 100644 index 00000000..4e33ba23 --- /dev/null +++ b/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-9b-chat-turbomind', + path='01-ai/Yi-1.5-9B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/configs/models/yi/lmdeploy_yi_34b_chat.py b/configs/models/yi/lmdeploy_yi_34b_chat.py new file mode 100644 index 00000000..5ed603a6 --- /dev/null +++ b/configs/models/yi/lmdeploy_yi_34b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-34b-chat-turbomind', + path='01-ai/Yi-34B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=2), + ) +] diff --git a/configs/models/yi/lmdeploy_yi_6b_chat.py b/configs/models/yi/lmdeploy_yi_6b_chat.py new file mode 100644 index 00000000..5c75bfa5 --- /dev/null +++ b/configs/models/yi/lmdeploy_yi_6b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-6b-chat-turbomind', + path='01-ai/Yi-6B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py index 2f8218a6..c5cb8c4d 100644 --- a/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py +++ b/opencompass/configs/models/chatglm/lmdeploy_glm4_9b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='glm-4-9b-chat-turbomind', path='THUDM/glm-4-9b-chat', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=8192, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py b/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py index e369e6e1..67624eb8 100644 --- a/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py +++ b/opencompass/configs/models/deepseek/lmdeploy_deepseek_67b_chat.py @@ -7,8 +7,8 @@ models = [ path='deepseek-ai/deepseek-llm-67b-chat', engine_config=dict(max_batch_size=16, tp=4), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9), - max_seq_len=7168, - max_out_len=1024, + max_seq_len=8192, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py b/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py index 26aa2afc..2c108cc1 100644 --- a/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py +++ b/opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py @@ -7,8 +7,8 @@ models = [ path='deepseek-ai/deepseek-llm-7b-chat', engine_config=dict(max_batch_size=16, tp=1), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9), - max_seq_len=7168, - max_out_len=1024, + max_seq_len=8192, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py index 5d5c257b..cf4691f1 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-1_8b-chat-turbomind', path='internlm/internlm2_5-1_8b-chat', - engine_config=dict(session_len=8192, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), - max_seq_len=8192, - max_out_len=2048, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py index f1bb1b08..7fb52161 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-20b-chat-turbomind', path='internlm/internlm2_5-20b-chat', - engine_config=dict(session_len=8192, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), - max_seq_len=8192, - max_out_len=2048, + engine_config=dict(session_len=16384, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py index 75fb9371..8dce2684 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-7b-chat-turbomind', path='internlm/internlm2_5-7b-chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py index 9c358d5a..f5df7926 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_1_8b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-1.8b-turbomind', path='internlm/internlm2-chat-1_8b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py index 44371549..23f35636 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-20b-turbomind', path='internlm/internlm2-chat-20b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py index 82ad2e46..60097e37 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2-chat-7b-turbomind', path='internlm/internlm2-chat-7b', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=8192, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=8192, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py index 8718a6cf..e9af5578 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_20b.py @@ -6,9 +6,9 @@ models = [ abbr='internlm-chat-20b-turbomind', path='internlm/internlm-chat-20b', engine_config=dict(session_len=4096, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=2), ) diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py index ea61313a..50656a5f 100644 --- a/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py +++ b/opencompass/configs/models/hf_internlm/lmdeploy_internlm_chat_7b.py @@ -6,9 +6,9 @@ models = [ abbr='internlm-chat-7b-turbomind', path='internlm/internlm-chat-7b', engine_config=dict(session_len=4096, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py index 4a17de93..c7527bb5 100644 --- a/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py +++ b/opencompass/configs/models/hf_llama/hf_llama3_1_70b_instruct.py @@ -5,7 +5,7 @@ models = [ type=HuggingFacewithChatTemplate, abbr='llama-3_1-70b-instruct-hf', path='meta-llama/Meta-Llama-3.1-70B-Instruct', - max_out_len=1024, + max_out_len=4096, batch_size=8, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py b/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py new file mode 100644 index 00000000..a41e1ddf --- /dev/null +++ b/opencompass/configs/models/hf_llama/hf_llama3_1_8b.py @@ -0,0 +1,12 @@ +from opencompass.models import HuggingFaceBaseModel + +models = [ + dict( + type=HuggingFaceBaseModel, + abbr='llama-3_1-8b-hf', + path='meta-llama/Meta-Llama-3.1-8B-Instruct', + max_out_len=1024, + batch_size=8, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py index cb42cb29..cacdec9a 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_13b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-13b-chat-turbomind', path='meta-llama/Llama-2-13b-chat-hf', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py index d6c69c6f..b850106b 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_70b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-70b-chat-turbomind', path='meta-llama/Llama-2-70b-chat-hf', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py b/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py index f520ce8b..aa345248 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama2_7b_chat.py @@ -6,9 +6,9 @@ models = [ abbr='llama-2-7b-chat-turbomind', path='meta-llama/Llama-2-7b-chat-hf', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=4096, - max_out_len=1024, + max_out_len=2048, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py index 23f9bc2a..9674169f 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_70b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3_1-70b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-70B-Instruct', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py index 429dfec7..2754eb83 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_1_8b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3_1-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3.1-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py index 333dc015..12fc944c 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3-70b-instruct-turbomind', path='meta-llama/Meta-Llama-3-70B-Instruct', engine_config=dict(max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py index cc5b3bd4..5a6431b7 100644 --- a/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py +++ b/opencompass/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py @@ -6,9 +6,9 @@ models = [ abbr='llama-3-8b-instruct-turbomind', path='meta-llama/Meta-Llama-3-8B-Instruct', engine_config=dict(max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|end_of_text|>', '<|eot_id|>'], diff --git a/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py b/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py new file mode 100644 index 00000000..4c867b60 --- /dev/null +++ b/opencompass/configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='mistral-7b-instruct-v0.3-turbomind', + path='mistralai/Mistral-7B-Instruct-v0.3', + engine_config=dict(session_len=32768, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=32768, + max_out_len=4096, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py b/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py new file mode 100644 index 00000000..e79a1f73 --- /dev/null +++ b/opencompass/configs/models/mistral/lmdeploy_mixtral_large_instruct_2407.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='mixtral-large-instruct-2407-turbomind', + path='mistralai/Mistral-Large-Instruct-2407', + engine_config=dict(session_len=32768, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=32768, + max_out_len=4096, + batch_size=16, + run_cfg=dict(num_gpus=4), + ) +] diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py index 9b92b814..bc123b40 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_110b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-110b-chat-turbomind', path='Qwen/Qwen1.5-110B-Chat', - engine_config=dict(session_len=7168, max_batch_size=8, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16834, max_batch_size=8, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16834, + max_out_len=4096, batch_size=8, run_cfg=dict(num_gpus=4), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py index d2b85c2a..5f0d54b9 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_14b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-14b-chat-turbomind', path='Qwen/Qwen1.5-14B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py index ff28ac0b..803ff333 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_1_8b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-1.8b-chat-turbomind', path='Qwen/Qwen1.5-1.8B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py index 1196548a..96fd1e43 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_32b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-32b-chat-turbomind', path='Qwen/Qwen1.5-32B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=2), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=2), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py index bde14a29..f9fcc3fb 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_4b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-4b-chat-turbomind', path='Qwen/Qwen1.5-4B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py index 38175eaf..64a5f7cb 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_72b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-72b-chat-turbomind', path='Qwen/Qwen1.5-72B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py index ca733c0b..1ab39303 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen1_5_7b_chat.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen1.5-7b-chat-turbomind', path='Qwen/Qwen1.5-7B-Chat', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), stop_words=['<|im_end|>', '<|im_start|>'], diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py index 502de187..f050ca38 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_1_5b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-1.5b-instruct-turbomind', path='Qwen/Qwen2-1.5B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py index 69ecb798..c29482b5 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_72b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-72b-instruct-turbomind', path='Qwen/Qwen2-72B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=4), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=4), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=4), ) diff --git a/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py b/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py index 4dff85e0..05fa25c5 100644 --- a/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py +++ b/opencompass/configs/models/qwen/lmdeploy_qwen2_7b_instruct.py @@ -5,10 +5,10 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='qwen2-7b-instruct-turbomind', path='Qwen/Qwen2-7B-Instruct', - engine_config=dict(session_len=7168, max_batch_size=16, tp=1), - gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024), - max_seq_len=7168, - max_out_len=1024, + engine_config=dict(session_len=16384, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096), + max_seq_len=16384, + max_out_len=4096, batch_size=16, run_cfg=dict(num_gpus=1), ) diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py new file mode 100644 index 00000000..d296a100 --- /dev/null +++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-34b-chat-turbomind', + path='01-ai/Yi-1.5-34B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=2), + ) +] diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py new file mode 100644 index 00000000..eeaf8ea2 --- /dev/null +++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-6b-chat-turbomind', + path='01-ai/Yi-1.5-6B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py new file mode 100644 index 00000000..4e33ba23 --- /dev/null +++ b/opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-1.5-9b-chat-turbomind', + path='01-ai/Yi-1.5-9B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +] diff --git a/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py new file mode 100644 index 00000000..5ed603a6 --- /dev/null +++ b/opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-34b-chat-turbomind', + path='01-ai/Yi-34B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=2), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=2), + ) +] diff --git a/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py b/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py new file mode 100644 index 00000000..5c75bfa5 --- /dev/null +++ b/opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py @@ -0,0 +1,15 @@ +from opencompass.models import TurboMindModelwithChatTemplate + +models = [ + dict( + type=TurboMindModelwithChatTemplate, + abbr='yi-6b-chat-turbomind', + path='01-ai/Yi-6B-Chat', + engine_config=dict(session_len=4096, max_batch_size=16, tp=1), + gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), + max_seq_len=4096, + max_out_len=2048, + batch_size=16, + run_cfg=dict(num_gpus=1), + ) +]