mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Update the max_out_len for many models (#1559)
This commit is contained in:
parent
a2e9bc0c41
commit
e7681943f3
@ -6,9 +6,9 @@ models = [
|
||||
abbr='glm-4-9b-chat-turbomind',
|
||||
path='THUDM/glm-4-9b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -7,8 +7,8 @@ models = [
|
||||
path='deepseek-ai/deepseek-llm-67b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -7,8 +7,8 @@ models = [
|
||||
path='deepseek-ai/deepseek-llm-7b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-1_8b-chat-turbomind',
|
||||
path='internlm/internlm2_5-1_8b-chat',
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-20b-chat-turbomind',
|
||||
path='internlm/internlm2_5-20b-chat',
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-7b-chat-turbomind',
|
||||
path='internlm/internlm2_5-7b-chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-1.8b-turbomind',
|
||||
path='internlm/internlm2-chat-1_8b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-20b-turbomind',
|
||||
path='internlm/internlm2-chat-20b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-7b-turbomind',
|
||||
path='internlm/internlm2-chat-7b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
path='internlm/internlm-chat-20b',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='internlm-chat-7b-turbomind',
|
||||
path='internlm/internlm-chat-7b',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,7 +5,7 @@ models = [
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='llama-3_1-70b-instruct-hf',
|
||||
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||
max_out_len=1024,
|
||||
max_out_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
12
configs/models/hf_llama/hf_llama3_1_8b.py
Normal file
12
configs/models/hf_llama/hf_llama3_1_8b.py
Normal file
@ -0,0 +1,12 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='llama-3_1-8b-hf',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
max_out_len=1024,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-13b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-13b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-70b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-70b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-7b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-7b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3_1-70b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3_1-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3-70b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3-70B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
15
configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
Normal file
15
configs/models/mistral/lmdeploy_mistral_7b_instruct_v0_3.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='mistral-7b-instruct-v0.3-turbomind',
|
||||
path='mistralai/Mistral-7B-Instruct-v0.3',
|
||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=32768,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='mixtral-large-instruct-2407-turbomind',
|
||||
path='mistralai/Mistral-Large-Instruct-2407',
|
||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=32768,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-110b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-110B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16834,
|
||||
max_out_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-14b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-14B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-1.8b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-1.8B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-32b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-32B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-4b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-4B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-72b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-72B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-7b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-7B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-1.5b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-1.5B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-72b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-72B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-7b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-7B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
15
configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
Normal file
15
configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-34b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-34B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
15
configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
Normal file
15
configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-6b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-6B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
Normal file
15
configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-9b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-9B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
configs/models/yi/lmdeploy_yi_34b_chat.py
Normal file
15
configs/models/yi/lmdeploy_yi_34b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-34b-chat-turbomind',
|
||||
path='01-ai/Yi-34B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
15
configs/models/yi/lmdeploy_yi_6b_chat.py
Normal file
15
configs/models/yi/lmdeploy_yi_6b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-6b-chat-turbomind',
|
||||
path='01-ai/Yi-6B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='glm-4-9b-chat-turbomind',
|
||||
path='THUDM/glm-4-9b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -7,8 +7,8 @@ models = [
|
||||
path='deepseek-ai/deepseek-llm-67b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -7,8 +7,8 @@ models = [
|
||||
path='deepseek-ai/deepseek-llm-7b-chat',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-1_8b-chat-turbomind',
|
||||
path='internlm/internlm2_5-1_8b-chat',
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-20b-chat-turbomind',
|
||||
path='internlm/internlm2_5-20b-chat',
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=8192,
|
||||
max_out_len=2048,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2_5-7b-chat-turbomind',
|
||||
path='internlm/internlm2_5-7b-chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-1.8b-turbomind',
|
||||
path='internlm/internlm2-chat-1_8b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-20b-turbomind',
|
||||
path='internlm/internlm2-chat-20b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm2-chat-7b-turbomind',
|
||||
path='internlm/internlm2-chat-7b',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=8192,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
path='internlm/internlm-chat-20b',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='internlm-chat-7b-turbomind',
|
||||
path='internlm/internlm-chat-7b',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,7 +5,7 @@ models = [
|
||||
type=HuggingFacewithChatTemplate,
|
||||
abbr='llama-3_1-70b-instruct-hf',
|
||||
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||
max_out_len=1024,
|
||||
max_out_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
12
opencompass/configs/models/hf_llama/hf_llama3_1_8b.py
Normal file
12
opencompass/configs/models/hf_llama/hf_llama3_1_8b.py
Normal file
@ -0,0 +1,12 @@
|
||||
from opencompass.models import HuggingFaceBaseModel
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=HuggingFaceBaseModel,
|
||||
abbr='llama-3_1-8b-hf',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
max_out_len=1024,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-13b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-13b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-70b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-70b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-2-7b-chat-turbomind',
|
||||
path='meta-llama/Llama-2-7b-chat-hf',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=1024,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3_1-70b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3_1-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3-70b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3-70B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -6,9 +6,9 @@ models = [
|
||||
abbr='llama-3-8b-instruct-turbomind',
|
||||
path='meta-llama/Meta-Llama-3-8B-Instruct',
|
||||
engine_config=dict(max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||
|
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='mistral-7b-instruct-v0.3-turbomind',
|
||||
path='mistralai/Mistral-7B-Instruct-v0.3',
|
||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=32768,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='mixtral-large-instruct-2407-turbomind',
|
||||
path='mistralai/Mistral-Large-Instruct-2407',
|
||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=32768,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-110b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-110B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16834,
|
||||
max_out_len=4096,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-14b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-14B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-1.8b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-1.8B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-32b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-32B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-4b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-4B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-72b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-72B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen1.5-7b-chat-turbomind',
|
||||
path='Qwen/Qwen1.5-7B-Chat',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
stop_words=['<|im_end|>', '<|im_start|>'],
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-1.5b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-1.5B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-72b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-72B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
|
@ -5,10 +5,10 @@ models = [
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='qwen2-7b-instruct-turbomind',
|
||||
path='Qwen/Qwen2-7B-Instruct',
|
||||
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
|
||||
max_seq_len=7168,
|
||||
max_out_len=1024,
|
||||
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||
max_seq_len=16384,
|
||||
max_out_len=4096,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
|
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
Normal file
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_34b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-34b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-34B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
Normal file
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_6b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-6b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-6B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
Normal file
15
opencompass/configs/models/yi/lmdeploy_yi_1_5_9b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-1.5-9b-chat-turbomind',
|
||||
path='01-ai/Yi-1.5-9B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
15
opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py
Normal file
15
opencompass/configs/models/yi/lmdeploy_yi_34b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-34b-chat-turbomind',
|
||||
path='01-ai/Yi-34B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
15
opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py
Normal file
15
opencompass/configs/models/yi/lmdeploy_yi_6b_chat.py
Normal file
@ -0,0 +1,15 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='yi-6b-chat-turbomind',
|
||||
path='01-ai/Yi-6B-Chat',
|
||||
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
|
||||
max_seq_len=4096,
|
||||
max_out_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1),
|
||||
)
|
||||
]
|
Loading…
Reference in New Issue
Block a user