[Feature] Update the max_out_len for many models (#1559)

This commit is contained in:
Songyang Zhang 2024-09-24 21:52:28 +08:00 committed by GitHub
parent a2e9bc0c41
commit e7681943f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
74 changed files with 420 additions and 186 deletions

View File

@ -6,9 +6,9 @@ models = [
abbr='glm-4-9b-chat-turbomind',
path='THUDM/glm-4-9b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -7,8 +7,8 @@ models = [
path='deepseek-ai/deepseek-llm-67b-chat',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -7,8 +7,8 @@ models = [
path='deepseek-ai/deepseek-llm-7b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-1_8b-chat-turbomind',
path='internlm/internlm2_5-1_8b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-20b-chat-turbomind',
path='internlm/internlm2_5-20b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-7b-chat-turbomind',
path='internlm/internlm2_5-7b-chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-1.8b-turbomind',
path='internlm/internlm2-chat-1_8b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-20b-turbomind',
path='internlm/internlm2-chat-20b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-7b-turbomind',
path='internlm/internlm2-chat-7b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='internlm-chat-20b-turbomind',
path='internlm/internlm-chat-20b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='internlm-chat-7b-turbomind',
path='internlm/internlm-chat-7b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,7 +5,7 @@ models = [
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-70b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
max_out_len=1024,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFaceBaseModel
models = [
dict(
type=HuggingFaceBaseModel,
abbr='llama-3_1-8b-hf',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-13b-chat-turbomind',
path='meta-llama/Llama-2-13b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-70b-chat-turbomind',
path='meta-llama/Llama-2-70b-chat-hf',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-7b-chat-turbomind',
path='meta-llama/Llama-2-7b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3_1-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3_1-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mistral-7b-instruct-v0.3-turbomind',
path='mistralai/Mistral-7B-Instruct-v0.3',
engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mixtral-large-instruct-2407-turbomind',
path='mistralai/Mistral-Large-Instruct-2407',
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
)
]

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-110b-chat-turbomind',
path='Qwen/Qwen1.5-110B-Chat',
engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16834,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-14b-chat-turbomind',
path='Qwen/Qwen1.5-14B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-1.8b-chat-turbomind',
path='Qwen/Qwen1.5-1.8B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-32b-chat-turbomind',
path='Qwen/Qwen1.5-32B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-4b-chat-turbomind',
path='Qwen/Qwen1.5-4B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-72b-chat-turbomind',
path='Qwen/Qwen1.5-72B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-7b-chat-turbomind',
path='Qwen/Qwen1.5-7B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-1.5b-instruct-turbomind',
path='Qwen/Qwen2-1.5B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-72b-instruct-turbomind',
path='Qwen/Qwen2-72B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-7b-instruct-turbomind',
path='Qwen/Qwen2-7B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-34b-chat-turbomind',
path='01-ai/Yi-1.5-34B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-6b-chat-turbomind',
path='01-ai/Yi-1.5-6B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-9b-chat-turbomind',
path='01-ai/Yi-1.5-9B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-34b-chat-turbomind',
path='01-ai/Yi-34B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-6b-chat-turbomind',
path='01-ai/Yi-6B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -6,9 +6,9 @@ models = [
abbr='glm-4-9b-chat-turbomind',
path='THUDM/glm-4-9b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -7,8 +7,8 @@ models = [
path='deepseek-ai/deepseek-llm-67b-chat',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -7,8 +7,8 @@ models = [
path='deepseek-ai/deepseek-llm-7b-chat',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
max_seq_len=7168,
max_out_len=1024,
max_seq_len=8192,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-1_8b-chat-turbomind',
path='internlm/internlm2_5-1_8b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-20b-chat-turbomind',
path='internlm/internlm2_5-20b-chat',
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=8192,
max_out_len=2048,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2_5-7b-chat-turbomind',
path='internlm/internlm2_5-7b-chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-1.8b-turbomind',
path='internlm/internlm2-chat-1_8b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-20b-turbomind',
path='internlm/internlm2-chat-20b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='internlm2-chat-7b-turbomind',
path='internlm/internlm2-chat-7b',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=8192,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='internlm-chat-20b-turbomind',
path='internlm/internlm-chat-20b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='internlm-chat-7b-turbomind',
path='internlm/internlm-chat-7b',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,7 +5,7 @@ models = [
type=HuggingFacewithChatTemplate,
abbr='llama-3_1-70b-instruct-hf',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
max_out_len=1024,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -0,0 +1,12 @@
from opencompass.models import HuggingFaceBaseModel
models = [
dict(
type=HuggingFaceBaseModel,
abbr='llama-3_1-8b-hf',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
max_out_len=1024,
batch_size=8,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-13b-chat-turbomind',
path='meta-llama/Llama-2-13b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-70b-chat-turbomind',
path='meta-llama/Llama-2-70b-chat-hf',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-2-7b-chat-turbomind',
path='meta-llama/Llama-2-7b-chat-hf',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=1024,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3_1-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3_1-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3.1-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3-70b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-70B-Instruct',
engine_config=dict(max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -6,9 +6,9 @@ models = [
abbr='llama-3-8b-instruct-turbomind',
path='meta-llama/Meta-Llama-3-8B-Instruct',
engine_config=dict(max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|end_of_text|>', '<|eot_id|>'],

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mistral-7b-instruct-v0.3-turbomind',
path='mistralai/Mistral-7B-Instruct-v0.3',
engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='mixtral-large-instruct-2407-turbomind',
path='mistralai/Mistral-Large-Instruct-2407',
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=32768,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
)
]

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-110b-chat-turbomind',
path='Qwen/Qwen1.5-110B-Chat',
engine_config=dict(session_len=7168, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16834, max_batch_size=8, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16834,
max_out_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-14b-chat-turbomind',
path='Qwen/Qwen1.5-14B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-1.8b-chat-turbomind',
path='Qwen/Qwen1.5-1.8B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-32b-chat-turbomind',
path='Qwen/Qwen1.5-32B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=2),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-4b-chat-turbomind',
path='Qwen/Qwen1.5-4B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-72b-chat-turbomind',
path='Qwen/Qwen1.5-72B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen1.5-7b-chat-turbomind',
path='Qwen/Qwen1.5-7B-Chat',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
stop_words=['<|im_end|>', '<|im_start|>'],

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-1.5b-instruct-turbomind',
path='Qwen/Qwen2-1.5B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-72b-instruct-turbomind',
path='Qwen/Qwen2-72B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=4),
)

View File

@ -5,10 +5,10 @@ models = [
type=TurboMindModelwithChatTemplate,
abbr='qwen2-7b-instruct-turbomind',
path='Qwen/Qwen2-7B-Instruct',
engine_config=dict(session_len=7168, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
max_seq_len=16384,
max_out_len=4096,
batch_size=16,
run_cfg=dict(num_gpus=1),
)

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-34b-chat-turbomind',
path='01-ai/Yi-1.5-34B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-6b-chat-turbomind',
path='01-ai/Yi-1.5-6B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-1.5-9b-chat-turbomind',
path='01-ai/Yi-1.5-9B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-34b-chat-turbomind',
path='01-ai/Yi-34B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=2),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2),
)
]

View File

@ -0,0 +1,15 @@
from opencompass.models import TurboMindModelwithChatTemplate
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='yi-6b-chat-turbomind',
path='01-ai/Yi-6B-Chat',
engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
max_seq_len=4096,
max_out_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1),
)
]