diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat_1m.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat_1m.py index 09d34dd1..6d96de28 100644 --- a/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat_1m.py +++ b/configs/models/hf_internlm/lmdeploy_internlm2_5_7b_chat_1m.py @@ -5,7 +5,7 @@ models = [ type=TurboMindModelwithChatTemplate, abbr='internlm2_5-7b-chat-1m-turbomind', path='internlm/internlm2_5-7b-chat-1m', - engine_config=dict(rope_scaling_factor=2.5, session_len=1048576, max_batch_size=1, tp=4), # 1M context length + engine_config=dict(rope_scaling_factor=2.5, session_len=1048576, max_batch_size=1, cache_max_entry_count=0.7, tp=4), # 1M context length gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048), max_seq_len=1048576, max_out_len=2048,