2024-05-15 21:51:07 +08:00
|
|
|
from opencompass.models import TurboMindModelwithChatTemplate
|
2024-03-25 18:05:01 +08:00
|
|
|
|
2024-10-09 22:58:06 +08:00
|
|
|
|
2024-03-25 18:05:01 +08:00
|
|
|
models = [
|
|
|
|
dict(
|
2024-05-15 21:51:07 +08:00
|
|
|
type=TurboMindModelwithChatTemplate,
|
2024-10-09 22:58:06 +08:00
|
|
|
abbr=f'internlm2-chat-7b-lmdeploy',
|
2024-05-14 15:35:58 +08:00
|
|
|
path='internlm/internlm2-chat-7b',
|
2024-10-09 22:58:06 +08:00
|
|
|
# inference backend of LMDeploy. It can be either 'turbomind' or 'pytorch'.
|
|
|
|
# If the model is not supported by 'turbomind', it will fallback to
|
|
|
|
# 'pytorch'
|
|
|
|
backend='turbomind',
|
|
|
|
# For the detailed engine config and generation config, please refer to
|
|
|
|
# https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/messages.py
|
|
|
|
engine_config=dict(tp=1),
|
|
|
|
gen_config=dict(do_sample=False),
|
2024-09-24 21:52:28 +08:00
|
|
|
max_seq_len=8192,
|
|
|
|
max_out_len=4096,
|
2024-10-09 22:58:06 +08:00
|
|
|
# the max number of prompts that LMDeploy receives
|
|
|
|
# in `generate` function
|
|
|
|
batch_size=5000,
|
2024-05-15 21:51:07 +08:00
|
|
|
run_cfg=dict(num_gpus=1),
|
2024-03-25 18:05:01 +08:00
|
|
|
)
|
|
|
|
]
|