From f7b28e87f37864b7ce109b12248f9ecf39758a6a Mon Sep 17 00:00:00 2001 From: ZJJ <1423115494@qq.com> Date: Tue, 18 Mar 2025 17:41:30 +0800 Subject: [PATCH] fix the bug that model_kwargs passed in is invalid when the accelerator is vllm --- opencompass/utils/run.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py index accd3468..2443b829 100644 --- a/opencompass/utils/run.py +++ b/opencompass/utils/run.py @@ -276,13 +276,15 @@ def change_accelerator(models, accelerator): if model.get(item) is not None: acc_model[item] = model[item] elif accelerator == 'vllm': + model_kwargs = dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)) + model_kwargs.update(model.get('model_kwargs')) logger.info(f'Transforming {model["abbr"]} to {accelerator}') acc_model = dict( type=f'{VLLM.__module__}.{VLLM.__name__}', abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm', path=model['path'], - model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)), + model_kwargs=model_kwargs, max_out_len=model['max_out_len'], max_seq_len=model.get('max_seq_len', None), batch_size=model['batch_size'], @@ -296,12 +298,14 @@ def change_accelerator(models, accelerator): raise ValueError(f'Unsupported accelerator {accelerator} for model type {model["type"]}') elif model['type'] in [HuggingFacewithChatTemplate, f'{HuggingFacewithChatTemplate.__module__}.{HuggingFacewithChatTemplate.__name__}']: if accelerator == 'vllm': + model_kwargs = dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)) + model_kwargs.update(model.get('model_kwargs')) mod = VLLMwithChatTemplate acc_model = dict( type=f'{mod.__module__}.{mod.__name__}', abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm', path=model['path'], - model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)), + model_kwargs=model_kwargs, max_seq_len=model.get('max_seq_len', None), max_out_len=model['max_out_len'], batch_size=16,