From 5c8e91f329720971141a6a7ebbb7974f5cccb191 Mon Sep 17 00:00:00 2001 From: liushz Date: Mon, 16 Dec 2024 21:44:36 +0800 Subject: [PATCH] [Fix] Fix vllm max_seq_len parameter transfer (#1745) * [Fix] Fix vllm max_seq_len parameter transfer * [Fix] Fix vllm max_seq_len parameter transfer * Update pr-run-test.yml * Update pr-run-test.yml --------- Co-authored-by: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com> --- .github/workflows/pr-run-test.yml | 6 +++--- opencompass/utils/run.py | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-run-test.yml b/.github/workflows/pr-run-test.yml index 77cecb4b..561ef750 100644 --- a/.github/workflows/pr-run-test.yml +++ b/.github/workflows/pr-run-test.yml @@ -73,10 +73,10 @@ jobs: exit 1 fi score=$(sed -n '$p' regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}') - if (( ${score%.*} >= 84 && ${score%.*} <= 87 )); then - echo "score is $score between 84 and 87" + if (( ${score%.*} >= 87 && ${score%.*} <= 89 )); then + echo "score is $score between 87 and 89" else - echo "score is $score not between 84 and 87" + echo "score is $score not between 87 and 89" exit 1 fi rm -rf regression_result1 & rm -rf regression_result2 & rm -rf regression_result3 diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py index 025efc4b..accd3468 100644 --- a/opencompass/utils/run.py +++ b/opencompass/utils/run.py @@ -282,9 +282,9 @@ def change_accelerator(models, accelerator): type=f'{VLLM.__module__}.{VLLM.__name__}', abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm', path=model['path'], - model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus']), + model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)), max_out_len=model['max_out_len'], - max_seq_len=model['max_seq_len'], + max_seq_len=model.get('max_seq_len', None), batch_size=model['batch_size'], generation_kwargs=generation_kwargs, run_cfg=model['run_cfg'], @@ -301,7 +301,8 @@ def change_accelerator(models, accelerator): type=f'{mod.__module__}.{mod.__name__}', abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm', path=model['path'], - model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus']), + model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)), + max_seq_len=model.get('max_seq_len', None), max_out_len=model['max_out_len'], batch_size=16, run_cfg=model['run_cfg'], @@ -313,9 +314,14 @@ def change_accelerator(models, accelerator): type=f'{mod.__module__}.{mod.__name__}', abbr=model['abbr'].replace('hf', 'lmdeploy') if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy', path=model['path'], - engine_config=dict(max_batch_size=model.get('batch_size', 16), tp=model['run_cfg']['num_gpus']), + engine_config=dict( + max_batch_size=model.get('batch_size', 16), + tp=model['run_cfg']['num_gpus'], + session_len=model.get('max_seq_len', None), + max_new_tokens=model['max_out_len'] + ), gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9), - max_seq_len=model.get('max_seq_len', 2048), + max_seq_len=model.get('max_seq_len', None), max_out_len=model['max_out_len'], batch_size=16, run_cfg=model['run_cfg'],