mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix vllm max_seq_len parameter transfer (#1745)
* [Fix] Fix vllm max_seq_len parameter transfer * [Fix] Fix vllm max_seq_len parameter transfer * Update pr-run-test.yml * Update pr-run-test.yml --------- Co-authored-by: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com>
This commit is contained in:
parent
1bd594fc62
commit
5c8e91f329
6
.github/workflows/pr-run-test.yml
vendored
6
.github/workflows/pr-run-test.yml
vendored
@ -73,10 +73,10 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
score=$(sed -n '$p' regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}')
|
||||
if (( ${score%.*} >= 84 && ${score%.*} <= 87 )); then
|
||||
echo "score is $score between 84 and 87"
|
||||
if (( ${score%.*} >= 87 && ${score%.*} <= 89 )); then
|
||||
echo "score is $score between 87 and 89"
|
||||
else
|
||||
echo "score is $score not between 84 and 87"
|
||||
echo "score is $score not between 87 and 89"
|
||||
exit 1
|
||||
fi
|
||||
rm -rf regression_result1 & rm -rf regression_result2 & rm -rf regression_result3
|
||||
|
@ -282,9 +282,9 @@ def change_accelerator(models, accelerator):
|
||||
type=f'{VLLM.__module__}.{VLLM.__name__}',
|
||||
abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm',
|
||||
path=model['path'],
|
||||
model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus']),
|
||||
model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)),
|
||||
max_out_len=model['max_out_len'],
|
||||
max_seq_len=model['max_seq_len'],
|
||||
max_seq_len=model.get('max_seq_len', None),
|
||||
batch_size=model['batch_size'],
|
||||
generation_kwargs=generation_kwargs,
|
||||
run_cfg=model['run_cfg'],
|
||||
@ -301,7 +301,8 @@ def change_accelerator(models, accelerator):
|
||||
type=f'{mod.__module__}.{mod.__name__}',
|
||||
abbr=model['abbr'].replace('hf', 'vllm') if '-hf' in model['abbr'] else model['abbr'] + '-vllm',
|
||||
path=model['path'],
|
||||
model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus']),
|
||||
model_kwargs=dict(tensor_parallel_size=model['run_cfg']['num_gpus'], max_model_len=model.get('max_seq_len', None)),
|
||||
max_seq_len=model.get('max_seq_len', None),
|
||||
max_out_len=model['max_out_len'],
|
||||
batch_size=16,
|
||||
run_cfg=model['run_cfg'],
|
||||
@ -313,9 +314,14 @@ def change_accelerator(models, accelerator):
|
||||
type=f'{mod.__module__}.{mod.__name__}',
|
||||
abbr=model['abbr'].replace('hf', 'lmdeploy') if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy',
|
||||
path=model['path'],
|
||||
engine_config=dict(max_batch_size=model.get('batch_size', 16), tp=model['run_cfg']['num_gpus']),
|
||||
engine_config=dict(
|
||||
max_batch_size=model.get('batch_size', 16),
|
||||
tp=model['run_cfg']['num_gpus'],
|
||||
session_len=model.get('max_seq_len', None),
|
||||
max_new_tokens=model['max_out_len']
|
||||
),
|
||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
|
||||
max_seq_len=model.get('max_seq_len', 2048),
|
||||
max_seq_len=model.get('max_seq_len', None),
|
||||
max_out_len=model['max_out_len'],
|
||||
batch_size=16,
|
||||
run_cfg=model['run_cfg'],
|
||||
|
Loading…
Reference in New Issue
Block a user