mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fixed repeated loading of VLLM (#1051)
* [fix]Fixed the issue caused by the repeated loading of VLLM model during task segmentation. * [fix] avoid TypeError: VLLM.__init__() got an unexpected keyword argument 'tokenizer_only' * restore .pre-commit-config.yaml * restore opencompass/tasks/openicl_infer.py --------- Co-authored-by: IcyFeather <mengzhuo.happy@gmail.com> Co-authored-by: Leymore <zfz-960727@163.com>
This commit is contained in:
parent
629836146a
commit
c172401323
@ -49,6 +49,12 @@ class VLLM(BaseModel):
|
|||||||
model_kwargs = DEFAULT_MODEL_KWARGS.copy()
|
model_kwargs = DEFAULT_MODEL_KWARGS.copy()
|
||||||
if add_model_kwargs is not None:
|
if add_model_kwargs is not None:
|
||||||
model_kwargs.update(add_model_kwargs)
|
model_kwargs.update(add_model_kwargs)
|
||||||
|
import ray
|
||||||
|
|
||||||
|
if ray.is_initialized():
|
||||||
|
self.logger.info('shutdown ray instance to avoid '
|
||||||
|
'"Calling ray.init() again" error.')
|
||||||
|
ray.shutdown()
|
||||||
self.model = LLM(path, **model_kwargs)
|
self.model = LLM(path, **model_kwargs)
|
||||||
|
|
||||||
def generate(self, inputs: List[str], max_out_len: int,
|
def generate(self, inputs: List[str], max_out_len: int,
|
||||||
|
@ -46,12 +46,14 @@ class LocalRunner(BaseRunner):
|
|||||||
lark_bot_url (str): Lark bot url.
|
lark_bot_url (str): Lark bot url.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(
|
||||||
task: ConfigDict,
|
self,
|
||||||
max_num_workers: int = 16,
|
task: ConfigDict,
|
||||||
debug: bool = False,
|
max_num_workers: int = 16,
|
||||||
max_workers_per_gpu: int = 1,
|
debug: bool = False,
|
||||||
lark_bot_url: str = None):
|
max_workers_per_gpu: int = 1,
|
||||||
|
lark_bot_url: str = None,
|
||||||
|
):
|
||||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||||
self.max_num_workers = max_num_workers
|
self.max_num_workers = max_num_workers
|
||||||
self.max_workers_per_gpu = max_workers_per_gpu
|
self.max_workers_per_gpu = max_workers_per_gpu
|
||||||
@ -69,6 +71,7 @@ class LocalRunner(BaseRunner):
|
|||||||
|
|
||||||
status = []
|
status = []
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
if 'CUDA_VISIBLE_DEVICES' in os.environ:
|
if 'CUDA_VISIBLE_DEVICES' in os.environ:
|
||||||
all_gpu_ids = [
|
all_gpu_ids = [
|
||||||
int(i) for i in re.findall(r'(?<!-)\d+',
|
int(i) for i in re.findall(r'(?<!-)\d+',
|
||||||
@ -100,7 +103,18 @@ class LocalRunner(BaseRunner):
|
|||||||
cmd = task.get_command(cfg_path=param_file, template=tmpl)
|
cmd = task.get_command(cfg_path=param_file, template=tmpl)
|
||||||
# run in subprocess if starts with torchrun etc.
|
# run in subprocess if starts with torchrun etc.
|
||||||
if 'python3 ' in cmd or 'python ' in cmd:
|
if 'python3 ' in cmd or 'python ' in cmd:
|
||||||
task.run()
|
# If it is an infer type task do not reload if
|
||||||
|
# the current model has already been loaded.
|
||||||
|
if 'infer' in self.task_cfg.type.lower():
|
||||||
|
# If a model instance already exists,
|
||||||
|
# do not reload it.
|
||||||
|
if hasattr(self, 'cur_model'):
|
||||||
|
task.run(self.cur_model)
|
||||||
|
else:
|
||||||
|
task.run()
|
||||||
|
self.cur_model = task.model
|
||||||
|
else:
|
||||||
|
task.run()
|
||||||
else:
|
else:
|
||||||
subprocess.run(cmd, shell=True, text=True)
|
subprocess.run(cmd, shell=True, text=True)
|
||||||
finally:
|
finally:
|
||||||
|
@ -59,13 +59,17 @@ class OpenICLInferTask(BaseTask):
|
|||||||
|
|
||||||
return template.format(task_cmd=command)
|
return template.format(task_cmd=command)
|
||||||
|
|
||||||
def run(self):
|
def run(self, cur_model=None):
|
||||||
self.logger.info(f'Task {task_abbr_from_cfg(self.cfg)}')
|
self.logger.info(f'Task {task_abbr_from_cfg(self.cfg)}')
|
||||||
for model_cfg, dataset_cfgs in zip(self.model_cfgs, self.dataset_cfgs):
|
for model_cfg, dataset_cfgs in zip(self.model_cfgs, self.dataset_cfgs):
|
||||||
self.max_out_len = model_cfg.get('max_out_len', None)
|
self.max_out_len = model_cfg.get('max_out_len', None)
|
||||||
self.batch_size = model_cfg.get('batch_size', None)
|
self.batch_size = model_cfg.get('batch_size', None)
|
||||||
self.min_out_len = model_cfg.get('min_out_len', None)
|
self.min_out_len = model_cfg.get('min_out_len', None)
|
||||||
self.model = build_model_from_cfg(model_cfg)
|
if cur_model:
|
||||||
|
self.model = cur_model
|
||||||
|
else:
|
||||||
|
self.model = build_model_from_cfg(model_cfg)
|
||||||
|
cur_model = self.model
|
||||||
|
|
||||||
for dataset_cfg in dataset_cfgs:
|
for dataset_cfg in dataset_cfgs:
|
||||||
self.model_cfg = model_cfg
|
self.model_cfg = model_cfg
|
||||||
|
@ -22,4 +22,5 @@ def build_model_from_cfg(model_cfg: ConfigDict):
|
|||||||
model_cfg.pop('summarizer_abbr', None)
|
model_cfg.pop('summarizer_abbr', None)
|
||||||
model_cfg.pop('pred_postprocessor', None)
|
model_cfg.pop('pred_postprocessor', None)
|
||||||
model_cfg.pop('min_out_len', None)
|
model_cfg.pop('min_out_len', None)
|
||||||
|
model_cfg.pop('tokenizer_only', None)
|
||||||
return MODELS.build(model_cfg)
|
return MODELS.build(model_cfg)
|
||||||
|
Loading…
Reference in New Issue
Block a user