From c1724013233bfb980b3cfa2a12f75d3bea38865a Mon Sep 17 00:00:00 2001 From: Robin Chen <56245435+IcyFeather233@users.noreply.github.com> Date: Wed, 17 Apr 2024 20:36:08 +0800 Subject: [PATCH] [Fix] Fixed repeated loading of VLLM (#1051) * [fix]Fixed the issue caused by the repeated loading of VLLM model during task segmentation. * [fix] avoid TypeError: VLLM.__init__() got an unexpected keyword argument 'tokenizer_only' * restore .pre-commit-config.yaml * restore opencompass/tasks/openicl_infer.py --------- Co-authored-by: IcyFeather Co-authored-by: Leymore --- opencompass/models/vllm.py | 6 ++++++ opencompass/runners/local.py | 28 +++++++++++++++++++++------- opencompass/tasks/openicl_infer.py | 8 ++++++-- opencompass/utils/build.py | 1 + 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/opencompass/models/vllm.py b/opencompass/models/vllm.py index 3bdffd06..c4d836f1 100644 --- a/opencompass/models/vllm.py +++ b/opencompass/models/vllm.py @@ -49,6 +49,12 @@ class VLLM(BaseModel): model_kwargs = DEFAULT_MODEL_KWARGS.copy() if add_model_kwargs is not None: model_kwargs.update(add_model_kwargs) + import ray + + if ray.is_initialized(): + self.logger.info('shutdown ray instance to avoid ' + '"Calling ray.init() again" error.') + ray.shutdown() self.model = LLM(path, **model_kwargs) def generate(self, inputs: List[str], max_out_len: int, diff --git a/opencompass/runners/local.py b/opencompass/runners/local.py index a3194d5a..c7d3632d 100644 --- a/opencompass/runners/local.py +++ b/opencompass/runners/local.py @@ -46,12 +46,14 @@ class LocalRunner(BaseRunner): lark_bot_url (str): Lark bot url. """ - def __init__(self, - task: ConfigDict, - max_num_workers: int = 16, - debug: bool = False, - max_workers_per_gpu: int = 1, - lark_bot_url: str = None): + def __init__( + self, + task: ConfigDict, + max_num_workers: int = 16, + debug: bool = False, + max_workers_per_gpu: int = 1, + lark_bot_url: str = None, + ): super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url) self.max_num_workers = max_num_workers self.max_workers_per_gpu = max_workers_per_gpu @@ -69,6 +71,7 @@ class LocalRunner(BaseRunner): status = [] import torch + if 'CUDA_VISIBLE_DEVICES' in os.environ: all_gpu_ids = [ int(i) for i in re.findall(r'(?