diff --git a/opencompass/models/vllm.py b/opencompass/models/vllm.py index 3bdffd06..c4d836f1 100644 --- a/opencompass/models/vllm.py +++ b/opencompass/models/vllm.py @@ -49,6 +49,12 @@ class VLLM(BaseModel): model_kwargs = DEFAULT_MODEL_KWARGS.copy() if add_model_kwargs is not None: model_kwargs.update(add_model_kwargs) + import ray + + if ray.is_initialized(): + self.logger.info('shutdown ray instance to avoid ' + '"Calling ray.init() again" error.') + ray.shutdown() self.model = LLM(path, **model_kwargs) def generate(self, inputs: List[str], max_out_len: int, diff --git a/opencompass/runners/local.py b/opencompass/runners/local.py index a3194d5a..c7d3632d 100644 --- a/opencompass/runners/local.py +++ b/opencompass/runners/local.py @@ -46,12 +46,14 @@ class LocalRunner(BaseRunner): lark_bot_url (str): Lark bot url. """ - def __init__(self, - task: ConfigDict, - max_num_workers: int = 16, - debug: bool = False, - max_workers_per_gpu: int = 1, - lark_bot_url: str = None): + def __init__( + self, + task: ConfigDict, + max_num_workers: int = 16, + debug: bool = False, + max_workers_per_gpu: int = 1, + lark_bot_url: str = None, + ): super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url) self.max_num_workers = max_num_workers self.max_workers_per_gpu = max_workers_per_gpu @@ -69,6 +71,7 @@ class LocalRunner(BaseRunner): status = [] import torch + if 'CUDA_VISIBLE_DEVICES' in os.environ: all_gpu_ids = [ int(i) for i in re.findall(r'(?