From a0e607ade5ecdd4c2d653b524ec1100f686c2fba Mon Sep 17 00:00:00 2001 From: bittersweet1999 <148421775+bittersweet1999@users.noreply.github.com> Date: Tue, 12 Nov 2024 17:33:09 +0800 Subject: [PATCH] Revert "add single lora adapter support for vLLM inference. (#1679)" This reverts commit 3ec178f4a99df494ba13407f4f883eec4979aa23. --- opencompass/models/vllm.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/opencompass/models/vllm.py b/opencompass/models/vllm.py index fbfaf66e..ab042437 100644 --- a/opencompass/models/vllm.py +++ b/opencompass/models/vllm.py @@ -7,7 +7,6 @@ from opencompass.utils import get_logger try: from vllm import LLM, SamplingParams - from vllm.lora.request import LoRARequest except ImportError: LLM, SamplingParams = None, None @@ -26,7 +25,6 @@ class VLLM(BaseModel): meta_template: Optional[Dict] = None, mode: str = 'none', use_fastchat_template: bool = False, - lora_path: str = None, stop_words: List[str] = [], ): super().__init__(path=path, @@ -40,7 +38,7 @@ class VLLM(BaseModel): self.tokenizer = self.model.get_tokenizer() self.generation_kwargs = generation_kwargs self.generation_kwargs.pop('do_sample', None) - self.lora_path = lora_path + assert mode in ['none', 'mid'] self.mode = mode self.use_fastchat_template = use_fastchat_template @@ -98,10 +96,7 @@ class VLLM(BaseModel): _stop = list(set(self.stop_words + stopping_criteria)) generation_kwargs.update({'stop': _stop}) sampling_kwargs = SamplingParams(**generation_kwargs) - if not self.lora_path: - outputs = self.model.generate(inputs, sampling_kwargs) - else: - outputs = self.model.generate(inputs, sampling_kwargs, lora_request=LoRARequest("sql_adapter", 1, self.lora_path)) + outputs = self.model.generate(inputs, sampling_kwargs) prompt_list, output_strs = [], [] for output in outputs: