From 1013dce60c198ff5217a5e4d8c384190f54bdc42 Mon Sep 17 00:00:00 2001 From: Lyu Han Date: Sun, 28 Apr 2024 19:57:40 +0800 Subject: [PATCH] adapt to lmdeploy v0.4.0 (#1073) * adapt to lmdeploy v0.4.0 * compatible --- opencompass/models/lmdeploy_pytorch.py | 15 ++++++++++++--- opencompass/models/turbomind.py | 7 ++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/opencompass/models/lmdeploy_pytorch.py b/opencompass/models/lmdeploy_pytorch.py index 814c3cc6..f9d67da4 100644 --- a/opencompass/models/lmdeploy_pytorch.py +++ b/opencompass/models/lmdeploy_pytorch.py @@ -50,6 +50,7 @@ class LmdeployPytorchModel(BaseModel): max_seq_len=max_seq_len, meta_template=meta_template) from lmdeploy.pytorch import engine as tm + from lmdeploy.version import version_info if engine_config is not None: from lmdeploy.messages import PytorchEngineConfig @@ -71,6 +72,7 @@ class LmdeployPytorchModel(BaseModel): self.generator_ids = [i + 1 for i in range(concurrency)] self.gen_config = gen_config self.end_str = end_str + self.major_version, self.minor_version, _ = version_info def generate( self, @@ -145,9 +147,16 @@ class LmdeployPytorchModel(BaseModel): assert type( prompt) is str, 'We only support string for TurboMind Python API' input_ids = self.tokenizer.encode(prompt) - _, output_ids, _ = generator.infer(session_id, - input_ids, - gen_config=gen_config) + if self.major_version >= 0 and self.minor_version >= 4: + outputs = generator.infer(session_id, + input_ids, + gen_config=gen_config) + output_ids = outputs.token_ids + else: + _, output_ids, _ = generator.infer(session_id, + input_ids, + gen_config=gen_config) + # stop engine if hasattr(generator, 'end'): generator.end(session_id) diff --git a/opencompass/models/turbomind.py b/opencompass/models/turbomind.py index 50c3e5ca..9479f02f 100644 --- a/opencompass/models/turbomind.py +++ b/opencompass/models/turbomind.py @@ -54,6 +54,7 @@ class TurboMindModel(BaseModel): max_seq_len=max_seq_len, meta_template=meta_template) from lmdeploy.turbomind import TurboMind + from lmdeploy.version import version_info if engine_config is not None: from lmdeploy.messages import TurbomindEngineConfig @@ -70,6 +71,7 @@ class TurboMindModel(BaseModel): self.generator_ids = [i + 1 for i in range(concurrency)] self.gen_config = gen_config self.end_str = end_str + self.major_version, self.minor_version, _ = version_info def generate(self, inputs: List[str], @@ -165,7 +167,10 @@ class TurboMindModel(BaseModel): sequence_end=True, step=0, stream_output=False): - _, output_ids, _ = outputs + if self.major_version >= 0 and self.minor_version >= 4: + output_ids = outputs.token_ids + else: + _, output_ids, _ = outputs response = self.tokenizer.decode(output_ids) response = valid_str(response) # used to trim