mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Support get_ppl for TurbomindModel (#878)
* update ppl for turbomindmodel * update api_server * rename config and set thread_safe for pytorch engine if possible
This commit is contained in:
parent
caf1cf8a17
commit
c54a5d3b0f
@ -6,9 +6,9 @@ with read_base():
|
||||
from .datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
|
||||
from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
|
||||
from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
|
||||
from .datasets.SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import WSC_datasets
|
||||
from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||
from .datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
|
||||
from .datasets.race.race_gen_69ee4f import race_datasets
|
||||
from .datasets.crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
|
||||
# and output the results in a choosen format
|
||||
@ -24,16 +24,29 @@ meta_template = dict(
|
||||
],
|
||||
eos_token_id=103028)
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
path="internlm-chat-20b",
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
meta_template=meta_template,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
||||
internlm_chat_20b = dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
meta_template=meta_template,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
end_str='<eoa>',
|
||||
)
|
||||
|
||||
internlm_chat_7b = dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-7b-turbomind',
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
meta_template=meta_template,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
end_str='<eoa>',
|
||||
)
|
||||
|
||||
models = [internlm_chat_20b]
|
||||
|
@ -14,15 +14,25 @@ with read_base():
|
||||
|
||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
path="internlm-chat-20b",
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
]
|
||||
internlm_chat_20b = dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
|
||||
internlm_chat_7b = dict(
|
||||
type=TurboMindAPIModel,
|
||||
abbr='internlm-chat-7b-turbomind',
|
||||
api_addr='http://0.0.0.0:23333',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
)
|
||||
|
||||
models = [internlm_chat_20b]
|
||||
|
@ -54,6 +54,10 @@ class LmdeployPytorchModel(BaseModel):
|
||||
if engine_config is not None:
|
||||
from lmdeploy.messages import PytorchEngineConfig
|
||||
engine_config = PytorchEngineConfig(**engine_config)
|
||||
# set thread_safe
|
||||
if hasattr(engine_config, 'thread_safe'):
|
||||
engine_config.thread_safe = True
|
||||
|
||||
if gen_config is not None:
|
||||
from lmdeploy.messages import EngineGenerationConfig
|
||||
gen_config = EngineGenerationConfig(**gen_config)
|
||||
|
@ -1,6 +1,8 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from opencompass.models.base import BaseModel
|
||||
from opencompass.utils.logging import get_logger
|
||||
from opencompass.utils.prompt import PromptList
|
||||
@ -161,3 +163,29 @@ class TurboMindModel(BaseModel):
|
||||
if end_str:
|
||||
response = response.split(end_str)[0]
|
||||
return response
|
||||
|
||||
def get_ppl(self,
|
||||
inputs: List[str],
|
||||
mask_length: Optional[List[int]] = None) -> List[float]:
|
||||
"""Get perplexity scores given a list of inputs.
|
||||
|
||||
Args:
|
||||
inputs (List[str]): A list of strings.
|
||||
mask_length (Optional[List[int]]): A list of mask lengths. If
|
||||
provided, the perplexity scores will be calculated with the
|
||||
first mask_length[i] tokens masked out. It's okay to skip
|
||||
its implementation if advanced features in PPLInfernecer is
|
||||
not needed.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The perplexity scores in shape of (N,)
|
||||
"""
|
||||
assert isinstance(
|
||||
inputs, List), f'List(str) is expected, but got {type(inputs)}'
|
||||
results = []
|
||||
for text in inputs:
|
||||
input_ids = self.tokenizer.encode(text)
|
||||
res = self.generators[0].get_ppl(input_ids)
|
||||
results.append(res)
|
||||
results = np.concatenate(results)
|
||||
return results
|
||||
|
@ -20,30 +20,31 @@ def valid_str(string, coding='utf-8'):
|
||||
|
||||
|
||||
class TurboMindAPIModel(BaseModel):
|
||||
"""Model wrapper for TurboMind Triton Inference Server gRPC API.
|
||||
"""Model wrapper for lmdeploy api server.
|
||||
|
||||
Args:
|
||||
path (str): The name of OpenAI's model.
|
||||
tis_addr (str): The address (ip:port format) of turbomind's
|
||||
triton inference server
|
||||
api_addr (str): The address (ip:port format) of lmdeploy's
|
||||
api server.
|
||||
max_seq_len (int): The maximum allowed sequence length of a model.
|
||||
Note that the length of prompt + generated tokens shall not exceed
|
||||
this value. Defaults to 2048.
|
||||
meta_template (Dict, optional): The model's meta prompt
|
||||
template if needed, in case the requirement of injecting or
|
||||
wrapping of any meta instructions.
|
||||
end_str (str, optional): Whether to trim generated strings with end_str
|
||||
if the model has special ending strings that are not handled well.
|
||||
Defaults to None.
|
||||
"""
|
||||
|
||||
is_api: bool = True
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
api_addr: str = 'http://0.0.0.0:23333',
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
):
|
||||
super().__init__(path=path,
|
||||
def __init__(self,
|
||||
api_addr: str = 'http://0.0.0.0:23333',
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
end_str: Optional[str] = None,
|
||||
**kwargs):
|
||||
super().__init__(path='',
|
||||
max_seq_len=max_seq_len,
|
||||
meta_template=meta_template)
|
||||
from lmdeploy.serve.openai.api_client import APIClient
|
||||
@ -55,6 +56,7 @@ class TurboMindAPIModel(BaseModel):
|
||||
if meta_template and 'eos_token_id' in meta_template:
|
||||
self.eos_token_id = meta_template['eos_token_id']
|
||||
self.api_addr = api_addr
|
||||
self.end_str = end_str
|
||||
|
||||
def generate(
|
||||
self,
|
||||
@ -73,7 +75,10 @@ class TurboMindAPIModel(BaseModel):
|
||||
between 0 and 2. Higher values like 0.8 will make the output
|
||||
more random, while lower values like 0.2 will make it more
|
||||
focused and deterministic. Defaults to 0.7.
|
||||
|
||||
end_str (str, optional): Whether to trim generated strings
|
||||
with end_str if the model has special ending strings
|
||||
that are not handled well.
|
||||
Defaults to None.
|
||||
Returns:
|
||||
List[str]: A list of generated strings.
|
||||
"""
|
||||
@ -82,7 +87,8 @@ class TurboMindAPIModel(BaseModel):
|
||||
results = list(
|
||||
executor.map(self._generate, inputs,
|
||||
[max_out_len] * len(inputs),
|
||||
[temperature] * len(inputs)))
|
||||
[temperature] * len(inputs),
|
||||
[self.end_str] * len(inputs)))
|
||||
return results
|
||||
|
||||
def get_token_len(self, prompt: str) -> int:
|
||||
@ -97,7 +103,7 @@ class TurboMindAPIModel(BaseModel):
|
||||
return self.token_bucket.get_token()
|
||||
|
||||
def _generate(self, prompt: str or PromptList, max_out_len: int,
|
||||
temperature: float) -> str:
|
||||
temperature: float, end_str: str) -> str:
|
||||
"""Generate results given a list of inputs.
|
||||
|
||||
Args:
|
||||
@ -127,4 +133,6 @@ class TurboMindAPIModel(BaseModel):
|
||||
top_k=1):
|
||||
response += output['choices'][0]['text']
|
||||
response = valid_str(response)
|
||||
if end_str:
|
||||
response = response.split(end_str)[0]
|
||||
return response
|
||||
|
Loading…
Reference in New Issue
Block a user