mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Add LightllmApi KeyError log & Update doc (#816)
* Add LightllmApi KeyError log * Update LightllmApi doc
This commit is contained in:
parent
8b5c467cc5
commit
f09a2ff418
@ -27,6 +27,7 @@ python -m lightllm.server.api_server --model_dir /path/llama2-7B \
|
|||||||
```
|
```
|
||||||
|
|
||||||
\*\*Note: \*\* tp can be configured to enable TensorParallel inference on several gpus, suitable for the inference of very large models.
|
\*\*Note: \*\* tp can be configured to enable TensorParallel inference on several gpus, suitable for the inference of very large models.
|
||||||
|
\*\*Note: \*\* The max_total_token_num in the above command will affect the throughput performance during testing. It can be configured according to the documentation on the [Lightllm homepage](https://github.com/ModelTC/lightllm). As long as it does not run out of memory, it is often better to set it as high as possible.
|
||||||
|
|
||||||
You can use the following Python script to quickly test whether the current service has been successfully started.
|
You can use the following Python script to quickly test whether the current service has been successfully started.
|
||||||
|
|
||||||
|
@ -27,6 +27,7 @@ python -m lightllm.server.api_server --model_dir /path/llama2-7B \
|
|||||||
```
|
```
|
||||||
|
|
||||||
**注:** 上述命令可以通过 tp 的数量设置,在 tp 张卡上进行 TensorParallel 推理,适用于较大的模型的推理。
|
**注:** 上述命令可以通过 tp 的数量设置,在 tp 张卡上进行 TensorParallel 推理,适用于较大的模型的推理。
|
||||||
|
**注:** 上述命令中的 max_total_token_num,会影响测试过程中的吞吐性能,可以根据 [Lightllm 主页](https://github.com/ModelTC/lightllm) 上的文档,进行设置。只要不爆显存,往往设置越大越好。
|
||||||
|
|
||||||
可以使用下面的 Python 脚本简单测试一下当前服务是否已经起成功
|
可以使用下面的 Python 脚本简单测试一下当前服务是否已经起成功
|
||||||
|
|
||||||
|
@ -78,6 +78,8 @@ class LightllmAPI(BaseAPIModel):
|
|||||||
except requests.JSONDecodeError:
|
except requests.JSONDecodeError:
|
||||||
self.logger.error('JsonDecode error, got',
|
self.logger.error('JsonDecode error, got',
|
||||||
str(raw_response.content))
|
str(raw_response.content))
|
||||||
|
except KeyError:
|
||||||
|
self.logger.error(f'KeyError. Response: {str(response)}')
|
||||||
max_num_retries += 1
|
max_num_retries += 1
|
||||||
|
|
||||||
raise RuntimeError('Calling LightllmAPI failed after retrying for '
|
raise RuntimeError('Calling LightllmAPI failed after retrying for '
|
||||||
@ -123,11 +125,11 @@ class LightllmAPI(BaseAPIModel):
|
|||||||
response = raw_response.json()
|
response = raw_response.json()
|
||||||
|
|
||||||
assert ('prompt_token_ids' in response and 'prompt_logprobs'
|
assert ('prompt_token_ids' in response and 'prompt_logprobs'
|
||||||
in response), 'prompt_token_ids and prompt_logprobs \
|
in response), f'prompt_token_ids and prompt_logprobs \
|
||||||
must be in the output. \
|
must be in the output. \
|
||||||
Please consider adding \
|
Please consider adding \
|
||||||
--return_all_prompt_logprobs argument \
|
--return_all_prompt_logprobs argument \
|
||||||
when starting your lightllm service.'
|
when starting lightllm service. Response: {str(response)}'
|
||||||
|
|
||||||
prompt_token_ids = response['prompt_token_ids'][1:]
|
prompt_token_ids = response['prompt_token_ids'][1:]
|
||||||
prompt_logprobs = [
|
prompt_logprobs = [
|
||||||
|
Loading…
Reference in New Issue
Block a user