mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
parent
814b3f73bd
commit
0836aec67b
@ -48,7 +48,7 @@ baichuan2_meta_template = dict(round=[
|
||||
internlm_chat_7b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-7b-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-7b',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -60,7 +60,7 @@ internlm_chat_7b = dict(
|
||||
internlm_chat_7b_w4 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-7b-w4-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-7b-w4',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -73,7 +73,7 @@ internlm_chat_7b_w4 = dict(
|
||||
internlm_chat_7b_w4kv8 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-7b-w4kv8-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-7b-w4kv8',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -86,7 +86,7 @@ internlm_chat_7b_w4kv8 = dict(
|
||||
internlm_chat_20b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-20b-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-20b',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
@ -99,7 +99,7 @@ internlm_chat_20b = dict(
|
||||
internlm_chat_20b_w4 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-20b-w4-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-20b-w4',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -112,7 +112,7 @@ internlm_chat_20b_w4 = dict(
|
||||
internlm_chat_20b_w4kv8 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-chat-20b-w4kv8-turbomind',
|
||||
path='./turbomind',
|
||||
path='internlm/internlm-chat-20b-w4kv8',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -125,7 +125,7 @@ internlm_chat_20b_w4kv8 = dict(
|
||||
llama2_chat_7b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='llama2-chat-7b-turbomind',
|
||||
path='./turbomind',
|
||||
path='meta-llama/Llama-2-7b-chat-hf',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -138,7 +138,7 @@ llama2_chat_7b = dict(
|
||||
llama2_chat_13b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='llama2-chat-13b-turbomind',
|
||||
path='./turbomind',
|
||||
path='meta-llama/Llama-2-13b-chat-hf',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -151,7 +151,7 @@ llama2_chat_13b = dict(
|
||||
llama2_chat_70b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='llama2-chat-70b-turbomind',
|
||||
path='./turbomind',
|
||||
path='meta-llama/Llama-2-70b-chat-hf',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
@ -164,7 +164,7 @@ llama2_chat_70b = dict(
|
||||
qwen_chat_7b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='qwen-chat-7b-turbomind',
|
||||
path='./turbomind',
|
||||
path='Qwen/Qwen-7B-Chat',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -177,7 +177,7 @@ qwen_chat_7b = dict(
|
||||
qwen_chat_14b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='qwen-chat-14b-turbomind',
|
||||
path='./turbomind',
|
||||
path='Qwen/Qwen-14B-Chat',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -190,7 +190,7 @@ qwen_chat_14b = dict(
|
||||
baichuan2_chat_7b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='baichuan2-chat-7b-turbomind',
|
||||
path='./turbomind',
|
||||
path='baichuan-inc/Baichuan2-7B-Chat',
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
|
@ -19,7 +19,7 @@ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
internlm_7b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-7b-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-7b",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -31,7 +31,7 @@ internlm_7b = dict(
|
||||
internlm_7b_w4 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-7b-w4-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-7b-w4",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -43,7 +43,7 @@ internlm_7b_w4 = dict(
|
||||
internlm_7b_w4kv8 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-7b-w4kv8-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-7b-w4kv8",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=32,
|
||||
@ -55,7 +55,7 @@ internlm_7b_w4kv8 = dict(
|
||||
internlm_20b = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-20b-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-20b",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=8,
|
||||
@ -67,7 +67,7 @@ internlm_20b = dict(
|
||||
internlm_20b_w4 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-20b-w4-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-20b-w4",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
@ -80,7 +80,7 @@ internlm_20b_w4 = dict(
|
||||
internlm_20b_w4kv8 = dict(
|
||||
type=TurboMindModel,
|
||||
abbr='internlm-20b-w4kv8-turbomind',
|
||||
path="./turbomind",
|
||||
path="internlm/internlm-20b-w4kv8",
|
||||
max_out_len=100,
|
||||
max_seq_len=2048,
|
||||
batch_size=16,
|
||||
|
@ -20,27 +20,14 @@ pip install lmdeploy
|
||||
|
||||
OpenCompass integrates both turbomind's python API and gRPC API for evaluation. And the former is highly recommended.
|
||||
|
||||
We take the InternLM-20B as example. Please download it from huggingface and convert it to turbomind's model format:
|
||||
We take the InternLM-20B as example. Please download it from huggingface:
|
||||
|
||||
```shell
|
||||
# 1. Download InternLM model(or use the cached model's checkpoint)
|
||||
# Download InternLM model(or use the cached model's checkpoint)
|
||||
|
||||
# Make sure you have git-lfs installed (https://git-lfs.com)
|
||||
git lfs install
|
||||
git clone https://huggingface.co/internlm/internlm-20b /path/to/internlm-20b
|
||||
|
||||
# 2. Convert InternLM model to turbomind's format, and save it in the home folder of opencompass
|
||||
lmdeploy convert internlm /path/to/internlm-20b \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
**Note**:
|
||||
|
||||
If evaluating the InternLM Chat model, make sure to pass `internlm-chat` as the model name instead of `internlm` when converting the model format. The specific command is:
|
||||
|
||||
```shell
|
||||
lmdeploy convert internlm-chat /path/to/internlm-20b-chat \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
### Evaluation with Turbomind Python API (recommended)
|
||||
@ -61,6 +48,22 @@ You are expected to get the evaluation results after the inference and evaluatio
|
||||
|
||||
### Evaluation with Turbomind gPRC API (optional)
|
||||
|
||||
Convert model to TurboMind format using lmdeploy
|
||||
|
||||
```shell
|
||||
lmdeploy convert internlm /path/to/internlm-20b \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
**Note**:
|
||||
|
||||
If evaluating the InternLM Chat model, make sure to pass `internlm-chat` as the model name instead of `internlm` when converting the model format. The specific command is:
|
||||
|
||||
```shell
|
||||
lmdeploy convert internlm-chat /path/to/internlm-20b-chat \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
In the home folder of OpenCompass, launch the Triton Inference Server:
|
||||
|
||||
```shell
|
||||
|
@ -20,25 +20,14 @@ pip install lmdeploy
|
||||
|
||||
OpenCompass 支持分别通过 turbomind python API 和 gRPC API 评测数据集。我们强烈推荐使用前者进行评测。
|
||||
|
||||
下文以 InternLM-20B 模型为例,介绍如何评测。首先,从 huggingface 上下载 InternLM 模型,并转换为 turbomind 模型格式:
|
||||
下文以 InternLM-20B 模型为例,介绍如何评测。首先,从 huggingface 上下载 InternLM 模型:
|
||||
|
||||
```shell
|
||||
# 1. Download InternLM model(or use the cached model's checkpoint)
|
||||
Download InternLM model(or use the cached model's checkpoint)
|
||||
|
||||
# Make sure you have git-lfs installed (https://git-lfs.com)
|
||||
git lfs install
|
||||
git clone https://huggingface.co/internlm/internlm-20b /path/to/internlm-20b
|
||||
|
||||
# 2. Convert InternLM model to turbomind's format, and save it in the home folder of opencompass
|
||||
lmdeploy convert internlm /path/to/internlm-20b \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
注意:如果评测 InternLM Chat 模型,那么在转换模型格式的时候,模型名字要填写 `internlm-chat`。具体命令是:
|
||||
|
||||
```shell
|
||||
lmdeploy convert internlm-chat /path/to/internlm-20b-chat \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
### 通过 TurboMind Python API 评测(推荐)
|
||||
@ -57,6 +46,20 @@ python run.py configs/eval_internlm_turbomind.py -w outputs/turbomind/internlm-2
|
||||
|
||||
### 通过 TurboMind gPRC API 评测(可选)
|
||||
|
||||
首先需要将模型转换为 turbomind 格式
|
||||
|
||||
```shell script
|
||||
lmdeploy convert internlm /path/to/internlm-20b \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
注意:如果评测 InternLM Chat 模型,那么在转换模型格式的时候,模型名字要填写 `internlm-chat`。具体命令是:
|
||||
|
||||
```shell
|
||||
lmdeploy convert internlm-chat /path/to/internlm-20b-chat \
|
||||
--dst-path {/home/folder/of/opencompass}/turbomind
|
||||
```
|
||||
|
||||
在 OpenCompass 的项目目录下,启动 triton inference server:
|
||||
|
||||
```shell
|
||||
|
@ -1,4 +1,3 @@
|
||||
import os.path as osp
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
@ -40,16 +39,14 @@ class TurboMindModel(BaseModel):
|
||||
max_seq_len: int = 2048,
|
||||
meta_template: Optional[Dict] = None,
|
||||
):
|
||||
from lmdeploy import turbomind as tm
|
||||
from lmdeploy.tokenizer import Tokenizer
|
||||
from lmdeploy.turbomind import TurboMind
|
||||
|
||||
super().__init__(path=path,
|
||||
max_seq_len=max_seq_len,
|
||||
meta_template=meta_template)
|
||||
self.logger = get_logger()
|
||||
tokenizer_model_path = osp.join(path, 'triton_models', 'tokenizer')
|
||||
self.tokenizer = Tokenizer(tokenizer_model_path)
|
||||
tm_model = tm.TurboMind(path)
|
||||
tm_model = TurboMind.from_pretrained(path)
|
||||
self.tokenizer = tm_model.tokenizer
|
||||
self.generators = [
|
||||
tm_model.create_instance() for i in range(concurrency)
|
||||
]
|
||||
@ -134,9 +131,10 @@ class TurboMindModel(BaseModel):
|
||||
sequence_start=True,
|
||||
sequence_end=True,
|
||||
top_k=1,
|
||||
top_p=0.8,
|
||||
step=0,
|
||||
stream_output=False):
|
||||
output_ids, _ = outputs[0]
|
||||
response = self.tokenizer.decode(output_ids.tolist())
|
||||
_, output_ids, _ = outputs
|
||||
response = self.tokenizer.decode(output_ids)
|
||||
response = valid_str(response)
|
||||
return response
|
||||
|
Loading…
Reference in New Issue
Block a user