mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Update configs for evaluating chat models like qwen, baichuan, llama2 using turbomind backend (#721)
* add llama2 test * fix * test qwen chat-7b * test w4 * add baichuan2 * update * update * update configs and docs * update
This commit is contained in:
parent
fbb912ddf3
commit
e34c552282
@ -5,112 +5,198 @@ from opencompass.models.turbomind import TurboMindModel
|
|||||||
with read_base():
|
with read_base():
|
||||||
# choose a list of datasets
|
# choose a list of datasets
|
||||||
from .datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
|
from .datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
|
||||||
from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
|
# from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
|
||||||
from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
|
# from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
|
||||||
from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
# from .datasets.SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import WSC_datasets
|
||||||
|
# from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
|
||||||
from .datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
from .datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
|
||||||
from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
|
# from .datasets.race.race_gen_69ee4f import race_datasets
|
||||||
from .datasets.race.race_gen_69ee4f import race_datasets
|
# from .datasets.crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
|
||||||
from .datasets.crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
|
|
||||||
# and output the results in a choosen format
|
# and output the results in a choosen format
|
||||||
from .summarizers.medium import summarizer
|
from .summarizers.medium import summarizer
|
||||||
|
|
||||||
|
|
||||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||||
|
|
||||||
|
internlm_meta_template = dict(round=[
|
||||||
|
dict(role='HUMAN', begin='<|User|>:', end='\n'),
|
||||||
|
dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
|
||||||
|
],
|
||||||
|
eos_token_id=103028)
|
||||||
|
|
||||||
meta_template = dict(
|
llama2_meta_template = dict(
|
||||||
round=[
|
round=[
|
||||||
dict(role='HUMAN', begin='<|User|>:', end='\n'),
|
dict(role='HUMAN', begin='[INST] ', end=' [/INST]'),
|
||||||
dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
|
dict(role='BOT', generate=True),
|
||||||
],
|
],
|
||||||
eos_token_id=103028)
|
eos_token_id=2)
|
||||||
|
|
||||||
|
qwen_meta_template = dict(round=[
|
||||||
|
dict(role='HUMAN', begin='\n<|im_start|>user\n', end='<|im_end|>'),
|
||||||
|
dict(role='BOT',
|
||||||
|
begin='\n<|im_start|>assistant\n',
|
||||||
|
end='<|im_end|>',
|
||||||
|
generate=True)
|
||||||
|
])
|
||||||
|
|
||||||
|
baichuan2_meta_template = dict(round=[
|
||||||
|
dict(role='HUMAN', begin='<reserved_106>'),
|
||||||
|
dict(role='BOT', begin='<reserved_107>', generate=True)
|
||||||
|
])
|
||||||
|
|
||||||
# config for internlm-chat-7b
|
# config for internlm-chat-7b
|
||||||
# models = [
|
internlm_chat_7b = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-chat-7b-turbomind',
|
||||||
# abbr='internlm-chat-7b-turbomind',
|
path='./turbomind',
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=32,
|
||||||
# batch_size=32,
|
concurrency=32,
|
||||||
# concurrency=32,
|
meta_template=internlm_meta_template,
|
||||||
# meta_template=meta_template,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# config for internlm-chat-7b-w4 model
|
internlm_chat_7b_w4 = dict(
|
||||||
# models = [
|
type=TurboMindModel,
|
||||||
# dict(
|
abbr='internlm-chat-7b-w4-turbomind',
|
||||||
# type=TurboMindModel,
|
path='./turbomind',
|
||||||
# abbr='internlm-chat-7b-w4-turbomind',
|
max_out_len=100,
|
||||||
# path="./turbomind",
|
max_seq_len=2048,
|
||||||
# max_out_len=100,
|
batch_size=32,
|
||||||
# max_seq_len=2048,
|
concurrency=32,
|
||||||
# batch_size=32,
|
meta_template=internlm_meta_template,
|
||||||
# concurrency=32,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# meta_template=meta_template,
|
)
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# config for internlm-chat-7b-w4kv8 model
|
# config for internlm-chat-7b-w4kv8 model
|
||||||
# models = [
|
internlm_chat_7b_w4kv8 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-chat-7b-w4kv8-turbomind',
|
||||||
# abbr='internlm-chat-7b-w4kv8-turbomind',
|
path='./turbomind',
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=32,
|
||||||
# batch_size=32,
|
concurrency=32,
|
||||||
# concurrency=32,
|
meta_template=internlm_meta_template,
|
||||||
# meta_template=meta_template,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# config for internlm-chat-20b
|
# config for internlm-chat-20b
|
||||||
# models = [
|
internlm_chat_20b = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-chat-20b-turbomind',
|
||||||
# abbr='internlm-chat-20b-turbomind',
|
path='./turbomind',
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=8,
|
||||||
# batch_size=8,
|
concurrency=8,
|
||||||
# concurrency=8,
|
meta_template=internlm_meta_template,
|
||||||
# meta_template=meta_template,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# config for internlm-chat-20b-w4 model
|
# config for internlm-chat-20b-w4 model
|
||||||
models = [
|
internlm_chat_20b_w4 = dict(
|
||||||
dict(
|
type=TurboMindModel,
|
||||||
type=TurboMindModel,
|
abbr='internlm-chat-20b-w4-turbomind',
|
||||||
abbr='internlm-chat-20b-w4-turbomind',
|
path='./turbomind',
|
||||||
path="./turbomind",
|
max_out_len=100,
|
||||||
max_out_len=100,
|
max_seq_len=2048,
|
||||||
max_seq_len=2048,
|
batch_size=16,
|
||||||
batch_size=16,
|
concurrency=16,
|
||||||
concurrency=16,
|
meta_template=internlm_meta_template,
|
||||||
meta_template=meta_template,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
# config for internlm-chat-20b-w4kv8 model
|
# config for internlm-chat-20b-w4kv8 model
|
||||||
# models = [
|
internlm_chat_20b_w4kv8 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-chat-20b-w4kv8-turbomind',
|
||||||
# abbr='internlm-chat-20b-w4kv8-turbomind',
|
path='./turbomind',
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=16,
|
||||||
# batch_size=16,
|
concurrency=16,
|
||||||
# concurrency=16,
|
meta_template=internlm_meta_template,
|
||||||
# meta_template=meta_template,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
# config for llama2-chat-7b
|
||||||
|
llama2_chat_7b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='llama2-chat-7b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=16,
|
||||||
|
concurrency=32,
|
||||||
|
meta_template=llama2_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
# config for llama2-chat-13b
|
||||||
|
llama2_chat_13b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='llama2-chat-13b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=16,
|
||||||
|
concurrency=16,
|
||||||
|
meta_template=llama2_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
# config for llama2-chat-70b
|
||||||
|
llama2_chat_70b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='llama2-chat-70b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=8,
|
||||||
|
concurrency=8,
|
||||||
|
meta_template=llama2_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
# config for qwen-chat-7b
|
||||||
|
qwen_chat_7b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='qwen-chat-7b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=16,
|
||||||
|
concurrency=32,
|
||||||
|
meta_template=qwen_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
# config for qwen-chat-7b
|
||||||
|
qwen_chat_14b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='qwen-chat-14b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=16,
|
||||||
|
concurrency=32,
|
||||||
|
meta_template=qwen_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
# config for baichuan2-chat-7b
|
||||||
|
baichuan2_chat_7b = dict(
|
||||||
|
type=TurboMindModel,
|
||||||
|
abbr='baichuan2-chat-7b-turbomind',
|
||||||
|
path='./turbomind',
|
||||||
|
max_out_len=100,
|
||||||
|
max_seq_len=2048,
|
||||||
|
batch_size=16,
|
||||||
|
concurrency=32,
|
||||||
|
meta_template=baichuan2_meta_template,
|
||||||
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
models = [internlm_chat_20b]
|
||||||
|
@ -16,50 +16,43 @@ with read_base():
|
|||||||
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||||
|
|
||||||
# # config for internlm-7b model
|
# # config for internlm-7b model
|
||||||
# models = [
|
internlm_7b = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-7b-turbomind',
|
||||||
# abbr='internlm-7b-turbomind',
|
path="./turbomind",
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=32,
|
||||||
# batch_size=32,
|
concurrency=32,
|
||||||
# concurrency=32,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# # config for internlm-7b-w4 model
|
# # config for internlm-7b-w4 model
|
||||||
# models = [
|
internlm_7b_w4 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-7b-w4-turbomind',
|
||||||
# abbr='internlm-7b-w4-turbomind',
|
path="./turbomind",
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=32,
|
||||||
# batch_size=32,
|
concurrency=32,
|
||||||
# concurrency=32,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# # config for internlm-7b-w4kv8 model
|
# # config for internlm-7b-w4kv8 model
|
||||||
# models = [
|
internlm_7b_w4kv8 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-7b-w4kv8-turbomind',
|
||||||
# abbr='internlm-7b-w4kv8-turbomind',
|
path="./turbomind",
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=32,
|
||||||
# batch_size=32,
|
concurrency=32,
|
||||||
# concurrency=32,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# config for internlm-20b model
|
# config for internlm-20b model
|
||||||
models = [
|
internlm_20b = dict(
|
||||||
dict(
|
|
||||||
type=TurboMindModel,
|
type=TurboMindModel,
|
||||||
abbr='internlm-20b-turbomind',
|
abbr='internlm-20b-turbomind',
|
||||||
path="./turbomind",
|
path="./turbomind",
|
||||||
@ -69,33 +62,30 @@ models = [
|
|||||||
concurrency=8,
|
concurrency=8,
|
||||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
)
|
)
|
||||||
]
|
|
||||||
|
|
||||||
# config for internlm-20b-w4 model
|
# config for internlm-20b-w4 model
|
||||||
# models = [
|
internlm_20b_w4 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-20b-w4-turbomind',
|
||||||
# abbr='internlm-20b-w4-turbomind',
|
path="./turbomind",
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=16,
|
||||||
# batch_size=16,
|
concurrency=16,
|
||||||
# concurrency=16,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
|
||||||
|
|
||||||
|
|
||||||
# config for internlm-20b-w4kv8 model
|
# config for internlm-20b-w4kv8 model
|
||||||
# models = [
|
internlm_20b_w4kv8 = dict(
|
||||||
# dict(
|
type=TurboMindModel,
|
||||||
# type=TurboMindModel,
|
abbr='internlm-20b-w4kv8-turbomind',
|
||||||
# abbr='internlm-20b-w4kv8-turbomind',
|
path="./turbomind",
|
||||||
# path="./turbomind",
|
max_out_len=100,
|
||||||
# max_out_len=100,
|
max_seq_len=2048,
|
||||||
# max_seq_len=2048,
|
batch_size=16,
|
||||||
# batch_size=16,
|
concurrency=16,
|
||||||
# concurrency=16,
|
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||||
# run_cfg=dict(num_gpus=1, num_procs=1),
|
)
|
||||||
# )
|
|
||||||
# ]
|
models = [internlm_20b]
|
||||||
|
@ -55,8 +55,9 @@ You are expected to get the evaluation results after the inference and evaluatio
|
|||||||
|
|
||||||
**Note**:
|
**Note**:
|
||||||
|
|
||||||
- If you evaluate theInternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py`
|
- If you evaluate the InternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py`
|
||||||
- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by commenting out the configuration for the 20B model and enabling the configuration for the 7B model.
|
- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by changing to the setting `models = [internlm_7b]` in the last line.
|
||||||
|
- If you want to evaluate other chat models like Llama2, QWen-7B, Baichuan2-7B, you could change to the setting of `models` in `eval_internlm_chat_turbomind.py`.
|
||||||
|
|
||||||
### Evaluation with Turbomind gPRC API (optional)
|
### Evaluation with Turbomind gPRC API (optional)
|
||||||
|
|
||||||
@ -76,4 +77,4 @@ python run.py configs/eval_internlm_turbomind_tis.py -w outputs/turbomind-tis/in
|
|||||||
|
|
||||||
- If the InternLM Chat model is requested to be evaluated, please use config file `eval_internlm_chat_turbomind_tis.py`
|
- If the InternLM Chat model is requested to be evaluated, please use config file `eval_internlm_chat_turbomind_tis.py`
|
||||||
- In `eval_internlm_turbomind_tis.py`, the configured Triton Inference Server (TIS) address is `tis_addr='0.0.0.0:33337'`. Please modify `tis_addr` to the IP address of the machine where the server is launched.
|
- In `eval_internlm_turbomind_tis.py`, the configured Triton Inference Server (TIS) address is `tis_addr='0.0.0.0:33337'`. Please modify `tis_addr` to the IP address of the machine where the server is launched.
|
||||||
- If evaluating the InternLM 7B model, please modify the config file, commenting out the configuration for the 20B model and enabling the configuration for the 7B model
|
- If evaluating the InternLM 7B model, please modify the `models` configuration in `eval_internlm_xxx_turbomind_tis.py`.
|
||||||
|
@ -52,7 +52,8 @@ python run.py configs/eval_internlm_turbomind.py -w outputs/turbomind/internlm-2
|
|||||||
**注:**
|
**注:**
|
||||||
|
|
||||||
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind.py`
|
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind.py`
|
||||||
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。把 20B 模型的配置注释掉,打开 7B 模型的配置。
|
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。将`models`字段配置为`models = [internlm_7b]` 。
|
||||||
|
- 如果评测其他模型如 Llama2, QWen-7B, Baichuan2-7B, 请修改`eval_internlm_chat_turbomind.py`中`models`字段 。
|
||||||
|
|
||||||
### 通过 TurboMind gPRC API 评测(可选)
|
### 通过 TurboMind gPRC API 评测(可选)
|
||||||
|
|
||||||
@ -72,5 +73,5 @@ python run.py configs/eval_internlm_turbomind_tis.py -w outputs/turbomind-tis/in
|
|||||||
|
|
||||||
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind_tis.py`
|
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind_tis.py`
|
||||||
- 在配置文件中,triton inference server(TIS) 地址是 `tis_addr='0.0.0.0:33337'`。请把配置中的`tis_addr`修改为server所在机器的ip地址。
|
- 在配置文件中,triton inference server(TIS) 地址是 `tis_addr='0.0.0.0:33337'`。请把配置中的`tis_addr`修改为server所在机器的ip地址。
|
||||||
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_xxx_turbomind_tis.py`。把其中 20B 模型的配置注释掉,打开 7B 模型的配置。
|
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_xxx_turbomind_tis.py`中`models`字段。
|
||||||
```
|
```
|
||||||
|
@ -126,7 +126,6 @@ class TurboMindModel(BaseModel):
|
|||||||
assert type(
|
assert type(
|
||||||
prompt) is str, 'We only support string for TurboMind Python API'
|
prompt) is str, 'We only support string for TurboMind Python API'
|
||||||
|
|
||||||
prompt = '<BOS>' + prompt
|
|
||||||
input_ids = self.tokenizer.encode(prompt)
|
input_ids = self.tokenizer.encode(prompt)
|
||||||
|
|
||||||
for outputs in generator.stream_infer(session_id=session_id,
|
for outputs in generator.stream_infer(session_id=session_id,
|
||||||
|
Loading…
Reference in New Issue
Block a user