From e34c552282e9c8fab5554bcd80fac820e7ddbe62 Mon Sep 17 00:00:00 2001 From: RunningLeon Date: Thu, 21 Dec 2023 18:22:17 +0800 Subject: [PATCH] [Feature] Update configs for evaluating chat models like qwen, baichuan, llama2 using turbomind backend (#721) * add llama2 test * fix * test qwen chat-7b * test w4 * add baichuan2 * update * update * update configs and docs * update --- configs/eval_internlm_chat_turbomind.py | 264 ++++++++++++------ configs/eval_internlm_turbomind.py | 116 ++++---- .../advanced_guides/evaluation_turbomind.md | 7 +- .../advanced_guides/evaluation_turbomind.md | 5 +- opencompass/models/turbomind.py | 1 - 5 files changed, 235 insertions(+), 158 deletions(-) diff --git a/configs/eval_internlm_chat_turbomind.py b/configs/eval_internlm_chat_turbomind.py index 73b76447..a09a67da 100644 --- a/configs/eval_internlm_chat_turbomind.py +++ b/configs/eval_internlm_chat_turbomind.py @@ -5,112 +5,198 @@ from opencompass.models.turbomind import TurboMindModel with read_base(): # choose a list of datasets from .datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets - from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets - from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets - from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets + # from .datasets.ceval.ceval_gen_5f30c7 import ceval_datasets + # from .datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets + # from .datasets.SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import WSC_datasets + # from .datasets.triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from .datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets - from .datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets - from .datasets.race.race_gen_69ee4f import race_datasets - from .datasets.crowspairs.crowspairs_gen_381af0 import crowspairs_datasets + # from .datasets.race.race_gen_69ee4f import race_datasets + # from .datasets.crowspairs.crowspairs_gen_381af0 import crowspairs_datasets # and output the results in a choosen format from .summarizers.medium import summarizer + datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +internlm_meta_template = dict(round=[ + dict(role='HUMAN', begin='<|User|>:', end='\n'), + dict(role='BOT', begin='<|Bot|>:', end='\n', generate=True), +], + eos_token_id=103028) -meta_template = dict( +llama2_meta_template = dict( round=[ - dict(role='HUMAN', begin='<|User|>:', end='\n'), - dict(role='BOT', begin='<|Bot|>:', end='\n', generate=True), + dict(role='HUMAN', begin='[INST] ', end=' [/INST]'), + dict(role='BOT', generate=True), ], - eos_token_id=103028) + eos_token_id=2) + +qwen_meta_template = dict(round=[ + dict(role='HUMAN', begin='\n<|im_start|>user\n', end='<|im_end|>'), + dict(role='BOT', + begin='\n<|im_start|>assistant\n', + end='<|im_end|>', + generate=True) + ]) + +baichuan2_meta_template = dict(round=[ + dict(role='HUMAN', begin=''), + dict(role='BOT', begin='', generate=True) + ]) # config for internlm-chat-7b -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-chat-7b-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# meta_template=meta_template, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_chat_7b = dict( + type=TurboMindModel, + abbr='internlm-chat-7b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) -# config for internlm-chat-7b-w4 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-chat-7b-w4-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# meta_template=meta_template, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_chat_7b_w4 = dict( + type=TurboMindModel, + abbr='internlm-chat-7b-w4-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) # config for internlm-chat-7b-w4kv8 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-chat-7b-w4kv8-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# meta_template=meta_template, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_chat_7b_w4kv8 = dict( + type=TurboMindModel, + abbr='internlm-chat-7b-w4kv8-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) # config for internlm-chat-20b -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-chat-20b-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=8, -# concurrency=8, -# meta_template=meta_template, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_chat_20b = dict( + type=TurboMindModel, + abbr='internlm-chat-20b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=8, + concurrency=8, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) # config for internlm-chat-20b-w4 model -models = [ - dict( - type=TurboMindModel, - abbr='internlm-chat-20b-w4-turbomind', - path="./turbomind", - max_out_len=100, - max_seq_len=2048, - batch_size=16, - concurrency=16, - meta_template=meta_template, - run_cfg=dict(num_gpus=1, num_procs=1), - ) -] +internlm_chat_20b_w4 = dict( + type=TurboMindModel, + abbr='internlm-chat-20b-w4-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=16, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) # config for internlm-chat-20b-w4kv8 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-chat-20b-w4kv8-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=16, -# concurrency=16, -# meta_template=meta_template, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_chat_20b_w4kv8 = dict( + type=TurboMindModel, + abbr='internlm-chat-20b-w4kv8-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=16, + meta_template=internlm_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for llama2-chat-7b +llama2_chat_7b = dict( + type=TurboMindModel, + abbr='llama2-chat-7b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=32, + meta_template=llama2_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for llama2-chat-13b +llama2_chat_13b = dict( + type=TurboMindModel, + abbr='llama2-chat-13b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=16, + meta_template=llama2_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for llama2-chat-70b +llama2_chat_70b = dict( + type=TurboMindModel, + abbr='llama2-chat-70b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=8, + concurrency=8, + meta_template=llama2_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for qwen-chat-7b +qwen_chat_7b = dict( + type=TurboMindModel, + abbr='qwen-chat-7b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=32, + meta_template=qwen_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for qwen-chat-7b +qwen_chat_14b = dict( + type=TurboMindModel, + abbr='qwen-chat-14b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=32, + meta_template=qwen_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +# config for baichuan2-chat-7b +baichuan2_chat_7b = dict( + type=TurboMindModel, + abbr='baichuan2-chat-7b-turbomind', + path='./turbomind', + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=32, + meta_template=baichuan2_meta_template, + run_cfg=dict(num_gpus=1, num_procs=1), +) + +models = [internlm_chat_20b] diff --git a/configs/eval_internlm_turbomind.py b/configs/eval_internlm_turbomind.py index 8d43321c..a2396a5b 100644 --- a/configs/eval_internlm_turbomind.py +++ b/configs/eval_internlm_turbomind.py @@ -16,50 +16,43 @@ with read_base(): datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) # # config for internlm-7b model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-7b-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_7b = dict( + type=TurboMindModel, + abbr='internlm-7b-turbomind', + path="./turbomind", + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + run_cfg=dict(num_gpus=1, num_procs=1), + ) # # config for internlm-7b-w4 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-7b-w4-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_7b_w4 = dict( + type=TurboMindModel, + abbr='internlm-7b-w4-turbomind', + path="./turbomind", + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + run_cfg=dict(num_gpus=1, num_procs=1), + ) # # config for internlm-7b-w4kv8 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-7b-w4kv8-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=32, -# concurrency=32, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_7b_w4kv8 = dict( + type=TurboMindModel, + abbr='internlm-7b-w4kv8-turbomind', + path="./turbomind", + max_out_len=100, + max_seq_len=2048, + batch_size=32, + concurrency=32, + run_cfg=dict(num_gpus=1, num_procs=1), + ) # config for internlm-20b model -models = [ - dict( +internlm_20b = dict( type=TurboMindModel, abbr='internlm-20b-turbomind', path="./turbomind", @@ -69,33 +62,30 @@ models = [ concurrency=8, run_cfg=dict(num_gpus=1, num_procs=1), ) -] # config for internlm-20b-w4 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-20b-w4-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=16, -# concurrency=16, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_20b_w4 = dict( + type=TurboMindModel, + abbr='internlm-20b-w4-turbomind', + path="./turbomind", + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=16, + run_cfg=dict(num_gpus=1, num_procs=1), + ) # config for internlm-20b-w4kv8 model -# models = [ -# dict( -# type=TurboMindModel, -# abbr='internlm-20b-w4kv8-turbomind', -# path="./turbomind", -# max_out_len=100, -# max_seq_len=2048, -# batch_size=16, -# concurrency=16, -# run_cfg=dict(num_gpus=1, num_procs=1), -# ) -# ] +internlm_20b_w4kv8 = dict( + type=TurboMindModel, + abbr='internlm-20b-w4kv8-turbomind', + path="./turbomind", + max_out_len=100, + max_seq_len=2048, + batch_size=16, + concurrency=16, + run_cfg=dict(num_gpus=1, num_procs=1), + ) + +models = [internlm_20b] diff --git a/docs/en/advanced_guides/evaluation_turbomind.md b/docs/en/advanced_guides/evaluation_turbomind.md index 01b0c15f..00b57226 100644 --- a/docs/en/advanced_guides/evaluation_turbomind.md +++ b/docs/en/advanced_guides/evaluation_turbomind.md @@ -55,8 +55,9 @@ You are expected to get the evaluation results after the inference and evaluatio **Note**: -- If you evaluate theInternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py` -- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by commenting out the configuration for the 20B model and enabling the configuration for the 7B model. +- If you evaluate the InternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py` +- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by changing to the setting `models = [internlm_7b]` in the last line. +- If you want to evaluate other chat models like Llama2, QWen-7B, Baichuan2-7B, you could change to the setting of `models` in `eval_internlm_chat_turbomind.py`. ### Evaluation with Turbomind gPRC API (optional) @@ -76,4 +77,4 @@ python run.py configs/eval_internlm_turbomind_tis.py -w outputs/turbomind-tis/in - If the InternLM Chat model is requested to be evaluated, please use config file `eval_internlm_chat_turbomind_tis.py` - In `eval_internlm_turbomind_tis.py`, the configured Triton Inference Server (TIS) address is `tis_addr='0.0.0.0:33337'`. Please modify `tis_addr` to the IP address of the machine where the server is launched. -- If evaluating the InternLM 7B model, please modify the config file, commenting out the configuration for the 20B model and enabling the configuration for the 7B model +- If evaluating the InternLM 7B model, please modify the `models` configuration in `eval_internlm_xxx_turbomind_tis.py`. diff --git a/docs/zh_cn/advanced_guides/evaluation_turbomind.md b/docs/zh_cn/advanced_guides/evaluation_turbomind.md index 74da26ea..4c8714e1 100644 --- a/docs/zh_cn/advanced_guides/evaluation_turbomind.md +++ b/docs/zh_cn/advanced_guides/evaluation_turbomind.md @@ -52,7 +52,8 @@ python run.py configs/eval_internlm_turbomind.py -w outputs/turbomind/internlm-2 **注:** - 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind.py` -- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。把 20B 模型的配置注释掉,打开 7B 模型的配置。 +- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。将`models`字段配置为`models = [internlm_7b]` 。 +- 如果评测其他模型如 Llama2, QWen-7B, Baichuan2-7B, 请修改`eval_internlm_chat_turbomind.py`中`models`字段 。 ### 通过 TurboMind gPRC API 评测(可选) @@ -72,5 +73,5 @@ python run.py configs/eval_internlm_turbomind_tis.py -w outputs/turbomind-tis/in - 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind_tis.py` - 在配置文件中,triton inference server(TIS) 地址是 `tis_addr='0.0.0.0:33337'`。请把配置中的`tis_addr`修改为server所在机器的ip地址。 -- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_xxx_turbomind_tis.py`。把其中 20B 模型的配置注释掉,打开 7B 模型的配置。 +- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_xxx_turbomind_tis.py`中`models`字段。 ``` diff --git a/opencompass/models/turbomind.py b/opencompass/models/turbomind.py index 3dd9db0d..b75c3e02 100644 --- a/opencompass/models/turbomind.py +++ b/opencompass/models/turbomind.py @@ -126,7 +126,6 @@ class TurboMindModel(BaseModel): assert type( prompt) is str, 'We only support string for TurboMind Python API' - prompt = '' + prompt input_ids = self.tokenizer.encode(prompt) for outputs in generator.stream_infer(session_id=session_id,