From 7505b3cadf51ab7c8596c79316c008b7b748f5b4 Mon Sep 17 00:00:00 2001
From: Fengzhe Zhou <zfz-960727@163.com>
Date: Tue, 14 May 2024 14:50:16 +0800
Subject: [PATCH] [Feature] Add huggingface apply_chat_template (#1098)

* add TheoremQA with 5-shot

* add huggingface_above_v4_33 classes

* use num_worker partitioner in cli

* update theoremqa

* update TheoremQA

* add TheoremQA

* rename theoremqa -> TheoremQA

* update TheoremQA output path

* rewrite many model configs

* update huggingface

* further update

* refine configs

* update configs

* update configs

* add configs/eval_llama3_instruct.py

* add summarizer multi faceted

* update bbh datasets

* update configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py

* rename class

* update readme

* update hf above v4.33
---
 README.md                                     |  11 +-
 README_zh-CN.md                               |  13 +-
 configs/dataset_collections/chat_OC15.py      |  22 +
 .../TheoremQA/TheoremQA_5shot_gen_a4f581.py   |  46 --
 configs/datasets/bbh/bbh_gen_2879b0.py        |  56 +++
 configs/datasets/bbh/bbh_subset_settings.py   |  29 ++
 configs/datasets/collections/chat_medium.py   |   2 +-
 configs/datasets/collections/chat_small.py    |   2 +-
 ...py => deprecated_winogrande_gen_a9ede5.py} |   0
 .../winogrande/winogrande_5shot_gen_b36770.py |  46 ++
 configs/datasets/winogrande/winogrande_gen.py |   2 +-
 .../winogrande/winogrande_gen_458220.py       |  41 ++
 configs/eval_llama3_instruct.py               |  52 +++
 configs/models/aquila/hf_aquila2_34b.py       |  22 +-
 configs/models/aquila/hf_aquila2_7b.py        |  22 +-
 configs/models/aquila/hf_aquilachat2_34b.py   |   1 -
 .../models/aquila/hf_aquilachat2_34b_16k.py   |   1 -
 configs/models/aquila/hf_aquilachat2_7b.py    |   1 -
 .../models/aquila/hf_aquilachat2_7b_16k.py    |   1 -
 .../models/baichuan/hf_baichuan2_13b_base.py  |  19 +-
 .../models/baichuan/hf_baichuan2_7b_base.py   |  19 +-
 configs/models/bluelm/hf_bluelm_7b_base.py    |  22 +-
 .../models/bluelm/hf_bluelm_7b_base_32k.py    |  22 +-
 configs/models/chatglm/hf_chatglm3_6b.py      |  31 +-
 configs/models/chatglm/hf_chatglm3_6b_32k.py  |  29 +-
 configs/models/chatglm/hf_chatglm3_6b_base.py |  20 +-
 configs/models/codellama/hf_codellama_13b.py  |  19 +-
 .../codellama/hf_codellama_13b_instruct.py    |  19 +-
 .../codellama/hf_codellama_13b_python.py      |  19 +-
 configs/models/codellama/hf_codellama_34b.py  |  19 +-
 .../codellama/hf_codellama_34b_instruct.py    |  19 +-
 .../codellama/hf_codellama_34b_python.py      |  19 +-
 configs/models/codellama/hf_codellama_70b.py  |  12 +
 .../codellama/hf_codellama_70b_instruct.py    |  12 +
 .../codellama/hf_codellama_70b_python.py      |  12 +
 configs/models/codellama/hf_codellama_7b.py   |  19 +-
 .../codellama/hf_codellama_7b_instruct.py     |  19 +-
 .../codellama/hf_codellama_7b_python.py       |  19 +-
 .../models/deepseek/hf_deepseek_67b_base.py   |  22 +-
 .../models/deepseek/hf_deepseek_67b_chat.py   |  31 +-
 .../models/deepseek/hf_deepseek_7b_base.py    |  22 +-
 .../models/deepseek/hf_deepseek_7b_chat.py    |  31 +-
 .../hf_deepseek_coder_1_3b_instruct.py        |  34 +-
 .../hf_deepseek_coder_33b_instruct.py         |  34 +-
 .../hf_deepseek_coder_6_7b_instruct.py        |  34 +-
 .../deepseek/hf_deepseek_moe_16b_base.py      |  22 +-
 .../deepseek/hf_deepseek_moe_16b_chat.py      |  30 +-
 configs/models/falcon/hf_falcon_40b.py        |  17 +-
 configs/models/falcon/hf_falcon_7b.py         |  17 +-
 configs/models/gemma/hf_gemma_2b.py           |  21 +-
 configs/models/gemma/hf_gemma_2b_it.py        |  31 +-
 configs/models/gemma/hf_gemma_7b.py           |  21 +-
 configs/models/gemma/hf_gemma_7b_it.py        |  31 +-
 .../models/hf_internlm/hf_internlm2_1_8b.py   |  22 +-
 .../models/hf_internlm/hf_internlm2_20b.py    |  21 +-
 configs/models/hf_internlm/hf_internlm2_7b.py |  21 +-
 .../hf_internlm/hf_internlm2_base_20b.py      |  21 +-
 .../hf_internlm/hf_internlm2_base_7b.py       |  21 +-
 .../hf_internlm/hf_internlm2_chat_1_8b.py     |  35 +-
 .../hf_internlm/hf_internlm2_chat_1_8b_sft.py |  35 +-
 .../hf_internlm/hf_internlm2_chat_20b.py      |  35 +-
 .../hf_internlm/hf_internlm2_chat_20b_sft.py  |  35 +-
 .../hf_internlm/hf_internlm2_chat_7b.py       |  35 +-
 .../hf_internlm/hf_internlm2_chat_7b_sft.py   |  35 +-
 .../hf_internlm/hf_internlm2_chat_math_20b.py |  34 +-
 .../hf_internlm2_chat_math_20b_with_system.py |   1 -
 .../hf_internlm/hf_internlm2_chat_math_7b.py  |  34 +-
 .../hf_internlm2_chat_math_7b_with_system.py  |   1 -
 .../hf_internlm/hf_internlm2_math_20b.py      |  13 +
 .../hf_internlm/hf_internlm2_math_7b.py       |  13 +
 configs/models/hf_internlm/hf_internlm_20b.py |  17 +-
 configs/models/hf_internlm/hf_internlm_7b.py  |  20 +-
 .../hf_internlm/hf_internlm_chat_7b_8k.py     |  34 --
 .../hf_internlm/hf_internlm_chat_7b_v1_1.py   |  34 --
 .../hf_internlm/lmdeploy_internlm2_20b.py     |  27 ++
 .../lmdeploy_internlm2_chat_20b.py            |   9 +-
 .../hf_internlm/lmdeploy_internlm2_chat_7b.py |   9 +-
 configs/models/hf_llama/hf_llama2_13b.py      |  19 +-
 configs/models/hf_llama/hf_llama2_13b_chat.py |  30 +-
 configs/models/hf_llama/hf_llama2_70b.py      |  19 +-
 configs/models/hf_llama/hf_llama2_70b_chat.py |  30 +-
 configs/models/hf_llama/hf_llama2_7b.py       |  19 +-
 configs/models/hf_llama/hf_llama2_7b_chat.py  |  30 +-
 configs/models/hf_llama/hf_llama3_70b.py      |  21 +-
 .../models/hf_llama/hf_llama3_70b_instruct.py |  30 +-
 configs/models/hf_llama/hf_llama3_8b.py       |  21 +-
 .../models/hf_llama/hf_llama3_8b_instruct.py  |  30 +-
 configs/models/hf_llama/hf_llama_13b.py       |  19 +-
 configs/models/hf_llama/hf_llama_30b.py       |  19 +-
 configs/models/hf_llama/hf_llama_65b.py       |  19 +-
 configs/models/hf_llama/hf_llama_7b.py        |  19 +-
 .../hf_llama/lmdeploy_llama3_70b_instruct.py  |  24 +
 .../hf_llama/lmdeploy_llama3_8b_instruct.py   |  24 +
 .../mistral/hf_mistral_7b_instruct_v0_1.py    |  30 +-
 .../mistral/hf_mistral_7b_instruct_v0_2.py    |  30 +-
 configs/models/mistral/hf_mistral_7b_v0_1.py  |  19 +-
 configs/models/mistral/hf_mistral_7b_v0_2.py  |  20 +-
 .../mistral/hf_mixtral_8x22b_instruct_v0_1.py |  12 +
 .../models/mistral/hf_mixtral_8x22b_v0_1.py   |  12 +
 .../mistral/hf_mixtral_8x7b_instruct_v0_1.py  |  12 +
 .../models/mistral/hf_mixtral_8x7b_v0_1.py    |  12 +
 .../{mixtral => mistral}/mixtral_8x7b_32k.py  |   0
 .../mistral/vllm_mistral_7b_instruct_v0_1.py  |   1 -
 .../mistral/vllm_mistral_7b_instruct_v0_2.py  |   1 -
 .../models/mistral/vllm_mistral_7b_v0_1.py    |  17 +
 .../models/mistral/vllm_mistral_7b_v0_2.py    |  17 +
 .../vllm_mixtral_8x7b_instruct_v0_1.py        |   1 -
 .../mixtral/hf_mixtral_8x22b_instruct_v0_1.py |  34 --
 .../models/mixtral/hf_mixtral_8x22b_v0_1.py   |  24 -
 .../mixtral/hf_mixtral_8x7b_instruct_v0_1.py  |  34 --
 .../models/mixtral/hf_mixtral_8x7b_v0_1.py    |  24 -
 .../models/nanbeige/hf_nanbeige2_8b_chat.py   |  34 +-
 .../models/nanbeige/hf_nanbeige_16b_base.py   |  33 --
 .../nanbeige/hf_nanbeige_16b_base_32k.py      |  34 --
 .../nanbeige/hf_nanbeige_16b_chat_32k.py      |  34 --
 .../models/openbmb/hf_minicpm_2b_dpo_fp32.py  |  29 +-
 .../models/openbmb/hf_minicpm_2b_sft_bf16.py  |  12 +
 .../models/openbmb/hf_minicpm_2b_sft_fp32.py  |  29 +-
 configs/models/opt/hf_opt_125m.py             |  31 +-
 configs/models/opt/hf_opt_350m.py             |  31 +-
 configs/models/others/hf_command_r_plus.py    |  23 +-
 configs/models/others/hf_dbrx_base.py         |  12 +
 configs/models/others/hf_dbrx_instruct.py     |  32 +-
 .../models/others/hf_dolphin_21_mistral_7b.py |   1 -
 .../models/others/hf_fashiongpt_70b_v11.py    |   1 -
 .../models/others/hf_orionstar_yi_34b_chat.py |   1 -
 configs/models/others/hf_telechat_7b_chat.py  |   1 -
 .../others/vllm_orionstar_14b_longchat.py     |   1 -
 configs/models/qwen/hf_qwen1_5_0_5b.py        |  23 +-
 configs/models/qwen/hf_qwen1_5_0_5b_chat.py   |  31 +-
 configs/models/qwen/hf_qwen1_5_14b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_14b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_1_8b.py        |  23 +-
 configs/models/qwen/hf_qwen1_5_1_8b_chat.py   |  31 +-
 configs/models/qwen/hf_qwen1_5_32b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_32b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_4b.py          |  23 +-
 configs/models/qwen/hf_qwen1_5_4b_chat.py     |  31 +-
 configs/models/qwen/hf_qwen1_5_72b.py         |  23 +-
 configs/models/qwen/hf_qwen1_5_72b_chat.py    |  31 +-
 configs/models/qwen/hf_qwen1_5_7b.py          |  23 +-
 configs/models/qwen/hf_qwen1_5_7b_chat.py     |  31 +-
 configs/models/qwen/hf_qwen_14b.py            |  24 +-
 configs/models/qwen/hf_qwen_14b_chat.py       |  35 +-
 configs/models/qwen/hf_qwen_1_8b.py           |  24 +-
 configs/models/qwen/hf_qwen_1_8b_chat.py      |  33 +-
 configs/models/qwen/hf_qwen_72b.py            |  24 +-
 configs/models/qwen/hf_qwen_72b_chat.py       |  32 +-
 configs/models/qwen/hf_qwen_7b.py             |  24 +-
 configs/models/qwen/hf_qwen_7b_chat.py        |  35 +-
 configs/models/qwen/vllm_qwen1_5_14b_chat.py  |   1 -
 configs/models/qwen/vllm_qwen1_5_72b_chat.py  |   1 -
 configs/models/skywork/hf_skywork_13b.py      |  22 +-
 configs/models/vicuna/hf_vicuna_13b_v13.py    |  22 +-
 configs/models/vicuna/hf_vicuna_13b_v15.py    |  22 +-
 .../models/vicuna/hf_vicuna_13b_v15_16k.py    |  29 +-
 configs/models/vicuna/hf_vicuna_33b_v13.py    |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v13.py     |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v15.py     |  22 +-
 configs/models/vicuna/hf_vicuna_7b_v15_16k.py |  29 +-
 configs/models/yi/hf_yi_34b.py                |  20 +-
 configs/models/yi/hf_yi_34b_200k.py           |  24 -
 configs/models/yi/hf_yi_34b_chat.py           |  28 +-
 configs/models/yi/hf_yi_6b.py                 |  20 +-
 configs/models/yi/hf_yi_6b_200k.py            |  23 -
 configs/models/yi/hf_yi_6b_chat.py            |  29 +-
 configs/models/zephyr/hf_zephyr_7b_beta.py    |  28 +-
 configs/summarizers/chat_OC15.py              |  81 ++++
 .../summarizers/chat_OC15_multi_faceted.py    | 130 ++++++
 docs/en/get_started/quick_start.md            |  45 +-
 docs/zh_cn/get_started/quick_start.md         |  45 +-
 opencompass/cli/main.py                       |  55 +--
 opencompass/datasets/winogrande.py            |   9 +
 opencompass/models/__init__.py                |  22 +-
 opencompass/models/huggingface_above_v4_33.py | 414 ++++++++++++++++++
 opencompass/models/turbomind.py               |  43 +-
 opencompass/models/vllm.py                    |  14 +-
 .../icl_inferencer/icl_ll_inferencer.py       |  76 ++--
 .../icl_inferencer/icl_ppl_inferencer.py      | 102 ++---
 opencompass/partitioners/num_worker.py        |   8 +-
 opencompass/summarizers/__init__.py           |   3 +-
 opencompass/summarizers/default.py            |  12 +-
 opencompass/summarizers/multi_faceted.py      |  46 ++
 opencompass/utils/build.py                    |   1 -
 opencompass/utils/run.py                      |  65 +--
 tools/prompt_viewer.py                        |   2 +-
 186 files changed, 1947 insertions(+), 2910 deletions(-)
 create mode 100644 configs/dataset_collections/chat_OC15.py
 delete mode 100644 configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
 create mode 100644 configs/datasets/bbh/bbh_gen_2879b0.py
 create mode 100644 configs/datasets/bbh/bbh_subset_settings.py
 rename configs/datasets/winogrande/{winogrande_gen_a9ede5.py => deprecated_winogrande_gen_a9ede5.py} (100%)
 create mode 100644 configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
 create mode 100644 configs/datasets/winogrande/winogrande_gen_458220.py
 create mode 100644 configs/eval_llama3_instruct.py
 create mode 100644 configs/models/codellama/hf_codellama_70b.py
 create mode 100644 configs/models/codellama/hf_codellama_70b_instruct.py
 create mode 100644 configs/models/codellama/hf_codellama_70b_python.py
 create mode 100644 configs/models/hf_internlm/hf_internlm2_math_20b.py
 create mode 100644 configs/models/hf_internlm/hf_internlm2_math_7b.py
 delete mode 100644 configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
 delete mode 100644 configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
 create mode 100644 configs/models/hf_internlm/lmdeploy_internlm2_20b.py
 create mode 100644 configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
 create mode 100644 configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x22b_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
 create mode 100644 configs/models/mistral/hf_mixtral_8x7b_v0_1.py
 rename configs/models/{mixtral => mistral}/mixtral_8x7b_32k.py (100%)
 create mode 100644 configs/models/mistral/vllm_mistral_7b_v0_1.py
 create mode 100644 configs/models/mistral/vllm_mistral_7b_v0_2.py
 rename configs/models/{mixtral => mistral}/vllm_mixtral_8x7b_instruct_v0_1.py (97%)
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
 delete mode 100644 configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
 delete mode 100644 configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
 create mode 100644 configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
 create mode 100644 configs/models/others/hf_dbrx_base.py
 delete mode 100644 configs/models/yi/hf_yi_34b_200k.py
 delete mode 100644 configs/models/yi/hf_yi_6b_200k.py
 create mode 100644 configs/summarizers/chat_OC15.py
 create mode 100644 configs/summarizers/chat_OC15_multi_faceted.py
 create mode 100644 opencompass/models/huggingface_above_v4_33.py
 create mode 100644 opencompass/summarizers/multi_faceted.py

diff --git a/README.md b/README.md
index eaac33b3..484b49f5 100644
--- a/README.md
+++ b/README.md
@@ -162,20 +162,11 @@ python tools/list_configs.py llama mmlu
 You can also evaluate other HuggingFace models via command line. Taking LLaMA-7b as an example:
 
 ```bash
-python run.py --datasets ceval_ppl mmlu_ppl \
---hf-path huggyllama/llama-7b \  # HuggingFace model path
---model-kwargs device_map='auto' \  # Arguments for model construction
---tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \  # Arguments for tokenizer construction
---max-out-len 100 \  # Maximum number of tokens generated
---max-seq-len 2048 \  # Maximum sequence length the model can accept
---batch-size 8 \  # Batch size
---no-batch-padding \  # Don't enable batch padding, infer through for loop to avoid performance loss
---num-gpus 1  # Number of minimum required GPUs
+python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
 ```
 
 > \[!TIP\]
 >
-> To run the command above, you will need to remove the comments starting from `# ` first.
 > configuration with `_ppl` is designed for base model typically.
 > configuration with `_gen` can be used for both base model and chat model.
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 8adb68a0..b53cf89d 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -163,20 +163,9 @@ python tools/list_configs.py llama mmlu
 你也可以通过命令行去评测其它 HuggingFace 模型。同样以 LLaMA-7b 为例：
 
 ```bash
-python run.py --datasets ceval_ppl mmlu_ppl \
---hf-path huggyllama/llama-7b \  # HuggingFace 模型地址
---model-kwargs device_map='auto' \  # 构造 model 的参数
---tokenizer-kwargs padding_side='left' truncation='left' use_fast=False \  # 构造 tokenizer 的参数
---max-out-len 100 \  # 最长生成 token 数
---max-seq-len 2048 \  # 模型能接受的最大序列长度
---batch-size 8 \  # 批次大小
---no-batch-padding \  # 不打开 batch padding，通过 for loop 推理，避免精度损失
---num-gpus 1  # 运行该模型所需的最少 gpu 数
+python run.py --datasets ceval_ppl mmlu_ppl --hf-type base --hf-path huggyllama/llama-7b
 ```
 
-> **注意**<br />
-> 若需要运行上述命令，你需要删除所有从 `# ` 开始的注释。
-
 通过命令行或配置文件，OpenCompass 还支持评测 API 或自定义模型，以及更多样化的评测策略。请阅读[快速开始](https://opencompass.readthedocs.io/zh_CN/latest/get_started/quick_start.html)了解如何运行一个评测任务。
 
 更多教程请查看我们的[文档](https://opencompass.readthedocs.io/zh_CN/latest/index.html)。
diff --git a/configs/dataset_collections/chat_OC15.py b/configs/dataset_collections/chat_OC15.py
new file mode 100644
index 00000000..a7ef8085
--- /dev/null
+++ b/configs/dataset_collections/chat_OC15.py
@@ -0,0 +1,22 @@
+from mmengine.config import read_base
+
+with read_base():
+    from ..datasets.mmlu.mmlu_gen_4d595a import mmlu_datasets
+    from ..datasets.cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
+    from ..datasets.ceval.ceval_gen_5f30c7 import ceval_datasets
+    from ..datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
+    from ..datasets.triviaqa.triviaqa_wiki_1shot_gen_eaf81e import triviaqa_datasets
+    from ..datasets.nq.nq_open_1shot_gen_01cf41 import nq_datasets
+    from ..datasets.race.race_gen_69ee4f import race_datasets
+    from ..datasets.winogrande.winogrande_5shot_gen_b36770 import winogrande_datasets
+    from ..datasets.hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
+    from ..datasets.bbh.bbh_gen_2879b0 import bbh_datasets
+    from ..datasets.gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
+    from ..datasets.math.math_0shot_gen_393424 import math_datasets
+    from ..datasets.TheoremQA.TheoremQA_5shot_gen_6f0af8 import TheoremQA_datasets
+    from ..datasets.humaneval.humaneval_gen_8e312c import humaneval_datasets
+    from ..datasets.mbpp.sanitized_mbpp_gen_830460 import sanitized_mbpp_datasets
+    from ..datasets.gpqa.gpqa_gen_4baadb import gpqa_datasets
+    from ..datasets.IFEval.IFEval_gen_3321a3 import ifeval_datasets
+
+datasets = sum((v for k, v in locals().items() if k.endswith("_datasets")), [])
diff --git a/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py b/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
deleted file mode 100644
index 00740094..00000000
--- a/configs/datasets/TheoremQA/TheoremQA_5shot_gen_a4f581.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from opencompass.openicl.icl_prompt_template import PromptTemplate
-from opencompass.openicl.icl_retriever import ZeroRetriever
-from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets import HFDataset, TheoremQA_postprocess_v3, TheoremQAEvaluatorV3
-
-TheoremQA_reader_cfg = dict(input_columns=["Question", "Answer_type"], output_column="Answer", train_split="test", test_split="test")
-
-TheoremQA_infer_cfg = dict(
-    prompt_template=dict(
-        type=PromptTemplate,
-        template=dict(
-            round=[
-                dict(role='HUMAN', prompt='You are supposed to provide a solution to a given problem.\n\n\nProblem:\nIn a 10 Gigabit Ethernet network, the average size of a frame is 1500 bytes. If a burst of noise lasting 1ms interrupts the network, how many frames are lost?'),
-                dict(role='BOT', prompt='Solution:\nFirst, calculate the data rate in bytes/s:\n\n10 Gigabit/s * (1 Byte / 8 bits) = 1.25 * 10^9 Bytes/s\n\nNext, calculate the data loss in bytes due to the noise:\n\n1 ms * 1.25 * 10^9 Bytes/s = 1.25 * 10^6 Bytes\n\nFinally, divide the data loss by the average frame size to get the number of frames lost:\n\n1.25 * 10^6 Bytes / 1500 Bytes/frame ≈ 833.33 frames\nThe answer is 833.33'),
-                dict(role='HUMAN', prompt='\nProblem:\nGiven x = 0.157, what is the value of x \\times \\frac{\\prod_{n=1}^\\infty (1 - \\frac{x^2}{n^2 \\pi^2})}{\\sin(x)}?'),
-                dict(role='BOT', prompt="Solution:\nTo evaluate the expression $x \\times \\frac{\\prod_{n=1}^{\\infty} (1 - \\frac{x^2}{n^2 \\pi^2})}{\\sin(x)}$ given x = 0.157, we first recognize that the product in the numerator is related to the sine function through the Euler's reflection formula for the sine function, which can be expressed as:\n\n$$\\sin(x) = x \\prod_{n=1}^{\\infty} \\left(1 - \\frac{x^2}{n^2 \\pi^2}\\right)$$\n\nTherefore, the given expression simplifies to: $x \\times \\frac{\\sin(x)}{\\sin(x)}$\n\nBecause sin(x) in the numerator and denominator cancels out, the expression simplifies further to just x.\n\nSo, given x = 0.157, the value of the expression is 0.157. This result is derived from the properties of the sine function and does not require computational evaluation.\nThe answer is 0.157"),
-                dict(role='HUMAN', prompt='\nProblem:\nConsider the basis C of \\mathbb{R}^2 consisting of vectors u_1 = [2, 4] and u_2 = [1, -1]. If y = [8, 12], find the C-coordinate vector of y.'),
-                dict(role='BOT', prompt="Solution:\nThe goal is to express y as a linear comPbination of the basis vectors of C, i.e., $y = a\\cdot u_1 + b\\cdot u_2$, where a and b are the scalar coefficients that we want to find. These coefficients will form the C-coordinate vector of y, which we'll denote as $[a, b]_C$.\n\nGiven:\n- $u_1 = [2, 4]$,\n- $u_2 = [1, -1]$,\n- $y = [8, 12]$.\n\nWe need to solve the system of linear equations:\n2a + 1b = 8\n4a - 1b = 12\n\nLet's solve this system of equations to find a and b.\n\nThe solution to the system of equations is $a = \\frac{10}{3} and b = \\frac{4}{3}$. Therefore, the C-coordinate vector of y in the basis consisting of vectors u_1 = [2, 4] and u_2 = [1, -1] is $\\left[\\frac{10}{3}, \\frac{4}{3}\\right]_C$. \nLet's calculate the numerical value of $\\left[\x0crac{10}{3}, \x0crac{4}{3}\right]_C$ as [3.33, 1.33].\nThe answer is [3.33, 1.33]"),
-                dict(role='HUMAN', prompt='\nProblem:\nOne can draw a simple, connected planar graph with 200 vertices and 397 edges. Is this statement Trur or False?'),
-                dict(role='BOT', prompt="Solution:\nTo determine the answer, we can use Euler's formula for planar graphs, which states that for any finite, connected, planar graph, $V - E + F = 2$, where V is the number of vertices, E is the number of edges, and F is the number of faces.\n\nGiven the modified question, we have V = 200 vertices and E = 397 edges. We want to find if we can have a graph that satisfies these conditions, adhering to Euler's formula.\n\nFirst, let's rearrange Euler's formula to solve for F:  F = E - V + 2\n\nSubstituting the given values: F = 397 - 200 + 2,  F = 199\n\nThis means a graph with 200 vertices and 397 edges would have 199 faces. However, to determine the truth of this possibility, we should check if this graph doesn't violate any other planar graph constraints, particularly regarding the number of edges.\n\nFor a simple, connected planar graph, there's also a relationship between vertices, edges, and faces given by the inequality: $E \\leq 3V - 6$\n\nSubstituting V = 200 gives: $E \\leq 3*200 - 6 = 594$\n\nWith E = 397, the condition $E \\leq 594$ is satisfied, meaning it's theoretically possible in terms of the edge condition for a planar graph.\n\nTherefore, one can draw a simple, connected planar graph with 200 vertices and 397 edges, resulting in 199 faces, without violating the conditions for it to be planar according to both Euler's formula and the constraint on the maximum number of edges.\nThe answer is True"),
-                dict(role='HUMAN', prompt='\nProblem:\nGiven a finite group G, and a collection of permutations H on a set. Then (a) there always exists H such that G is isomorphic to H; (b) for any H, G is isomorphic to H; (c) G can never be isomorphic to H; (d) none of the above. Which option is correct?'),
-                dict(role='BOT', prompt="Solution:\nThis is based on Cayley's theorem, which states that every group G is isomorphic to a subgroup of the symmetric group acting on G. \nIn other words, for every finite group G, there exists a collection of permutations H (which in this context, can be thought of as the set of permutations representing the action of G on itself) such that G is isomorphic to H.\n\nTherefore, there always exists H such that G is isomorphic to H.\nThe answer is (a)"),
-                dict(role='HUMAN', prompt='\nProblem:\n{Question}'),
-                dict(role='BOT', prompt='Solution:\n{Answer}'),
-            ]
-        ),
-    ),
-    retriever=dict(type=ZeroRetriever),
-    inferencer=dict(type=GenInferencer, max_out_len=1024, stopping_criteria=["USER:", "ASSISTANT:",  "### Instruction:", "Response:", "<start_of_turn>", "[INST]", "Problem:"]),
-)
-
-TheoremQA_eval_cfg = dict(
-    evaluator=dict(type=TheoremQAEvaluatorV3),
-    pred_postprocessor=dict(type=TheoremQA_postprocess_v3)
-)
-
-TheoremQA_datasets = [
-    dict(
-        abbr="TheoremQA",
-        type=HFDataset,
-        path="TIGER-Lab/TheoremQA",
-        reader_cfg=TheoremQA_reader_cfg,
-        infer_cfg=TheoremQA_infer_cfg,
-        eval_cfg=TheoremQA_eval_cfg,
-    )
-]
diff --git a/configs/datasets/bbh/bbh_gen_2879b0.py b/configs/datasets/bbh/bbh_gen_2879b0.py
new file mode 100644
index 00000000..d962939f
--- /dev/null
+++ b/configs/datasets/bbh/bbh_gen_2879b0.py
@@ -0,0 +1,56 @@
+import os
+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.datasets import BBHDataset, bbh_mcq_postprocess, BBHEvaluator, BBHEvaluator_mcq
+
+with read_base():
+    from .bbh_subset_settings import settings
+
+bbh_datasets = []
+for name, test_type in settings:
+    with open(os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{name}.txt'), 'r') as f:
+        hint = f.read()
+
+    task_prompt, body = hint.split('\n\nQ:', 1)
+    sections = ('Q:' + body).split('\n\n')
+    prompt_rounds = []
+    for index, section in enumerate(sections):
+        question, answer = section.split('\nA:')
+        answer = 'A:' + answer
+        if index == 0:
+            desc = task_prompt.strip() + '\n'
+        else:
+            desc = ''
+        prompt_rounds.append(dict(role="HUMAN", prompt=f"{desc}{question.strip()}"))
+        prompt_rounds.append(dict(role="BOT", prompt=answer.strip()))
+    prompt_rounds.append(dict(role="HUMAN", prompt="Q: {input}"))
+
+    bbh_reader_cfg = dict(input_columns=["input"], output_column="target")
+
+    bbh_infer_cfg = dict(
+        prompt_template=dict(type=PromptTemplate, template=dict(round=prompt_rounds)),
+        retriever=dict(type=ZeroRetriever),
+        inferencer=dict(type=GenInferencer, max_out_len=512))
+
+    if test_type == 'mcq':
+        bbh_eval_cfg = dict(
+            evaluator=dict(type=BBHEvaluator_mcq),
+            pred_role="BOT",
+            pred_postprocessor=dict(type=bbh_mcq_postprocess),
+            dataset_postprocessor=dict(type=bbh_mcq_postprocess))
+    else:
+        bbh_eval_cfg = dict(
+            evaluator=dict(type=BBHEvaluator),
+            pred_role="BOT")
+
+    bbh_datasets.append(
+        dict(
+            type=BBHDataset,
+            path="./data/BBH/data",
+            name=name,
+            abbr='bbh-' + name,
+            reader_cfg=bbh_reader_cfg.copy(),
+            infer_cfg=bbh_infer_cfg.copy(),
+            eval_cfg=bbh_eval_cfg.copy()))
diff --git a/configs/datasets/bbh/bbh_subset_settings.py b/configs/datasets/bbh/bbh_subset_settings.py
new file mode 100644
index 00000000..fbed7511
--- /dev/null
+++ b/configs/datasets/bbh/bbh_subset_settings.py
@@ -0,0 +1,29 @@
+settings = [
+    ('temporal_sequences', 'mcq'),
+    ('disambiguation_qa', 'mcq'),
+    ('date_understanding', 'mcq'),
+    ('tracking_shuffled_objects_three_objects', 'mcq'),
+    ('penguins_in_a_table', 'mcq'),
+    ('geometric_shapes', 'mcq'),
+    ('snarks', 'mcq'),
+    ('ruin_names', 'mcq'),
+    ('tracking_shuffled_objects_seven_objects', 'mcq'),
+    ('tracking_shuffled_objects_five_objects', 'mcq'),
+    ('logical_deduction_three_objects', 'mcq'),
+    ('hyperbaton', 'mcq'),
+    ('logical_deduction_five_objects', 'mcq'),
+    ('logical_deduction_seven_objects', 'mcq'),
+    ('movie_recommendation', 'mcq'),
+    ('salient_translation_error_detection', 'mcq'),
+    ('reasoning_about_colored_objects', 'mcq'),
+    ('multistep_arithmetic_two', 'free_form'),
+    ('navigate', 'free_form'),
+    ('dyck_languages', 'free_form'),
+    ('word_sorting', 'free_form'),
+    ('sports_understanding', 'free_form'),
+    ('boolean_expressions', 'free_form'),
+    ('object_counting', 'free_form'),
+    ('formal_fallacies', 'free_form'),
+    ('causal_judgement', 'free_form'),
+    ('web_of_lies', 'free_form'),
+]
diff --git a/configs/datasets/collections/chat_medium.py b/configs/datasets/collections/chat_medium.py
index bf2fef58..c2874bc5 100644
--- a/configs/datasets/collections/chat_medium.py
+++ b/configs/datasets/collections/chat_medium.py
@@ -47,7 +47,7 @@ with read_base():
     from ..piqa.piqa_gen_1194eb import piqa_datasets
     from ..siqa.siqa_gen_e78df3 import siqa_datasets
     from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
-    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
     from ..obqa.obqa_gen_9069e4 import obqa_datasets
     from ..nq.nq_gen_c788f6 import nq_datasets
     from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
diff --git a/configs/datasets/collections/chat_small.py b/configs/datasets/collections/chat_small.py
index dce15420..47648021 100644
--- a/configs/datasets/collections/chat_small.py
+++ b/configs/datasets/collections/chat_small.py
@@ -31,7 +31,7 @@ with read_base():
     from ..summedits.summedits_gen_315438 import summedits_datasets
     from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
     from ..piqa.piqa_gen_1194eb import piqa_datasets
-    from ..winogrande.winogrande_gen_a9ede5 import winogrande_datasets
+    from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
     from ..obqa.obqa_gen_9069e4 import obqa_datasets
     from ..nq.nq_gen_c788f6 import nq_datasets
     from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
diff --git a/configs/datasets/winogrande/winogrande_gen_a9ede5.py b/configs/datasets/winogrande/deprecated_winogrande_gen_a9ede5.py
similarity index 100%
rename from configs/datasets/winogrande/winogrande_gen_a9ede5.py
rename to configs/datasets/winogrande/deprecated_winogrande_gen_a9ede5.py
diff --git a/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py b/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
new file mode 100644
index 00000000..60dca51e
--- /dev/null
+++ b/configs/datasets/winogrande/winogrande_5shot_gen_b36770.py
@@ -0,0 +1,46 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import FixKRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winograndeDataset_V3
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+winogrande_reader_cfg = dict(
+    input_columns=["prompt", "only_option1", "only_option2"],
+    output_column="answer",
+    train_split="train_xs",
+    test_split="dev",
+)
+
+winogrande_infer_cfg = dict(
+    ice_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            begin="</E>",
+            round=[
+                dict(role="HUMAN", prompt="Question: {prompt}\nA. {only_option1}\nB. {only_option2}\nAnswer:"),
+                dict(role="BOT", prompt="{answer}"),
+            ]
+        ),
+        ice_token="</E>",
+    ),
+    retriever=dict(type=FixKRetriever, fix_id_list=[0, 2, 4, 6, 8]),
+    inferencer=dict(type=GenInferencer),
+)
+
+winogrande_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_option_postprocess, options="AB"),
+)
+
+winogrande_datasets = [
+    dict(
+        abbr="winogrande",
+        type=winograndeDataset_V3,
+        path="./data/winogrande",
+        reader_cfg=winogrande_reader_cfg,
+        infer_cfg=winogrande_infer_cfg,
+        eval_cfg=winogrande_eval_cfg,
+    )
+]
diff --git a/configs/datasets/winogrande/winogrande_gen.py b/configs/datasets/winogrande/winogrande_gen.py
index ddf8330d..0aca8fe9 100644
--- a/configs/datasets/winogrande/winogrande_gen.py
+++ b/configs/datasets/winogrande/winogrande_gen.py
@@ -1,4 +1,4 @@
 from mmengine.config import read_base
 
 with read_base():
-    from .winogrande_gen_a9ede5 import winogrande_datasets  # noqa: F401, F403
+    from .winogrande_gen_458220 import winogrande_datasets  # noqa: F401, F403
diff --git a/configs/datasets/winogrande/winogrande_gen_458220.py b/configs/datasets/winogrande/winogrande_gen_458220.py
new file mode 100644
index 00000000..0a83c029
--- /dev/null
+++ b/configs/datasets/winogrande/winogrande_gen_458220.py
@@ -0,0 +1,41 @@
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_evaluator import AccEvaluator
+from opencompass.datasets import winograndeDataset_V2
+from opencompass.utils.text_postprocessors import first_option_postprocess
+
+winogrande_reader_cfg = dict(
+    input_columns=["prompt", "only_option1", "only_option2"],
+    output_column="answer",
+)
+
+winogrande_infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(role="HUMAN", prompt="Question: {prompt}\nA. {only_option1}\nB. {only_option2}\nAnswer:"),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+
+winogrande_eval_cfg = dict(
+    evaluator=dict(type=AccEvaluator),
+    pred_role="BOT",
+    pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
+)
+
+winogrande_datasets = [
+    dict(
+        abbr="winogrande",
+        type=winograndeDataset_V2,
+        path='./data/winogrande',
+        reader_cfg=winogrande_reader_cfg,
+        infer_cfg=winogrande_infer_cfg,
+        eval_cfg=winogrande_eval_cfg,
+    )
+]
diff --git a/configs/eval_llama3_instruct.py b/configs/eval_llama3_instruct.py
new file mode 100644
index 00000000..9645c2b6
--- /dev/null
+++ b/configs/eval_llama3_instruct.py
@@ -0,0 +1,52 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .dataset_collections.chat_OC15 import datasets
+
+    from .models.hf_llama.hf_llama3_8b_instruct import models as hf_llama3_8b_instruct_model
+
+    from .summarizers.chat_OC15 import summarizer
+
+
+work_dir = 'outputs/debug/llama3-instruct'
+
+models = sum([v for k, v in locals().items() if k.endswith("_model")], [])
+
+# dataset               version    metric                        mode    llama-3-8b-instruct-hf
+# --------------------  ---------  ----------------------------  ------  ------------------------
+# average               -          naive_average                 gen     55.64
+# mmlu                  -          naive_average                 gen     68.30
+# cmmlu                 -          naive_average                 gen     53.29
+# ceval                 -          naive_average                 gen     52.32
+# GaokaoBench           -          weighted_average              gen     45.91
+# triviaqa_wiki_1shot   eaf81e     score                         gen     79.01
+# nq_open_1shot         01cf41     score                         gen     30.25
+# race-high             9a54b6     accuracy                      gen     81.22
+# winogrande            b36770     accuracy                      gen     66.46
+# hellaswag             e42710     accuracy                      gen     74.33
+# bbh                   -          naive_average                 gen     67.25
+# gsm8k                 1d7fe4     accuracy                      gen     79.08
+# math                  393424     accuracy                      gen     27.78
+# TheoremQA             6f0af8     score                         gen     19.50
+# openai_humaneval      8e312c     humaneval_pass@1              gen     55.49
+# sanitized_mbpp        830460     score                         gen     66.54
+# GPQA_diamond          4baadb     accuracy                      gen     25.76
+# IFEval                3321a3     Prompt-level-strict-accuracy  gen     67.84
+#                       -          -                             -       -
+# mmlu                  -          naive_average                 gen     68.30
+# mmlu-stem             -          naive_average                 gen     57.92
+# mmlu-social-science   -          naive_average                 gen     77.83
+# mmlu-humanities       -          naive_average                 gen     71.20
+# mmlu-other            -          naive_average                 gen     71.79
+# cmmlu                 -          naive_average                 gen     53.29
+# cmmlu-stem            -          naive_average                 gen     45.40
+# cmmlu-social-science  -          naive_average                 gen     54.63
+# cmmlu-humanities      -          naive_average                 gen     54.14
+# cmmlu-other           -          naive_average                 gen     59.52
+# cmmlu-china-specific  -          naive_average                 gen     49.33
+# ceval                 -          naive_average                 gen     52.32
+# ceval-stem            -          naive_average                 gen     48.16
+# ceval-social-science  -          naive_average                 gen     57.50
+# ceval-humanities      -          naive_average                 gen     53.26
+# ceval-other           -          naive_average                 gen     54.26
+# ceval-hard            -          naive_average                 gen     35.59
diff --git a/configs/models/aquila/hf_aquila2_34b.py b/configs/models/aquila/hf_aquila2_34b.py
index e0194a5a..bb84b549 100644
--- a/configs/models/aquila/hf_aquila2_34b.py
+++ b/configs/models/aquila/hf_aquila2_34b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='aquila2-34b-hf',
-        path="BAAI/Aquila2-34B",
-        tokenizer_path='BAAI/Aquila2-34B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='BAAI/Aquila2-34B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/aquila/hf_aquila2_7b.py b/configs/models/aquila/hf_aquila2_7b.py
index 95af1f7d..0dab2538 100644
--- a/configs/models/aquila/hf_aquila2_7b.py
+++ b/configs/models/aquila/hf_aquila2_7b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='aquila2-7b-hf',
-        path="BAAI/Aquila2-7B",
-        tokenizer_path='BAAI/Aquila2-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='BAAI/Aquila2-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/aquila/hf_aquilachat2_34b.py b/configs/models/aquila/hf_aquilachat2_34b.py
index 112b39df..b096239e 100644
--- a/configs/models/aquila/hf_aquilachat2_34b.py
+++ b/configs/models/aquila/hf_aquilachat2_34b.py
@@ -5,7 +5,6 @@ _meta_template = dict(
         dict(role='HUMAN', begin='### Human: ', end='\n'),
         dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_34b_16k.py b/configs/models/aquila/hf_aquilachat2_34b_16k.py
index ccf28dde..b3c6b6c2 100644
--- a/configs/models/aquila/hf_aquilachat2_34b_16k.py
+++ b/configs/models/aquila/hf_aquilachat2_34b_16k.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role='HUMAN', begin='Human: ', end='###'),
         dict(role='BOT', begin='Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_7b.py b/configs/models/aquila/hf_aquilachat2_7b.py
index ff964d05..3b318c3c 100644
--- a/configs/models/aquila/hf_aquilachat2_7b.py
+++ b/configs/models/aquila/hf_aquilachat2_7b.py
@@ -5,7 +5,6 @@ _meta_template = dict(
         dict(role='HUMAN', begin='<|startofpiece|>', end=''),
         dict(role='BOT', begin='<|endofpiece|>', end='</s>', generate=True),
     ],
-    eos_token_id=2,
 )
 
 models = [
diff --git a/configs/models/aquila/hf_aquilachat2_7b_16k.py b/configs/models/aquila/hf_aquilachat2_7b_16k.py
index 55794259..40b8619e 100644
--- a/configs/models/aquila/hf_aquilachat2_7b_16k.py
+++ b/configs/models/aquila/hf_aquilachat2_7b_16k.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role='HUMAN', begin='Human: ', end='###'),
         dict(role='BOT', begin='Assistant: ', end='</s>', generate=True),
     ],
-    eos_token_id=100007,
 )
 
 models = [
diff --git a/configs/models/baichuan/hf_baichuan2_13b_base.py b/configs/models/baichuan/hf_baichuan2_13b_base.py
index 39651a24..3d00925b 100644
--- a/configs/models/baichuan/hf_baichuan2_13b_base.py
+++ b/configs/models/baichuan/hf_baichuan2_13b_base.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='baichuan2-13b-base-hf',
-        path="baichuan-inc/Baichuan2-13B-Base",
-        tokenizer_path='baichuan-inc/Baichuan2-13B-Base',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='baichuan-inc/Baichuan2-13B-Base',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/baichuan/hf_baichuan2_7b_base.py b/configs/models/baichuan/hf_baichuan2_7b_base.py
index f351c870..88ebfde4 100644
--- a/configs/models/baichuan/hf_baichuan2_7b_base.py
+++ b/configs/models/baichuan/hf_baichuan2_7b_base.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='baichuan2-7b-base-hf',
-        path="baichuan-inc/Baichuan2-7B-Base",
-        tokenizer_path='baichuan-inc/Baichuan2-7B-Base',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='baichuan-inc/Baichuan2-7B-Base',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/bluelm/hf_bluelm_7b_base.py b/configs/models/bluelm/hf_bluelm_7b_base.py
index d7689864..98ff33ba 100644
--- a/configs/models/bluelm/hf_bluelm_7b_base.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='bluelm-7b-base-hf',
-        path="vivo-ai/BlueLM-7B-Base",
-        tokenizer_path='vivo-ai/BlueLM-7B-Base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='vivo-ai/BlueLM-7B-Base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/bluelm/hf_bluelm_7b_base_32k.py b/configs/models/bluelm/hf_bluelm_7b_base_32k.py
index f319456f..755dc25d 100644
--- a/configs/models/bluelm/hf_bluelm_7b_base_32k.py
+++ b/configs/models/bluelm/hf_bluelm_7b_base_32k.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='bluelm-7b-base-32k-hf',
-        path="vivo-ai/BlueLM-7B-Base-32K",
-        tokenizer_path='vivo-ai/BlueLM-7B-Base-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=4096,
+        path='vivo-ai/BlueLM-7B-Base-32K',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/chatglm/hf_chatglm3_6b.py b/configs/models/chatglm/hf_chatglm3_6b.py
index c7182e1d..44940963 100644
--- a/configs/models/chatglm/hf_chatglm3_6b.py
+++ b/configs/models/chatglm/hf_chatglm3_6b.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFaceChatGLM3
-
-api_meta_template = dict(
-    round=[
-        dict(role='HUMAN', api_role='HUMAN'),
-        dict(role='BOT', api_role='BOT', generate=True),
-    ]
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceChatGLM3,
+        type=HuggingFacewithChatTemplate,
         abbr='chatglm3-6b-hf',
         path='THUDM/chatglm3-6b',
-        tokenizer_path='THUDM/chatglm3-6b',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=api_meta_template,
-        max_out_len=100,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/chatglm/hf_chatglm3_6b_32k.py b/configs/models/chatglm/hf_chatglm3_6b_32k.py
index 26fc9b49..2badff91 100644
--- a/configs/models/chatglm/hf_chatglm3_6b_32k.py
+++ b/configs/models/chatglm/hf_chatglm3_6b_32k.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFaceChatGLM3
-
-api_meta_template = dict(
-    round=[
-        dict(role='HUMAN', api_role='HUMAN'),
-        dict(role='BOT', api_role='BOT', generate=True),
-    ]
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceChatGLM3,
+        type=HuggingFacewithChatTemplate,
         abbr='chatglm3-6b-32k-hf',
         path='THUDM/chatglm3-6b-32k',
-        tokenizer_path='THUDM/chatglm3-6b-32k',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=api_meta_template,
-        max_out_len=100,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/chatglm/hf_chatglm3_6b_base.py b/configs/models/chatglm/hf_chatglm3_6b_base.py
index 17f5d5ba..f88c0aac 100644
--- a/configs/models/chatglm/hf_chatglm3_6b_base.py
+++ b/configs/models/chatglm/hf_chatglm3_6b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='chatglm3-6b-base-hf',
         path='THUDM/chatglm3-6b-base',
-        tokenizer_path='THUDM/chatglm3-6b-base',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=4096,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b.py b/configs/models/codellama/hf_codellama_13b.py
index 2267f923..4596c9dd 100644
--- a/configs/models/codellama/hf_codellama_13b.py
+++ b/configs/models/codellama/hf_codellama_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 13B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-13b',
-        path="codellama/CodeLlama-13b-hf",
-        tokenizer_path='codellama/CodeLlama-13b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b_instruct.py b/configs/models/codellama/hf_codellama_13b_instruct.py
index 01830015..a636c0fd 100644
--- a/configs/models/codellama/hf_codellama_13b_instruct.py
+++ b/configs/models/codellama/hf_codellama_13b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 13B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-13b-Instruct',
-        path="codellama/CodeLlama-13b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-13b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_13b_python.py b/configs/models/codellama/hf_codellama_13b_python.py
index 4c5ea0d1..07f44aa8 100644
--- a/configs/models/codellama/hf_codellama_13b_python.py
+++ b/configs/models/codellama/hf_codellama_13b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 13B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-13b-Python',
-        path="codellama/CodeLlama-13b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-13b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-13b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b.py b/configs/models/codellama/hf_codellama_34b.py
index e6dbef89..599fadda 100644
--- a/configs/models/codellama/hf_codellama_34b.py
+++ b/configs/models/codellama/hf_codellama_34b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 34B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-34b',
-        path="codellama/CodeLlama-34b-hf",
-        tokenizer_path='codellama/CodeLlama-34b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b_instruct.py b/configs/models/codellama/hf_codellama_34b_instruct.py
index 63894fd2..ae2ae79b 100644
--- a/configs/models/codellama/hf_codellama_34b_instruct.py
+++ b/configs/models/codellama/hf_codellama_34b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 34B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-34b-Instruct',
-        path="codellama/CodeLlama-34b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-34b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_34b_python.py b/configs/models/codellama/hf_codellama_34b_python.py
index 4ac82de8..a58d61f8 100644
--- a/configs/models/codellama/hf_codellama_34b_python.py
+++ b/configs/models/codellama/hf_codellama_34b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 34B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-34b-Python',
-        path="codellama/CodeLlama-34b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-34b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-34b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=2),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_70b.py b/configs/models/codellama/hf_codellama_70b.py
new file mode 100644
index 00000000..7fd8e866
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='CodeLlama-70b',
+        path='codellama/CodeLlama-70b-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_70b_instruct.py b/configs/models/codellama/hf_codellama_70b_instruct.py
new file mode 100644
index 00000000..38c15871
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b_instruct.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='CodeLlama-70b-Instruct',
+        path='codellama/CodeLlama-70b-Instruct-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_70b_python.py b/configs/models/codellama/hf_codellama_70b_python.py
new file mode 100644
index 00000000..83d62f2d
--- /dev/null
+++ b/configs/models/codellama/hf_codellama_70b_python.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='CodeLlama-70b-Python',
+        path='codellama/CodeLlama-70b-Python-hf',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/codellama/hf_codellama_7b.py b/configs/models/codellama/hf_codellama_7b.py
index b66f5095..a4c57dfd 100644
--- a/configs/models/codellama/hf_codellama_7b.py
+++ b/configs/models/codellama/hf_codellama_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 7B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-7b',
-        path="codellama/CodeLlama-7b-hf",
-        tokenizer_path='codellama/CodeLlama-7b-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_7b_instruct.py b/configs/models/codellama/hf_codellama_7b_instruct.py
index 1ae4ef84..aadb87e2 100644
--- a/configs/models/codellama/hf_codellama_7b_instruct.py
+++ b/configs/models/codellama/hf_codellama_7b_instruct.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
-    # CodeLlama 7B Instruct
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='CodeLlama-7b-Instruct',
-        path="codellama/CodeLlama-7b-Instruct-hf",
-        tokenizer_path='codellama/CodeLlama-7b-Instruct-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-Instruct-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/codellama/hf_codellama_7b_python.py b/configs/models/codellama/hf_codellama_7b_python.py
index b0cae6da..6504fa36 100644
--- a/configs/models/codellama/hf_codellama_7b_python.py
+++ b/configs/models/codellama/hf_codellama_7b_python.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # CodeLlama 7B Python
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='CodeLlama-7b-Python',
-        path="codellama/CodeLlama-7b-Python-hf",
-        tokenizer_path='codellama/CodeLlama-7b-Python-hf',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
+        path='codellama/CodeLlama-7b-Python-hf',
         max_out_len=1024,
-        max_seq_len=2048,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    ),
+        run_cfg=dict(num_gpus=1),
+    )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_67b_base.py b/configs/models/deepseek/hf_deepseek_67b_base.py
index c7378dae..1ec3e211 100644
--- a/configs/models/deepseek/hf_deepseek_67b_base.py
+++ b/configs/models/deepseek/hf_deepseek_67b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-67b-base-hf',
-        path="deepseek-ai/deepseek-llm-67b-base",
-        tokenizer_path='deepseek-ai/deepseek-llm-67b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-67b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_67b_chat.py b/configs/models/deepseek/hf_deepseek_67b_chat.py
index 8eaadfc7..d56d567e 100644
--- a/configs/models/deepseek/hf_deepseek_67b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_67b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-67b-chat-hf',
-        path="deepseek-ai/deepseek-llm-67b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-67b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_7b_base.py b/configs/models/deepseek/hf_deepseek_7b_base.py
index 9985932f..0aea3b62 100644
--- a/configs/models/deepseek/hf_deepseek_7b_base.py
+++ b/configs/models/deepseek/hf_deepseek_7b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-7b-base-hf',
-        path="deepseek-ai/deepseek-llm-7b-base",
-        tokenizer_path='deepseek-ai/deepseek-llm-7b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-7b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_7b_chat.py b/configs/models/deepseek/hf_deepseek_7b_chat.py
index 2531961b..3ed5044a 100644
--- a/configs/models/deepseek/hf_deepseek_7b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_7b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-7b-chat-hf',
-        path="deepseek-ai/deepseek-llm-7b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-llm-7b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
index c53b07b9..85cafd9b 100644
--- a/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_1_3b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-1.3b-hf',
-        path="deepseek-ai/deepseek-coder-1.3b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-1.3b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-1.3b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
index 87bdf3ca..3e3e21dd 100644
--- a/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_33b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-33b-hf',
-        path="deepseek-ai/deepseek-coder-33b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-33b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-33b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=2),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py b/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
index 59669a9a..110e77ec 100644
--- a/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
+++ b/configs/models/deepseek/hf_deepseek_coder_6_7b_instruct.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='### Instruction:\n', end='\n'),
-        dict(role="BOT", begin="### Response:\n", end='<|EOT|>', generate=True),
-    ],
-    eos_token_id=100001,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-coder-6.7b-hf',
-        path="deepseek-ai/deepseek-coder-6.7b-instruct",
-        tokenizer_path='deepseek-ai/deepseek-coder-6.7b-instruct',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=2048,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-coder-6.7b-instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|EOT|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/deepseek/hf_deepseek_moe_16b_base.py b/configs/models/deepseek/hf_deepseek_moe_16b_base.py
index 0c7f75c8..4f299d74 100644
--- a/configs/models/deepseek/hf_deepseek_moe_16b_base.py
+++ b/configs/models/deepseek/hf_deepseek_moe_16b_base.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='deepseek-moe-16b-base-hf',
-        path="deepseek-ai/deepseek-moe-16b-base",
-        tokenizer_path='deepseek-ai/deepseek-moe-16b-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=3,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-moe-16b-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/deepseek/hf_deepseek_moe_16b_chat.py b/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
index a009ff0c..85460f65 100644
--- a/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
+++ b/configs/models/deepseek/hf_deepseek_moe_16b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin='<｜begin▁of▁sentence｜>',
-    round=[
-        dict(role="HUMAN", begin='User: ', end='\n\n'),
-        dict(role="BOT", begin="Assistant: ", end='<｜end▁of▁sentence｜>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='deepseek-moe-16b-chat-hf',
-        path="deepseek-ai/deepseek-moe-16b-chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='deepseek-ai/deepseek-moe-16b-chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/falcon/hf_falcon_40b.py b/configs/models/falcon/hf_falcon_40b.py
index da089dfc..d5cf827b 100644
--- a/configs/models/falcon/hf_falcon_40b.py
+++ b/configs/models/falcon/hf_falcon_40b.py
@@ -1,21 +1,12 @@
-# Only torch >=2.0 is supported for falcon-40b
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='falcon-40b-hf',
         path='tiiuae/falcon-40b',
-        tokenizer_path='tiiuae/falcon-40b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='561820f7eef0cc56a31ea38af15ca1acb07fab5d'),
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/falcon/hf_falcon_7b.py b/configs/models/falcon/hf_falcon_7b.py
index cab4f61a..6c42355c 100644
--- a/configs/models/falcon/hf_falcon_7b.py
+++ b/configs/models/falcon/hf_falcon_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='falcon-7b-hf',
         path='tiiuae/falcon-7b',
-        tokenizer_path='tiiuae/falcon-7b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto', revision='2f5c3cd4eace6be6c0f12981f377fb35e5bf6ee5'),
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_2b.py b/configs/models/gemma/hf_gemma_2b.py
index ec731c48..966ff397 100644
--- a/configs/models/gemma/hf_gemma_2b.py
+++ b/configs/models/gemma/hf_gemma_2b.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='gemma-2b-hf',
-        path="google/gemma-2b",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-2b',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_2b_it.py b/configs/models/gemma/hf_gemma_2b_it.py
index 0075484b..32f89e7d 100644
--- a/configs/models/gemma/hf_gemma_2b_it.py
+++ b/configs/models/gemma/hf_gemma_2b_it.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
-        dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='gemma-2b-it-hf',
-        path="google/gemma-2b-it",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-2b-it',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_7b.py b/configs/models/gemma/hf_gemma_7b.py
index 842ea263..d7c3b6d4 100644
--- a/configs/models/gemma/hf_gemma_7b.py
+++ b/configs/models/gemma/hf_gemma_7b.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='gemma-7b-hf',
-        path="google/gemma-7b",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-7b',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/gemma/hf_gemma_7b_it.py b/configs/models/gemma/hf_gemma_7b_it.py
index b913db6e..4699a17b 100644
--- a/configs/models/gemma/hf_gemma_7b_it.py
+++ b/configs/models/gemma/hf_gemma_7b_it.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<start_of_turn>user\n', end='<end_of_turn>\n'),
-        dict(role="BOT", begin="<start_of_turn>model\n", end='<end_of_turn>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='gemma-7b-it-hf',
-        path="google/gemma-7b-it",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='google/gemma-7b-it',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_1_8b.py b/configs/models/hf_internlm/hf_internlm2_1_8b.py
index 7f0ae9cd..3494c8a6 100644
--- a/configs/models/hf_internlm/hf_internlm2_1_8b.py
+++ b/configs/models/hf_internlm/hf_internlm2_1_8b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-1.8b-hf',
         path="internlm/internlm2-1_8b",
-        tokenizer_path='internlm/internlm2-1_8b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_20b.py b/configs/models/hf_internlm/hf_internlm2_20b.py
index 10ec2e41..f99518f9 100644
--- a/configs/models/hf_internlm/hf_internlm2_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_20b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-20b-hf',
         path="internlm/internlm2-20b",
-        tokenizer_path='internlm/internlm2-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_7b.py b/configs/models/hf_internlm/hf_internlm2_7b.py
index 990d48f0..054f5e96 100644
--- a/configs/models/hf_internlm/hf_internlm2_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_7b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-7b-hf',
         path="internlm/internlm2-7b",
-        tokenizer_path='internlm/internlm2-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_base_20b.py b/configs/models/hf_internlm/hf_internlm2_base_20b.py
index 1b825004..cb8ff73e 100644
--- a/configs/models/hf_internlm/hf_internlm2_base_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_base_20b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-base-20b-hf',
         path="internlm/internlm2-base-20b",
-        tokenizer_path='internlm/internlm2-base-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_base_7b.py b/configs/models/hf_internlm/hf_internlm2_base_7b.py
index ae43fe84..d47cf3e0 100644
--- a/configs/models/hf_internlm/hf_internlm2_base_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_base_7b.py
@@ -1,26 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm2-base-7b-hf',
         path="internlm/internlm2-base-7b",
-        tokenizer_path='internlm/internlm2-base-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        min_out_len=1,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
index 81c7d35d..18a70714 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_1_8b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-1.8b-hf',
-        path="internlm/internlm2-chat-1_8b",
-        tokenizer_path='internlm/internlm2-chat-1_8b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-1_8b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
index 6228ea57..aee7d21f 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_1_8b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-1.8b-sft-hf',
-        path="internlm/internlm2-chat-1_8b-sft",
-        tokenizer_path='internlm/internlm2-chat-1_8b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-1_8b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
index c35e1701..69af8f88 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_20b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-20b-hf',
-        path="internlm/internlm2-chat-20b",
-        tokenizer_path='internlm/internlm2-chat-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-20b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
index 53844f5c..94a0b8e9 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_20b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-20b-sft-hf',
-        path="internlm/internlm2-chat-20b-sft",
-        tokenizer_path='internlm/internlm2-chat-20b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-20b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
index 3e0b349d..0fd75455 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_7b.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-7b-hf',
-        path="internlm/internlm2-chat-7b",
-        tokenizer_path='internlm/internlm2-chat-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-7b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
index 07164a67..242ba360 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_7b_sft.py
@@ -1,36 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-7b-sft-hf',
-        path="internlm/internlm2-chat-7b-sft",
-        tokenizer_path='internlm/internlm2-chat-7b-sft',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-chat-7b-sft',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        generation_kwargs = {"eos_token_id": [2, 92542]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
index 3e77198d..77a9bd05 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_20b.py
@@ -1,35 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
-    ],
-    eos_token_id=92542
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-math-20b-hf',
-        path="internlm/internlm2-math-20b",
-        tokenizer_path='internlm/internlm2-math-20b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-math-20b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        run_cfg=dict(num_gpus=2),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
index 80e12a18..bd01fffa 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_20b_with_system.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
         dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
     ],
-    eos_token_id=92542
 )
 
 models = [
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
index afc9a098..181e8e1c 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_7b.py
@@ -1,35 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='[UNUSED_TOKEN_146]user\n', end='[UNUSED_TOKEN_145]\n'),
-        dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
-    ],
-    eos_token_id=92542
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='internlm2-chat-math-7b-hf',
-        path="internlm/internlm2-math-7b",
-        tokenizer_path='internlm/internlm2-math-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='internlm/internlm2-math-7b',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[UNUSED_TOKEN_145]',
+        run_cfg=dict(num_gpus=1),
+        stop_words=['</s>', '<|im_end|>'],
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
index aa9b5d09..545ab627 100644
--- a/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
+++ b/configs/models/hf_internlm/hf_internlm2_chat_math_7b_with_system.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role='SYSTEM', begin='[UNUSED_TOKEN_146]system\n', end='[UNUSED_TOKEN_145]\n'),
         dict(role='BOT', begin='[UNUSED_TOKEN_146]assistant\n', end='[UNUSED_TOKEN_145]\n', generate=True),
     ],
-    eos_token_id=92542
 )
 
 models = [
diff --git a/configs/models/hf_internlm/hf_internlm2_math_20b.py b/configs/models/hf_internlm/hf_internlm2_math_20b.py
new file mode 100644
index 00000000..c0d68572
--- /dev/null
+++ b/configs/models/hf_internlm/hf_internlm2_math_20b.py
@@ -0,0 +1,13 @@
+from opencompass.models import HuggingFaceBaseModel
+
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='internlm2-math-20b-hf',
+        path="internlm/internlm2-math-20b",
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/configs/models/hf_internlm/hf_internlm2_math_7b.py b/configs/models/hf_internlm/hf_internlm2_math_7b.py
new file mode 100644
index 00000000..bf103d64
--- /dev/null
+++ b/configs/models/hf_internlm/hf_internlm2_math_7b.py
@@ -0,0 +1,13 @@
+from opencompass.models import HuggingFaceBaseModel
+
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='internlm2-math-7b-hf',
+        path="internlm/internlm2-math-7b",
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/hf_internlm/hf_internlm_20b.py b/configs/models/hf_internlm/hf_internlm_20b.py
index 9af67533..e112f85b 100644
--- a/configs/models/hf_internlm/hf_internlm_20b.py
+++ b/configs/models/hf_internlm/hf_internlm_20b.py
@@ -1,22 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm-20b-hf',
         path="internlm/internlm-20b",
-        tokenizer_path='internlm/internlm-20b',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(trust_remote_code=True, device_map='auto'),
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm_7b.py b/configs/models/hf_internlm/hf_internlm_7b.py
index 649e0c75..15a2294d 100644
--- a/configs/models/hf_internlm/hf_internlm_7b.py
+++ b/configs/models/hf_internlm/hf_internlm_7b.py
@@ -1,25 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='internlm-7b-hf',
         path="internlm/internlm-7b",
-        tokenizer_path='internlm/internlm-7b',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py b/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
deleted file mode 100644
index 5e0152d5..00000000
--- a/configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|User|>:', end='\n'),
-        dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='internlm-chat-7b-8k-hf',
-        path="internlm/internlm-chat-7b-8k",
-        tokenizer_path='internlm/internlm-chat-7b-8k',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<eoa>',
-    )
-]
diff --git a/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py b/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
deleted file mode 100644
index 7471e68c..00000000
--- a/configs/models/hf_internlm/hf_internlm_chat_7b_v1_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|User|>:', end='\n'),
-        dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='internlm-chat-7b-v1.1-hf',
-        path="internlm/internlm-chat-7b-v1_1",
-        tokenizer_path='internlm/internlm-chat-7b-v1_1',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<eoa>',
-    )
-]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_20b.py
new file mode 100644
index 00000000..730cb764
--- /dev/null
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_20b.py
@@ -0,0 +1,27 @@
+from opencompass.models.turbomind import TurboMindModel
+
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr="internlm2-20b-turbomind",
+        path="internlm/internlm2-20b",
+        engine_config=dict(
+            session_len=32768,
+            max_batch_size=32,
+            model_name="internlm2-20b",
+            tp=2,
+        ),
+        gen_config=dict(
+            top_k=1,
+            top_p=0.8,
+            temperature=1.0,
+            max_new_tokens=2000,
+        ),
+        max_out_len=2000,
+        max_seq_len=32768,
+        batch_size=32,
+        concurrency=8,
+        run_cfg=dict(num_gpus=2, num_procs=1),
+    )
+]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
index fcad86d9..0e84ff8b 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_20b.py
@@ -15,9 +15,8 @@ models = [
         path="internlm/internlm2-chat-20b",
         meta_template=_meta_template,
         engine_config=dict(
-            session_len=210000,
-            max_batch_size=8,
-            rope_scaling_factor=3.0,
+            session_len=32768,
+            max_batch_size=32,
             model_name="internlm2-chat-20b",
             tp=2,
             stop_words=[2, 92542],
@@ -29,8 +28,8 @@ models = [
             max_new_tokens=2000,
         ),
         max_out_len=2000,
-        max_seq_len=210000,
-        batch_size=1,
+        max_seq_len=32768,
+        batch_size=32,
         concurrency=8,
         run_cfg=dict(num_gpus=2, num_procs=1),
     )
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
index 424fc1c9..cb192e09 100644
--- a/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
+++ b/configs/models/hf_internlm/lmdeploy_internlm2_chat_7b.py
@@ -15,9 +15,8 @@ models = [
         path="internlm/internlm2-chat-7b",
         meta_template=_meta_template,
         engine_config=dict(
-            session_len=210000,
-            max_batch_size=8,
-            rope_scaling_factor=2.0,
+            session_len=32768,
+            max_batch_size=32,
             model_name="internlm2-chat-7b",
             tp=1,
             stop_words=[2, 92542],
@@ -29,8 +28,8 @@ models = [
             max_new_tokens=2000,
         ),
         max_out_len=2000,
-        max_seq_len=210000,
-        batch_size=1,
+        max_seq_len=32768,
+        batch_size=32,
         concurrency=8,
         run_cfg=dict(num_gpus=1, num_procs=1),
     )
diff --git a/configs/models/hf_llama/hf_llama2_13b.py b/configs/models/hf_llama/hf_llama2_13b.py
index 4103c874..4044f87e 100644
--- a/configs/models/hf_llama/hf_llama2_13b.py
+++ b/configs/models/hf_llama/hf_llama2_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-13b-hf',
-        path="meta-llama/Llama-2-13b-hf",
-        tokenizer_path='meta-llama/Llama-2-13b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-13b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_13b_chat.py b/configs/models/hf_llama/hf_llama2_13b_chat.py
index ef85562e..8460ad42 100644
--- a/configs/models/hf_llama/hf_llama2_13b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_13b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-13b-chat-hf',
-        path="meta-llama/Llama-2-13b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-13b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-13b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_70b.py b/configs/models/hf_llama/hf_llama2_70b.py
index 9bc12a2a..97d28a4b 100644
--- a/configs/models/hf_llama/hf_llama2_70b.py
+++ b/configs/models/hf_llama/hf_llama2_70b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-70b-hf',
-        path="meta-llama/Llama-2-70b-hf",
-        tokenizer_path='meta-llama/Llama-2-70b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-70b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_70b_chat.py b/configs/models/hf_llama/hf_llama2_70b_chat.py
index ff25d27d..6f6351fa 100644
--- a/configs/models/hf_llama/hf_llama2_70b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_70b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-70b-chat-hf',
-        path="meta-llama/Llama-2-70b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-70b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-70b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_7b.py b/configs/models/hf_llama/hf_llama2_7b.py
index 3d00990e..beb4d667 100644
--- a/configs/models/hf_llama/hf_llama2_7b.py
+++ b/configs/models/hf_llama/hf_llama2_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-2-7b-hf',
-        path="meta-llama/Llama-2-7b-hf",
-        tokenizer_path='meta-llama/Llama-2-7b-hf',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-7b-hf',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama2_7b_chat.py b/configs/models/hf_llama/hf_llama2_7b_chat.py
index 4c880729..e1f95331 100644
--- a/configs/models/hf_llama/hf_llama2_7b_chat.py
+++ b/configs/models/hf_llama/hf_llama2_7b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin=' ', end=' ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='llama-2-7b-chat-hf',
-        path="meta-llama/Llama-2-7b-chat-hf",
-        tokenizer_path='meta-llama/Llama-2-7b-chat-hf',
-        model_kwargs=dict(
-            device_map='auto'
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='meta-llama/Llama-2-7b-chat-hf',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='[INST]',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_70b.py b/configs/models/hf_llama/hf_llama3_70b.py
index f35c18ad..b3cce950 100644
--- a/configs/models/hf_llama/hf_llama3_70b.py
+++ b/configs/models/hf_llama/hf_llama3_70b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-70b-hf",
-        path="meta-llama/Meta-Llama-3-70B",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFaceBaseModel,
+        abbr='llama-3-70b-hf',
+        path='meta-llama/Meta-Llama-3-70B',
+        max_out_len=1024,
         batch_size=8,
-        batch_padding=True,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_70b_instruct.py b/configs/models/hf_llama/hf_llama3_70b_instruct.py
index c19c6615..cb7e8554 100644
--- a/configs/models/hf_llama/hf_llama3_70b_instruct.py
+++ b/configs/models/hf_llama/hf_llama3_70b_instruct.py
@@ -1,29 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin="<|start_header_id|>user<|end_header_id|>\n\n", end="<|eot_id|>"),
-        dict(role="BOT", begin="<|start_header_id|>assistant<|end_header_id|>\n\n", end="<|eot_id|>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-70b-instruct-hf",
-        path="meta-llama/Meta-Llama-3-70B-Instruct",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFacewithChatTemplate,
+        abbr='llama-3-70b-instruct-hf',
+        path='meta-llama/Meta-Llama-3-70B-Instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        generation_kwargs={"eos_token_id": [128001, 128009]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_8b.py b/configs/models/hf_llama/hf_llama3_8b.py
index cbf2a9da..3ae9f2c3 100644
--- a/configs/models/hf_llama/hf_llama3_8b.py
+++ b/configs/models/hf_llama/hf_llama3_8b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-8b-hf",
-        path="meta-llama/Meta-Llama-3-8B",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFaceBaseModel,
+        abbr='llama-3-8b-hf',
+        path='meta-llama/Meta-Llama-3-8B',
+        max_out_len=1024,
         batch_size=8,
-        batch_padding=True,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama3_8b_instruct.py b/configs/models/hf_llama/hf_llama3_8b_instruct.py
index e0b439d9..1e2fd8f0 100644
--- a/configs/models/hf_llama/hf_llama3_8b_instruct.py
+++ b/configs/models/hf_llama/hf_llama3_8b_instruct.py
@@ -1,29 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin="<|start_header_id|>user<|end_header_id|>\n\n", end="<|eot_id|>"),
-        dict(role="BOT", begin="<|start_header_id|>assistant<|end_header_id|>\n\n", end="<|eot_id|>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
-        abbr="llama-3-8b-instruct-hf",
-        path="meta-llama/Meta-Llama-3-8B-Instruct",
-        model_kwargs=dict(device_map="auto"),
-        tokenizer_kwargs=dict(
-            padding_side="left",
-            truncation_side="left",
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        type=HuggingFacewithChatTemplate,
+        abbr='llama-3-8b-instruct-hf',
+        path='meta-llama/Meta-Llama-3-8B-Instruct',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        generation_kwargs={"eos_token_id": [128001, 128009]},
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
+        stop_words=['<|end_of_text|>', '<|eot_id|>'],
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_13b.py b/configs/models/hf_llama/hf_llama_13b.py
index 40389b7c..70d1b9cb 100644
--- a/configs/models/hf_llama/hf_llama_13b.py
+++ b/configs/models/hf_llama/hf_llama_13b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 13B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-13b-hf',
-        path="huggyllama/llama-13b",
-        tokenizer_path='huggyllama/llama-13b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-13b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_30b.py b/configs/models/hf_llama/hf_llama_30b.py
index 493923bb..063a6927 100644
--- a/configs/models/hf_llama/hf_llama_30b.py
+++ b/configs/models/hf_llama/hf_llama_30b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 30B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-30b-hf',
-        path="huggyllama/llama-30b",
-        tokenizer_path='huggyllama/llama-30b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-30b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_65b.py b/configs/models/hf_llama/hf_llama_65b.py
index 1b26f26f..9db5fcc9 100644
--- a/configs/models/hf_llama/hf_llama_65b.py
+++ b/configs/models/hf_llama/hf_llama_65b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 65B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-65b-hf',
-        path="huggyllama/llama-65b",
-        tokenizer_path='huggyllama/llama-65b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-65b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/hf_llama/hf_llama_7b.py b/configs/models/hf_llama/hf_llama_7b.py
index 4e09dd74..1100f119 100644
--- a/configs/models/hf_llama/hf_llama_7b.py
+++ b/configs/models/hf_llama/hf_llama_7b.py
@@ -1,21 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
-    # LLaMA 7B
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='llama-7b-hf',
-        path="huggyllama/llama-7b",
-        tokenizer_path='huggyllama/llama-7b',
-        tokenizer_kwargs=dict(padding_side='left',
-                              truncation_side='left',
-                              use_fast=False,
-                              ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='huggyllama/llama-7b',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
new file mode 100644
index 00000000..4d93d6c8
--- /dev/null
+++ b/configs/models/hf_llama/lmdeploy_llama3_70b_instruct.py
@@ -0,0 +1,24 @@
+from opencompass.models import TurboMindModel
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='<|begin_of_text|>user<|end_header_id|>\n\n', end='<|eot_id|>'),
+        dict(role="BOT", begin='<|begin_of_text|>assistant<|end_header_id|>\n\n', end='<|eot_id|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3-70b-instruct-lmdeploy',
+        path='meta-llama/Meta-Llama-3-70B-Instruct',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024, stop_words=[128001, 128009]),
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=16,
+        concurrency=16,
+        meta_template=_meta_template,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
new file mode 100644
index 00000000..b393072b
--- /dev/null
+++ b/configs/models/hf_llama/lmdeploy_llama3_8b_instruct.py
@@ -0,0 +1,24 @@
+from opencompass.models import TurboMindModel
+
+_meta_template = dict(
+    round=[
+        dict(role="HUMAN", begin='<|begin_of_text|>user<|end_header_id|>\n\n', end='<|eot_id|>'),
+        dict(role="BOT", begin='<|begin_of_text|>assistant<|end_header_id|>\n\n', end='<|eot_id|>', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        type=TurboMindModel,
+        abbr='llama-3-8b-instruct-lmdeploy',
+        path='meta-llama/Meta-Llama-3-8B-Instruct',
+        engine_config=dict(session_len=4096, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024, stop_words=[128001, 128009]),
+        max_out_len=1024,
+        max_seq_len=4096,
+        batch_size=16,
+        concurrency=16,
+        meta_template=_meta_template,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py b/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
index b8149a51..cb0f1a65 100644
--- a/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct_v0_1.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
+        type=HuggingFacewithChatTemplate,
         abbr='mistral-7b-instruct-v0.1-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mistral-7B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py b/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
index e109ca58..188698c7 100644
--- a/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
+++ b/configs/models/mistral/hf_mistral_7b_instruct_v0_2.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
+        type=HuggingFacewithChatTemplate,
         abbr='mistral-7b-instruct-v0.2-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-Instruct-v0.2',
-        tokenizer_path='mistralai/Mistral-7B-Instruct-v0.2',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_v0_1.py b/configs/models/mistral/hf_mistral_7b_v0_1.py
index bae2ce32..3446cf37 100644
--- a/configs/models/mistral/hf_mistral_7b_v0_1.py
+++ b/configs/models/mistral/hf_mistral_7b_v0_1.py
@@ -1,24 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
+        type=HuggingFaceBaseModel,
         abbr='mistral-7b-v0.1-hf',
-        type=HuggingFaceCausalLM,
         path='mistralai/Mistral-7B-v0.1',
-        tokenizer_path='mistralai/Mistral-7B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mistral_7b_v0_2.py b/configs/models/mistral/hf_mistral_7b_v0_2.py
index 02a0a088..df696e42 100644
--- a/configs/models/mistral/hf_mistral_7b_v0_2.py
+++ b/configs/models/mistral/hf_mistral_7b_v0_2.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 
 models = [
     dict(
+        type=HuggingFaceBaseModel,
         abbr='mistral-7b-v0.2-hf',
-        type=HuggingFaceCausalLM,
-        path='alpindale/Mistral-7B-v0.2-hf',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='mistral-community/Mistral-7B-v0.2',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py b/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
new file mode 100644
index 00000000..588c18a7
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x22b_instruct_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='mixtral-8x22b-instruct-v0.1-hf',
+        path='mistralai/Mixtral-8x22B-Instruct-v0.1',
+        max_out_len=1024,
+        batch_size=4,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x22b_v0_1.py b/configs/models/mistral/hf_mixtral_8x22b_v0_1.py
new file mode 100644
index 00000000..d84021ab
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x22b_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='mixtral-8x22b-v0.1-hf',
+        path='mistralai/Mixtral-8x22B-v0.1',
+        max_out_len=1024,
+        batch_size=4,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py b/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
new file mode 100644
index 00000000..c910d210
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x7b_instruct_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='mixtral-8x7b-instruct-v0.1-hf',
+        path='mistralai/Mixtral-8x7B-Instruct-v0.1',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/mistral/hf_mixtral_8x7b_v0_1.py b/configs/models/mistral/hf_mixtral_8x7b_v0_1.py
new file mode 100644
index 00000000..25204226
--- /dev/null
+++ b/configs/models/mistral/hf_mixtral_8x7b_v0_1.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='mixtral-8x7b-v0.1-hf',
+        path='mistralai/Mixtral-8x7B-v0.1',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=4),
+    )
+]
diff --git a/configs/models/mixtral/mixtral_8x7b_32k.py b/configs/models/mistral/mixtral_8x7b_32k.py
similarity index 100%
rename from configs/models/mixtral/mixtral_8x7b_32k.py
rename to configs/models/mistral/mixtral_8x7b_32k.py
diff --git a/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py b/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
index 9c9ab08f..4454c32f 100644
--- a/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
+++ b/configs/models/mistral/vllm_mistral_7b_instruct_v0_1.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py b/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
index b6c565c2..010f9bf1 100644
--- a/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
+++ b/configs/models/mistral/vllm_mistral_7b_instruct_v0_2.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mistral/vllm_mistral_7b_v0_1.py b/configs/models/mistral/vllm_mistral_7b_v0_1.py
new file mode 100644
index 00000000..32486ce2
--- /dev/null
+++ b/configs/models/mistral/vllm_mistral_7b_v0_1.py
@@ -0,0 +1,17 @@
+from opencompass.models import VLLM
+
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mistral-7b-v0.1-vllm',
+        path='mistralai/Mistral-7B-v0.1',
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        model_kwargs=dict(dtype='bfloat16'),
+        generation_kwargs=dict(temperature=0, top_p=1, max_tokens=2048, stop_token_ids=[2]),
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        stop_words=['[INST]'],
+    )
+]
diff --git a/configs/models/mistral/vllm_mistral_7b_v0_2.py b/configs/models/mistral/vllm_mistral_7b_v0_2.py
new file mode 100644
index 00000000..22931d9a
--- /dev/null
+++ b/configs/models/mistral/vllm_mistral_7b_v0_2.py
@@ -0,0 +1,17 @@
+from opencompass.models import VLLM
+
+
+models = [
+    dict(
+        type=VLLM,
+        abbr='mistral-7b-v0.2-vllm',
+        path='mistral-community/Mistral-7B-v0.2',
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=32,
+        model_kwargs=dict(dtype='bfloat16'),
+        generation_kwargs=dict(temperature=0, top_p=1, max_tokens=2048, stop_token_ids=[2]),
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        stop_words=['[INST]'],
+    )
+]
diff --git a/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py b/configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
similarity index 97%
rename from configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
rename to configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
index 6f26822f..894be13c 100644
--- a/configs/models/mixtral/vllm_mixtral_8x7b_instruct_v0_1.py
+++ b/configs/models/mistral/vllm_mixtral_8x7b_instruct_v0_1.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='[INST]', end='[/INST]'),
         dict(role="BOT", begin="", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py b/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
deleted file mode 100644
index 89283ef4..00000000
--- a/configs/models/mixtral/hf_mixtral_8x22b_instruct_v0_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='mixtral-8x22b-instruct-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x22B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x22B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        batch_padding=True,
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py b/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
deleted file mode 100644
index aa60b408..00000000
--- a/configs/models/mixtral/hf_mixtral_8x22b_v0_1.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-models = [
-    dict(
-        abbr='mixtral-8x22b-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x22B-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x22B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py b/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
deleted file mode 100644
index 0c31f3c8..00000000
--- a/configs/models/mixtral/hf_mixtral_8x7b_instruct_v0_1.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    begin="<s>",
-    round=[
-        dict(role="HUMAN", begin='[INST] ', end=' [/INST]'),
-        dict(role="BOT", begin="", end='</s> ', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='mixtral-8x7b-instruct-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x7B-Instruct-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x7B-Instruct-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        batch_padding=True,
-    )
-]
diff --git a/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py b/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
deleted file mode 100644
index 71d6489e..00000000
--- a/configs/models/mixtral/hf_mixtral_8x7b_v0_1.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-models = [
-    dict(
-        abbr='mixtral-8x7b-v0.1',
-        type=HuggingFaceCausalLM,
-        path='mistralai/Mixtral-8x7B-v0.1',
-        tokenizer_path='mistralai/Mixtral-8x7B-v0.1',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige2_8b_chat.py b/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
index a399a5d4..cb9dd464 100644
--- a/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
+++ b/configs/models/nanbeige/hf_nanbeige2_8b_chat.py
@@ -1,36 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    begin="<|im_start|>system\n你是一个名为\"南北阁\"的人工智能助手，正在与人类用户进行交谈。你的目标是以最有帮助和最逻辑的方式回答问题，同时确保内容的安全性。你的回答中不应包含任何有害、政治化、宗教化、不道德、种族主义、非法的内容。请确保你的回答不带有社会偏见，符合社会主义价值观。如果遇到的问题无意义或事实上不连贯，请不要回答错误的内容，而是解释问题为何无效或不连贯。如果你不知道问题的答案，也请勿提供错误的信息。<|im_end|>\n",
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='nanbeige2-8b-chat-hf',
-        path="Nanbeige/Nanbeige2-8B-Chat",
-        tokenizer_path='Nanbeige/Nanbeige2-8B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            torch_dtype='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=100,
-        max_seq_len=4096,
+        path='Nanbeige/Nanbeige2-8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base.py b/configs/models/nanbeige/hf_nanbeige_16b_base.py
deleted file mode 100644
index 322f18a4..00000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_base.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='', end=''),
-        dict(role='BOT', begin='', end='\n\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        abbr='nanbeige-16b-base-hf',
-        type=HuggingFaceCausalLM,
-        path='Nanbeige/Nanbeige-16B-Base',
-        tokenizer_path='Nanbeige/Nanbeige-16B-Base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=1024,
-        max_seq_len=4096,
-        batch_size=1,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
deleted file mode 100644
index d0c1c2ea..00000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_base_32k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='', end=''),
-        dict(role='BOT', begin='', end='\n\n', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='nanbeige-16b-base-32k-hf',
-        path="Nanbeige/Nanbeige-16B-Base-32K",
-        tokenizer_path='Nanbeige/Nanbeige-16B-Base-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='right',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        batch_padding=False,
-        max_out_len=1024,
-        max_seq_len=8192,
-        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py b/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
deleted file mode 100644
index 8363ae96..00000000
--- a/configs/models/nanbeige/hf_nanbeige_16b_chat_32k.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='### Human: \n', end='\n\n'),
-        dict(role='BOT', begin='### Assistant: ', end='</s>', generate=True),
-    ],
-)
-
-models = [
-    dict(
-        type=HuggingFaceCausalLM,
-        abbr='nanbeige-16b-chat-32k-hf',
-        path="Nanbeige/Nanbeige-16B-Chat-32K",
-        tokenizer_path='Nanbeige/Nanbeige-16B-Chat-32K',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-            torch_dtype='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='</s>',
-    )
-]
diff --git a/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py b/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
index 1b40ef2a..d0969029 100644
--- a/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
+++ b/configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<用户>'),
-        dict(role="BOT", begin="<AI>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
-        abbr='minicpm-2b-dpo-hf',
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-dpo-fp32-hf',
         path='openbmb/MiniCPM-2B-dpo-fp32',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py b/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
new file mode 100644
index 00000000..43303b24
--- /dev/null
+++ b/configs/models/openbmb/hf_minicpm_2b_sft_bf16.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-sft-bf16-hf',
+        path='openbmb/MiniCPM-2B-sft-bf16',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py b/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
index b8ea8c32..a13fbcd6 100644
--- a/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
+++ b/configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
@@ -1,31 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<用户>'),
-        dict(role="BOT", begin="<AI>", generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
-        abbr='minicpm-2b-sft-hf',
+        type=HuggingFacewithChatTemplate,
+        abbr='minicpm-2b-sft-fp32-hf',
         path='openbmb/MiniCPM-2B-sft-fp32',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/opt/hf_opt_125m.py b/configs/models/opt/hf_opt_125m.py
index 760e65b0..ec0c68d3 100644
--- a/configs/models/opt/hf_opt_125m.py
+++ b/configs/models/opt/hf_opt_125m.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-125M
-opt125m = dict(
-       type=HuggingFaceCausalLM,
-       # the folowing are HuggingFaceCausalLM init parameters
-       path='facebook/opt-125m',
-       tokenizer_path='facebook/opt-125m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       max_seq_len=2048,
-       # the folowing are not HuggingFaceCausalLM init parameters
-       abbr='opt125m',                # Model abbreviation
-       max_out_len=100,               # Maximum number of generated tokens
-       batch_size=128,
-       run_cfg=dict(num_gpus=1),   # Run configuration for specifying resource requirements
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='opt-125m-hf',
+        path='facebook/opt-125m',
+        max_out_len=1024,
+        batch_size=64,
+        run_cfg=dict(num_gpus=1),
     )
-
-models = [opt125m]
+]
diff --git a/configs/models/opt/hf_opt_350m.py b/configs/models/opt/hf_opt_350m.py
index 33cbacc0..6a25db0a 100644
--- a/configs/models/opt/hf_opt_350m.py
+++ b/configs/models/opt/hf_opt_350m.py
@@ -1,23 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # the folowing are HuggingFaceCausalLM init parameters
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       max_seq_len=2048,
-       # the folowing are not HuggingFaceCausalLM init parameters
-       abbr='opt350m',                    # Model abbreviation
-       max_out_len=100,                   # Maximum number of generated tokens          
-       batch_size=64,
-       run_cfg=dict(num_gpus=1),    # Run configuration for specifying resource requirements
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='opt-350m-hf',
+        path='facebook/opt-350m',
+        max_out_len=1024,
+        batch_size=32,
+        run_cfg=dict(num_gpus=1),
     )
-
-models = [opt350m]
+]
diff --git a/configs/models/others/hf_command_r_plus.py b/configs/models/others/hf_command_r_plus.py
index ce41ab3d..bdbf924f 100644
--- a/configs/models/others/hf_command_r_plus.py
+++ b/configs/models/others/hf_command_r_plus.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|START_OF_TURN_TOKEN|><|USER_TOKEN|>', end='<|END_OF_TURN_TOKEN|>'),
-        dict(role="BOT", begin="<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", end='<|END_OF_TURN_TOKEN|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='command-r-plus-hf',
-        path="CohereForAI/c4ai-command-r-plus",
-        model_kwargs=dict(device_map='auto', trust_remote_code=True),
-        tokenizer_kwargs=dict(padding_side='left', truncation_side='left', trust_remote_code=True),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='CohereForAI/c4ai-command-r-plus',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=8, num_procs=1),
-        end_str='<|END_OF_TURN_TOKEN|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=8),
     )
 ]
diff --git a/configs/models/others/hf_dbrx_base.py b/configs/models/others/hf_dbrx_base.py
new file mode 100644
index 00000000..985e6add
--- /dev/null
+++ b/configs/models/others/hf_dbrx_base.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFaceBaseModel
+
+models = [
+    dict(
+        type=HuggingFaceBaseModel,
+        abbr='dbrx-base-hf',
+        path='databricks/dbrx-base',
+        max_out_len=1024,
+        batch_size=8,
+        run_cfg=dict(num_gpus=8),
+    )
+]
diff --git a/configs/models/others/hf_dbrx_instruct.py b/configs/models/others/hf_dbrx_instruct.py
index af0a54b7..a207f3bb 100644
--- a/configs/models/others/hf_dbrx_instruct.py
+++ b/configs/models/others/hf_dbrx_instruct.py
@@ -1,34 +1,12 @@
-
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='dbrx-instruct-hf',
-        path="databricks/dbrx-instruct",
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='databricks/dbrx-instruct',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=8, num_procs=1),
-        batch_padding=True,
+        run_cfg=dict(num_gpus=8),
     )
 ]
diff --git a/configs/models/others/hf_dolphin_21_mistral_7b.py b/configs/models/others/hf_dolphin_21_mistral_7b.py
index ecc0b196..89da9f5f 100644
--- a/configs/models/others/hf_dolphin_21_mistral_7b.py
+++ b/configs/models/others/hf_dolphin_21_mistral_7b.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_fashiongpt_70b_v11.py b/configs/models/others/hf_fashiongpt_70b_v11.py
index dbb2d7e4..f4ddcaab 100644
--- a/configs/models/others/hf_fashiongpt_70b_v11.py
+++ b/configs/models/others/hf_fashiongpt_70b_v11.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='### User:\n', end='\n'),
         dict(role="BOT", begin="### Assistant:\n", generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_orionstar_yi_34b_chat.py b/configs/models/others/hf_orionstar_yi_34b_chat.py
index 9fba307b..ab8928db 100644
--- a/configs/models/others/hf_orionstar_yi_34b_chat.py
+++ b/configs/models/others/hf_orionstar_yi_34b_chat.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='Human: ', end='\n\n'),
         dict(role="BOT", begin="Assistant: <|endoftext|>", end='<|endoftext|>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/others/hf_telechat_7b_chat.py b/configs/models/others/hf_telechat_7b_chat.py
index 58c496e3..60dbe28e 100644
--- a/configs/models/others/hf_telechat_7b_chat.py
+++ b/configs/models/others/hf_telechat_7b_chat.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='<_user>'),
         dict(role="BOT", begin="<_bot>", end='<_end>', generate=True),
     ],
-    eos_token_id=160133
 )
 
 models = [
diff --git a/configs/models/others/vllm_orionstar_14b_longchat.py b/configs/models/others/vllm_orionstar_14b_longchat.py
index 67ca61d0..873f31e2 100644
--- a/configs/models/others/vllm_orionstar_14b_longchat.py
+++ b/configs/models/others/vllm_orionstar_14b_longchat.py
@@ -7,7 +7,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='Human: ', end='\n'),
         dict(role="BOT", begin="Assistant: ", end='</s>', generate=True),
     ],
-    eos_token_id=2
 )
 
 models = [
diff --git a/configs/models/qwen/hf_qwen1_5_0_5b.py b/configs/models/qwen/hf_qwen1_5_0_5b.py
index 62a219f0..60014be6 100644
--- a/configs/models/qwen/hf_qwen1_5_0_5b.py
+++ b/configs/models/qwen/hf_qwen1_5_0_5b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-0.5b-hf',
-        path="Qwen/Qwen1.5-0.5B",
-        tokenizer_path='Qwen/Qwen1.5-0.5B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-0.5B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_0_5b_chat.py b/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
index c7413332..eb5c22da 100644
--- a/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_0_5b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-0.5b-chat-hf',
-        path="Qwen/Qwen1.5-0.5B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-0.5B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_14b.py b/configs/models/qwen/hf_qwen1_5_14b.py
index 1f6d1709..c338d1b9 100644
--- a/configs/models/qwen/hf_qwen1_5_14b.py
+++ b/configs/models/qwen/hf_qwen1_5_14b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-14b-hf',
-        path="Qwen/Qwen1.5-14B",
-        tokenizer_path='Qwen/Qwen1.5-14B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-14B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_14b_chat.py b/configs/models/qwen/hf_qwen1_5_14b_chat.py
index f6bff1f9..81efa2a3 100644
--- a/configs/models/qwen/hf_qwen1_5_14b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_14b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-14b-chat-hf',
-        path="Qwen/Qwen1.5-14B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-14B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_1_8b.py b/configs/models/qwen/hf_qwen1_5_1_8b.py
index 71492cf8..5caf3efa 100644
--- a/configs/models/qwen/hf_qwen1_5_1_8b.py
+++ b/configs/models/qwen/hf_qwen1_5_1_8b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-1.8b-hf',
-        path="Qwen/Qwen1.5-1.8B",
-        tokenizer_path='Qwen/Qwen1.5-1.8B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-1.8B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_1_8b_chat.py b/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
index 4e090de0..e1682e07 100644
--- a/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_1_8b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-1.8b-chat-hf',
-        path="Qwen/Qwen1.5-1.8B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-1.8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_32b.py b/configs/models/qwen/hf_qwen1_5_32b.py
index 9ad947af..e886873d 100644
--- a/configs/models/qwen/hf_qwen1_5_32b.py
+++ b/configs/models/qwen/hf_qwen1_5_32b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-32b-hf',
-        path="Qwen/Qwen1.5-32B",
-        tokenizer_path='Qwen/Qwen1.5-32B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-32B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_32b_chat.py b/configs/models/qwen/hf_qwen1_5_32b_chat.py
index 1e215ff6..03506d13 100644
--- a/configs/models/qwen/hf_qwen1_5_32b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_32b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-32b-chat-hf',
-        path="Qwen/Qwen1.5-32B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-32B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_4b.py b/configs/models/qwen/hf_qwen1_5_4b.py
index 6aa57263..e63eaec4 100644
--- a/configs/models/qwen/hf_qwen1_5_4b.py
+++ b/configs/models/qwen/hf_qwen1_5_4b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-4b-hf',
-        path="Qwen/Qwen1.5-4B",
-        tokenizer_path='Qwen/Qwen1.5-4B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-4B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_4b_chat.py b/configs/models/qwen/hf_qwen1_5_4b_chat.py
index 427c7849..32475a5f 100644
--- a/configs/models/qwen/hf_qwen1_5_4b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_4b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-4b-chat-hf',
-        path="Qwen/Qwen1.5-4B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-4B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_72b.py b/configs/models/qwen/hf_qwen1_5_72b.py
index 3dd6e638..d850b236 100644
--- a/configs/models/qwen/hf_qwen1_5_72b.py
+++ b/configs/models/qwen/hf_qwen1_5_72b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-72b-hf',
-        path="Qwen/Qwen1.5-72B",
-        tokenizer_path='Qwen/Qwen1.5-72B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-72B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_72b_chat.py b/configs/models/qwen/hf_qwen1_5_72b_chat.py
index f0279442..1ff66255 100644
--- a/configs/models/qwen/hf_qwen1_5_72b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_72b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-72b-chat-hf',
-        path="Qwen/Qwen1.5-72B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-72B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_7b.py b/configs/models/qwen/hf_qwen1_5_7b.py
index d9df3031..2649ffff 100644
--- a/configs/models/qwen/hf_qwen1_5_7b.py
+++ b/configs/models/qwen/hf_qwen1_5_7b.py
@@ -1,25 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen1.5-7b-hf',
-        path="Qwen/Qwen1.5-7B",
-        tokenizer_path='Qwen/Qwen1.5-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151645,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen1_5_7b_chat.py b/configs/models/qwen/hf_qwen1_5_7b_chat.py
index 43825c22..b62c3bed 100644
--- a/configs/models/qwen/hf_qwen1_5_7b_chat.py
+++ b/configs/models/qwen/hf_qwen1_5_7b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen1.5-7b-chat-hf',
-        path="Qwen/Qwen1.5-7B-Chat",
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen1.5-7B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_14b.py b/configs/models/qwen/hf_qwen_14b.py
index 83c62867..8c15c032 100644
--- a/configs/models/qwen/hf_qwen_14b.py
+++ b/configs/models/qwen/hf_qwen_14b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-14b-hf',
-        path="Qwen/Qwen-14B",
-        tokenizer_path='Qwen/Qwen-14B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-14B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_14b_chat.py b/configs/models/qwen/hf_qwen_14b_chat.py
index 47a609bc..4d9cccee 100644
--- a/configs/models/qwen/hf_qwen_14b_chat.py
+++ b/configs/models/qwen/hf_qwen_14b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-14b-chat-hf',
-        path="Qwen/Qwen-14B-Chat",
-        tokenizer_path='Qwen/Qwen-14B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-14B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/qwen/hf_qwen_1_8b.py b/configs/models/qwen/hf_qwen_1_8b.py
index 7ba7ddba..f82d3db7 100644
--- a/configs/models/qwen/hf_qwen_1_8b.py
+++ b/configs/models/qwen/hf_qwen_1_8b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-1.8b-hf',
-        path="Qwen/Qwen-1_8B",
-        tokenizer_path='Qwen/Qwen-1_8B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-1_8B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_1_8b_chat.py b/configs/models/qwen/hf_qwen_1_8b_chat.py
index fb4f488a..1838a04a 100644
--- a/configs/models/qwen/hf_qwen_1_8b_chat.py
+++ b/configs/models/qwen/hf_qwen_1_8b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-    eos_token_id=151645,
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-1.8b-chat-hf',
-        path="Qwen/Qwen-1_8B-Chat",
-        tokenizer_path='Qwen/Qwen-1_8B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-1_8B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_72b.py b/configs/models/qwen/hf_qwen_72b.py
index 686a435d..325315b8 100644
--- a/configs/models/qwen/hf_qwen_72b.py
+++ b/configs/models/qwen/hf_qwen_72b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-72b-hf',
-        path="Qwen/Qwen-72B",
-        tokenizer_path='Qwen/Qwen-72B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-72B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_72b_chat.py b/configs/models/qwen/hf_qwen_72b_chat.py
index 83da466f..255aeb5d 100644
--- a/configs/models/qwen/hf_qwen_72b_chat.py
+++ b/configs/models/qwen/hf_qwen_72b_chat.py
@@ -1,34 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-72b-chat-hf',
-        path="Qwen/Qwen-72B-Chat",
-        tokenizer_path='Qwen/Qwen-72B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-72B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=4),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_7b.py b/configs/models/qwen/hf_qwen_7b.py
index cb60c156..17ba5b47 100644
--- a/configs/models/qwen/hf_qwen_7b.py
+++ b/configs/models/qwen/hf_qwen_7b.py
@@ -1,26 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='qwen-7b-hf',
-        path="Qwen/Qwen-7B",
-        tokenizer_path='Qwen/Qwen-7B',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        min_out_len=1,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-7B',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/qwen/hf_qwen_7b_chat.py b/configs/models/qwen/hf_qwen_7b_chat.py
index 88dda4f0..e5479fb6 100644
--- a/configs/models/qwen/hf_qwen_7b_chat.py
+++ b/configs/models/qwen/hf_qwen_7b_chat.py
@@ -1,35 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
-
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='\n<|im_start|>user\n', end='<|im_end|>'),
-        dict(role="BOT", begin="\n<|im_start|>assistant\n", end='<|im_end|>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='qwen-7b-chat-hf',
-        path="Qwen/Qwen-7B-Chat",
-        tokenizer_path='Qwen/Qwen-7B-Chat',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        pad_token_id=151643,
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Qwen/Qwen-7B-Chat',
+        max_out_len=1024,
         batch_size=8,
-        meta_template=_meta_template,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
+        run_cfg=dict(num_gpus=1),
     )
-]
\ No newline at end of file
+]
diff --git a/configs/models/qwen/vllm_qwen1_5_14b_chat.py b/configs/models/qwen/vllm_qwen1_5_14b_chat.py
index 15cd97bb..4af72729 100644
--- a/configs/models/qwen/vllm_qwen1_5_14b_chat.py
+++ b/configs/models/qwen/vllm_qwen1_5_14b_chat.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=151645,
 )
 
 models = [
diff --git a/configs/models/qwen/vllm_qwen1_5_72b_chat.py b/configs/models/qwen/vllm_qwen1_5_72b_chat.py
index 035c7a8a..68f1e73c 100644
--- a/configs/models/qwen/vllm_qwen1_5_72b_chat.py
+++ b/configs/models/qwen/vllm_qwen1_5_72b_chat.py
@@ -6,7 +6,6 @@ _meta_template = dict(
         dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
         dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
     ],
-    eos_token_id=151645,
 )
 
 models = [
diff --git a/configs/models/skywork/hf_skywork_13b.py b/configs/models/skywork/hf_skywork_13b.py
index 495a3392..1b56c3a6 100644
--- a/configs/models/skywork/hf_skywork_13b.py
+++ b/configs/models/skywork/hf_skywork_13b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFaceCausalLM
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFaceBaseModel,
         abbr='skywork-13b-hf',
-        path="Skywork/Skywork-13B-base",
-        tokenizer_path='Skywork/Skywork-13B-base',
-        model_kwargs=dict(
-            device_map='auto',
-            trust_remote_code=True,
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='Skywork/Skywork-13B-base',
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v13.py b/configs/models/vicuna/hf_vicuna_13b_v13.py
index 6a04a3c4..74f4e147 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.3-hf',
-        path="lmsys/vicuna-13b-v1.3",
-        tokenizer_path='lmsys/vicuna-13b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-13b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=2, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v15.py b/configs/models/vicuna/hf_vicuna_13b_v15.py
index c87b9dc7..28366ea9 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v15.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v15.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.5-hf',
-        path="lmsys/vicuna-13b-v1.5",
-        tokenizer_path='lmsys/vicuna-13b-v1.5',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-13b-v1.5',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_13b_v15_16k.py b/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
index a8e2aa5f..3caf3f57 100644
--- a/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_13b_v15_16k.py
@@ -1,30 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='USER: '),
-        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-13b-v1.5-16k-hf',
-        path="lmsys/vicuna-13b-v1.5-16k",
-        tokenizer_path='lmsys/vicuna-13b-v1.5-16k',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=8192,
+        path='lmsys/vicuna-13b-v1.5-16k',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_33b_v13.py b/configs/models/vicuna/hf_vicuna_33b_v13.py
index 0f280e63..036cbc63 100644
--- a/configs/models/vicuna/hf_vicuna_33b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_33b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-33b-v1.3-hf',
-        path="lmsys/vicuna-33b-v1.3",
-        tokenizer_path='lmsys/vicuna-33b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-33b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=4, num_procs=1)
+        run_cfg=dict(num_gpus=2),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v13.py b/configs/models/vicuna/hf_vicuna_7b_v13.py
index 67e1c79b..39626455 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v13.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v13.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.3-hf',
-        path="lmsys/vicuna-7b-v1.3",
-        tokenizer_path='lmsys/vicuna-7b-v1.3',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-7b-v1.3',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v15.py b/configs/models/vicuna/hf_vicuna_7b_v15.py
index 06f3ef73..b7888f04 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v15.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v15.py
@@ -1,23 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.5-hf',
-        path="lmsys/vicuna-7b-v1.5",
-        tokenizer_path='lmsys/vicuna-7b-v1.5',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        path='lmsys/vicuna-7b-v1.5',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        use_fastchat_template=True,
-        run_cfg=dict(num_gpus=1, num_procs=1)
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/vicuna/hf_vicuna_7b_v15_16k.py b/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
index e8ad47df..c8b557ab 100644
--- a/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
+++ b/configs/models/vicuna/hf_vicuna_7b_v15_16k.py
@@ -1,30 +1,13 @@
-from opencompass.models import HuggingFaceCausalLM
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='USER: '),
-        dict(role="BOT", begin=" ASSISTANT:", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFaceCausalLM,
+        type=HuggingFacewithChatTemplate,
         abbr='vicuna-7b-v1.5-16k-hf',
-        path="lmsys/vicuna-7b-v1.5-16k",
-        tokenizer_path='lmsys/vicuna-7b-v1.5-16k',
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            use_fast=False,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=8192,
+        path='lmsys/vicuna-7b-v1.5-16k',
+        max_out_len=1024,
         batch_size=8,
-        model_kwargs=dict(device_map='auto'),
-        batch_padding=False, # if false, inference with for-loop without batch padding
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
+        fastchat_template='vicuna',
     )
 ]
diff --git a/configs/models/yi/hf_yi_34b.py b/configs/models/yi/hf_yi_34b.py
index 3f20f416..7fc59d46 100644
--- a/configs/models/yi/hf_yi_34b.py
+++ b/configs/models/yi/hf_yi_34b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='yi-34b-hf',
         path='01-ai/Yi-34B',
-        tokenizer_path='01-ai/Yi-34B',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/yi/hf_yi_34b_200k.py b/configs/models/yi/hf_yi_34b_200k.py
deleted file mode 100644
index a8e207bf..00000000
--- a/configs/models/yi/hf_yi_34b_200k.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from opencompass.models import HuggingFace
-
-
-models = [
-    dict(
-        type=HuggingFace,
-        abbr='yi-34b-200k-hf',
-        path='01-ai/Yi-34B-200K',
-        tokenizer_path='01-ai/Yi-34B-200K',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=4, num_procs=1),
-    )
-]
diff --git a/configs/models/yi/hf_yi_34b_chat.py b/configs/models/yi/hf_yi_34b_chat.py
index 352c58bf..635ee3b8 100644
--- a/configs/models/yi/hf_yi_34b_chat.py
+++ b/configs/models/yi/hf_yi_34b_chat.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='yi-34b-chat-hf',
         path='01-ai/Yi-34B-Chat',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=2, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=2),
     )
 ]
diff --git a/configs/models/yi/hf_yi_6b.py b/configs/models/yi/hf_yi_6b.py
index c376d868..6ce3b134 100644
--- a/configs/models/yi/hf_yi_6b.py
+++ b/configs/models/yi/hf_yi_6b.py
@@ -1,24 +1,12 @@
-from opencompass.models import HuggingFace
-
+from opencompass.models import HuggingFaceBaseModel
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFaceBaseModel,
         abbr='yi-6b-hf',
         path='01-ai/Yi-6B',
-        tokenizer_path='01-ai/Yi-6B',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/yi/hf_yi_6b_200k.py b/configs/models/yi/hf_yi_6b_200k.py
deleted file mode 100644
index bc2cd125..00000000
--- a/configs/models/yi/hf_yi_6b_200k.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from opencompass.models import HuggingFace
-
-models = [
-    dict(
-        type=HuggingFace,
-        abbr='yi-6b-200k-hf',
-        path='01-ai/Yi-6B-200K',
-        tokenizer_path='01-ai/Yi-6B-200K',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        max_out_len=100,
-        max_seq_len=2048,
-        batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-    )
-]
diff --git a/configs/models/yi/hf_yi_6b_chat.py b/configs/models/yi/hf_yi_6b_chat.py
index 92a46e69..f04f1102 100644
--- a/configs/models/yi/hf_yi_6b_chat.py
+++ b/configs/models/yi/hf_yi_6b_chat.py
@@ -1,33 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|im_start|>user\n', end='<|im_end|>\n'),
-        dict(role="BOT", begin="<|im_start|>assistant\n", end='<|im_end|>\n', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='yi-6b-chat-hf',
         path='01-ai/Yi-6B-Chat',
-        tokenizer_path='01-ai/Yi-6B-Chat',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='<|im_end|>',
-        batch_padding=True,
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/models/zephyr/hf_zephyr_7b_beta.py b/configs/models/zephyr/hf_zephyr_7b_beta.py
index 916ebe2c..da58c31e 100644
--- a/configs/models/zephyr/hf_zephyr_7b_beta.py
+++ b/configs/models/zephyr/hf_zephyr_7b_beta.py
@@ -1,32 +1,12 @@
-from opencompass.models import HuggingFace
-
-_meta_template = dict(
-    round=[
-        dict(role="HUMAN", begin='<|user|>\n', end='</s>'),
-        dict(role="BOT", begin="<|assistant|>\n", end='</s>', generate=True),
-    ],
-)
+from opencompass.models import HuggingFacewithChatTemplate
 
 models = [
     dict(
-        type=HuggingFace,
+        type=HuggingFacewithChatTemplate,
         abbr='zephyr-7b-beta-hf',
         path='HuggingFaceH4/zephyr-7b-beta',
-        tokenizer_path='HuggingFaceH4/zephyr-7b-beta',
-        model_kwargs=dict(
-            trust_remote_code=True,
-            device_map='auto',
-        ),
-        tokenizer_kwargs=dict(
-            padding_side='left',
-            truncation_side='left',
-            trust_remote_code=True,
-        ),
-        meta_template=_meta_template,
-        max_out_len=100,
-        max_seq_len=2048,
+        max_out_len=1024,
         batch_size=8,
-        run_cfg=dict(num_gpus=1, num_procs=1),
-        end_str='</s>',
+        run_cfg=dict(num_gpus=1),
     )
 ]
diff --git a/configs/summarizers/chat_OC15.py b/configs/summarizers/chat_OC15.py
new file mode 100644
index 00000000..6379a115
--- /dev/null
+++ b/configs/summarizers/chat_OC15.py
@@ -0,0 +1,81 @@
+from mmengine.config import read_base
+
+with read_base():
+    from .groups.mmlu import mmlu_summary_groups
+    from .groups.cmmlu import cmmlu_summary_groups
+    from .groups.ceval import ceval_summary_groups
+    from .groups.bbh import bbh_summary_groups
+    from .groups.GaokaoBench import GaokaoBench_summary_groups
+    from .groups.lcbench import lcbench_summary_groups
+
+other_summary_groups = [
+    {
+        'name': 'average',
+        'subsets': [
+            ['mmlu', 'naive_average'],
+            ['cmmlu', 'naive_average'],
+            ['ceval', 'naive_average'],
+            ['GaokaoBench', 'weighted_average'],
+            ['triviaqa_wiki_1shot', 'score'],
+            ['nq_open_1shot', 'score'],
+            ['race-high', 'accuracy'],
+            ['winogrande', 'accuracy'],
+            ['hellaswag', 'accuracy'],
+            ['bbh', 'naive_average'],
+            ['gsm8k', 'accuracy'],
+            ['math', 'accuracy'],
+            ['TheoremQA', 'score'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['sanitized_mbpp', 'score'],
+            ['GPQA_diamond', 'accuracy'],
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+        ],
+    },
+]
+
+summarizer = dict(
+    dataset_abbrs=[
+        ['average', 'naive_average'],
+        ['mmlu', 'naive_average'],
+        ['cmmlu', 'naive_average'],
+        ['ceval', 'naive_average'],
+        ['GaokaoBench', 'weighted_average'],
+        ['triviaqa_wiki_1shot', 'score'],
+        ['nq_open_1shot', 'score'],
+        ['race-high', 'accuracy'],
+        ['winogrande', 'accuracy'],
+        ['hellaswag', 'accuracy'],
+        ['bbh', 'naive_average'],
+        ['gsm8k', 'accuracy'],
+        ['math', 'accuracy'],
+        ['TheoremQA', 'score'],
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['sanitized_mbpp', 'score'],
+        ['GPQA_diamond', 'accuracy'],
+        ['IFEval', 'Prompt-level-strict-accuracy'],
+
+        '',
+
+        'mmlu',
+        'mmlu-stem',
+        'mmlu-social-science',
+        'mmlu-humanities',
+        'mmlu-other',
+
+        'cmmlu',
+        'cmmlu-stem',
+        'cmmlu-social-science',
+        'cmmlu-humanities',
+        'cmmlu-other',
+        'cmmlu-china-specific',
+
+        'ceval',
+        'ceval-stem',
+        'ceval-social-science',
+        'ceval-humanities',
+        'ceval-other',
+        'ceval-hard',
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith("_summary_groups")], []),
+)
diff --git a/configs/summarizers/chat_OC15_multi_faceted.py b/configs/summarizers/chat_OC15_multi_faceted.py
new file mode 100644
index 00000000..badeac51
--- /dev/null
+++ b/configs/summarizers/chat_OC15_multi_faceted.py
@@ -0,0 +1,130 @@
+from mmengine.config import read_base
+from opencompass.summarizers import MultiFacetedSummarizer
+
+with read_base():
+    from .groups.mmlu import mmlu_summary_groups
+    from .groups.cmmlu import cmmlu_summary_groups
+    from .groups.ceval import ceval_summary_groups
+    from .groups.bbh import bbh_summary_groups
+    from .groups.GaokaoBench import GaokaoBench_summary_groups
+
+other_summary_groups = [
+    {
+        'name': 'average',
+        'subsets': [
+            ['mmlu', 'naive_average'],
+            ['cmmlu', 'naive_average'],
+            ['ceval', 'naive_average'],
+            ['GaokaoBench', 'weighted_average'],
+            ['triviaqa_wiki_1shot', 'score'],
+            ['nq_open_1shot', 'score'],
+            ['race-high', 'accuracy'],
+            ['winogrande', 'accuracy'],
+            ['hellaswag', 'accuracy'],
+            ['bbh', 'naive_average'],
+            ['gsm8k', 'accuracy'],
+            ['math', 'accuracy'],
+            ['TheoremQA', 'score'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['sanitized_mbpp', 'score'],
+            ['GPQA_diamond', 'accuracy'],
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+        ],
+    },
+]
+
+overall_dataset_abbrs = [
+    ['average', 'naive_average'],
+    ['mmlu', 'naive_average'],
+    ['cmmlu', 'naive_average'],
+    ['ceval', 'naive_average'],
+    ['GaokaoBench', 'weighted_average'],
+    ['triviaqa_wiki_1shot', 'score'],
+    ['nq_open_1shot', 'score'],
+    ['race-high', 'accuracy'],
+    ['winogrande', 'accuracy'],
+    ['hellaswag', 'accuracy'],
+    ['bbh', 'naive_average'],
+    ['gsm8k', 'accuracy'],
+    ['math', 'accuracy'],
+    ['TheoremQA', 'score'],
+    ['openai_humaneval', 'humaneval_pass@1'],
+    ['sanitized_mbpp', 'score'],
+    ['GPQA_diamond', 'accuracy'],
+    ['IFEval', 'Prompt-level-strict-accuracy'],
+]
+
+mmlu_summary_groups_dict = {g['name']: g['subsets'] for g in mmlu_summary_groups}
+mmlu_dataset_abbrs = [
+    ['mmlu', 'naive_average'],
+    ['mmlu-stem', 'naive_average'],
+    ['mmlu-social-science', 'naive_average'],
+    ['mmlu-humanities', 'naive_average'],
+    ['mmlu-other', 'naive_average'],
+    *mmlu_summary_groups_dict['mmlu-stem'],
+    *mmlu_summary_groups_dict['mmlu-social-science'],
+    *mmlu_summary_groups_dict['mmlu-humanities'],
+    *mmlu_summary_groups_dict['mmlu-other'],
+]
+
+cmmlu_summary_groups_dict = {g['name']: g['subsets'] for g in cmmlu_summary_groups}
+cmmlu_dataset_abbrs = [
+    ['cmmlu', 'naive_average'],
+    ['cmmlu-stem', 'naive_average'],
+    ['cmmlu-social-science', 'naive_average'],
+    ['cmmlu-humanities', 'naive_average'],
+    ['cmmlu-other', 'naive_average'],
+    ['cmmlu-china-specific', 'naive_average'],
+    *cmmlu_summary_groups_dict['cmmlu-stem'],
+    *cmmlu_summary_groups_dict['cmmlu-social-science'],
+    *cmmlu_summary_groups_dict['cmmlu-humanities'],
+    *cmmlu_summary_groups_dict['cmmlu-other'],
+]
+
+ceval_summary_groups_dict = {g['name']: g['subsets'] for g in ceval_summary_groups}
+ceval_dataset_abbrs = [
+    ['ceval', 'naive_average'],
+    ['ceval-stem', 'naive_average'],
+    ['ceval-social-science', 'naive_average'],
+    ['ceval-humanities', 'naive_average'],
+    ['ceval-other', 'naive_average'],
+    ['ceval-hard', 'naive_average'],
+    *ceval_summary_groups_dict['ceval-stem'],
+    *ceval_summary_groups_dict['ceval-social-science'],
+    *ceval_summary_groups_dict['ceval-humanities'],
+    *ceval_summary_groups_dict['ceval-other'],
+]
+
+bbh_summary_groups_dict = {g['name']: g['subsets'] for g in bbh_summary_groups}
+bbh_dataset_abbrs = [
+    ['bbh', 'naive_average'],
+    *bbh_summary_groups_dict['bbh'],
+]
+
+GaokaoBench_summary_groups_dict = {g['name']: g['subsets'] for g in GaokaoBench_summary_groups}
+GaokaoBench_dataset_abbrs = [
+    ['GaokaoBench', 'weighted_average'],
+    *GaokaoBench_summary_groups_dict['GaokaoBench'],
+]
+
+sanitized_mbpp_dataset_abbrs = [
+    ['sanitized_mbpp', 'score'],
+    ['sanitized_mbpp', 'pass'],
+    ['sanitized_mbpp', 'failed'],
+    ['sanitized_mbpp', 'wrong_answer'],
+    ['sanitized_mbpp', 'timeout'],
+]
+
+summarizer = dict(
+    type=MultiFacetedSummarizer,
+    dataset_abbrs_list=[
+        {'name': 'mmlu', 'dataset_abbrs': mmlu_dataset_abbrs},
+        {'name': 'cmmlu', 'dataset_abbrs': cmmlu_dataset_abbrs},
+        {'name': 'ceval', 'dataset_abbrs': ceval_dataset_abbrs},
+        {'name': 'bbh', 'dataset_abbrs': bbh_dataset_abbrs},
+        {'name': 'GaokaoBench', 'dataset_abbrs': GaokaoBench_dataset_abbrs},
+        {'name': 'sanitized_mbpp', 'dataset_abbrs': sanitized_mbpp_dataset_abbrs},
+        {'name': 'overall', 'dataset_abbrs': overall_dataset_abbrs},
+    ],
+    summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
+)
diff --git a/docs/en/get_started/quick_start.md b/docs/en/get_started/quick_start.md
index d9e5bc07..caba742c 100644
--- a/docs/en/get_started/quick_start.md
+++ b/docs/en/get_started/quick_start.md
@@ -80,13 +80,8 @@ For HuggingFace models, users can set model parameters directly through the comm
 
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
---hf-path facebook/opt-125m \
---model-kwargs device_map='auto' \
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \
---max-seq-len 2048 \
---max-out-len 100 \
---batch-size 128  \
---num-gpus 1  # Number of minimum required GPUs
+--hf-type base \
+--hf-path facebook/opt-125m
 ```
 
 Note that in this way, OpenCompass only evaluates one model at a time, while other ways can evaluate multiple models at once.
@@ -99,12 +94,14 @@ Note that in this way, OpenCompass only evaluates one model at a time, while oth
 :animate: fade-in-slide-down
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace model type, base or chat
 --hf-path facebook/opt-125m \  # HuggingFace model path
 --tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer path (if the same as the model path, can be omitted)
 --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # Arguments to construct the tokenizer
 --model-kwargs device_map='auto' \  # Arguments to construct the model
 --max-seq-len 2048 \  # Maximum sequence length the model can accept
 --max-out-len 100 \  # Maximum number of tokens to generate
+--min-out-len 100 \  # Minimum number of tokens to generate
 --batch-size 64  \  # Batch size
 --num-gpus 1  # Number of GPUs required to run the model
 ```
@@ -146,28 +143,22 @@ python run.py configs/eval_demo.py
 OpenCompass provides a series of pre-defined model configurations under `configs/models`. Below is the configuration snippet related to [opt-350m](https://github.com/open-compass/opencompass/blob/main/configs/models/opt/hf_opt_350m.py) (`configs/models/opt/hf_opt_350m.py`):
 
 ```python
-# Evaluate models supported by HuggingFace's `AutoModelForCausalLM` using `HuggingFaceCausalLM`
-from opencompass.models import HuggingFaceCausalLM
+# Evaluate models supported by HuggingFace's `AutoModelForCausalLM` using `HuggingFaceBaseModel`
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # Initialization parameters for `HuggingFaceCausalLM`
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       # Below are common parameters for all models, not specific to HuggingFaceCausalLM
-       abbr='opt350m',               # Model abbreviation for result display
-       max_seq_len=2048,             # The maximum length of the entire sequence
-       max_out_len=100,              # Maximum number of generated tokens
-       batch_size=64,                # batchsize
-       run_cfg=dict(num_gpus=1),     # The required GPU numbers for this model
+models = [
+    # OPT-350M
+    dict(
+        type=HuggingFaceBaseModel,
+        # Initialization parameters for `HuggingFaceBaseModel`
+        path='facebook/opt-350m',
+        # Below are common parameters for all models, not specific to HuggingFaceBaseModel
+        abbr='opt-350m-hf',         # Model abbreviation
+        max_out_len=1024,           # Maximum number of generated tokens
+        batch_size=32,              # Batch size
+        run_cfg=dict(num_gpus=1),   # The required GPU numbers for this model
     )
+]
 ```
 
 When using configurations, we can specify the relevant files through the command-line argument ` --models` or import the model configurations into the  `models` list in the configuration file using the inheritance mechanism.
diff --git a/docs/zh_cn/get_started/quick_start.md b/docs/zh_cn/get_started/quick_start.md
index e164349f..2e092b16 100644
--- a/docs/zh_cn/get_started/quick_start.md
+++ b/docs/zh_cn/get_started/quick_start.md
@@ -79,13 +79,8 @@ python tools/list_configs.py llama mmlu
 
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
---hf-path facebook/opt-125m \
---model-kwargs device_map='auto' \
---tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \
---max-seq-len 2048 \
---max-out-len 100 \
---batch-size 128  \
---num-gpus 1  # 最少需要的 GPU 数量
+--hf-type base \
+--hf-path facebook/opt-125m
 ```
 
 请注意，通过这种方式，OpenCompass 一次只评估一个模型，而其他方式可以一次评估多个模型。
@@ -100,12 +95,14 @@ python run.py --datasets siqa_gen winograd_ppl \
 :animate: fade-in-slide-down
 ```bash
 python run.py --datasets siqa_gen winograd_ppl \
+--hf-type base \  # HuggingFace 模型类型, base 或 chat
 --hf-path facebook/opt-125m \  # HuggingFace 模型路径
 --tokenizer-path facebook/opt-125m \  # HuggingFace tokenizer 路径（如果与模型路径相同，可以省略）
 --tokenizer-kwargs padding_side='left' truncation='left' trust_remote_code=True \  # 构建 tokenizer 的参数
 --model-kwargs device_map='auto' \  # 构建模型的参数
 --max-seq-len 2048 \  # 模型可以接受的最大序列长度
 --max-out-len 100 \  # 生成的最大 token 数
+--min-out-len 100 \  # 生成的最小 token 数
 --batch-size 64  \  # 批量大小
 --num-gpus 1  # 运行模型所需的 GPU 数量
 ```
@@ -147,28 +144,22 @@ python run.py configs/eval_demo.py
 OpenCompass 提供了一系列预定义的模型配置，位于 `configs/models` 下。以下是与 [opt-350m](https://github.com/open-compass/opencompass/blob/main/configs/models/opt/hf_opt_350m.py)（`configs/models/opt/hf_opt_350m.py`）相关的配置片段：
 
 ```python
-# 使用 `HuggingFaceCausalLM` 评估由 HuggingFace 的 `AutoModelForCausalLM` 支持的模型
-from opencompass.models import HuggingFaceCausalLM
+# 使用 `HuggingFaceBaseModel` 评估由 HuggingFace 的 `AutoModelForCausalLM` 支持的模型
+from opencompass.models import HuggingFaceBaseModel
 
-# OPT-350M
-opt350m = dict(
-       type=HuggingFaceCausalLM,
-       # `HuggingFaceCausalLM` 的初始化参数
-       path='facebook/opt-350m',
-       tokenizer_path='facebook/opt-350m',
-       tokenizer_kwargs=dict(
-           padding_side='left',
-           truncation_side='left',
-           proxies=None,
-           trust_remote_code=True),
-       model_kwargs=dict(device_map='auto'),
-       # 下面是所有模型的共同参数，不特定于 HuggingFaceCausalLM
-       abbr='opt350m',               # 结果显示的模型缩写
-       max_seq_len=2048,             # 整个序列的最大长度
-       max_out_len=100,              # 生成的最大 token 数
-       batch_size=64,                # 批量大小
-       run_cfg=dict(num_gpus=1),     # 该模型所需的 GPU 数量
+models = [
+    # OPT-350M
+    dict(
+        type=HuggingFaceBaseModel,
+        # `HuggingFaceBaseModel` 的初始化参数
+        path='facebook/opt-350m',
+        # 下面是所有模型的共同参数，不特定于 HuggingFaceBaseModel
+        abbr='opt-350m-hf',         # 模型的缩写
+        max_out_len=1024,           # 生成的最大 token 数
+        batch_size=32,              # 批量大小
+        run_cfg=dict(num_gpus=1),   # 该模型所需的 GPU 数量
     )
+]
 ```
 
 使用配置时，我们可以通过命令行参数 `--models` 指定相关文件，或使用继承机制将模型配置导入到配置文件中的 `models` 列表中。
diff --git a/opencompass/cli/main.py b/opencompass/cli/main.py
index 2c06b9f3..e9b5abb3 100644
--- a/opencompass/cli/main.py
+++ b/opencompass/cli/main.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+# yapf: disable
 import argparse
 import getpass
 import os
@@ -51,7 +53,7 @@ def parse_args():
                         action='store_true',
                         default=False)
     parser.add_argument(
-        '--accelerator',
+        '-a', '--accelerator',
         help='Infer accelerator, support vllm and lmdeploy now.',
         choices=['vllm', 'lmdeploy', 'hf'],
         default='hf',
@@ -81,7 +83,7 @@ def parse_args():
                         'saved in this path, including the slurm logs, '
                         'the evaluation results, the summary results, etc.'
                         'If not specified, the work_dir will be set to '
-                        './outputs/default.',
+                        'outputs/default.',
                         default=None,
                         type=str)
     parser.add_argument(
@@ -95,23 +97,12 @@ def parse_args():
                         help='Report the running status to lark bot',
                         action='store_true',
                         default=False)
-    parser.add_argument('--max-partition-size',
-                        help='The maximum size of an infer task. Only '
-                        'effective when "infer" is missing from the config.',
-                        type=int,
-                        default=40000),
-    parser.add_argument(
-        '--gen-task-coef',
-        help='The dataset cost measurement coefficient for generation tasks, '
-        'Only effective when "infer" is missing from the config.',
-        type=int,
-        default=20)
     parser.add_argument('--max-num-workers',
                         help='Max number of workers to run in parallel. '
                         'Will be overrideen by the "max_num_workers" argument '
                         'in the config.',
                         type=int,
-                        default=32)
+                        default=1)
     parser.add_argument('--max-workers-per-gpu',
                         help='Max task to run in parallel on one GPU. '
                         'It will only be used in the local runner.',
@@ -181,25 +172,21 @@ def parse_dlc_args(dlc_parser):
 
 def parse_hf_args(hf_parser):
     """These args are all for the quick construction of HuggingFace models."""
-    hf_parser.add_argument('--hf-path', type=str)
-    hf_parser.add_argument('--peft-path', type=str)
-    hf_parser.add_argument('--tokenizer-path', type=str)
-    hf_parser.add_argument('--model-kwargs',
-                           nargs='+',
-                           action=DictAction,
-                           default={})
-    hf_parser.add_argument('--tokenizer-kwargs',
-                           nargs='+',
-                           action=DictAction,
-                           default={})
-    hf_parser.add_argument('--max-out-len', type=int)
-    hf_parser.add_argument('--max-seq-len', type=int)
-    hf_parser.add_argument('--no-batch-padding',
-                           action='store_true',
-                           default=False)
-    hf_parser.add_argument('--batch-size', type=int)
-    hf_parser.add_argument('--num-gpus', type=int)
-    hf_parser.add_argument('--pad-token-id', type=int)
+    hf_parser.add_argument('--hf-type', type=str, choices=['base', 'chat'], default='chat', help='The type of the HuggingFace model, base or chat')
+    hf_parser.add_argument('--hf-path', type=str, help='The path to the HuggingFace model, e.g. "facebook/opt-125m", required')
+    hf_parser.add_argument('--model-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the HuggingFace model')
+    hf_parser.add_argument('--tokenizer-path', type=str, help='The path to the HuggingFace tokenizer, same as --hf-path if not specified')
+    hf_parser.add_argument('--tokenizer-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the tokenizer')
+    hf_parser.add_argument('--peft-path', type=str, help='The path to the PEFT model')
+    hf_parser.add_argument('--peft-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the PEFT model')
+    hf_parser.add_argument('--generation-kwargs', nargs='+', action=DictAction, default={}, help='The kwargs for the generation')
+    hf_parser.add_argument('--max-seq-len', type=int, help='The max sequence length for the HuggingFace model')
+    hf_parser.add_argument('--max-out-len', type=int, default=256, help='The max output length for the HuggingFace model')
+    hf_parser.add_argument('--min-out-len', type=int, default=1, help='The min output length for the HuggingFace model')
+    hf_parser.add_argument('--batch-size', type=int, default=8, help='The batch size for the HuggingFace model')
+    hf_parser.add_argument('--num-gpus', type=int, default=1, help='The number of GPUs for **the HuggingFace model passed via cli**')
+    hf_parser.add_argument('--pad-token-id', type=int, help='The pad token id for the HuggingFace model')
+    hf_parser.add_argument('--stop-words', nargs='+', default=[], help='The stop words for the HuggingFace model')
 
 
 def parse_custom_dataset_args(custom_dataset_parser):
@@ -225,7 +212,7 @@ def main():
     if args.work_dir is not None:
         cfg['work_dir'] = args.work_dir
     else:
-        cfg.setdefault('work_dir', './outputs/default/')
+        cfg.setdefault('work_dir', osp.join('outputs', 'default'))
 
     # cfg_time_str defaults to the current time
     cfg_time_str = dir_time_str = datetime.now().strftime('%Y%m%d_%H%M%S')
diff --git a/opencompass/datasets/winogrande.py b/opencompass/datasets/winogrande.py
index 0e897ee5..8ea8d27b 100644
--- a/opencompass/datasets/winogrande.py
+++ b/opencompass/datasets/winogrande.py
@@ -22,6 +22,9 @@ class winograndeDataset(BaseDataset):
                 prompt = line['sentence']
                 continue_prompt = prompt.split('_')[1]
                 data_item = {
+                    'prompt': prompt,
+                    'only_option1': line['option1'],
+                    'only_option2': line['option2'],
                     'opt1': prompt.replace('_', line['option1']),
                     'opt2': prompt.replace('_', line['option2']),
                     'answer': line['answer'],
@@ -48,6 +51,9 @@ class winograndeDataset_V2(BaseDataset):
                 answer = line['answer']
                 answer = ' AB'[int(answer)] if answer != '' else 'NULL'
                 data_item = {
+                    'prompt': prompt,
+                    'only_option1': line['option1'],
+                    'only_option2': line['option2'],
                     'opt1': prompt.replace('_', line['option1']),
                     'opt2': prompt.replace('_', line['option2']),
                     'answer': answer,
@@ -76,6 +82,9 @@ class winograndeDataset_V3(BaseDataset):
                     answer = line['answer']
                     answer = ' AB'[int(answer)] if answer != '' else 'NULL'
                     data_item = {
+                        'prompt': prompt,
+                        'only_option1': line['option1'],
+                        'only_option2': line['option2'],
                         'opt1': prompt.replace('_', line['option1']),
                         'opt2': prompt.replace('_', line['option2']),
                         'answer': answer,
diff --git a/opencompass/models/__init__.py b/opencompass/models/__init__.py
index 273ae367..fab0824b 100644
--- a/opencompass/models/__init__.py
+++ b/opencompass/models/__init__.py
@@ -3,26 +3,28 @@ from .ai360_api import AI360GPT  # noqa: F401
 from .alaya import AlayaLM  # noqa: F401
 from .baichuan_api import BaiChuan, BaiChuan3  # noqa: F401
 from .baidu_api import ERNIEBot  # noqa: F401
-from .base import BaseModel, LMTemplateParser  # noqa
-from .base_api import APITemplateParser, BaseAPIModel  # noqa
+from .base import BaseModel, LMTemplateParser  # noqa: F401
+from .base_api import APITemplateParser, BaseAPIModel  # noqa: F401
 from .bytedance_api import ByteDance  # noqa: F401
 from .claude_api import Claude  # noqa: F401
-from .gemini_api import Gemini, GeminiAllesAPIN  # noqa: F401, F403
-from .glm import GLM130B  # noqa: F401, F403
-from .huggingface import HuggingFace  # noqa: F401, F403
-from .huggingface import HuggingFaceCausalLM  # noqa: F401, F403
-from .huggingface import HuggingFaceChatGLM3  # noqa: F401, F403
+from .gemini_api import Gemini, GeminiAllesAPIN  # noqa: F401
+from .glm import GLM130B  # noqa: F401
+from .huggingface import HuggingFace  # noqa: F401
+from .huggingface import HuggingFaceCausalLM  # noqa: F401
+from .huggingface import HuggingFaceChatGLM3  # noqa: F401
+from .huggingface_above_v4_33 import HuggingFaceBaseModel  # noqa: F401
+from .huggingface_above_v4_33 import HuggingFacewithChatTemplate  # noqa: F401
 from .hunyuan_api import Hunyuan  # noqa: F401
-from .intern_model import InternLM  # noqa: F401, F403
+from .intern_model import InternLM  # noqa: F401
 from .krgpt_api import KrGPT  # noqa: F401
 from .lightllm_api import LightllmAPI  # noqa: F401
-from .llama2 import Llama2, Llama2Chat  # noqa: F401, F403
+from .llama2 import Llama2, Llama2Chat  # noqa: F401
 from .lmdeploy_pytorch import LmdeployPytorchModel  # noqa: F401
 from .lmdeploy_tis import LmdeployTisModel  # noqa: F401
 from .minimax_api import MiniMax  # noqa: F401
 from .mistral_api import Mistral  # noqa: F401
 from .mixtral import Mixtral  # noqa: F401
-from .modelscope import ModelScope, ModelScopeCausalLM  # noqa: F401, F403
+from .modelscope import ModelScope, ModelScopeCausalLM  # noqa: F401
 from .moonshot_api import MoonShot  # noqa: F401
 from .nanbeige_api import Nanbeige  # noqa: F401
 from .openai_api import OpenAI  # noqa: F401
diff --git a/opencompass/models/huggingface_above_v4_33.py b/opencompass/models/huggingface_above_v4_33.py
new file mode 100644
index 00000000..f7ce622b
--- /dev/null
+++ b/opencompass/models/huggingface_above_v4_33.py
@@ -0,0 +1,414 @@
+# flake8: noqa
+# yapf: disable
+from typing import Dict, List, Optional, Union
+
+from opencompass.models.base import BaseModel, LMTemplateParser
+from opencompass.models.base_api import APITemplateParser
+from opencompass.registry import MODELS
+from opencompass.utils.logging import get_logger
+from opencompass.utils.prompt import PromptList
+
+PromptType = Union[PromptList, str]
+
+
+def _get_stopping_criteria(stop_words, tokenizer, batch_size):
+    from transformers import (PreTrainedTokenizer, StoppingCriteria,
+                              StoppingCriteriaList)
+
+    class MultiTokenEOSCriteria(StoppingCriteria):
+        """Criteria to stop on the specified multi-token sequence."""
+
+        def __init__(self, sequence: str, tokenizer: PreTrainedTokenizer, batch_size: int):
+            self.done_tracker = [False] * batch_size
+            self.sequence = sequence
+            self.sequence_ids = tokenizer.encode(sequence, add_special_tokens=False)
+            self.sequence_id_len = len(self.sequence_ids)
+            self.tokenizer = tokenizer
+
+        def __call__(self, input_ids, scores, **kwargs) -> bool:
+            # compare the last len(stop) tokens
+            lookback_ids_batch = input_ids[:, -self.sequence_id_len:]
+            lookback_tokens_batch = self.tokenizer.batch_decode(lookback_ids_batch)
+            for i, done in enumerate(self.done_tracker):
+                if done:
+                    continue
+                self.done_tracker[i] = self.sequence in lookback_tokens_batch[i]
+            return False not in self.done_tracker
+
+    criteria = []
+    for stop_word in stop_words:
+        c = MultiTokenEOSCriteria(stop_word, tokenizer, batch_size)
+        criteria.append(c)
+    criteria = StoppingCriteriaList(criteria)
+    return criteria
+
+def _get_possible_max_seq_len(max_seq_len, path):
+    if max_seq_len is not None:
+        return max_seq_len
+
+    from transformers import AutoConfig
+    config = AutoConfig.from_pretrained(path, trust_remote_code=True)
+    possible_keys = [
+        'max_position_embeddings',
+        'seq_length',
+        'model_max_length',
+    ]
+    for k in possible_keys:
+        if hasattr(config, k):
+            return getattr(config, k)
+    raise ValueError('max_seq_len is not provided and cannot be inferred from the model config.')
+
+
+def _convert_chat_messages(inputs):
+    outputs = []
+    for _input in inputs:
+        messages = []
+        if isinstance(_input, str):
+            messages.append({'role': 'HUMAN', 'prompt': _input})
+        else:
+            for item in _input:
+                role = {
+                    'HUMAN': 'user',
+                    'BOT': 'assistant',
+                    'SYSTEM': 'system',
+                }[item['role']]
+                messages.append({'role': role, 'content': item['prompt']})
+        outputs.append(messages)
+    return outputs
+
+
+def _format_with_fast_chat_template(inputs: List[str], name: str='vicuna'):
+    try:
+        from fastchat.model import get_conversation_template
+    except ImportError:
+        raise ModuleNotFoundError('fastchat not found. Please install with\npip install "fschat[model_worker,webui]"')
+
+    outputs = []
+    for _input in inputs:
+        template = get_conversation_template(name)
+        for item in _input:
+            if item['role'] == 'user':
+                template.append_message(template.roles[0], item['content'])
+            elif item['role'] == 'assistant':
+                template.append_message(template.roles[1], item['content'])
+            elif item['role'] == 'system':
+                continue
+            else:
+                raise ValueError(f'Unknown role {item["role"]}')
+        template.append_message(template.roles[1], None)
+        outputs.append(template.get_prompt())
+    return outputs
+
+
+def _get_meta_template(meta_template):
+    default_meta_template = dict(
+        round=[
+            dict(role='HUMAN', api_role='HUMAN'),
+            dict(role='BOT', api_role='BOT', generate=True),
+        ]
+    )
+    return APITemplateParser(meta_template or default_meta_template)
+
+
+def _set_model_kwargs_torch_dtype(model_kwargs):
+    import torch
+    if 'torch_dtype' not in model_kwargs:
+        torch_dtype = torch.float16
+    else:
+        torch_dtype = {
+            'torch.float16': torch.float16,
+            'torch.bfloat16': torch.bfloat16,
+            'torch.float': torch.float,
+            'auto': 'auto',
+            'None': None,
+        }.get(model_kwargs['torch_dtype'])
+    if torch_dtype is not None:
+        model_kwargs['torch_dtype'] = torch_dtype
+    return model_kwargs
+
+
+@MODELS.register_module()
+class HuggingFacewithChatTemplate(BaseModel):
+
+    def __init__(self,
+                 path: str,
+                 model_kwargs: dict = dict(),
+                 tokenizer_path: Optional[str] = None,
+                 tokenizer_kwargs: dict = dict(),
+                 peft_path: Optional[str] = None,
+                 peft_kwargs: dict = dict(),
+                 tokenizer_only: bool = False,
+                 generation_kwargs: dict = dict(),
+                 max_seq_len: Optional[int] = None,
+                 meta_template: Optional[Dict] = None,
+                 pad_token_id: Optional[int] = None,
+                 fastchat_template: Optional[str] = None,
+                 stop_words: Optional[str] = [],
+                 **other_kwargs):
+
+        self.logger = get_logger()
+        self.path = path
+        self.tokenizer_only = tokenizer_only
+        self.template_parser = _get_meta_template(meta_template)
+        self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
+        self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
+        if not tokenizer_only:
+            self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
+        self.generation_kwargs = generation_kwargs
+        self.fastchat_template = fastchat_template
+        self.stop_words = stop_words
+
+        for k, v in other_kwargs.items():
+            if v is not None:
+                self.logger.warning(f'Unused argument {k}={v}')
+
+    def _load_tokenizer(self, path: Optional[str], kwargs: dict, pad_token_id: Optional[int] = None):
+        from transformers import AutoTokenizer, GenerationConfig
+
+        DEFAULT_TOKENIZER_KWARGS = dict(padding_side='left', truncation_side='left', use_fast=False, trust_remote_code=True)
+        tokenizer_kwargs = DEFAULT_TOKENIZER_KWARGS
+        tokenizer_kwargs.update(kwargs)
+        self.tokenizer = AutoTokenizer.from_pretrained(path, **tokenizer_kwargs)
+
+        # A patch for some models without pad_token_id
+        if pad_token_id is not None:
+            if self.tokenizer.pad_token_id is None:
+                self.logger.debug(f'Using {pad_token_id} as pad_token_id')
+            elif self.tokenizer.pad_token_id != pad_token_id:
+                self.logger.warning(f'pad_token_id is not consistent. Using {pad_token_id} as pad_token_id')
+            self.tokenizer.pad_token_id = pad_token_id
+            return
+        if self.tokenizer.pad_token_id is not None:
+            return
+        self.logger.warning('pad_token_id is not set for the tokenizer.')
+        generation_config = GenerationConfig.from_pretrained(path)
+        if generation_config.pad_token_id is not None:
+            self.logger.warning(f'Using {generation_config.pad_token_id} as pad_token_id.')
+            self.tokenizer.pad_token_id = generation_config.pad_token_id
+            return
+        if self.tokenizer.eos_token_id is not None:
+            self.logger.warning(f'Using eos_token_id {self.tokenizer.eos_token_id} as pad_token_id.')
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+            return
+        raise ValueError('pad_token_id is not set for this tokenizer. Please set `pad_token_id={PAD_TOKEN_ID}` in model_cfg.')
+
+    def _load_model(self, path: str, kwargs: dict, peft_path: Optional[str] = None, peft_kwargs: dict = dict()):
+        from transformers import AutoModel, AutoModelForCausalLM
+
+        DEFAULT_MODEL_KWARGS = dict(device_map='auto', trust_remote_code=True)
+        model_kwargs = DEFAULT_MODEL_KWARGS
+        model_kwargs.update(kwargs)
+        model_kwargs = _set_model_kwargs_torch_dtype(model_kwargs)
+
+        try:
+            self.model = AutoModelForCausalLM.from_pretrained(path, **model_kwargs)
+        except ValueError:
+            self.model = AutoModel.from_pretrained(path, **model_kwargs)
+
+        if peft_path is not None:
+            from peft import PeftModel
+            peft_kwargs['is_trainable'] = False
+            self.model = PeftModel.from_pretrained(self.model, peft_path, **peft_kwargs)
+
+        self.model.eval()
+        self.model.generation_config.do_sample = False
+
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 min_out_len: Optional[int] = None,
+                 stopping_criteria: List[str] = [],
+                 **kwargs) -> List[str]:
+        messages = _convert_chat_messages(inputs)
+        batch_size = len(messages)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        if self.fastchat_template:
+            messages = _format_with_fast_chat_template(messages, self.fastchat_template)
+            tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        else:
+            messages = [self.tokenizer.apply_chat_template(m, add_generation_prompt=True, tokenize=False) for m in messages]
+            tokenize_kwargs['add_special_tokens'] = False
+            tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+
+        generation_kwargs = self.generation_kwargs.copy()
+        generation_kwargs.update(kwargs)
+        stopping_criteria = list(set(stopping_criteria + self.stop_words))
+        if stopping_criteria:
+            generation_kwargs['stopping_criteria'] = _get_stopping_criteria(stopping_criteria, self.tokenizer, batch_size)
+        if max_out_len is not None:
+            generation_kwargs['max_new_tokens'] = max_out_len
+        if min_out_len is not None:
+            generation_kwargs['min_new_tokens'] = min_out_len
+        generation_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
+
+        # step-2: conduct model forward to generate output
+        outputs = self.model.generate(**tokens, **generation_kwargs)
+        outputs = outputs[:, tokens['input_ids'].shape[1]:]
+
+        # step-3: decode the output
+        decodeds = self.tokenizer.batch_decode(outputs)
+        for stop in stopping_criteria:
+            decodeds = [t.split(stop)[0] for t in decodeds]
+
+        return decodeds
+
+    def get_token_len(self, prompt: str) -> int:
+        m = _convert_chat_messages([prompt])[0]
+        t = self.tokenizer.apply_chat_template(m, add_generation_prompt=True, return_dict=True)
+        return len(t['input_ids'])
+
+def  _convert_base_messages(inputs):
+    outputs = []
+    for _input in inputs:
+        if isinstance(_input, str):
+            outputs.append(_input)
+        else:
+            messages = []
+            for item in _input:
+                messages.append(item['prompt'])
+            outputs.append(''.join(messages))
+    return outputs
+
+
+class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
+
+    def __init__(self,
+                 path: str,
+                 model_kwargs: dict = dict(),
+                 tokenizer_path: Optional[str] = None,
+                 tokenizer_kwargs: dict = dict(),
+                 peft_path: Optional[str] = None,
+                 peft_kwargs: dict = dict(),
+                 tokenizer_only: bool = False,
+                 generation_kwargs: dict = dict(),
+                 max_seq_len: Optional[int] = None,
+                 pad_token_id: Optional[int] = None,
+                 stop_words: Optional[str] = [],
+                 **other_kwargs):
+
+        self.logger = get_logger()
+        self.path = path
+        self.tokenizer_only = tokenizer_only
+        self.template_parser = LMTemplateParser()
+        self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
+        self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
+        if not tokenizer_only:
+            self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
+        self.generation_kwargs = generation_kwargs
+        self.stop_words = stop_words
+
+        for k, v in other_kwargs.items():
+            if v is not None:
+                self.logger.warning(f'Unused argument {k}={v}')
+
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 min_out_len: Optional[int] = None,
+                 stopping_criteria: List[str] = [],
+                 **kwargs) -> List[str]:
+        messages = _convert_base_messages(inputs)
+        batch_size = len(messages)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+
+        generation_kwargs = self.generation_kwargs.copy()
+        generation_kwargs.update(kwargs)
+        stopping_criteria = list(set(stopping_criteria + self.stop_words))
+        if stopping_criteria:
+            generation_kwargs['stopping_criteria'] = _get_stopping_criteria(stopping_criteria, self.tokenizer, batch_size)
+        if max_out_len is not None:
+            generation_kwargs['max_new_tokens'] = max_out_len
+        if min_out_len is not None:
+            generation_kwargs['min_new_tokens'] = min_out_len
+        generation_kwargs['pad_token_id'] = self.tokenizer.pad_token_id
+
+        # step-2: conduct model forward to generate output
+        outputs = self.model.generate(**tokens, **generation_kwargs)
+        outputs = outputs[:, tokens['input_ids'].shape[1]:]
+
+        # step-3: decode the output
+        decodeds = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        for stop in stopping_criteria:
+            decodeds = [token.split(stop)[0] for token in decodeds]
+
+        return decodeds
+
+    def get_ppl(self, inputs: List[str], mask_length: Optional[List[int]] = None) -> List[float]:
+        """Get perplexity scores given a list of inputs.
+
+        Args:
+            inputs (List[str]): A list of strings.
+            mask_length (Optional[List[int]]): A list of mask lengths. If
+                provided, the perplexity scores will be calculated with the
+                first mask_length[i] tokens masked out. It's okay to skip
+                its implementation if advanced features in PPLInfernecer is
+                not needed.
+
+        Returns:
+            List[float]: A list of perplexity scores.
+        """
+        assert self.tokenizer.pad_token
+        import torch
+        import torch.nn.functional as F
+        pad_token_id = self.tokenizer.pad_token_id
+        messages = _convert_base_messages(inputs)
+
+        tokenize_kwargs = dict(
+            return_tensors='pt',
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+            max_length=self.max_seq_len
+        )
+        tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
+        tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
+        outputs = self.model(**tokens)[0]
+
+        batch_size, seq_len, vocab_size = outputs.shape
+        shift_logits = outputs[:, :-1, :].contiguous().float()
+        shift_labels = tokens['input_ids'][:, 1:].contiguous()
+        loss = F.cross_entropy(
+            shift_logits.view(-1, vocab_size),
+            shift_labels.view(-1),
+            ignore_index=pad_token_id,
+            reduction='none').view(batch_size, seq_len - 1)
+        lens = (tokens['input_ids'] != pad_token_id).sum(-1).cpu().numpy()
+
+        if mask_length is not None:
+            import numpy as np
+            mask = torch.zeros_like(shift_labels)  # [batch,seqlen]
+            for i in range(len(mask)):
+                for j in range(mask_length[i] - 1, len(mask[i])):
+                    mask[i][j] = 1
+            loss = loss * mask
+            lens -= np.array(mask_length)
+
+        ce_loss = loss.float().sum(-1).cpu().detach().numpy() / lens
+        return ce_loss
+
+    def get_loglikelihood(self, inputs: List[str], conts:  List[str]) -> List[float]:
+        mask_length = [self.get_token_len(c, add_special_tokens=False) for c in conts]
+        return - self.get_ppl(inputs, mask_length)
+
+    def get_token_len(self, prompt: str, add_special_tokens: bool=True) -> int:
+        m = _convert_base_messages([prompt])[0]
+        t = self.tokenizer(m, add_special_tokens=add_special_tokens)
+        return len(t['input_ids'])
diff --git a/opencompass/models/turbomind.py b/opencompass/models/turbomind.py
index 9479f02f..f64249bc 100644
--- a/opencompass/models/turbomind.py
+++ b/opencompass/models/turbomind.py
@@ -37,9 +37,6 @@ class TurboMindModel(BaseModel):
             arguments like session_len, max_batch_size for TurboMind.
         gen_config (Dict, optional): Generation config to set
                 arguments like top_k, top_p, temperature.
-        end_str (str, optional): Whether to trim generated strings with end_str
-            if the model has special ending strings that are not handled well.
-            Defaults to None.
     """
 
     def __init__(self,
@@ -47,9 +44,8 @@ class TurboMindModel(BaseModel):
                  concurrency: int = 8,
                  max_seq_len: int = 2048,
                  meta_template: Optional[Dict] = None,
-                 engine_config: Optional[Dict] = None,
-                 gen_config: Optional[Dict] = None,
-                 end_str: Optional[str] = None):
+                 engine_config: Dict = {},
+                 gen_config: Dict = {}):
         super().__init__(path=path,
                          max_seq_len=max_seq_len,
                          meta_template=meta_template)
@@ -70,12 +66,14 @@ class TurboMindModel(BaseModel):
         ]
         self.generator_ids = [i + 1 for i in range(concurrency)]
         self.gen_config = gen_config
-        self.end_str = end_str
         self.major_version, self.minor_version, _ = version_info
 
     def generate(self,
                  inputs: List[str],
                  max_out_len: int = 512,
+                 stopping_criteria: List[str] = [],
+                 do_sample: Optional[bool] = None,
+                 temperature: int = 1,
                  **kwargs) -> List[str]:
         """Generate results given a list of inputs.
 
@@ -96,13 +94,21 @@ class TurboMindModel(BaseModel):
         ]
 
         gen_config = copy.deepcopy(self.gen_config)
-        if 'do_sample' in kwargs:
-            if kwargs['do_sample']:
-                gen_config.top_k = 1000
-                gen_config.temperature = kwargs.get('temperature', 1)
+        if do_sample is not None:
+            if do_sample:
+                gen_config['top_k'] = 1000
+                gen_config['temperature'] = temperature
             else:
-                gen_config.top_k = 1
-                gen_config.temperature = 0.01
+                gen_config['top_k'] = 1
+        if stopping_criteria:
+            stop_words = gen_config.get('stop_words', [])
+            for t in stopping_criteria:
+                t = self.tokenizer.encode(t, add_bos=False)
+                stop_words.append(t[0])
+            gen_config['stop_words'] = list(set(stop_words))
+
+        from lmdeploy.messages import EngineGenerationConfig
+        gen_config = EngineGenerationConfig(**gen_config)
 
         results = []
         for batch_input in batch_inputs:
@@ -115,7 +121,6 @@ class TurboMindModel(BaseModel):
                         batch_input,
                         [max_out_len] * len(batch_input),
                         [gen_config] * len(batch_input),
-                        [self.end_str] * len(batch_input),
                     ))
                 results += _results
         return results
@@ -136,8 +141,7 @@ class TurboMindModel(BaseModel):
                   session_id,
                   prompt: PromptType,
                   max_out_len: int,
-                  gen_config=None,
-                  end_str: Optional[str] = None) -> str:
+                  gen_config=None) -> str:
         """Generate results given a list of inputs.
 
         Args:
@@ -147,10 +151,6 @@ class TurboMindModel(BaseModel):
             max_out_len (int): The maximum length of the output.
             gen_config (EngineGenerationConfig, optional): Generation
                 config to set arguments like top_k, top_p, temperature.
-            end_str (str, optional): Whether to trim generated strings
-                with end_str if the model has special ending strings
-                that are not handled well.
-                Defaults to None.
         Returns:
             str: The generated string.
         """
@@ -173,9 +173,6 @@ class TurboMindModel(BaseModel):
                 _, output_ids, _ = outputs
             response = self.tokenizer.decode(output_ids)
             response = valid_str(response)
-        # used to trim
-        if end_str:
-            response = response.split(end_str)[0]
         return response
 
     def get_ppl(self,
diff --git a/opencompass/models/vllm.py b/opencompass/models/vllm.py
index 63da7b3f..e204c0c4 100644
--- a/opencompass/models/vllm.py
+++ b/opencompass/models/vllm.py
@@ -25,7 +25,7 @@ class VLLM(BaseModel):
         meta_template: Optional[Dict] = None,
         mode: str = 'none',
         use_fastchat_template: bool = False,
-        end_str: Optional[str] = None,
+        stop_words: List[str] = [],
     ):
         super().__init__(path=path,
                          max_seq_len=max_seq_len,
@@ -42,7 +42,7 @@ class VLLM(BaseModel):
         assert mode in ['none', 'mid']
         self.mode = mode
         self.use_fastchat_template = use_fastchat_template
-        self.end_str = end_str
+        self.stop_words = stop_words
 
     def _load_model(self,
                     path: str,
@@ -59,7 +59,10 @@ class VLLM(BaseModel):
             ray.shutdown()
         self.model = LLM(path, **model_kwargs)
 
-    def generate(self, inputs: List[str], max_out_len: int,
+    def generate(self,
+                 inputs: List[str],
+                 max_out_len: int,
+                 stopping_criteria: List[str] = [],
                  **kwargs) -> List[str]:
         """Generate results given a list of inputs.
 
@@ -90,6 +93,8 @@ class VLLM(BaseModel):
         generation_kwargs = kwargs.copy()
         generation_kwargs.update(self.generation_kwargs)
         generation_kwargs.update({'max_tokens': max_out_len})
+        _stop = list(set(self.stop_words + stopping_criteria))
+        generation_kwargs.update({'stop': _stop})
         sampling_kwargs = SamplingParams(**generation_kwargs)
         outputs = self.model.generate(inputs, sampling_kwargs)
 
@@ -97,9 +102,6 @@ class VLLM(BaseModel):
         for output in outputs:
             prompt = output.prompt
             generated_text = output.outputs[0].text
-
-            if self.end_str:
-                generated_text = generated_text.split(self.end_str)[0]
             prompt_list.append(prompt)
             output_strs.append(generated_text)
 
diff --git a/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py b/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
index ca325186..40367ade 100644
--- a/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
+++ b/opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
@@ -1,4 +1,6 @@
-"""PPL Inferencer."""
+# flake8: noqa
+# yapf: disable
+"""LogLikelihood(LL) Inferencer."""
 
 import os
 from typing import List, Optional
@@ -76,16 +78,13 @@ class LLInferencer(BaseInferencer):
 
         # 3. Get labels of all the classes
         if self.labels is None:
-            labels = retriever.get_labels(ice_template=ice_template,
-                                          prompt_template=prompt_template)
+            labels = retriever.get_labels(ice_template=ice_template, prompt_template=prompt_template)
         else:
             labels = self.labels
 
         # 4. Generate in-context examples for testing inputs
         for idx in range(len(ice_idx_list)):
-            ice.append(
-                retriever.generate_ice(ice_idx_list[idx],
-                                       ice_template=ice_template))
+            ice.append(retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template))
         output_handler.save_ice(self.model.parse_template(ice, mode='ppl'))
 
         # 5. Calculating loglikelihood for prompts in each label's class
@@ -99,58 +98,41 @@ class LLInferencer(BaseInferencer):
             # 5.1 Generate prompts of current label and truncate
             # TODO: Refactor
             for idx in range(len(ice_idx_list)):
-                prompt = retriever.generate_label_prompt(
-                    idx,
-                    ice[idx],
-                    label,
-                    ice_template=ice_template,
-                    prompt_template=prompt_template)
+                prompt_kwargs = {
+                    'idx': idx,
+                    'ice': ice[idx],
+                    'label': label,
+                    'ice_template': ice_template,
+                    'prompt_template': prompt_template,
+                }
+                prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
                 if self.max_seq_len is not None:
-                    prompt_token_num = self.model.get_token_len_from_template(
-                        prompt, mode='ppl')
-                    while len(ice_idx_list[idx]
-                              ) > 0 and prompt_token_num > self.max_seq_len:
+                    while len(ice_idx_list[idx]) > 0 and prompt_token_num > self.max_seq_len:
                         ice_idx_list[idx] = ice_idx_list[idx][:-1]
-                        ice[idx] = retriever.generate_ice(
-                            ice_idx_list[idx], ice_template=ice_template)
-                        prompt = retriever.generate_label_prompt(
-                            idx,
-                            ice[idx],
-                            label,
-                            ice_template=ice_template,
-                            prompt_template=prompt_template)
-                        prompt_token_num = self.model.get_token_len_from_template(  # noqa
-                            prompt, mode='ppl')  # noqa
+                        ice[idx] = retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template)
+                        prompt_kwargs['ice'] = ice[idx]
+                        prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                        prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
 
                 prompt_list.append(prompt)
                 token_num_list.append(prompt_token_num)
                 cont_list.append(retriever.test_ds[idx]['cont'])
 
             # 5.2 Get loglikelihood
-            logger.info(
-                f"Calculating Loglikelihood for prompts labeled '{label}'"
-            )  # noqa
-            for idx in trange(0,
-                              len(prompt_list),
-                              self.batch_size,
-                              disable=not self.is_main_process):
+            logger.info(f"Calculating Loglikelihood for prompts labeled '{label}'")
+            for idx in trange(0, len(prompt_list), self.batch_size, disable=not self.is_main_process):
                 sub_prompt_list = prompt_list[idx:idx + self.batch_size]
                 sub_cont_list = cont_list[idx:idx + self.batch_size]
 
                 with torch.no_grad():
                     # mainly modify compared to PPLInferencer
-                    sub_inputs = self.model.parse_template(sub_prompt_list,
-                                                           mode='ppl')
-                    sub_res = self.model.get_loglikelihood(
-                        sub_inputs, sub_cont_list).tolist()
-                for res, prompt in zip(
-                        sub_res,
-                        self.model.parse_template(sub_prompt_list,
-                                                  mode='ppl')):
+                    sub_inputs = self.model.parse_template(sub_prompt_list, mode='ppl')
+                    sub_res = self.model.get_loglikelihood(sub_inputs, sub_cont_list).tolist()
+                for res, prompt in zip(sub_res, self.model.parse_template(sub_prompt_list, mode='ppl')):
                     sub_ppl_list.append(res)
                     ice_str = self.model.parse_template(ice[idx], mode='ppl')
-                    output_handler.save_prompt_and_loglikelihood(
-                        label, prompt.replace(ice_str, ''), prompt, res, index)
+                    output_handler.save_prompt_and_loglikelihood(label, prompt.replace(ice_str, ''), prompt, res, index)
                     index = index + 1
             ppl.append(sub_ppl_list)
 
@@ -169,13 +151,9 @@ class LLInferencer(BaseInferencer):
         # 8. Output
         if self.is_main_process:
             os.makedirs(output_json_filepath, exist_ok=True)
-            output_handler.write_to_json(output_json_filepath,
-                                         output_json_filename)
+            output_handler.write_to_json(output_json_filepath, output_json_filename)
 
-        return [
-            sample['prediction']
-            for sample in output_handler.results_dict.values()
-        ]
+        return [sample['prediction'] for sample in output_handler.results_dict.values()]
 
 
 class LLInferencerOutputHandler:
diff --git a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
index e48a8a2f..40a85480 100644
--- a/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
+++ b/opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+# yapf: disable
 """PPL Inferencer."""
 
 import os
@@ -84,9 +86,7 @@ class PPLInferencer(BaseInferencer):
 
         # 4. Generate in-context examples for testing inputs
         for idx in range(len(ice_idx_list)):
-            ice.append(
-                retriever.generate_ice(ice_idx_list[idx],
-                                       ice_template=ice_template))
+            ice.append(retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template))
         output_handler.save_ice(self.model.parse_template(ice, mode='ppl'))
 
         # 5. Calculating PPL for prompts in each label's class
@@ -101,33 +101,26 @@ class PPLInferencer(BaseInferencer):
             # 5.1 Generate prompts of current label and truncate
             # TODO: Refactor
             for idx in range(len(ice_idx_list)):
-                prompt = retriever.generate_label_prompt(
-                    idx,
-                    ice[idx],
-                    label,
-                    ice_template=ice_template,
-                    prompt_template=prompt_template,
-                    remain_sep=normalizing_str is not None)
-                prompt_token_num = self.model.get_token_len_from_template(
-                    prompt, mode='ppl')
+                prompt_kwargs = {
+                    'idx': idx,
+                    'ice': ice[idx],
+                    'label': label,
+                    'ice_template': ice_template,
+                    'prompt_template': prompt_template,
+                    'remain_sep': normalizing_str is not None
+                }
+                prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
                 if self.max_seq_len is not None:
-                    while len(ice_idx_list[idx]
-                              ) > 0 and prompt_token_num > self.max_seq_len:
+                    while len(ice_idx_list[idx]) > 0 and prompt_token_num > self.max_seq_len:
                         ice_idx_list[idx] = ice_idx_list[idx][:-1]
-                        ice[idx] = retriever.generate_ice(
-                            ice_idx_list[idx], ice_template=ice_template)
-                        prompt = retriever.generate_label_prompt(
-                            idx,
-                            ice[idx],
-                            label,
-                            ice_template=ice_template,
-                            prompt_template=prompt_template)
-                        prompt_token_num = self.model.get_token_len_from_template(  # noqa
-                            prompt, mode='ppl')  # noqa
+                        ice[idx] = retriever.generate_ice(ice_idx_list[idx], ice_template=ice_template)
+                        prompt_kwargs['ice'] = ice[idx]
+                        prompt = retriever.generate_label_prompt(**prompt_kwargs)
+                        prompt_token_num = self.model.get_token_len_from_template(prompt, mode='ppl')
 
                 if normalizing_str is not None:
-                    assert isinstance(prompt, str), \
-                         'Prompt must be a string when normalizing_str is set.'
+                    assert isinstance(prompt, str), 'Prompt must be a string when normalizing_str is set.'
                     prompt_sep = prompt
                     if prompt_template is not None:
                         sep_token = prompt_template.sep_token
@@ -140,10 +133,9 @@ class PPLInferencer(BaseInferencer):
                     prompt = context + answer
                     normalizing_prompt = normalizing_str + answer
 
-                    context_length_list.append(
-                        self.model.get_token_len_from_template(context,
-                                                               mode='ppl'))
+                    context_length_list.append(self.model.get_token_len_from_template(context, mode='ppl'))
                     normalizing_prompt_list.append(normalizing_prompt)
+
                 prompt_list.append(prompt)
                 token_num_list.append(prompt_token_num)
 
@@ -153,45 +145,25 @@ class PPLInferencer(BaseInferencer):
 
             # 5.2 Get PPL
             logger.info(f"Calculating PPL for prompts labeled '{label}'")
-            for idx in trange(0,
-                              len(prompt_list),
-                              self.batch_size,
-                              disable=not self.is_main_process):
+            for idx in trange(0, len(prompt_list), self.batch_size, disable=not self.is_main_process):
                 sub_prompt_list = prompt_list[idx:idx + self.batch_size]
-                if normalizing_str is not None:
-                    sub_context_length_list = context_length_list[idx:idx +
-                                                                  self.
-                                                                  batch_size]
-                    sub_normalizing_prompt_list = normalizing_prompt_list[
-                        idx:idx + self.batch_size]
-
                 with torch.no_grad():
                     if normalizing_str is not None:
-                        res1 = self.model.get_ppl_from_template(
-                            sub_prompt_list,
-                            mask_length=sub_context_length_list)
-                        res2 = self.model.get_ppl_from_template(
-                            sub_normalizing_prompt_list,
-                            mask_length=[
-                                normalizing_str_len
-                                for i in range(len(sub_prompt_list))
-                            ])
+                        sub_context_length_list = context_length_list[idx:idx + self.batch_size]
+                        sub_normalizing_prompt_list = normalizing_prompt_list[idx:idx + self.batch_size]
+                        res1 = self.model.get_ppl_from_template(sub_prompt_list, mask_length=sub_context_length_list)
+                        sub_normalizing_context_length_list = [normalizing_str_len for _ in range(len(sub_prompt_list))]
+                        res2 = self.model.get_ppl_from_template(sub_normalizing_prompt_list, mask_length=sub_normalizing_context_length_list)
                         sub_res = res1 - res2
                     else:
-                        sub_res = self.model.get_ppl_from_template(
-                            sub_prompt_list).tolist()
-                for res, prompt in zip(
-                        sub_res,
-                        self.model.parse_template(sub_prompt_list,
-                                                  mode='ppl')):
+                        sub_res = self.model.get_ppl_from_template(sub_prompt_list).tolist()
+
+                for res, prompt in zip(sub_res, self.model.parse_template(sub_prompt_list, mode='ppl')):
                     sub_ppl_list.append(res)
                     ice_str = self.model.parse_template(ice[idx], mode='ppl')
-                    output_handler.save_prompt_and_ppl(
-                        label, prompt.replace(ice_str, ''), prompt, res, index)
-                    output_handler.results_dict[str(
-                        index)][f'label: {str(label)}'][
-                            'BPB'] = res * token_num_list[index] / len(
-                                prompt.replace(ice_str, '').encode())
+                    prompt_wo_ice = prompt.replace(ice_str, '')
+                    output_handler.save_prompt_and_ppl(label, prompt_wo_ice, prompt, res, index)
+                    output_handler.results_dict[str(index)][f'label: {str(label)}']['BPB'] = res * token_num_list[index] / len(prompt_wo_ice.encode())
                     index = index + 1
             ppl.append(sub_ppl_list)
 
@@ -210,10 +182,6 @@ class PPLInferencer(BaseInferencer):
         # 8. Output
         if self.is_main_process:
             os.makedirs(output_json_filepath, exist_ok=True)
-            output_handler.write_to_json(output_json_filepath,
-                                         output_json_filename)
+            output_handler.write_to_json(output_json_filepath, output_json_filename)
 
-        return [
-            sample['prediction']
-            for sample in output_handler.results_dict.values()
-        ]
+        return [sample['prediction'] for sample in output_handler.results_dict.values()]
diff --git a/opencompass/partitioners/num_worker.py b/opencompass/partitioners/num_worker.py
index 4e22a5ff..58f35847 100644
--- a/opencompass/partitioners/num_worker.py
+++ b/opencompass/partitioners/num_worker.py
@@ -60,14 +60,16 @@ class NumWorkerPartitioner(BasePartitioner):
                     if osp.exists(filename):
                         continue
                     dataset_size = self.get_size(dataset)
-                    if dataset_size > self.min_task_size:
+                    if self.num_worker <= 1:
+                        chunks.append(dataset)
+                    elif dataset_size <= self.min_task_size:
+                        chunks.append(dataset)
+                    else:
                         root, ext = osp.splitext(filename)
                         dataset_splits = self.split_dataset(dataset)
                         for i, dataset_split in enumerate(dataset_splits):
                             if not osp.exists(f'{root}_{i}{ext}'):
                                 chunks.append(dataset_split)
-                    else:
-                        chunks.append(dataset)
 
                 if self.strategy == 'heuristic':
                     buckets = [[] for _ in range(self.num_worker)]
diff --git a/opencompass/summarizers/__init__.py b/opencompass/summarizers/__init__.py
index 1d2d2584..274f3b18 100644
--- a/opencompass/summarizers/__init__.py
+++ b/opencompass/summarizers/__init__.py
@@ -1,5 +1,6 @@
 # flake8: noqa: F401, E501
 from .circular import CircularSummarizer  # noqa: F401
 from .default import DefaultSummarizer  # noqa: F401
-from .llm_compression import LLMCompressionSummarizer
+from .llm_compression import LLMCompressionSummarizer  # noqa: F401
+from .multi_faceted import MultiFacetedSummarizer  # noqa: F401
 from .subjective import *  # noqa: F401
diff --git a/opencompass/summarizers/default.py b/opencompass/summarizers/default.py
index e4fe023c..f16b208d 100644
--- a/opencompass/summarizers/default.py
+++ b/opencompass/summarizers/default.py
@@ -226,12 +226,12 @@ class DefaultSummarizer:
 
         return raw_results, parsed_results, dataset_metrics, dataset_eval_mode
 
-    def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode):
+    def _format_table(self, parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=None):
         dataset_abbrs = [dataset_abbr_from_cfg(dataset) for dataset in self.dataset_cfgs]
         prompt_version = {dataset_abbr_from_cfg(d): get_prompt_hash(d)[:6] for d in self.dataset_cfgs}
 
         summarizer_dataset_abbrs = []
-        if self.dataset_abbrs is None:
+        if required_dataset_abbrs is None:
             # display all dataset metrics included in the config
             for dataset_abbr in dataset_abbrs:
                 if dataset_abbr in dataset_metrics:
@@ -246,7 +246,7 @@ class DefaultSummarizer:
                         summarizer_dataset_abbrs.append((dataset_abbr, metric))
         else:
             # follow the required order
-            for item in self.dataset_abbrs:
+            for item in required_dataset_abbrs:
                 if isinstance(item, str):
                     summarizer_dataset_abbrs.append((item, None))
                 elif isinstance(item, (list, tuple)):
@@ -306,7 +306,7 @@ class DefaultSummarizer:
             text = f'{time_str}\n' + \
                     'tabulate format\n' + \
                     '^' * 128 + '\n' + \
-                    tabulate.tabulate(table, headers='firstrow') + '\n' + \
+                    tabulate.tabulate(table, headers='firstrow', floatfmt='.2f') + '\n' + \
                     '$' * 128 + '\n\n' + \
                     '-' * 128 + ' THIS IS A DIVIDER ' + '-' * 128 + '\n\n' + \
                     'csv format\n' + \
@@ -338,13 +338,13 @@ class DefaultSummarizer:
             self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
 
         # format table
-        table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode)
+        table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=self.dataset_abbrs)
 
         # format raw txt
         raw_txts = self._format_raw_txt(raw_results)
 
         # output to screen
-        print(tabulate.tabulate(table, headers='firstrow'))
+        print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
 
         # output to .text / .csv files
         self._output_to_file(output_path, time_str, table, raw_txts)
diff --git a/opencompass/summarizers/multi_faceted.py b/opencompass/summarizers/multi_faceted.py
new file mode 100644
index 00000000..27848ba3
--- /dev/null
+++ b/opencompass/summarizers/multi_faceted.py
@@ -0,0 +1,46 @@
+# flake8: noqa
+# yapf: disable
+import functools
+import getpass
+import math
+import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+import tabulate
+from mmengine import ConfigDict
+
+from .default import DefaultSummarizer
+
+
+class MultiFacetedSummarizer(DefaultSummarizer):
+
+    def __init__(self, config: ConfigDict, dataset_abbrs_list: Optional[Dict[str, List[str]]] = None, summary_groups: List = []) -> None:
+        super().__init__(config, dataset_abbrs=None, summary_groups=summary_groups)
+        self.dataset_abbrs_list = dataset_abbrs_list
+
+    def summarize(self, output_path: str = None, time_str: str = datetime.now().strftime('%Y%m%d_%H%M%S')):
+
+        # pick up results
+        raw_results, parsed_results, dataset_metrics, dataset_eval_mode = self._pick_up_results()
+
+        # calculate group metrics
+        raw_results, parsed_results, dataset_metrics, dataset_eval_mode = \
+            self._calculate_group_metrics(raw_results, parsed_results, dataset_metrics, dataset_eval_mode)
+
+        for dataset_abbrs_item in self.dataset_abbrs_list:
+            profile_name = dataset_abbrs_item['name']
+            profile_dataset_abbrs = dataset_abbrs_item['dataset_abbrs']
+
+            # format table
+            table = self._format_table(parsed_results, dataset_metrics, dataset_eval_mode, required_dataset_abbrs=profile_dataset_abbrs)
+
+            # output to screen
+            print(tabulate.tabulate(table, headers='firstrow', floatfmt='.2f'))
+
+            # output to .text / .csv files
+            output_csv_path = os.path.join(self.work_dir, 'summary', f'summary_{time_str}', f'{profile_name}.csv')
+            os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)
+            with open(output_csv_path, 'w', encoding='utf-8') as f:
+                f.write('\n'.join([','.join(row) for row in table]) + '\n')
+            self.logger.info(f'write csv to {os.path.abspath(output_csv_path)}')
diff --git a/opencompass/utils/build.py b/opencompass/utils/build.py
index 40e8ae2d..14a66683 100644
--- a/opencompass/utils/build.py
+++ b/opencompass/utils/build.py
@@ -22,5 +22,4 @@ def build_model_from_cfg(model_cfg: ConfigDict):
     model_cfg.pop('summarizer_abbr', None)
     model_cfg.pop('pred_postprocessor', None)
     model_cfg.pop('min_out_len', None)
-    model_cfg.pop('tokenizer_only', None)
     return MODELS.build(model_cfg)
diff --git a/opencompass/utils/run.py b/opencompass/utils/run.py
index 2a26b6c7..c3b9de81 100644
--- a/opencompass/utils/run.py
+++ b/opencompass/utils/run.py
@@ -5,8 +5,10 @@ import tabulate
 from mmengine.config import Config
 
 from opencompass.datasets.custom import make_custom_dataset_config
-from opencompass.models import VLLM, HuggingFaceCausalLM, TurboMindModel
-from opencompass.partitioners import NaivePartitioner, SizePartitioner
+from opencompass.models import (VLLM, HuggingFaceBaseModel,
+                                HuggingFaceCausalLM,
+                                HuggingFacewithChatTemplate, TurboMindModel)
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
 from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
 from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
 from opencompass.utils import get_logger, match_files
@@ -71,6 +73,7 @@ def get_config_from_arg(args) -> Config:
     2. args.models and args.datasets
     3. Huggingface parameter groups and args.datasets
     """
+    logger = get_logger()
     if args.config:
         config = Config.fromfile(args.config, format_python_code=False)
         config = try_fill_in_custom_cfgs(config)
@@ -140,19 +143,25 @@ def get_config_from_arg(args) -> Config:
                     f'Config file {model[1]} does not contain "models" field')
             models += cfg['models']
     else:
-        from opencompass.models import HuggingFace
-        model = dict(type=f'{HuggingFace.__module__}.{HuggingFace.__name__}',
+        if args.hf_type == 'chat':
+            mod = HuggingFacewithChatTemplate
+        else:
+            mod = HuggingFaceBaseModel
+        model = dict(type=f'{mod.__module__}.{mod.__name__}',
+                     abbr=args.hf_path.split('/')[-1] + '_hf',
                      path=args.hf_path,
-                     peft_path=args.peft_path,
-                     tokenizer_path=args.tokenizer_path,
                      model_kwargs=args.model_kwargs,
+                     tokenizer_path=args.tokenizer_path,
                      tokenizer_kwargs=args.tokenizer_kwargs,
+                     peft_path=args.peft_path,
+                     peft_kwargs=args.peft_kwargs,
                      max_seq_len=args.max_seq_len,
                      max_out_len=args.max_out_len,
-                     batch_padding=not args.no_batch_padding,
                      batch_size=args.batch_size,
                      pad_token_id=args.pad_token_id,
+                     stop_words=args.stop_words,
                      run_cfg=dict(num_gpus=args.num_gpus))
+        logger.debug(f'Using model: {model}')
         models.append(model)
     # set infer accelerator if needed
     if args.accelerator in ['vllm', 'lmdeploy']:
@@ -173,7 +182,7 @@ def get_config_from_arg(args) -> Config:
         summarizer_file = summarizer_arg
 
     s = match_cfg_file(summarizers_dir, [summarizer_file])[0]
-    get_logger().info(f'Loading {s[0]}: {s[1]}')
+    logger.info(f'Loading {s[0]}: {s[1]}')
     cfg = Config.fromfile(s[1])
     # Use summarizer_key to retrieve the summarizer definition
     # from the configuration file
@@ -186,28 +195,23 @@ def get_config_from_arg(args) -> Config:
 
 def change_accelerator(models, accelerator):
     models = models.copy()
+    logger = get_logger()
     model_accels = []
     for model in models:
-        get_logger().info(f'Transforming {model["abbr"]} to {accelerator}')
+        logger.info(f'Transforming {model["abbr"]} to {accelerator}')
         # change HuggingFace model to VLLM or TurboMindModel
         if model['type'] is HuggingFaceCausalLM:
             gen_args = dict()
             if model.get('generation_kwargs') is not None:
                 generation_kwargs = model['generation_kwargs'].copy()
-                gen_args['temperature'] = 0.001 if generation_kwargs.get(
-                    'temperature'
-                ) is None else generation_kwargs['temperature']
-                gen_args['top_k'] = 1 if generation_kwargs.get(
-                    'top_k') is None else generation_kwargs['top_k']
-                gen_args['top_p'] = 0.9 if generation_kwargs.get(
-                    'top_p') is None else generation_kwargs['top_p']
-                gen_args['stop_token_ids'] = None if generation_kwargs.get(
-                    'eos_token_id'
-                ) is None else generation_kwargs['eos_token_id']
-                generation_kwargs[
-                    'stop_token_ids'] = None if generation_kwargs.get(
-                        'eos_token_id'
-                    ) is None else generation_kwargs['eos_token_id']
+                gen_args['temperature'] = generation_kwargs.get(
+                    'temperature', 0.001)
+                gen_args['top_k'] = generation_kwargs.get('top_k', 1)
+                gen_args['top_p'] = generation_kwargs.get('top_p', 0.9)
+                gen_args['stop_token_ids'] = generation_kwargs.get(
+                    'eos_token_id', None)
+                generation_kwargs['stop_token_ids'] = generation_kwargs.get(
+                    'eos_token_id', None)
                 generation_kwargs.pop('eos_token_id')
             else:
                 # if generation_kwargs is not provided, set default values
@@ -218,11 +222,10 @@ def change_accelerator(models, accelerator):
                 gen_args['stop_token_ids'] = None
 
             if accelerator == 'lmdeploy':
-                get_logger().info(
-                    f'Transforming {model["abbr"]} to {accelerator}')
+                logger.info(f'Transforming {model["abbr"]} to {accelerator}')
+                mod = TurboMindModel
                 acc_model = dict(
-                    type=  # noqa E251
-                    f'{TurboMindModel.__module__}.{TurboMindModel.__name__}',
+                    type=f'{mod.__module__}.{mod.__name__}',
                     abbr=model['abbr'].replace('hf', 'lmdeploy')
                     if '-hf' in model['abbr'] else model['abbr'] + '-lmdeploy',
                     path=model['path'],
@@ -244,8 +247,7 @@ def change_accelerator(models, accelerator):
                     if model.get(item) is not None:
                         acc_model[item] = model[item]
             elif accelerator == 'vllm':
-                get_logger().info(
-                    f'Transforming {model["abbr"]} to {accelerator}')
+                logger.info(f'Transforming {model["abbr"]} to {accelerator}')
 
                 acc_model = dict(
                     type=f'{VLLM.__module__}.{VLLM.__name__}',
@@ -275,9 +277,8 @@ def get_config_type(obj) -> str:
 
 def fill_infer_cfg(cfg, args):
     new_cfg = dict(infer=dict(
-        partitioner=dict(type=get_config_type(SizePartitioner),
-                         max_task_size=args.max_partition_size,
-                         gen_task_coef=args.gen_task_coef),
+        partitioner=dict(type=get_config_type(NumWorkerPartitioner),
+                         num_worker=args.max_num_workers),
         runner=dict(
             max_num_workers=args.max_num_workers,
             debug=args.debug,
diff --git a/tools/prompt_viewer.py b/tools/prompt_viewer.py
index ed821c5a..ed7c0c96 100644
--- a/tools/prompt_viewer.py
+++ b/tools/prompt_viewer.py
@@ -54,7 +54,7 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
     # extracted and generalized as a static method in these Inferencers
     # and reused here.
     if model_cfg:
-        max_seq_len = model_cfg.max_seq_len
+        max_seq_len = model_cfg.get('max_seq_len', 32768)
         if not model_cfg['type'].is_api:
             model_cfg['tokenizer_only'] = True
         model = build_model_from_cfg(model_cfg)