diff --git a/.github/scripts/eval_regression_base.py b/.github/scripts/eval_regression_base.py
index 8b4c6446..12339ecf 100644
--- a/.github/scripts/eval_regression_base.py
+++ b/.github/scripts/eval_regression_base.py
@@ -8,15 +8,17 @@ with read_base():
         race_datasets  # noqa: F401, E501
     from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_base import \
         models as hf_deepseek_moe_16b_base_model  # noqa: F401, E501
+    from opencompass.configs.models.deepseek.hf_deepseek_v2_lite import \
+        models as hf_deepseek_v2_lite_model  # noqa: F401, E501
     # read hf models - chat models
     from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \
         models as lmdeploy_deepseek_7b_base_model  # noqa: F401, E501
     from opencompass.configs.models.deepseek.vllm_deepseek_moe_16b_base import \
         models as vllm_deepseek_moe_16b_base_model  # noqa: F401, E501
-    from opencompass.configs.models.gemma.hf_gemma_2b import \
-        models as hf_gemma_2b_model  # noqa: F401, E501
-    from opencompass.configs.models.gemma.hf_gemma_7b import \
-        models as hf_gemma_7b_model  # noqa: F401, E501
+    from opencompass.configs.models.gemma.hf_gemma2_2b import \
+        models as hf_gemma2_2b_model  # noqa: F401, E501
+    from opencompass.configs.models.gemma.hf_gemma2_9b import \
+        models as hf_gemma2_9b_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import \
         models as hf_internlm2_5_7b_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \
@@ -31,16 +33,28 @@ with read_base():
         models as lmdeploy_internlm2_7b_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \
         models as lmdeploy_internlm2_base_7b_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_llama.hf_llama2_7b import \
+        models as hf_llama2_7b_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_llama.hf_llama3_8b import \
+        models as hf_llama3_8b_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b import \
+        models as lmdeploy_llama3_1_8b_model  # noqa: F401, E501
     from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \
         models as lmdeploy_llama3_8b_model  # noqa: F401, E501
-    from opencompass.configs.models.mistral.hf_mistral_7b_v0_2 import \
-        models as hf_mistral_7b_v0_2_model  # noqa: F401, E501
+    from opencompass.configs.models.mistral.hf_mistral_7b_v0_3 import \
+        models as hf_mistral_7b_v0_3_model  # noqa: F401, E501
     from opencompass.configs.models.mistral.vllm_mistral_7b_v0_2 import \
         models as vllm_mistral_7b_v0_2_model  # noqa: F401, E501
+    from opencompass.configs.models.mistral.vllm_mixtral_8x7b_v0_1 import \
+        models as vllm_mixtral_8x7b_v0_1_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b import \
         models as hf_qwen1_5_moe_a2_7b_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen2_0_5b import \
         models as hf_qwen2_0_5b_model  # noqa: F401, E501
+    from opencompass.configs.models.qwen.hf_qwen2_1_5b import \
+        models as hf_qwen2_1_5b_model  # noqa: F401, E501
+    from opencompass.configs.models.qwen.hf_qwen2_7b import \
+        models as hf_qwen2_7b_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \
         models as lmdeploy_qwen2_1_5b_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \
diff --git a/.github/scripts/eval_regression_chat.py b/.github/scripts/eval_regression_chat.py
index 1ee28e63..fa28562f 100644
--- a/.github/scripts/eval_regression_chat.py
+++ b/.github/scripts/eval_regression_chat.py
@@ -13,20 +13,32 @@ with read_base():
         models as hf_baichuan2_7b_chat_model  # noqa: F401, E501
     from opencompass.configs.models.chatglm.hf_glm4_9b_chat import \
         models as hf_glm4_9b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.chatglm.lmdeploy_glm4_9b_chat import \
+        models as lmdeploy_glm4_9b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.chatglm.vllm_glm4_9b_chat import \
+        models as vllm_glm4_9b_chat_model  # noqa: F401, E501
     from opencompass.configs.models.deepseek.hf_deepseek_7b_chat import \
         models as hf_deepseek_7b_chat_model  # noqa: F401, E501
     from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_chat import \
         models as hf_deepseek_moe_16b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.deepseek.hf_deepseek_v2_lite_chat import \
+        models as hf_deepseek_v2_lite_chat_model  # noqa: F401, E501
     from opencompass.configs.models.deepseek.vllm_deepseek_7b_chat import \
         models as vllm_deepseek_7b_chat_model  # noqa: F401, E501
-    from opencompass.configs.models.gemma.hf_gemma_2b_it import \
-        models as hf_gemma_2b_it_model  # noqa: F401, E501
-    from opencompass.configs.models.gemma.hf_gemma_7b_it import \
-        models as hf_gemma_7b_it_model  # noqa: F401, E501
+    from opencompass.configs.models.gemma.hf_gemma2_2b_it import \
+        models as hf_gemma2_2b_it_model  # noqa: F401, E501
+    from opencompass.configs.models.gemma.hf_gemma2_9b_it import \
+        models as hf_gemma2_9b_it_model  # noqa: F401, E501
+    from opencompass.configs.models.gemma.vllm_gemma_7b_it import \
+        models as vllm_gemma_7b_it_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b_chat import \
         models as hf_internlm2_5_7b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_internlm.hf_internlm2_5_20b_chat import \
+        models as hf_internlm2_5_20b_chat_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
         models as lmdeploy_internlm2_5_7b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
+        models as lmdeploy_internlm2_5_20b_chat_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \
         models as lmdeploy_internlm2_chat_1_8b_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \
@@ -37,14 +49,20 @@ with read_base():
         models as lmdeploy_internlm2_chat_7b_sft_model  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.vllm_internlm2_chat_7b import \
         models as vllm_internlm2_chat_7b_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_llama.hf_llama3_1_8b_instruct import \
+        models as hf_llama3_1_8b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
         models as hf_llama3_8b_instruct_model  # noqa: F401, E501
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
+        models as lmdeploy_llama3_1_8b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
         models as lmdeploy_llama3_8b_instruct_model  # noqa: F401, E501
-    from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \
-        models as hf_mistral_7b_instruct_v0_2_model  # noqa: F401, E501
+    from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_3 import \
+        models as hf_mistral_7b_instruct_v0_3_model  # noqa: F401, E501
     from opencompass.configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \
         models as vllm_mistral_7b_instruct_v0_2_model  # noqa: F401, E501
+    from opencompass.configs.models.mistral.vllm_mixtral_8x7b_instruct_v0_1 import \
+        models as vllm_mixtral_8x7b_instruct_v0_1_model  # noqa: F401, E501
     from opencompass.configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \
         models as hf_minicpm_2b_dpo_fp32_model  # noqa: F401, E501
     from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \
@@ -57,6 +75,10 @@ with read_base():
         models as hf_phi_3_mini_8k_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen1_5_0_5b_chat import \
         models as hf_qwen1_5_0_5b_chat_model  # noqa: F401, E501
+    from opencompass.configs.models.qwen.hf_qwen2_1_5b_instruct import \
+        models as hf_qwen2_1_5b_instruct_model  # noqa: F401, E501
+    from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \
+        models as hf_qwen2_7b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
         models as lmdeploy_qwen2_1_5b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py
index f869b157..6f2c0a11 100644
--- a/.github/scripts/oc_score_assert.py
+++ b/.github/scripts/oc_score_assert.py
@@ -7,30 +7,35 @@ import yaml
 output_path = 'regression_result_daily'
 
 chat_model_list = [
-    'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
-    'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf',
-    'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind',
-    'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind',
-    'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind',
-    'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf',
-    'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
-    'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf',
-    'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf',
-    'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf',
+    'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm',
+    'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
+    'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf',
+    'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf',
+    'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind',
+    'internlm2_5-20b-chat-turbomind', 'internlm2-chat-1.8b-turbomind',
+    'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
+    'internlm2-chat-7b-sft-turbomind', 'internlm2-chat-7b-vllm',
+    'llama-3_1-8b-instruct-hf', 'llama-3-8b-instruct-hf',
+    'llama-3_1-8b-instruct-turbomind', 'llama-3-8b-instruct-turbomind',
+    'mistral-7b-instruct-v0.3-hf', 'mistral-7b-instruct-v0.2-vllm',
+    'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf',
+    'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf',
+    'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-hf', 'qwen2-7b-instruct-hf',
     'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind',
     'qwen1.5-0.5b-chat-vllm', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf',
     'lmdeploy-api-test'
 ]
 base_model_list = [
-    'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind',
-    'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf',
-    'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf',
-    'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind',
-    'internlm2-7b-turbomind', 'internlm2-base-7b-hf',
-    'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
-    'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf',
-    'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind',
-    'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
+    'deepseek-moe-16b-base-hf', 'deepseek-v2-lite-hf',
+    'deepseek-7b-base-turbomind', 'deepseek-moe-16b-base-vllm', 'gemma2-2b-hf',
+    'gemma2-9b-hf', 'internlm2_5-7b-hf', 'internlm2-7b-hf',
+    'internlm2-base-7b-hf', 'internlm2-1.8b-turbomind',
+    'internlm2_5-7b-turbomind', 'internlm2-7b-turbomind',
+    'internlm2-base-7b-turbomind', 'llama-2-7b-hf', 'llama-3-8b-hf',
+    'llama-3.1-8b-turbomind', 'llama-3-8b-turbomind', 'mistral-7b-v0.3-hf',
+    'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf',
+    'qwen2-1.5b-hf', 'qwen2-7b-hf', 'qwen2-1.5b-turbomind',
+    'qwen2-7b-turbomind', 'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
 ]
 dataset_list = ['gsm8k', 'race-middle', 'race-high']
 
diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml
index d7e765be..9690aa2c 100644
--- a/.github/scripts/oc_score_baseline.yaml
+++ b/.github/scripts/oc_score_baseline.yaml
@@ -8,6 +8,16 @@ glm-4-9b-chat-hf:
     race-middle: 88
     race-high: 88
 
+glm-4-9b-chat-turbomind:
+    gsm8k: 69
+    race-middle: 82
+    race-high: 77
+
+glm-4-9b-chat-vllm:
+    gsm8k: 73
+    race-middle: 87
+    race-high: 87
+
 deepseek-7b-chat-hf:
     gsm8k: 60
     race-middle: 74
@@ -18,6 +28,11 @@ deepseek-moe-16b-chat-hf:
     race-middle: 62
     race-high: 70
 
+deepseek-v2-lite-chat-hf:
+    gsm8k: 59
+    race-middle: 82
+    race-high: 79
+
 deepseek-7b-chat-vllm:
     gsm8k: 63
     race-middle: 74
@@ -33,23 +48,48 @@ gemma-7b-it-hf:
     race-middle: 74
     race-high: 71
 
+gemma-7b-it-vllm:
+    gsm8k: 38
+    race-middle: 75
+    race-high: 70
+
+gemma2-2b-it-hf:
+    gsm8k: 62
+    race-middle: 75
+    race-high: 67
+
+gemma2-9b-it-hf:
+    gsm8k: 80
+    race-middle: 89
+    race-high: 85
+
 internlm2_5-7b-chat-hf:
     gsm8k: 86
     race-middle: 92
     race-high: 93
 
+internlm2_5-20b-chat-hf:
+    gsm8k: 91
+    race-middle: 95
+    race-high: 91
+
 internlm2_5-7b-chat-turbomind:
     gsm8k: 87
     race-middle: 92
     race-high: 93
 
+internlm2_5-20b-chat-turbomind:
+    gsm8k: 91
+    race-middle: 95
+    race-high: 91
+
 internlm2-chat-1.8b-turbomind:
     gsm8k: 40
     race-middle: 82
     race-high: 83
 
 internlm2-chat-1.8b-sft-turbomind:
-    gsm8k: 32
+    gsm8k: 34
     race-middle: 81
     race-high: 83
 
@@ -68,11 +108,21 @@ internlm2-chat-7b-vllm:
     race-middle: 90
     race-high: 91
 
+llama-3_1-8b-instruct-hf:
+    gsm8k: 82
+    race-middle: 82
+    race-high: 88
+
 llama-3-8b-instruct-hf:
     gsm8k: 77
     race-middle: 85
     race-high: 87
 
+llama-3_1-8b-instruct-turbomind:
+    gsm8k: 79
+    race-middle: 82
+    race-high: 88
+
 llama-3-8b-instruct-turbomind:
     gsm8k: 77
     race-middle: 85
@@ -83,6 +133,11 @@ mistral-7b-instruct-v0.2-hf:
     race-middle: 82
     race-high: 78
 
+mistral-7b-instruct-v0.3-hf:
+    gsm8k: 53
+    race-middle: 80
+    race-high: 78
+
 mistral-7b-instruct-v0.2-vllm:
     gsm8k: 49
     race-middle: 81
@@ -118,6 +173,11 @@ qwen1.5-0.5b-chat-hf:
     race-middle: 55
     race-high: 50
 
+qwen2-1.5b-instruct-hf:
+    gsm8k: 63
+    race-middle: 77
+    race-high: 86
+
 qwen2-1.5b-instruct-turbomind:
     gsm8k: 60
     race-middle: 77
@@ -128,6 +188,11 @@ qwen2-7b-instruct-turbomind:
     race-middle: 87
     race-high: 89
 
+qwen2-7b-instruct-hf:
+    gsm8k: 85
+    race-middle: 87
+    race-high: 91
+
 qwen1.5-0.5b-chat-vllm:
     gsm8k: 5
     race-middle: 57
@@ -153,6 +218,11 @@ deepseek-moe-16b-base-hf:
     race-middle: 35
     race-high: 23
 
+deepseek-v2-lite-hf:
+    gsm8k: 37
+    race-middle: 56
+    race-high: 62
+
 deepseek-7b-base-turbomind:
     gsm8k: 21
     race-middle: 42
@@ -173,8 +243,18 @@ gemma-7b-hf:
     race-middle: 59
     race-high: 66
 
+gemma2-2b-hf:
+    gsm8k: 8
+    race-middle: 31
+    race-high: 30
+
+gemma2-9b-hf:
+    gsm8k: 20
+    race-middle: 42
+    race-high: 35
+
 internlm2_5-7b-hf:
-    gsm8k: 46
+    gsm8k: 47
     race-middle: 92
     race-high: 91
 
@@ -208,6 +288,21 @@ internlm2-base-7b-turbomind:
     race-middle: 75
     race-high: 81
 
+llama-2-7b-hf:
+    gsm8k: 17
+    race-middle: 32
+    race-high: 38
+
+llama-3-8b-hf:
+    gsm8k: 48
+    race-middle: 64
+    race-high: 70
+
+llama-3.1-8b-turbomind:
+    gsm8k: 57
+    race-middle: 67
+    race-high: 75
+
 llama-3-8b-turbomind:
     gsm8k: 52
     race-middle: 63
@@ -218,6 +313,11 @@ mistral-7b-v0.2-hf:
     race-middle: 42
     race-high: 60
 
+mistral-7b-v0.3-hf:
+    gsm8k: 43
+    race-middle: 42
+    race-high: 60
+
 mistral-7b-v0.2-vllm:
     gsm8k: 45
     race-middle: 42
@@ -228,11 +328,21 @@ qwen1.5-moe-a2.7b-hf:
     race-middle: 78
     race-high: 90
 
+qwen2-1.5b-hf:
+    gsm8k: 58
+    race-middle: 65
+    race-high: 78
+
 qwen2-0.5b-hf:
     gsm8k: 35
     race-middle: 52
     race-high: 48
 
+qwen2-7b-hf:
+    gsm8k: 82
+    race-middle: 88
+    race-high: 89
+
 qwen2-1.5b-turbomind:
     gsm8k: 57
     race-middle: 64
diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml
index 7d7affaf..894b149e 100644
--- a/.github/workflows/daily-run-test.yml
+++ b/.github/workflows/daily-run-test.yml
@@ -14,9 +14,14 @@ env:
   PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
   USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
   HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
+  HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
+  HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
   DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets
   HF_DATASETS_OFFLINE: 1
+  HF_EVALUATE_OFFLINE: 1
   TRANSFORMERS_OFFLINE: 1
+  VLLM_USE_MODELSCOPE: false
+  LMDEPLOY_USE_MODELSCOPE: false
   HF_HUB_OFFLINE: 1
   TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
 
@@ -43,7 +48,11 @@ jobs:
 
   daily_run_test:
     needs: build-pypi
-    runs-on: self-hosted
+    strategy:
+      fail-fast: false
+      matrix:
+        cuda_env: [dsw_cu11, dsw_cu12]
+    runs-on: ${{ matrix.cuda_env }}
     environment: 'prod'
     timeout-minutes: 420 #7hours
     steps:
@@ -53,22 +62,38 @@ jobs:
         uses: actions/download-artifact@v4
         with:
           name: my-artifact-${{ github.run_id }}
-      - name: Prepare - create conda env and install torch
+      - name: Prepare - create conda env and install torch - cu11
+        if: ${{matrix.cuda_env == 'dsw_cu11'}}
         run: |
           . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
-          conda create -y --name ${{env.CONDA_ENV}} python=3.10
-          conda activate ${{env.CONDA_ENV}}
-          pip install opencompass*.whl
-          pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
-          pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
-
-          pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}}
+          conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
+          conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
+          pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
           pip uninstall torch torchvision torchaudio -y
           pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
           FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
           pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
           conda info --envs
           pip list
+      - name: Prepare - create conda env and install torch - cu12
+        if: ${{matrix.cuda_env == 'dsw_cu12'}}
+        run: |
+          . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
+          conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
+          conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
+          pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install lmdeploy==0.6.0 --cache-dir ${{env.PIP_CACHE_PATH}} --no-cache-dir
+          pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
+          pip uninstall torch torchvision torchaudio -y
+          pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}}
+          FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+          pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
+          conda info --envs
+          pip list
       - name: Prepare - prepare data and hf model
         run: |
           ln -s ${{env.DATEASET_CACHE_PATH}} data
@@ -77,45 +102,45 @@ jobs:
       - name:  Run chat model test
         run: |
           . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
-          conda activate ${{env.CONDA_ENV}}
+          conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
           conda info --envs
           sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py
-          python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat/*/summary regression_result_daily
+          opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py
       - name:  Run base model test
         run: |
           . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
-          conda activate ${{env.CONDA_ENV}}
+          conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
           conda info --envs
-          python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base/*/summary regression_result_daily
+          opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py
       - name:  Run command testcase
         run: |
           . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
-          conda activate ${{env.CONDA_ENV}}
+          conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
           conda info --envs
           export from_tf=TRUE
           python tools/list_configs.py internlm2_5 mmlu
-          python run.py --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1 --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1/*/summary regression_result_daily
+          opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
-          python run.py --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2/*/summary regression_result_daily
+          opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
-          python run.py --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3/*/summary regression_result_daily
+          opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
-          python run.py --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse
-          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4/*/summary regression_result_daily
+          opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2
+          rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
       - name:  Remove Conda Env
         if: always()
         run: |
           rm -rf regression_result_daily
           . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
-          conda env remove -y --name ${{env.CONDA_ENV}}
+          conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
           conda info --envs
 
   notify_to_feishu:
diff --git a/.github/workflows/pr-run-test.yml b/.github/workflows/pr-run-test.yml
index 6cab1378..d9fcdc3a 100644
--- a/.github/workflows/pr-run-test.yml
+++ b/.github/workflows/pr-run-test.yml
@@ -51,7 +51,7 @@ jobs:
           conda activate ${{env.CONDA_ENV}}
           conda info --envs
           rm -rf regression_result
-          python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
+          opencompass --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
       - name:  Get result
         run: |
           score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')