mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[CI] add more models into testcase and test env of cu12 (#1558)
* update * update * Update pr-run-test.yml * update * update * update * update * Update daily-run-test.yml * update * updaste * update * update * update * Update daily-run-test.yml * update * update * Update daily-run-test.yml * Update daily-run-test.yml * update * update * update * update * update * Update daily-run-test.yml * update --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
This commit is contained in:
parent
87df8a73a3
commit
aa43eaf267
26
.github/scripts/eval_regression_base.py
vendored
26
.github/scripts/eval_regression_base.py
vendored
@ -8,15 +8,17 @@ with read_base():
|
||||
race_datasets # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_base import \
|
||||
models as hf_deepseek_moe_16b_base_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.hf_deepseek_v2_lite import \
|
||||
models as hf_deepseek_v2_lite_model # noqa: F401, E501
|
||||
# read hf models - chat models
|
||||
from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \
|
||||
models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.vllm_deepseek_moe_16b_base import \
|
||||
models as vllm_deepseek_moe_16b_base_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma_2b import \
|
||||
models as hf_gemma_2b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma_7b import \
|
||||
models as hf_gemma_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma2_2b import \
|
||||
models as hf_gemma2_2b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma2_9b import \
|
||||
models as hf_gemma2_9b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import \
|
||||
models as hf_internlm2_5_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \
|
||||
@ -31,16 +33,28 @@ with read_base():
|
||||
models as lmdeploy_internlm2_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \
|
||||
models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.hf_llama2_7b import \
|
||||
models as hf_llama2_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.hf_llama3_8b import \
|
||||
models as hf_llama3_8b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b import \
|
||||
models as lmdeploy_llama3_1_8b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \
|
||||
models as lmdeploy_llama3_8b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mistral_7b_v0_2 import \
|
||||
models as hf_mistral_7b_v0_2_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mistral_7b_v0_3 import \
|
||||
models as hf_mistral_7b_v0_3_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.vllm_mistral_7b_v0_2 import \
|
||||
models as vllm_mistral_7b_v0_2_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.vllm_mixtral_8x7b_v0_1 import \
|
||||
models as vllm_mixtral_8x7b_v0_1_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b import \
|
||||
models as hf_qwen1_5_moe_a2_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen2_0_5b import \
|
||||
models as hf_qwen2_0_5b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen2_1_5b import \
|
||||
models as hf_qwen2_1_5b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen2_7b import \
|
||||
models as hf_qwen2_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \
|
||||
models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \
|
||||
|
34
.github/scripts/eval_regression_chat.py
vendored
34
.github/scripts/eval_regression_chat.py
vendored
@ -13,20 +13,32 @@ with read_base():
|
||||
models as hf_baichuan2_7b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.chatglm.hf_glm4_9b_chat import \
|
||||
models as hf_glm4_9b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.chatglm.lmdeploy_glm4_9b_chat import \
|
||||
models as lmdeploy_glm4_9b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.chatglm.vllm_glm4_9b_chat import \
|
||||
models as vllm_glm4_9b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.hf_deepseek_7b_chat import \
|
||||
models as hf_deepseek_7b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_chat import \
|
||||
models as hf_deepseek_moe_16b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.hf_deepseek_v2_lite_chat import \
|
||||
models as hf_deepseek_v2_lite_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.deepseek.vllm_deepseek_7b_chat import \
|
||||
models as vllm_deepseek_7b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma_2b_it import \
|
||||
models as hf_gemma_2b_it_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma_7b_it import \
|
||||
models as hf_gemma_7b_it_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma2_2b_it import \
|
||||
models as hf_gemma2_2b_it_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.hf_gemma2_9b_it import \
|
||||
models as hf_gemma2_9b_it_model # noqa: F401, E501
|
||||
from opencompass.configs.models.gemma.vllm_gemma_7b_it import \
|
||||
models as vllm_gemma_7b_it_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b_chat import \
|
||||
models as hf_internlm2_5_7b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.hf_internlm2_5_20b_chat import \
|
||||
models as hf_internlm2_5_20b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
|
||||
models as lmdeploy_internlm2_5_7b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
|
||||
models as lmdeploy_internlm2_5_20b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \
|
||||
models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \
|
||||
@ -37,14 +49,20 @@ with read_base():
|
||||
models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_internlm.vllm_internlm2_chat_7b import \
|
||||
models as vllm_internlm2_chat_7b_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.hf_llama3_1_8b_instruct import \
|
||||
models as hf_llama3_1_8b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
|
||||
models as hf_llama3_8b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
|
||||
models as lmdeploy_llama3_1_8b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
|
||||
models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \
|
||||
models as hf_mistral_7b_instruct_v0_2_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_3 import \
|
||||
models as hf_mistral_7b_instruct_v0_3_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \
|
||||
models as vllm_mistral_7b_instruct_v0_2_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.vllm_mixtral_8x7b_instruct_v0_1 import \
|
||||
models as vllm_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501
|
||||
from opencompass.configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \
|
||||
models as hf_minicpm_2b_dpo_fp32_model # noqa: F401, E501
|
||||
from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \
|
||||
@ -57,6 +75,10 @@ with read_base():
|
||||
models as hf_phi_3_mini_8k_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen1_5_0_5b_chat import \
|
||||
models as hf_qwen1_5_0_5b_chat_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen2_1_5b_instruct import \
|
||||
models as hf_qwen2_1_5b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \
|
||||
models as hf_qwen2_7b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
|
||||
models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501
|
||||
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
|
||||
|
43
.github/scripts/oc_score_assert.py
vendored
43
.github/scripts/oc_score_assert.py
vendored
@ -7,30 +7,35 @@ import yaml
|
||||
output_path = 'regression_result_daily'
|
||||
|
||||
chat_model_list = [
|
||||
'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
|
||||
'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf',
|
||||
'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind',
|
||||
'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind',
|
||||
'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind',
|
||||
'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf',
|
||||
'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
|
||||
'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf',
|
||||
'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf',
|
||||
'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf',
|
||||
'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm',
|
||||
'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
|
||||
'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf',
|
||||
'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf',
|
||||
'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind',
|
||||
'internlm2_5-20b-chat-turbomind', 'internlm2-chat-1.8b-turbomind',
|
||||
'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
|
||||
'internlm2-chat-7b-sft-turbomind', 'internlm2-chat-7b-vllm',
|
||||
'llama-3_1-8b-instruct-hf', 'llama-3-8b-instruct-hf',
|
||||
'llama-3_1-8b-instruct-turbomind', 'llama-3-8b-instruct-turbomind',
|
||||
'mistral-7b-instruct-v0.3-hf', 'mistral-7b-instruct-v0.2-vllm',
|
||||
'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf',
|
||||
'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf',
|
||||
'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-hf', 'qwen2-7b-instruct-hf',
|
||||
'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind',
|
||||
'qwen1.5-0.5b-chat-vllm', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf',
|
||||
'lmdeploy-api-test'
|
||||
]
|
||||
base_model_list = [
|
||||
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind',
|
||||
'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf',
|
||||
'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf',
|
||||
'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind',
|
||||
'internlm2-7b-turbomind', 'internlm2-base-7b-hf',
|
||||
'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
|
||||
'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf',
|
||||
'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind',
|
||||
'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
|
||||
'deepseek-moe-16b-base-hf', 'deepseek-v2-lite-hf',
|
||||
'deepseek-7b-base-turbomind', 'deepseek-moe-16b-base-vllm', 'gemma2-2b-hf',
|
||||
'gemma2-9b-hf', 'internlm2_5-7b-hf', 'internlm2-7b-hf',
|
||||
'internlm2-base-7b-hf', 'internlm2-1.8b-turbomind',
|
||||
'internlm2_5-7b-turbomind', 'internlm2-7b-turbomind',
|
||||
'internlm2-base-7b-turbomind', 'llama-2-7b-hf', 'llama-3-8b-hf',
|
||||
'llama-3.1-8b-turbomind', 'llama-3-8b-turbomind', 'mistral-7b-v0.3-hf',
|
||||
'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf',
|
||||
'qwen2-1.5b-hf', 'qwen2-7b-hf', 'qwen2-1.5b-turbomind',
|
||||
'qwen2-7b-turbomind', 'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
|
||||
]
|
||||
dataset_list = ['gsm8k', 'race-middle', 'race-high']
|
||||
|
||||
|
114
.github/scripts/oc_score_baseline.yaml
vendored
114
.github/scripts/oc_score_baseline.yaml
vendored
@ -8,6 +8,16 @@ glm-4-9b-chat-hf:
|
||||
race-middle: 88
|
||||
race-high: 88
|
||||
|
||||
glm-4-9b-chat-turbomind:
|
||||
gsm8k: 69
|
||||
race-middle: 82
|
||||
race-high: 77
|
||||
|
||||
glm-4-9b-chat-vllm:
|
||||
gsm8k: 73
|
||||
race-middle: 87
|
||||
race-high: 87
|
||||
|
||||
deepseek-7b-chat-hf:
|
||||
gsm8k: 60
|
||||
race-middle: 74
|
||||
@ -18,6 +28,11 @@ deepseek-moe-16b-chat-hf:
|
||||
race-middle: 62
|
||||
race-high: 70
|
||||
|
||||
deepseek-v2-lite-chat-hf:
|
||||
gsm8k: 59
|
||||
race-middle: 82
|
||||
race-high: 79
|
||||
|
||||
deepseek-7b-chat-vllm:
|
||||
gsm8k: 63
|
||||
race-middle: 74
|
||||
@ -33,23 +48,48 @@ gemma-7b-it-hf:
|
||||
race-middle: 74
|
||||
race-high: 71
|
||||
|
||||
gemma-7b-it-vllm:
|
||||
gsm8k: 38
|
||||
race-middle: 75
|
||||
race-high: 70
|
||||
|
||||
gemma2-2b-it-hf:
|
||||
gsm8k: 62
|
||||
race-middle: 75
|
||||
race-high: 67
|
||||
|
||||
gemma2-9b-it-hf:
|
||||
gsm8k: 80
|
||||
race-middle: 89
|
||||
race-high: 85
|
||||
|
||||
internlm2_5-7b-chat-hf:
|
||||
gsm8k: 86
|
||||
race-middle: 92
|
||||
race-high: 93
|
||||
|
||||
internlm2_5-20b-chat-hf:
|
||||
gsm8k: 91
|
||||
race-middle: 95
|
||||
race-high: 91
|
||||
|
||||
internlm2_5-7b-chat-turbomind:
|
||||
gsm8k: 87
|
||||
race-middle: 92
|
||||
race-high: 93
|
||||
|
||||
internlm2_5-20b-chat-turbomind:
|
||||
gsm8k: 91
|
||||
race-middle: 95
|
||||
race-high: 91
|
||||
|
||||
internlm2-chat-1.8b-turbomind:
|
||||
gsm8k: 40
|
||||
race-middle: 82
|
||||
race-high: 83
|
||||
|
||||
internlm2-chat-1.8b-sft-turbomind:
|
||||
gsm8k: 32
|
||||
gsm8k: 34
|
||||
race-middle: 81
|
||||
race-high: 83
|
||||
|
||||
@ -68,11 +108,21 @@ internlm2-chat-7b-vllm:
|
||||
race-middle: 90
|
||||
race-high: 91
|
||||
|
||||
llama-3_1-8b-instruct-hf:
|
||||
gsm8k: 82
|
||||
race-middle: 82
|
||||
race-high: 88
|
||||
|
||||
llama-3-8b-instruct-hf:
|
||||
gsm8k: 77
|
||||
race-middle: 85
|
||||
race-high: 87
|
||||
|
||||
llama-3_1-8b-instruct-turbomind:
|
||||
gsm8k: 79
|
||||
race-middle: 82
|
||||
race-high: 88
|
||||
|
||||
llama-3-8b-instruct-turbomind:
|
||||
gsm8k: 77
|
||||
race-middle: 85
|
||||
@ -83,6 +133,11 @@ mistral-7b-instruct-v0.2-hf:
|
||||
race-middle: 82
|
||||
race-high: 78
|
||||
|
||||
mistral-7b-instruct-v0.3-hf:
|
||||
gsm8k: 53
|
||||
race-middle: 80
|
||||
race-high: 78
|
||||
|
||||
mistral-7b-instruct-v0.2-vllm:
|
||||
gsm8k: 49
|
||||
race-middle: 81
|
||||
@ -118,6 +173,11 @@ qwen1.5-0.5b-chat-hf:
|
||||
race-middle: 55
|
||||
race-high: 50
|
||||
|
||||
qwen2-1.5b-instruct-hf:
|
||||
gsm8k: 63
|
||||
race-middle: 77
|
||||
race-high: 86
|
||||
|
||||
qwen2-1.5b-instruct-turbomind:
|
||||
gsm8k: 60
|
||||
race-middle: 77
|
||||
@ -128,6 +188,11 @@ qwen2-7b-instruct-turbomind:
|
||||
race-middle: 87
|
||||
race-high: 89
|
||||
|
||||
qwen2-7b-instruct-hf:
|
||||
gsm8k: 85
|
||||
race-middle: 87
|
||||
race-high: 91
|
||||
|
||||
qwen1.5-0.5b-chat-vllm:
|
||||
gsm8k: 5
|
||||
race-middle: 57
|
||||
@ -153,6 +218,11 @@ deepseek-moe-16b-base-hf:
|
||||
race-middle: 35
|
||||
race-high: 23
|
||||
|
||||
deepseek-v2-lite-hf:
|
||||
gsm8k: 37
|
||||
race-middle: 56
|
||||
race-high: 62
|
||||
|
||||
deepseek-7b-base-turbomind:
|
||||
gsm8k: 21
|
||||
race-middle: 42
|
||||
@ -173,8 +243,18 @@ gemma-7b-hf:
|
||||
race-middle: 59
|
||||
race-high: 66
|
||||
|
||||
gemma2-2b-hf:
|
||||
gsm8k: 8
|
||||
race-middle: 31
|
||||
race-high: 30
|
||||
|
||||
gemma2-9b-hf:
|
||||
gsm8k: 20
|
||||
race-middle: 42
|
||||
race-high: 35
|
||||
|
||||
internlm2_5-7b-hf:
|
||||
gsm8k: 46
|
||||
gsm8k: 47
|
||||
race-middle: 92
|
||||
race-high: 91
|
||||
|
||||
@ -208,6 +288,21 @@ internlm2-base-7b-turbomind:
|
||||
race-middle: 75
|
||||
race-high: 81
|
||||
|
||||
llama-2-7b-hf:
|
||||
gsm8k: 17
|
||||
race-middle: 32
|
||||
race-high: 38
|
||||
|
||||
llama-3-8b-hf:
|
||||
gsm8k: 48
|
||||
race-middle: 64
|
||||
race-high: 70
|
||||
|
||||
llama-3.1-8b-turbomind:
|
||||
gsm8k: 57
|
||||
race-middle: 67
|
||||
race-high: 75
|
||||
|
||||
llama-3-8b-turbomind:
|
||||
gsm8k: 52
|
||||
race-middle: 63
|
||||
@ -218,6 +313,11 @@ mistral-7b-v0.2-hf:
|
||||
race-middle: 42
|
||||
race-high: 60
|
||||
|
||||
mistral-7b-v0.3-hf:
|
||||
gsm8k: 43
|
||||
race-middle: 42
|
||||
race-high: 60
|
||||
|
||||
mistral-7b-v0.2-vllm:
|
||||
gsm8k: 45
|
||||
race-middle: 42
|
||||
@ -228,11 +328,21 @@ qwen1.5-moe-a2.7b-hf:
|
||||
race-middle: 78
|
||||
race-high: 90
|
||||
|
||||
qwen2-1.5b-hf:
|
||||
gsm8k: 58
|
||||
race-middle: 65
|
||||
race-high: 78
|
||||
|
||||
qwen2-0.5b-hf:
|
||||
gsm8k: 35
|
||||
race-middle: 52
|
||||
race-high: 48
|
||||
|
||||
qwen2-7b-hf:
|
||||
gsm8k: 82
|
||||
race-middle: 88
|
||||
race-high: 89
|
||||
|
||||
qwen2-1.5b-turbomind:
|
||||
gsm8k: 57
|
||||
race-middle: 64
|
||||
|
75
.github/workflows/daily-run-test.yml
vendored
75
.github/workflows/daily-run-test.yml
vendored
@ -14,9 +14,14 @@ env:
|
||||
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
|
||||
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
|
||||
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
|
||||
HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
|
||||
HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
|
||||
DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets
|
||||
HF_DATASETS_OFFLINE: 1
|
||||
HF_EVALUATE_OFFLINE: 1
|
||||
TRANSFORMERS_OFFLINE: 1
|
||||
VLLM_USE_MODELSCOPE: false
|
||||
LMDEPLOY_USE_MODELSCOPE: false
|
||||
HF_HUB_OFFLINE: 1
|
||||
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
|
||||
|
||||
@ -43,7 +48,11 @@ jobs:
|
||||
|
||||
daily_run_test:
|
||||
needs: build-pypi
|
||||
runs-on: self-hosted
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
cuda_env: [dsw_cu11, dsw_cu12]
|
||||
runs-on: ${{ matrix.cuda_env }}
|
||||
environment: 'prod'
|
||||
timeout-minutes: 420 #7hours
|
||||
steps:
|
||||
@ -53,22 +62,38 @@ jobs:
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: my-artifact-${{ github.run_id }}
|
||||
- name: Prepare - create conda env and install torch
|
||||
- name: Prepare - create conda env and install torch - cu11
|
||||
if: ${{matrix.cuda_env == 'dsw_cu11'}}
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda create -y --name ${{env.CONDA_ENV}} python=3.10
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
pip install opencompass*.whl
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
|
||||
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
|
||||
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip uninstall torch torchvision torchaudio -y
|
||||
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
|
||||
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda info --envs
|
||||
pip list
|
||||
- name: Prepare - create conda env and install torch - cu12
|
||||
if: ${{matrix.cuda_env == 'dsw_cu12'}}
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
|
||||
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install lmdeploy==0.6.0 --cache-dir ${{env.PIP_CACHE_PATH}} --no-cache-dir
|
||||
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip uninstall torch torchvision torchaudio -y
|
||||
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda info --envs
|
||||
pip list
|
||||
- name: Prepare - prepare data and hf model
|
||||
run: |
|
||||
ln -s ${{env.DATEASET_CACHE_PATH}} data
|
||||
@ -77,45 +102,45 @@ jobs:
|
||||
- name: Run chat model test
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
conda info --envs
|
||||
sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py
|
||||
python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat/*/summary regression_result_daily
|
||||
opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
- name: Run base model test
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
conda info --envs
|
||||
python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base/*/summary regression_result_daily
|
||||
opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
- name: Run command testcase
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
conda info --envs
|
||||
export from_tf=TRUE
|
||||
python tools/list_configs.py internlm2_5 mmlu
|
||||
python run.py --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1 --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1/*/summary regression_result_daily
|
||||
opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
python run.py --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2/*/summary regression_result_daily
|
||||
opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
python run.py --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3/*/summary regression_result_daily
|
||||
opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
python run.py --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4/*/summary regression_result_daily
|
||||
opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2
|
||||
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily
|
||||
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
- name: Remove Conda Env
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf regression_result_daily
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda env remove -y --name ${{env.CONDA_ENV}}
|
||||
conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
|
||||
conda info --envs
|
||||
|
||||
notify_to_feishu:
|
||||
|
2
.github/workflows/pr-run-test.yml
vendored
2
.github/workflows/pr-run-test.yml
vendored
@ -51,7 +51,7 @@ jobs:
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
conda info --envs
|
||||
rm -rf regression_result
|
||||
python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
|
||||
opencompass --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
|
||||
- name: Get result
|
||||
run: |
|
||||
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
|
||||
|
Loading…
Reference in New Issue
Block a user