[CI] add more models into testcase and test env of cu12 (#1558)

* update

* update

* Update pr-run-test.yml

* update

* update

* update

* update

* Update daily-run-test.yml

* update

* updaste

* update

* update

* update

* Update daily-run-test.yml

* update

* update

* Update daily-run-test.yml

* Update daily-run-test.yml

* update

* update

* update

* update

* update

* Update daily-run-test.yml

* update

---------

Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
This commit is contained in:
zhulinJulia24 2024-09-25 17:07:27 +08:00 committed by GitHub
parent 87df8a73a3
commit aa43eaf267
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 235 additions and 59 deletions

View File

@ -8,15 +8,17 @@ with read_base():
race_datasets # noqa: F401, E501
from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_base import \
models as hf_deepseek_moe_16b_base_model # noqa: F401, E501
from opencompass.configs.models.deepseek.hf_deepseek_v2_lite import \
models as hf_deepseek_v2_lite_model # noqa: F401, E501
# read hf models - chat models
from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \
models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501
from opencompass.configs.models.deepseek.vllm_deepseek_moe_16b_base import \
models as vllm_deepseek_moe_16b_base_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma_2b import \
models as hf_gemma_2b_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma_7b import \
models as hf_gemma_7b_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma2_2b import \
models as hf_gemma2_2b_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma2_9b import \
models as hf_gemma2_9b_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import \
models as hf_internlm2_5_7b_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \
@ -31,16 +33,28 @@ with read_base():
models as lmdeploy_internlm2_7b_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \
models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama2_7b import \
models as hf_llama2_7b_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama3_8b import \
models as hf_llama3_8b_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b import \
models as lmdeploy_llama3_1_8b_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \
models as lmdeploy_llama3_8b_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_7b_v0_2 import \
models as hf_mistral_7b_v0_2_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_7b_v0_3 import \
models as hf_mistral_7b_v0_3_model # noqa: F401, E501
from opencompass.configs.models.mistral.vllm_mistral_7b_v0_2 import \
models as vllm_mistral_7b_v0_2_model # noqa: F401, E501
from opencompass.configs.models.mistral.vllm_mixtral_8x7b_v0_1 import \
models as vllm_mixtral_8x7b_v0_1_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b import \
models as hf_qwen1_5_moe_a2_7b_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_0_5b import \
models as hf_qwen2_0_5b_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_1_5b import \
models as hf_qwen2_1_5b_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_7b import \
models as hf_qwen2_7b_model # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \
models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \

View File

@ -13,20 +13,32 @@ with read_base():
models as hf_baichuan2_7b_chat_model # noqa: F401, E501
from opencompass.configs.models.chatglm.hf_glm4_9b_chat import \
models as hf_glm4_9b_chat_model # noqa: F401, E501
from opencompass.configs.models.chatglm.lmdeploy_glm4_9b_chat import \
models as lmdeploy_glm4_9b_chat_model # noqa: F401, E501
from opencompass.configs.models.chatglm.vllm_glm4_9b_chat import \
models as vllm_glm4_9b_chat_model # noqa: F401, E501
from opencompass.configs.models.deepseek.hf_deepseek_7b_chat import \
models as hf_deepseek_7b_chat_model # noqa: F401, E501
from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_chat import \
models as hf_deepseek_moe_16b_chat_model # noqa: F401, E501
from opencompass.configs.models.deepseek.hf_deepseek_v2_lite_chat import \
models as hf_deepseek_v2_lite_chat_model # noqa: F401, E501
from opencompass.configs.models.deepseek.vllm_deepseek_7b_chat import \
models as vllm_deepseek_7b_chat_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma_2b_it import \
models as hf_gemma_2b_it_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma_7b_it import \
models as hf_gemma_7b_it_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma2_2b_it import \
models as hf_gemma2_2b_it_model # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma2_9b_it import \
models as hf_gemma2_9b_it_model # noqa: F401, E501
from opencompass.configs.models.gemma.vllm_gemma_7b_it import \
models as vllm_gemma_7b_it_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b_chat import \
models as hf_internlm2_5_7b_chat_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_5_20b_chat import \
models as hf_internlm2_5_20b_chat_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
models as lmdeploy_internlm2_5_7b_chat_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \
models as lmdeploy_internlm2_5_20b_chat_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \
models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \
@ -37,14 +49,20 @@ with read_base():
models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501
from opencompass.configs.models.hf_internlm.vllm_internlm2_chat_7b import \
models as vllm_internlm2_chat_7b_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama3_1_8b_instruct import \
models as hf_llama3_1_8b_instruct_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
models as hf_llama3_8b_instruct_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
models as lmdeploy_llama3_1_8b_instruct_model # noqa: F401, E501
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \
models as hf_mistral_7b_instruct_v0_2_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_3 import \
models as hf_mistral_7b_instruct_v0_3_model # noqa: F401, E501
from opencompass.configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \
models as vllm_mistral_7b_instruct_v0_2_model # noqa: F401, E501
from opencompass.configs.models.mistral.vllm_mixtral_8x7b_instruct_v0_1 import \
models as vllm_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501
from opencompass.configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \
models as hf_minicpm_2b_dpo_fp32_model # noqa: F401, E501
from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \
@ -57,6 +75,10 @@ with read_base():
models as hf_phi_3_mini_8k_instruct_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen1_5_0_5b_chat import \
models as hf_qwen1_5_0_5b_chat_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_1_5b_instruct import \
models as hf_qwen2_1_5b_instruct_model # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \
models as hf_qwen2_7b_instruct_model # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \

View File

@ -7,30 +7,35 @@ import yaml
output_path = 'regression_result_daily'
chat_model_list = [
'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf',
'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind',
'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind',
'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind',
'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf',
'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf',
'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf',
'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf',
'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm',
'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf',
'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf',
'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind',
'internlm2_5-20b-chat-turbomind', 'internlm2-chat-1.8b-turbomind',
'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
'internlm2-chat-7b-sft-turbomind', 'internlm2-chat-7b-vllm',
'llama-3_1-8b-instruct-hf', 'llama-3-8b-instruct-hf',
'llama-3_1-8b-instruct-turbomind', 'llama-3-8b-instruct-turbomind',
'mistral-7b-instruct-v0.3-hf', 'mistral-7b-instruct-v0.2-vllm',
'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf',
'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf',
'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-hf', 'qwen2-7b-instruct-hf',
'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind',
'qwen1.5-0.5b-chat-vllm', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf',
'lmdeploy-api-test'
]
base_model_list = [
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind',
'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf',
'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf',
'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind',
'internlm2-7b-turbomind', 'internlm2-base-7b-hf',
'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf',
'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind',
'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
'deepseek-moe-16b-base-hf', 'deepseek-v2-lite-hf',
'deepseek-7b-base-turbomind', 'deepseek-moe-16b-base-vllm', 'gemma2-2b-hf',
'gemma2-9b-hf', 'internlm2_5-7b-hf', 'internlm2-7b-hf',
'internlm2-base-7b-hf', 'internlm2-1.8b-turbomind',
'internlm2_5-7b-turbomind', 'internlm2-7b-turbomind',
'internlm2-base-7b-turbomind', 'llama-2-7b-hf', 'llama-3-8b-hf',
'llama-3.1-8b-turbomind', 'llama-3-8b-turbomind', 'mistral-7b-v0.3-hf',
'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf',
'qwen2-1.5b-hf', 'qwen2-7b-hf', 'qwen2-1.5b-turbomind',
'qwen2-7b-turbomind', 'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
]
dataset_list = ['gsm8k', 'race-middle', 'race-high']

View File

@ -8,6 +8,16 @@ glm-4-9b-chat-hf:
race-middle: 88
race-high: 88
glm-4-9b-chat-turbomind:
gsm8k: 69
race-middle: 82
race-high: 77
glm-4-9b-chat-vllm:
gsm8k: 73
race-middle: 87
race-high: 87
deepseek-7b-chat-hf:
gsm8k: 60
race-middle: 74
@ -18,6 +28,11 @@ deepseek-moe-16b-chat-hf:
race-middle: 62
race-high: 70
deepseek-v2-lite-chat-hf:
gsm8k: 59
race-middle: 82
race-high: 79
deepseek-7b-chat-vllm:
gsm8k: 63
race-middle: 74
@ -33,23 +48,48 @@ gemma-7b-it-hf:
race-middle: 74
race-high: 71
gemma-7b-it-vllm:
gsm8k: 38
race-middle: 75
race-high: 70
gemma2-2b-it-hf:
gsm8k: 62
race-middle: 75
race-high: 67
gemma2-9b-it-hf:
gsm8k: 80
race-middle: 89
race-high: 85
internlm2_5-7b-chat-hf:
gsm8k: 86
race-middle: 92
race-high: 93
internlm2_5-20b-chat-hf:
gsm8k: 91
race-middle: 95
race-high: 91
internlm2_5-7b-chat-turbomind:
gsm8k: 87
race-middle: 92
race-high: 93
internlm2_5-20b-chat-turbomind:
gsm8k: 91
race-middle: 95
race-high: 91
internlm2-chat-1.8b-turbomind:
gsm8k: 40
race-middle: 82
race-high: 83
internlm2-chat-1.8b-sft-turbomind:
gsm8k: 32
gsm8k: 34
race-middle: 81
race-high: 83
@ -68,11 +108,21 @@ internlm2-chat-7b-vllm:
race-middle: 90
race-high: 91
llama-3_1-8b-instruct-hf:
gsm8k: 82
race-middle: 82
race-high: 88
llama-3-8b-instruct-hf:
gsm8k: 77
race-middle: 85
race-high: 87
llama-3_1-8b-instruct-turbomind:
gsm8k: 79
race-middle: 82
race-high: 88
llama-3-8b-instruct-turbomind:
gsm8k: 77
race-middle: 85
@ -83,6 +133,11 @@ mistral-7b-instruct-v0.2-hf:
race-middle: 82
race-high: 78
mistral-7b-instruct-v0.3-hf:
gsm8k: 53
race-middle: 80
race-high: 78
mistral-7b-instruct-v0.2-vllm:
gsm8k: 49
race-middle: 81
@ -118,6 +173,11 @@ qwen1.5-0.5b-chat-hf:
race-middle: 55
race-high: 50
qwen2-1.5b-instruct-hf:
gsm8k: 63
race-middle: 77
race-high: 86
qwen2-1.5b-instruct-turbomind:
gsm8k: 60
race-middle: 77
@ -128,6 +188,11 @@ qwen2-7b-instruct-turbomind:
race-middle: 87
race-high: 89
qwen2-7b-instruct-hf:
gsm8k: 85
race-middle: 87
race-high: 91
qwen1.5-0.5b-chat-vllm:
gsm8k: 5
race-middle: 57
@ -153,6 +218,11 @@ deepseek-moe-16b-base-hf:
race-middle: 35
race-high: 23
deepseek-v2-lite-hf:
gsm8k: 37
race-middle: 56
race-high: 62
deepseek-7b-base-turbomind:
gsm8k: 21
race-middle: 42
@ -173,8 +243,18 @@ gemma-7b-hf:
race-middle: 59
race-high: 66
gemma2-2b-hf:
gsm8k: 8
race-middle: 31
race-high: 30
gemma2-9b-hf:
gsm8k: 20
race-middle: 42
race-high: 35
internlm2_5-7b-hf:
gsm8k: 46
gsm8k: 47
race-middle: 92
race-high: 91
@ -208,6 +288,21 @@ internlm2-base-7b-turbomind:
race-middle: 75
race-high: 81
llama-2-7b-hf:
gsm8k: 17
race-middle: 32
race-high: 38
llama-3-8b-hf:
gsm8k: 48
race-middle: 64
race-high: 70
llama-3.1-8b-turbomind:
gsm8k: 57
race-middle: 67
race-high: 75
llama-3-8b-turbomind:
gsm8k: 52
race-middle: 63
@ -218,6 +313,11 @@ mistral-7b-v0.2-hf:
race-middle: 42
race-high: 60
mistral-7b-v0.3-hf:
gsm8k: 43
race-middle: 42
race-high: 60
mistral-7b-v0.2-vllm:
gsm8k: 45
race-middle: 42
@ -228,11 +328,21 @@ qwen1.5-moe-a2.7b-hf:
race-middle: 78
race-high: 90
qwen2-1.5b-hf:
gsm8k: 58
race-middle: 65
race-high: 78
qwen2-0.5b-hf:
gsm8k: 35
race-middle: 52
race-high: 48
qwen2-7b-hf:
gsm8k: 82
race-middle: 88
race-high: 89
qwen2-1.5b-turbomind:
gsm8k: 57
race-middle: 64

View File

@ -14,9 +14,14 @@ env:
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub
DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets
HF_DATASETS_OFFLINE: 1
HF_EVALUATE_OFFLINE: 1
TRANSFORMERS_OFFLINE: 1
VLLM_USE_MODELSCOPE: false
LMDEPLOY_USE_MODELSCOPE: false
HF_HUB_OFFLINE: 1
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
@ -43,7 +48,11 @@ jobs:
daily_run_test:
needs: build-pypi
runs-on: self-hosted
strategy:
fail-fast: false
matrix:
cuda_env: [dsw_cu11, dsw_cu12]
runs-on: ${{ matrix.cuda_env }}
environment: 'prod'
timeout-minutes: 420 #7hours
steps:
@ -53,22 +62,38 @@ jobs:
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}
- name: Prepare - create conda env and install torch
- name: Prepare - create conda env and install torch - cu11
if: ${{matrix.cuda_env == 'dsw_cu11'}}
run: |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda create -y --name ${{env.CONDA_ENV}} python=3.10
conda activate ${{env.CONDA_ENV}}
pip install opencompass*.whl
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}}
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
pip uninstall torch torchvision torchaudio -y
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
conda info --envs
pip list
- name: Prepare - create conda env and install torch - cu12
if: ${{matrix.cuda_env == 'dsw_cu12'}}
run: |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
pip install lmdeploy==0.6.0 --cache-dir ${{env.PIP_CACHE_PATH}} --no-cache-dir
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}}
pip uninstall torch torchvision torchaudio -y
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}}
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
conda info --envs
pip list
- name: Prepare - prepare data and hf model
run: |
ln -s ${{env.DATEASET_CACHE_PATH}} data
@ -77,45 +102,45 @@ jobs:
- name: Run chat model test
run: |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda activate ${{env.CONDA_ENV}}
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
conda info --envs
sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py
python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat/*/summary regression_result_daily
opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py
- name: Run base model test
run: |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda activate ${{env.CONDA_ENV}}
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
conda info --envs
python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base/*/summary regression_result_daily
opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py
- name: Run command testcase
run: |
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda activate ${{env.CONDA_ENV}}
conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
conda info --envs
export from_tf=TRUE
python tools/list_configs.py internlm2_5 mmlu
python run.py --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1 --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1/*/summary regression_result_daily
opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
python run.py --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2/*/summary regression_result_daily
opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
python run.py --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3/*/summary regression_result_daily
opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
python run.py --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4/*/summary regression_result_daily
opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2
rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
- name: Remove Conda Env
if: always()
run: |
rm -rf regression_result_daily
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
conda env remove -y --name ${{env.CONDA_ENV}}
conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }}
conda info --envs
notify_to_feishu:

View File

@ -51,7 +51,7 @@ jobs:
conda activate ${{env.CONDA_ENV}}
conda info --envs
rm -rf regression_result
python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
opencompass --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug
- name: Get result
run: |
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')