diff --git a/.github/scripts/eval_regression_base.py b/.github/scripts/eval_regression_base.py index 8b4c6446..12339ecf 100644 --- a/.github/scripts/eval_regression_base.py +++ b/.github/scripts/eval_regression_base.py @@ -8,15 +8,17 @@ with read_base(): race_datasets # noqa: F401, E501 from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_base import \ models as hf_deepseek_moe_16b_base_model # noqa: F401, E501 + from opencompass.configs.models.deepseek.hf_deepseek_v2_lite import \ + models as hf_deepseek_v2_lite_model # noqa: F401, E501 # read hf models - chat models from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \ models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501 from opencompass.configs.models.deepseek.vllm_deepseek_moe_16b_base import \ models as vllm_deepseek_moe_16b_base_model # noqa: F401, E501 - from opencompass.configs.models.gemma.hf_gemma_2b import \ - models as hf_gemma_2b_model # noqa: F401, E501 - from opencompass.configs.models.gemma.hf_gemma_7b import \ - models as hf_gemma_7b_model # noqa: F401, E501 + from opencompass.configs.models.gemma.hf_gemma2_2b import \ + models as hf_gemma2_2b_model # noqa: F401, E501 + from opencompass.configs.models.gemma.hf_gemma2_9b import \ + models as hf_gemma2_9b_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import \ models as hf_internlm2_5_7b_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \ @@ -31,16 +33,28 @@ with read_base(): models as lmdeploy_internlm2_7b_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \ models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_llama.hf_llama2_7b import \ + models as hf_llama2_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_llama.hf_llama3_8b import \ + models as hf_llama3_8b_model # noqa: F401, E501 + from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b import \ + models as lmdeploy_llama3_1_8b_model # noqa: F401, E501 from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \ models as lmdeploy_llama3_8b_model # noqa: F401, E501 - from opencompass.configs.models.mistral.hf_mistral_7b_v0_2 import \ - models as hf_mistral_7b_v0_2_model # noqa: F401, E501 + from opencompass.configs.models.mistral.hf_mistral_7b_v0_3 import \ + models as hf_mistral_7b_v0_3_model # noqa: F401, E501 from opencompass.configs.models.mistral.vllm_mistral_7b_v0_2 import \ models as vllm_mistral_7b_v0_2_model # noqa: F401, E501 + from opencompass.configs.models.mistral.vllm_mixtral_8x7b_v0_1 import \ + models as vllm_mixtral_8x7b_v0_1_model # noqa: F401, E501 from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b import \ models as hf_qwen1_5_moe_a2_7b_model # noqa: F401, E501 from opencompass.configs.models.qwen.hf_qwen2_0_5b import \ models as hf_qwen2_0_5b_model # noqa: F401, E501 + from opencompass.configs.models.qwen.hf_qwen2_1_5b import \ + models as hf_qwen2_1_5b_model # noqa: F401, E501 + from opencompass.configs.models.qwen.hf_qwen2_7b import \ + models as hf_qwen2_7b_model # noqa: F401, E501 from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \ models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501 from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \ diff --git a/.github/scripts/eval_regression_chat.py b/.github/scripts/eval_regression_chat.py index 1ee28e63..fa28562f 100644 --- a/.github/scripts/eval_regression_chat.py +++ b/.github/scripts/eval_regression_chat.py @@ -13,20 +13,32 @@ with read_base(): models as hf_baichuan2_7b_chat_model # noqa: F401, E501 from opencompass.configs.models.chatglm.hf_glm4_9b_chat import \ models as hf_glm4_9b_chat_model # noqa: F401, E501 + from opencompass.configs.models.chatglm.lmdeploy_glm4_9b_chat import \ + models as lmdeploy_glm4_9b_chat_model # noqa: F401, E501 + from opencompass.configs.models.chatglm.vllm_glm4_9b_chat import \ + models as vllm_glm4_9b_chat_model # noqa: F401, E501 from opencompass.configs.models.deepseek.hf_deepseek_7b_chat import \ models as hf_deepseek_7b_chat_model # noqa: F401, E501 from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_chat import \ models as hf_deepseek_moe_16b_chat_model # noqa: F401, E501 + from opencompass.configs.models.deepseek.hf_deepseek_v2_lite_chat import \ + models as hf_deepseek_v2_lite_chat_model # noqa: F401, E501 from opencompass.configs.models.deepseek.vllm_deepseek_7b_chat import \ models as vllm_deepseek_7b_chat_model # noqa: F401, E501 - from opencompass.configs.models.gemma.hf_gemma_2b_it import \ - models as hf_gemma_2b_it_model # noqa: F401, E501 - from opencompass.configs.models.gemma.hf_gemma_7b_it import \ - models as hf_gemma_7b_it_model # noqa: F401, E501 + from opencompass.configs.models.gemma.hf_gemma2_2b_it import \ + models as hf_gemma2_2b_it_model # noqa: F401, E501 + from opencompass.configs.models.gemma.hf_gemma2_9b_it import \ + models as hf_gemma2_9b_it_model # noqa: F401, E501 + from opencompass.configs.models.gemma.vllm_gemma_7b_it import \ + models as vllm_gemma_7b_it_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b_chat import \ models as hf_internlm2_5_7b_chat_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.hf_internlm2_5_20b_chat import \ + models as hf_internlm2_5_20b_chat_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \ models as lmdeploy_internlm2_5_7b_chat_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import \ + models as lmdeploy_internlm2_5_20b_chat_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \ models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \ @@ -37,14 +49,20 @@ with read_base(): models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501 from opencompass.configs.models.hf_internlm.vllm_internlm2_chat_7b import \ models as vllm_internlm2_chat_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_llama.hf_llama3_1_8b_instruct import \ + models as hf_llama3_1_8b_instruct_model # noqa: F401, E501 from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \ models as hf_llama3_8b_instruct_model # noqa: F401, E501 + from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \ + models as lmdeploy_llama3_1_8b_instruct_model # noqa: F401, E501 from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \ models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501 - from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \ - models as hf_mistral_7b_instruct_v0_2_model # noqa: F401, E501 + from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_3 import \ + models as hf_mistral_7b_instruct_v0_3_model # noqa: F401, E501 from opencompass.configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \ models as vllm_mistral_7b_instruct_v0_2_model # noqa: F401, E501 + from opencompass.configs.models.mistral.vllm_mixtral_8x7b_instruct_v0_1 import \ + models as vllm_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501 from opencompass.configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \ models as hf_minicpm_2b_dpo_fp32_model # noqa: F401, E501 from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \ @@ -57,6 +75,10 @@ with read_base(): models as hf_phi_3_mini_8k_instruct_model # noqa: F401, E501 from opencompass.configs.models.qwen.hf_qwen1_5_0_5b_chat import \ models as hf_qwen1_5_0_5b_chat_model # noqa: F401, E501 + from opencompass.configs.models.qwen.hf_qwen2_1_5b_instruct import \ + models as hf_qwen2_1_5b_instruct_model # noqa: F401, E501 + from opencompass.configs.models.qwen.hf_qwen2_7b_instruct import \ + models as hf_qwen2_7b_instruct_model # noqa: F401, E501 from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \ models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501 from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \ diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py index f869b157..6f2c0a11 100644 --- a/.github/scripts/oc_score_assert.py +++ b/.github/scripts/oc_score_assert.py @@ -7,30 +7,35 @@ import yaml output_path = 'regression_result_daily' chat_model_list = [ - 'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf', - 'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf', - 'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind', - 'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind', - 'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind', - 'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf', - 'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf', - 'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf', - 'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf', - 'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf', + 'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm', + 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf', + 'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf', + 'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf', + 'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind', + 'internlm2_5-20b-chat-turbomind', 'internlm2-chat-1.8b-turbomind', + 'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind', + 'internlm2-chat-7b-sft-turbomind', 'internlm2-chat-7b-vllm', + 'llama-3_1-8b-instruct-hf', 'llama-3-8b-instruct-hf', + 'llama-3_1-8b-instruct-turbomind', 'llama-3-8b-instruct-turbomind', + 'mistral-7b-instruct-v0.3-hf', 'mistral-7b-instruct-v0.2-vllm', + 'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf', + 'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf', + 'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-hf', 'qwen2-7b-instruct-hf', 'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind', 'qwen1.5-0.5b-chat-vllm', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf', 'lmdeploy-api-test' ] base_model_list = [ - 'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind', - 'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf', - 'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf', - 'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind', - 'internlm2-7b-turbomind', 'internlm2-base-7b-hf', - 'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind', - 'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf', - 'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind', - 'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf' + 'deepseek-moe-16b-base-hf', 'deepseek-v2-lite-hf', + 'deepseek-7b-base-turbomind', 'deepseek-moe-16b-base-vllm', 'gemma2-2b-hf', + 'gemma2-9b-hf', 'internlm2_5-7b-hf', 'internlm2-7b-hf', + 'internlm2-base-7b-hf', 'internlm2-1.8b-turbomind', + 'internlm2_5-7b-turbomind', 'internlm2-7b-turbomind', + 'internlm2-base-7b-turbomind', 'llama-2-7b-hf', 'llama-3-8b-hf', + 'llama-3.1-8b-turbomind', 'llama-3-8b-turbomind', 'mistral-7b-v0.3-hf', + 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf', + 'qwen2-1.5b-hf', 'qwen2-7b-hf', 'qwen2-1.5b-turbomind', + 'qwen2-7b-turbomind', 'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf' ] dataset_list = ['gsm8k', 'race-middle', 'race-high'] diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml index d7e765be..9690aa2c 100644 --- a/.github/scripts/oc_score_baseline.yaml +++ b/.github/scripts/oc_score_baseline.yaml @@ -8,6 +8,16 @@ glm-4-9b-chat-hf: race-middle: 88 race-high: 88 +glm-4-9b-chat-turbomind: + gsm8k: 69 + race-middle: 82 + race-high: 77 + +glm-4-9b-chat-vllm: + gsm8k: 73 + race-middle: 87 + race-high: 87 + deepseek-7b-chat-hf: gsm8k: 60 race-middle: 74 @@ -18,6 +28,11 @@ deepseek-moe-16b-chat-hf: race-middle: 62 race-high: 70 +deepseek-v2-lite-chat-hf: + gsm8k: 59 + race-middle: 82 + race-high: 79 + deepseek-7b-chat-vllm: gsm8k: 63 race-middle: 74 @@ -33,23 +48,48 @@ gemma-7b-it-hf: race-middle: 74 race-high: 71 +gemma-7b-it-vllm: + gsm8k: 38 + race-middle: 75 + race-high: 70 + +gemma2-2b-it-hf: + gsm8k: 62 + race-middle: 75 + race-high: 67 + +gemma2-9b-it-hf: + gsm8k: 80 + race-middle: 89 + race-high: 85 + internlm2_5-7b-chat-hf: gsm8k: 86 race-middle: 92 race-high: 93 +internlm2_5-20b-chat-hf: + gsm8k: 91 + race-middle: 95 + race-high: 91 + internlm2_5-7b-chat-turbomind: gsm8k: 87 race-middle: 92 race-high: 93 +internlm2_5-20b-chat-turbomind: + gsm8k: 91 + race-middle: 95 + race-high: 91 + internlm2-chat-1.8b-turbomind: gsm8k: 40 race-middle: 82 race-high: 83 internlm2-chat-1.8b-sft-turbomind: - gsm8k: 32 + gsm8k: 34 race-middle: 81 race-high: 83 @@ -68,11 +108,21 @@ internlm2-chat-7b-vllm: race-middle: 90 race-high: 91 +llama-3_1-8b-instruct-hf: + gsm8k: 82 + race-middle: 82 + race-high: 88 + llama-3-8b-instruct-hf: gsm8k: 77 race-middle: 85 race-high: 87 +llama-3_1-8b-instruct-turbomind: + gsm8k: 79 + race-middle: 82 + race-high: 88 + llama-3-8b-instruct-turbomind: gsm8k: 77 race-middle: 85 @@ -83,6 +133,11 @@ mistral-7b-instruct-v0.2-hf: race-middle: 82 race-high: 78 +mistral-7b-instruct-v0.3-hf: + gsm8k: 53 + race-middle: 80 + race-high: 78 + mistral-7b-instruct-v0.2-vllm: gsm8k: 49 race-middle: 81 @@ -118,6 +173,11 @@ qwen1.5-0.5b-chat-hf: race-middle: 55 race-high: 50 +qwen2-1.5b-instruct-hf: + gsm8k: 63 + race-middle: 77 + race-high: 86 + qwen2-1.5b-instruct-turbomind: gsm8k: 60 race-middle: 77 @@ -128,6 +188,11 @@ qwen2-7b-instruct-turbomind: race-middle: 87 race-high: 89 +qwen2-7b-instruct-hf: + gsm8k: 85 + race-middle: 87 + race-high: 91 + qwen1.5-0.5b-chat-vllm: gsm8k: 5 race-middle: 57 @@ -153,6 +218,11 @@ deepseek-moe-16b-base-hf: race-middle: 35 race-high: 23 +deepseek-v2-lite-hf: + gsm8k: 37 + race-middle: 56 + race-high: 62 + deepseek-7b-base-turbomind: gsm8k: 21 race-middle: 42 @@ -173,8 +243,18 @@ gemma-7b-hf: race-middle: 59 race-high: 66 +gemma2-2b-hf: + gsm8k: 8 + race-middle: 31 + race-high: 30 + +gemma2-9b-hf: + gsm8k: 20 + race-middle: 42 + race-high: 35 + internlm2_5-7b-hf: - gsm8k: 46 + gsm8k: 47 race-middle: 92 race-high: 91 @@ -208,6 +288,21 @@ internlm2-base-7b-turbomind: race-middle: 75 race-high: 81 +llama-2-7b-hf: + gsm8k: 17 + race-middle: 32 + race-high: 38 + +llama-3-8b-hf: + gsm8k: 48 + race-middle: 64 + race-high: 70 + +llama-3.1-8b-turbomind: + gsm8k: 57 + race-middle: 67 + race-high: 75 + llama-3-8b-turbomind: gsm8k: 52 race-middle: 63 @@ -218,6 +313,11 @@ mistral-7b-v0.2-hf: race-middle: 42 race-high: 60 +mistral-7b-v0.3-hf: + gsm8k: 43 + race-middle: 42 + race-high: 60 + mistral-7b-v0.2-vllm: gsm8k: 45 race-middle: 42 @@ -228,11 +328,21 @@ qwen1.5-moe-a2.7b-hf: race-middle: 78 race-high: 90 +qwen2-1.5b-hf: + gsm8k: 58 + race-middle: 65 + race-high: 78 + qwen2-0.5b-hf: gsm8k: 35 race-middle: 52 race-high: 48 +qwen2-7b-hf: + gsm8k: 82 + race-middle: 88 + race-high: 89 + qwen2-1.5b-turbomind: gsm8k: 57 race-middle: 64 diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml index 7d7affaf..894b149e 100644 --- a/.github/workflows/daily-run-test.yml +++ b/.github/workflows/daily-run-test.yml @@ -14,9 +14,14 @@ env: PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub + HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub + HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets HF_DATASETS_OFFLINE: 1 + HF_EVALUATE_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 + VLLM_USE_MODELSCOPE: false + LMDEPLOY_USE_MODELSCOPE: false HF_HUB_OFFLINE: 1 TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas @@ -43,7 +48,11 @@ jobs: daily_run_test: needs: build-pypi - runs-on: self-hosted + strategy: + fail-fast: false + matrix: + cuda_env: [dsw_cu11, dsw_cu12] + runs-on: ${{ matrix.cuda_env }} environment: 'prod' timeout-minutes: 420 #7hours steps: @@ -53,22 +62,38 @@ jobs: uses: actions/download-artifact@v4 with: name: my-artifact-${{ github.run_id }} - - name: Prepare - create conda env and install torch + - name: Prepare - create conda env and install torch - cu11 + if: ${{matrix.cuda_env == 'dsw_cu11'}} run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate - conda create -y --name ${{env.CONDA_ENV}} python=3.10 - conda activate ${{env.CONDA_ENV}} - pip install opencompass*.whl - pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} - pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} - - pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}} + conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 + conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} + pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} + pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} + pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} + pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}} pip uninstall torch torchvision torchaudio -y pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} conda info --envs pip list + - name: Prepare - create conda env and install torch - cu12 + if: ${{matrix.cuda_env == 'dsw_cu12'}} + run: | + . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate + conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 + conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} + pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} + pip install lmdeploy==0.6.0 --cache-dir ${{env.PIP_CACHE_PATH}} --no-cache-dir + pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}} + pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes modelscope --cache-dir ${{env.PIP_CACHE_PATH}} + pip uninstall torch torchvision torchaudio -y + pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} + FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl + pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} + conda info --envs + pip list - name: Prepare - prepare data and hf model run: | ln -s ${{env.DATEASET_CACHE_PATH}} data @@ -77,45 +102,45 @@ jobs: - name: Run chat model test run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate - conda activate ${{env.CONDA_ENV}} + conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py - python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat/*/summary regression_result_daily + opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py - name: Run base model test run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate - conda activate ${{env.CONDA_ENV}} + conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs - python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base/*/summary regression_result_daily + opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py - name: Run command testcase run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate - conda activate ${{env.CONDA_ENV}} + conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs export from_tf=TRUE python tools/list_configs.py internlm2_5 mmlu - python run.py --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1 --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1/*/summary regression_result_daily + opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py - python run.py --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2/*/summary regression_result_daily + opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py - python run.py --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3/*/summary regression_result_daily + opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py - python run.py --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse - rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4/*/summary regression_result_daily + opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2 + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py - name: Remove Conda Env if: always() run: | rm -rf regression_result_daily . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate - conda env remove -y --name ${{env.CONDA_ENV}} + conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs notify_to_feishu: diff --git a/.github/workflows/pr-run-test.yml b/.github/workflows/pr-run-test.yml index 6cab1378..d9fcdc3a 100644 --- a/.github/workflows/pr-run-test.yml +++ b/.github/workflows/pr-run-test.yml @@ -51,7 +51,7 @@ jobs: conda activate ${{env.CONDA_ENV}} conda info --envs rm -rf regression_result - python3 run.py --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug + opencompass --models hf_internlm2_chat_7b --datasets siqa_gen --work-dir regression_result --debug - name: Get result run: | score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')