name: daily_run_test on: workflow_dispatch: inputs: repo_org: required: false description: 'Tested repository organization name. Default is open-compass/opencompass' type: string default: 'open-compass/opencompass' repo_ref: required: false description: 'Set branch or tag or commit id. Default is "main"' type: string default: 'main' regression_func: required: true description: 'regression functions' type: string default: "['chat','base','cmd']" schedule: - cron: '56 16 * * *' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: CONDA_ENV: opencompass_regression PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub HUGGINGFACE_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub HF_HUB_CACHE: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets HF_DATASETS_OFFLINE: 1 HF_EVALUATE_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 VLLM_USE_MODELSCOPE: false LMDEPLOY_USE_MODELSCOPE: false HF_HUB_OFFLINE: 1 TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas jobs: build-pypi: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} ref: ${{github.event.inputs.repo_ref || 'main'}} - name: Set up Python 3.x uses: actions/setup-python@v2 with: python-version: 3.x - name: Build lagent run: | pip install wheel setuptools python setup.py sdist bdist_wheel - name: Upload Artifacts uses: actions/upload-artifact@v4 with: if-no-files-found: error path: dist/* retention-days: 1 name: my-artifact-${{ github.run_id }} daily_run_test: if: ${{!cancelled()}} needs: build-pypi strategy: fail-fast: false matrix: cuda_env: [dsw_cu11, dsw_cu12] runs-on: ${{ matrix.cuda_env }} environment: 'prod' timeout-minutes: 600 #10hours steps: - name: Clone repository uses: actions/checkout@v2 with: repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }} ref: ${{github.event.inputs.repo_ref || 'main'}} - name: Download Artifacts uses: actions/download-artifact@v4 with: name: my-artifact-${{ github.run_id }} - name: Prepare - create conda env and install torch - cu11 if: ${{matrix.cuda_env == 'dsw_cu11'}} run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu11.txt --cache-dir ${{env.PIP_CACHE_PATH}} pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.6.1+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.6.1.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} pip uninstall torch torchvision torchaudio -y pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} conda info --envs pip list - name: Prepare - create conda env and install torch - cu12 if: ${{matrix.cuda_env == 'dsw_cu12'}} run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda create -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} python=3.10 conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} pip install -r /cpfs01/shared/public/qa-llm-cicd/requirements-cu12.txt --cache-dir ${{env.PIP_CACHE_PATH}} pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}} pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}} pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}} pip uninstall torch torchvision torchaudio -y pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} conda info --envs pip list - name: Prepare - prepare data and hf model run: | ln -s ${{env.DATEASET_CACHE_PATH}} data rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub - name: Run command testcase if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'cmd') run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs export from_tf=TRUE python tools/list_configs.py internlm2_5 mmlu opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py opencompass --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py - name: Run chat model test if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'chat') run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs sed -i 's/judgemodel/'$(tail -n 1 /cpfs01/shared/public/llmeval/share_info/compassjuder_ip.txt)'/g' .github/scripts/eval_regression_chat.py opencompass .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py - name: Run base model test if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'base') run: | . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda activate ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs opencompass .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }} --reuse --max-num-workers 2 rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py - name: Remove Conda Env if: always() run: | rm -rf regression_result_daily . /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate conda env remove -y --name ${{env.CONDA_ENV}}_${{ matrix.cuda_env }} conda info --envs notify_to_feishu: if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} needs: [daily_run_test] environment: 'prod' timeout-minutes: 5 runs-on: self-hosted steps: - name: notify run: | curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}