name: pr_run_test on: pull_request: paths-ignore: - 'README.md' - 'README_zh-CN.md' - 'docs/**' - 'configs/**' - 'tools/**' workflow_dispatch: schedule: - cron: '56 22 * * *' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: CONDA_ENV: pr_test HF_DATASETS_OFFLINE: 1 HF_EVALUATE_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 VLLM_USE_MODELSCOPE: false LMDEPLOY_USE_MODELSCOPE: false HF_HUB_OFFLINE: 1 CONDA_PATH: /fs-computility/llm/qa-llm-cicd/miniconda3 PIP_CACHE_PATH: /fs-computility/llm/qa-llm-cicd/.cache/pip REPORT_ROOT: /fs-computility/llm/qa-llm-cicd/eval_report/prtest COMPASS_DATA_CACHE: /fs-computility/llm/shared/llmeval/datasets/compass_data_cache HUGGINGFACE_HUB_CACHE: /fs-computility/llm/shared/llmeval/models/opencompass_hf_hub HF_HUB_CACHE: /fs-computility/llm/shared/llmeval/models/opencompass_hf_hub jobs: pr_run_test: runs-on: volc_cu12_local environment: 'prod' timeout-minutes: 30 steps: - name: Checkout repository uses: actions/checkout@v2 - name: Prepare - Install opencompass run: | . ${{env.CONDA_PATH}}/bin/activate conda activate ${{env.CONDA_ENV}} python3 -m pip uninstall opencompass -y python3 -m pip install -e ".[full]" --cache-dir ${{env.PIP_CACHE_PATH}} conda info --envs - name: conda env run: | . ${{env.CONDA_PATH}}/bin/activate conda activate ${{env.CONDA_ENV}} conda info --envs pip list lmdeploy check_env - name: Run test run: | . ${{env.CONDA_PATH}}/bin/activate conda activate ${{env.CONDA_ENV}} conda info --envs rm -rf regression_result opencompass --models hf_internlm2_5_20b_chat --datasets demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result1 --debug opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result2 --debug --max-num-workers 2 opencompass --models hf_internlm2_5_7b_chat --datasets demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result3 --debug --max-num-workers 2 - name: Get result run: | score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result1/*/summary/*.csv | awk -F ',' '{print $NF}') if (( ${score%.*} >= 88 && ${score%.*} <= 89 )); then echo "score is $score between 88 and 89" else echo "score is $score not between 88 and 89" exit 1 fi score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result2/*/summary/*.csv | awk -F ',' '{print $NF}') if (( ${score%.*} >= 87 && ${score%.*} <= 88 )); then echo "score is $score between 87 and 88" else echo "score is $score not between 87 and 88" exit 1 fi score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}') if (( ${score%.*} >= 87 && ${score%.*} <= 91 )); then echo "score is $score between 87 and 91" else echo "score is $score not between 87 and 91" exit 1 fi - name: Uninstall opencompass if: always() run: | . ${{env.CONDA_PATH}}/bin/activate conda activate ${{env.CONDA_ENV}} python3 -m pip uninstall opencompass -y conda info --envs notify_to_feishu: if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }} needs: [pr_run_test] timeout-minutes: 5 runs-on: self-hosted environment: 'prod' steps: - name: notify run: | curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}