mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
352 lines
17 KiB
YAML
352 lines
17 KiB
YAML
name: daily_run_test
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
repo_org:
|
|
required: false
|
|
description: 'Tested repository organization name. Default is open-compass/opencompass'
|
|
type: string
|
|
default: 'open-compass/opencompass'
|
|
repo_ref:
|
|
required: false
|
|
description: 'Set branch or tag or commit id. Default is "main"'
|
|
type: string
|
|
default: 'main'
|
|
build_lmdeploy:
|
|
required: false
|
|
description: 'whether to build lmdeploy'
|
|
type: boolean
|
|
default: false
|
|
repo_org_lmdeploy:
|
|
required: false
|
|
description: 'Tested repository organization name. Default is internlm/lmdeploy'
|
|
type: string
|
|
default: 'InternLM/lmdeploy'
|
|
repo_ref_lmdeploy:
|
|
required: false
|
|
description: 'Set branch or tag or commit id. Default is "main"'
|
|
type: string
|
|
default: 'main'
|
|
regression_func_volc:
|
|
required: true
|
|
description: 'regression functions'
|
|
type: string
|
|
default: "['chat_models','base_models', 'chat_obj_fullbench', 'base_fullbench']"
|
|
regression_func_local:
|
|
required: true
|
|
description: 'regression functions'
|
|
type: string
|
|
default: "['cmd', 'api', 'chat_sub_fullbench']"
|
|
fullbench_eval:
|
|
required: true
|
|
description: 'fullbench volc functions'
|
|
type: string
|
|
default: "['base_objective','chat_objective','chat_subjective','base_long_context','chat_long_context']"
|
|
schedule:
|
|
- cron: '15 14 * * 0,3'
|
|
|
|
env:
|
|
HF_DATASETS_OFFLINE: 1
|
|
HF_EVALUATE_OFFLINE: 1
|
|
TRANSFORMERS_OFFLINE: 1
|
|
VLLM_USE_MODELSCOPE: false
|
|
LMDEPLOY_USE_MODELSCOPE: false
|
|
HF_HUB_OFFLINE: 1
|
|
OUTPUT_FOLDER: cuda12.1_dist_${{ github.run_id }}
|
|
CONDA_PATH: ${{ secrets.WORKSPACE_PREFIX }}/miniconda3
|
|
PIP_CACHE_PATH: ${{ secrets.WORKSPACE_PREFIX }}/.cache/pip
|
|
REPORT_ROOT: ${{ secrets.WORKSPACE_PREFIX }}/eval_report/regression
|
|
COMPASS_DATA_CACHE: ${{ secrets.SHARESPACE_PREFIX }}/datasets/compass_data_cache
|
|
HUGGINGFACE_HUB_CACHE: ${{ secrets.SHARESPACE_PREFIX }}/models/opencompass_hf_hub
|
|
HF_HUB_CACHE: ${{ secrets.SHARESPACE_PREFIX }}/models/opencompass_hf_hub
|
|
HF_DATASETS_CACHE: ${{ secrets.SHARESPACE_PREFIX }}/datasets/hf_datasets_cache
|
|
HF_ENDPOINT: https://hf-mirror.com
|
|
CONDA_ENV: regression_test
|
|
export VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
|
|
jobs:
|
|
build-pypi:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v2
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }}
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}}
|
|
- name: Set up Python 3.10
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: '3.10'
|
|
- name: Build lagent
|
|
run: |
|
|
pip install wheel setuptools
|
|
python setup.py sdist bdist_wheel
|
|
- name: Upload Artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
if-no-files-found: error
|
|
path: dist/*
|
|
retention-days: 1
|
|
name: my-artifact-${{ github.run_id }}
|
|
|
|
build-pypi-lmdeploy:
|
|
if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.build_lmdeploy)}}
|
|
strategy:
|
|
matrix:
|
|
pyver: [py310]
|
|
runs-on: ubuntu-latest
|
|
env:
|
|
PYTHON_VERSION: ${{ matrix.pyver }}
|
|
PLAT_NAME: manylinux2014_x86_64
|
|
DOCKER_TAG: cuda12.1
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v3
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org_lmdeploy || 'InternLM/lmdeploy' }}
|
|
ref: ${{github.event.inputs.repo_ref_lmdeploy || 'main'}}
|
|
- name: Build
|
|
run: |
|
|
echo ${PYTHON_VERSION}
|
|
echo ${PLAT_NAME}
|
|
echo ${DOCKER_TAG}
|
|
echo ${OUTPUT_FOLDER}
|
|
echo ${GITHUB_RUN_ID}
|
|
# remove -it
|
|
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
|
|
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
|
|
- name: Upload Artifacts
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
if-no-files-found: error
|
|
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
|
|
retention-days: 1
|
|
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
|
|
|
|
|
|
prepare_env:
|
|
if: ${{!cancelled()}}
|
|
needs: ['build-pypi', 'build-pypi-lmdeploy']
|
|
runs-on: volc_cu12
|
|
timeout-minutes: 120 #2hours
|
|
steps:
|
|
- name: Clone repository
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }}
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}}
|
|
- name: Download Artifacts
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: my-artifact-${{ github.run_id }}
|
|
- name: Remove Conda Env
|
|
if: always()
|
|
run: |
|
|
. ${{ secrets.WORKSPACE_PREFIX }}/miniconda3/bin/activate
|
|
conda env remove -y --name ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
- name: Prepare - create conda env and install torch - cu12
|
|
uses: nick-fields/retry@v3
|
|
with:
|
|
max_attempts: 3
|
|
timeout_minutes: 120
|
|
command: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda create -y --name ${{env.CONDA_ENV}} python=3.10
|
|
conda activate ${{env.CONDA_ENV}}
|
|
pip install -r ${{ secrets.WORKSPACE_PREFIX }}/config/requirements.txt --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install opencompass[full] --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install opencompass[api] --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install ${{ secrets.WORKSPACE_PREFIX }}/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
|
pip install xformers --index-url https://download.pytorch.org/whl/cu121 --cache-dir ${{env.PIP_CACHE_PATH}}
|
|
cp -r /root/nltk_data ${{env.CONDA_PATH}}/envs/${{env.CONDA_ENV}}/nltk_data
|
|
- name: Prepare - reinstall lmdeploy - cu12
|
|
if: ${{github.event_name == 'schedule' || inputs.build_lmdeploy}}
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: my-artifact-${{ github.run_id }}-py310
|
|
- name: Prepare - reinstall lmdeploy - cu12
|
|
if: ${{github.event_name == 'schedule' || inputs.build_lmdeploy}}
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
pip uninstall -y lmdeploy
|
|
pip install lmdeploy-*.whl --no-deps
|
|
- name: conda env
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
pip list
|
|
|
|
daily_run_test_volc:
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success')}}
|
|
needs: prepare_env
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
regression_func: ${{fromJSON(github.event.inputs.regression_func_volc || '["chat_models","base_models","chat_obj_fullbench","base_fullbench"]')}}
|
|
runs-on: volc_cu12_daily
|
|
timeout-minutes: 180 #3hours
|
|
steps:
|
|
- name: Clone repository
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }}
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}}
|
|
- name: conda env
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
pip list
|
|
- name: modify config
|
|
if: matrix.regression_func != 'chat_sub_fullbench'
|
|
run: |
|
|
cp -r ${{ secrets.WORKSPACE_PREFIX }}/ocplayground/template/configs_cluster/volc.py .
|
|
cat ${{ secrets.WORKSPACE_PREFIX }}/config/test_config.txt >> .github/scripts/eval_regression_${{matrix.regression_func}}.py
|
|
- name: Run test
|
|
uses: nick-fields/retry@v3
|
|
with:
|
|
max_attempts: 1
|
|
timeout_minutes: 180
|
|
command: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
opencompass .github/scripts/eval_regression_${{matrix.regression_func}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} --reuse --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}}/*/summary regression_result_daily
|
|
python -m pytest -m ${{matrix.regression_func}} -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
|
|
|
|
daily_run_test_local:
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success')}}
|
|
needs: prepare_env
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
regression_func: ${{fromJSON(github.event.inputs.regression_func_local || '["cmd","api","chat_sub_fullbench"]')}}
|
|
runs-on: volc_cu12_local
|
|
timeout-minutes: 480 #6hours
|
|
steps:
|
|
- name: Clone repository
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }}
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}}
|
|
- name: conda env
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
pip list
|
|
- name: modify config
|
|
if: matrix.regression_func == 'chat_sub_fullbench'
|
|
run: |
|
|
cp -r ${{ secrets.WORKSPACE_PREFIX }}/ocplayground/template/configs_cluster/volc.py .
|
|
cat ${{ secrets.WORKSPACE_PREFIX }}/config/test_config_sub.txt >> .github/scripts/eval_regression_${{matrix.regression_func}}.py
|
|
- name: Run command testcase
|
|
if: matrix.regression_func == 'cmd'
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
export from_tf=TRUE
|
|
python tools/list_configs.py internlm2_5 mmlu
|
|
opencompass --models hf_internlm2_5_7b --datasets race_ppl demo_gsm8k_chat_gen --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily
|
|
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
opencompass --models hf_internlm2_5_7b_chat hf_internlm3_8b_instruct --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2/*/summary regression_result_daily
|
|
python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
opencompass --datasets race_ppl demo_gsm8k_chat_gen --hf-type base --hf-path internlm/internlm2_5-7b --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd3/*/summary regression_result_daily
|
|
python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
opencompass --datasets race_gen demo_gsm8k_chat_gen --hf-type chat --hf-path internlm/internlm3-8b-instruct -a lmdeploy --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd4/*/summary regression_result_daily
|
|
python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
opencompass --datasets race_gen demo_gsm8k_chat_gen --hf-type chat --hf-path internlm/internlm3-8b-instruct -a vllm --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd5 --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd5/*/summary regression_result_daily
|
|
python -m pytest -m case5 -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
- name: Run model test - api
|
|
if: matrix.regression_func == 'api'
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
lmdeploy serve api_server internlm/internlm3-8b-instruct --max-batch-size 256 --model-name internlm3 > ${{env.REPORT_ROOT}}/${{ github.run_id }}/restful.log 2>&1 &
|
|
echo "restful_pid=$!" >> "$GITHUB_ENV"
|
|
sleep 180s
|
|
env | grep PROXY
|
|
env | grep proxy
|
|
unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy;
|
|
opencompass .github/scripts/eval_regression_api.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/api --reuse --max-num-workers 2 --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/api/*/summary regression_result_daily
|
|
python -m pytest -m api -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
- name: Run model test - api kill
|
|
if: always() && matrix.regression_func == 'api'
|
|
run: |
|
|
kill -15 "$restful_pid"
|
|
- name: Run testcase
|
|
if: matrix.regression_func == 'chat_sub_fullbench'
|
|
env:
|
|
COMPASS_DATA_CACHE: ${{ secrets.SHARESPACE_PREFIX }}/datasets/compass_data_cache_subset
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
export from_tf=TRUE
|
|
opencompass .github/scripts/eval_regression_${{matrix.regression_func}}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}} --reuse --dump-eval-details
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{matrix.regression_func}}/*/summary regression_result_daily
|
|
python -m pytest -m ${{matrix.regression_func}} -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
|
|
fullbench_run_test:
|
|
if: ${{!cancelled() && contains(needs.prepare_env.result, 'success')}}
|
|
needs: prepare_env
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_objective","chat_objective","chat_subjective","base_long_context","chat_long_context"]')}}
|
|
runs-on: volc_cu12
|
|
timeout-minutes: 480 #6hours
|
|
steps:
|
|
- name: Clone repository
|
|
uses: actions/checkout@v2
|
|
with:
|
|
repository: ${{ github.event.inputs.repo_org || 'open-compass/opencompass' }}
|
|
ref: ${{github.event.inputs.repo_ref || 'main'}}
|
|
- name: conda env
|
|
run: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
pip list
|
|
- name: Run testcase
|
|
uses: nick-fields/retry@v3
|
|
with:
|
|
max_attempts: 1
|
|
timeout_minutes: 480
|
|
command: |
|
|
. ${{env.CONDA_PATH}}/bin/activate
|
|
conda activate ${{env.CONDA_ENV}}
|
|
conda info --envs
|
|
export from_tf=TRUE
|
|
opencompass ${{ secrets.WORKSPACE_PREFIX }}/ocplayground/template/regression/eval_${{ matrix.function_type }}.py --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }} --reuse
|
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/${{ matrix.function_type }}/*/summary regression_result_daily
|
|
python -m pytest -m ${{ matrix.function_type }} -s -v --color=yes .github/scripts/oc_score_assert.py
|
|
|
|
|
|
notify_to_feishu:
|
|
if: ${{ always() && github.event_name == 'schedule' && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
|
|
needs: [daily_run_test_volc, daily_run_test_local, fullbench_run_test]
|
|
timeout-minutes: 5
|
|
runs-on: self-hosted
|
|
steps:
|
|
- name: notify
|
|
run: |
|
|
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
|