From 89abcba486b8c1e6c6c8b93b6ed856a0d0bb3554 Mon Sep 17 00:00:00 2001 From: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:30:38 +0800 Subject: [PATCH] [CI] Fix testcase failure (#1582) * update * Update oc_score_baseline.yaml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml --------- Co-authored-by: zhulin1 --- .github/scripts/oc_score_assert.py | 3 +-- .github/scripts/oc_score_baseline.yaml | 12 ++++++------ .github/workflows/daily-run-test.yml | 8 ++++---- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py index 6f2c0a11..c01ef686 100644 --- a/.github/scripts/oc_score_assert.py +++ b/.github/scripts/oc_score_assert.py @@ -7,8 +7,7 @@ import yaml output_path = 'regression_result_daily' chat_model_list = [ - 'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm', - 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf', + 'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf', 'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf', 'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf', 'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind', diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml index 9690aa2c..809dfea4 100644 --- a/.github/scripts/oc_score_baseline.yaml +++ b/.github/scripts/oc_score_baseline.yaml @@ -244,14 +244,14 @@ gemma-7b-hf: race-high: 66 gemma2-2b-hf: - gsm8k: 8 - race-middle: 31 - race-high: 30 + gsm8k: 33 + race-middle: 56 + race-high: 58 gemma2-9b-hf: - gsm8k: 20 - race-middle: 42 - race-high: 35 + gsm8k: 70 + race-middle: 82 + race-high: 84 internlm2_5-7b-hf: gsm8k: 47 diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml index 894b149e..42ada2f0 100644 --- a/.github/workflows/daily-run-test.yml +++ b/.github/workflows/daily-run-test.yml @@ -123,16 +123,16 @@ jobs: conda info --envs export from_tf=TRUE python tools/list_configs.py internlm2_5 mmlu - opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2 + opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py - opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 + opencompass --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py - opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2 + opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py - opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2 + opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py - name: Remove Conda Env