update test workflow (#1167)

* Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update oc_score_baseline.yaml * Update daily-run-test.yml * Update oc_score_assert.py --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
2025-05-30 16:03:24 +08:00 · 2024-05-16 15:32:57 +08:00 · 2024-05-16 15:32:57 +08:00 · 94eb90569f
commit 94eb90569f
parent 8ea2c404d7
4 changed files with 26 additions and 18 deletions
--- a/.github/scripts/oc_score_assert.py
+++ b/.github/scripts/oc_score_assert.py
@ -6,7 +6,7 @@ import yaml

 output_path = 'regression_result_daily'

-model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
+model_list = ['internlm2-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
 dataset_list = [
    'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
    'openbookqa_fact'
--- a/.github/scripts/oc_score_baseline.yaml
+++ b/.github/scripts/oc_score_baseline.yaml
@ -1,10 +1,10 @@
 internlm-7b-hf:
-    ARC-c: 36.27
-    chid-dev: 81.68
-    chid-test: 83.67
-    openai_humaneval: 10.37
-    openbookqa: 44.4
-    openbookqa_fact: 73.2
+    ARC-c: 34.24
+    chid-dev: 79.70
+    chid-test: 81.12
+    openai_humaneval: 10.98
+    openbookqa: 47.20
+    openbookqa_fact: 74.00

 internlm-chat-7b-hf:
    ARC-c: 36.95
@ -15,9 +15,17 @@ internlm-chat-7b-hf:
    openbookqa_fact: 80.4

 chatglm3-6b-base-hf:
-    ARC-c: 43.05
-    chid-dev: 80.2
-    chid-test: 80.77
+    ARC-c: 44.41
+    chid-dev: 78.22
+    chid-test: 78.57
    openai_humaneval: 20.73
-    openbookqa: 79.8
-    openbookqa_fact: 92.2
+    openbookqa: 78.40
+    openbookqa_fact: 92.00
+
+internlm2-7b-hf:
+    ARC-c: 34.92
+    chid-dev: 55.94
+    chid-test: 53.70
+    openai_humaneval: 44.51
+    openbookqa: 83.00
+    openbookqa_fact: 83.00
--- a/.github/workflows/daily-run-test.yml
+++ b/.github/workflows/daily-run-test.yml
@ -45,7 +45,6 @@ jobs:
          cp -r ${{env.USERSPACE_PREFIX}}/data .
          rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
          ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
-          export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_HUB_OFFLINE=1;
      - name:  Run test
        run: |
          eval "$(conda shell.bash hook)"
@ -53,7 +52,7 @@ jobs:
          conda info --envs
          rm -rf regression_result_daily
          export from_tf=TRUE
-          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
+          python3 run.py --models hf_internlm_chat_7b hf_internlm2_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
      - name:  Get result
        run: |
          eval "$(conda shell.bash hook)"
@ -62,8 +61,9 @@ jobs:
      - name:  Remove Conda Env
        if: always()
        run: |
+          cp -r regression_result_daily/* /cpfs01/user/qa-llm-cicd/report
          eval "$(conda shell.bash hook)"
-          conda env remove --name ${{env.CONDA_ENV}}
+          conda env remove -y --name ${{env.CONDA_ENV}}
          conda info --envs

  notify_to_feishu:
--- a/.github/workflows/pr-run-test.yml
+++ b/.github/workflows/pr-run-test.yml
@ -55,10 +55,10 @@ jobs:
      - name:  Get result
        run: |
          score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
-          if (( ${score%.*} >= 75 && ${score%.*} <= 85 )); then
-             echo "score is $score between 75 and 85"
+          if (( ${score%.*} >= 79 && ${score%.*} <= 81 )); then
+             echo "score is $score between 79 and 81"
          else
-             echo "score is $score not between 75 and 85"
+             echo "score is $score not between 79 and 81"
             exit 1
          fi
          rm -rf regression_result