Update daily test (#871)

* add daily test case * Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update pr-run-test.yml * Update daily-run-test.yml * Update oc_score_assert.py * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * update testcase baseline * fix test case name * add more models into daily test --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> Co-authored-by: Leymore <zfz-960727@163.com>
2025-05-30 16:03:24 +08:00 · 2024-02-05 15:52:00 +08:00 · 2024-02-05 15:52:00 +08:00 · b4a9acd7be
commit b4a9acd7be
parent fc84aff963
3 changed files with 23 additions and 7 deletions
--- a/.github/scripts/oc_score_assert.py
+++ b/.github/scripts/oc_score_assert.py
@ -6,8 +6,11 @@ import yaml

 output_path = 'regression_result_daily'

-model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf']
-dataset_list = ['ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval']
+model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
+dataset_list = [
+    'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
+    'openbookqa_fact'
+]


@pytest.fixture()
@ -34,8 +37,8 @@ class TestChat:

    @pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
                                                for p2 in dataset_list])
-    def test_demo_default(self, baseline_scores, result_scores, model,
-                          dataset):
+    def test_model_dataset_score(self, baseline_scores, result_scores, model,
+                                 dataset):
        base_score = baseline_scores.get(model).get(dataset)
        result_score = result_scores.get(model).get(dataset)
        assert_score(result_score, base_score)
--- a/.github/scripts/oc_score_baseline.yaml
+++ b/.github/scripts/oc_score_baseline.yaml
@ -3,9 +3,21 @@ internlm-7b-hf:
    chid-dev: 81.68
    chid-test: 83.67
    openai_humaneval: 10.37
+    openbookqa: 44.4
+    openbookqa_fact: 73.2

 internlm-chat-7b-hf:
    ARC-c: 36.95
    chid-dev: 71.78
    chid-test: 76.87
    openai_humaneval: 21.34
+    openbookqa: 66.6
+    openbookqa_fact: 80.4
+
+chatglm3-6b-base-hf:
+    ARC-c: 43.05
+    chid-dev: 80.2
+    chid-test: 80.77
+    openai_humaneval: 20.73
+    openbookqa: 79.8
+    openbookqa_fact: 92.2
--- a/.github/workflows/daily-run-test.yml
+++ b/.github/workflows/daily-run-test.yml
@ -50,11 +50,12 @@ jobs:
          conda info --envs
          rm -rf regression_result_daily
          export from_tf=TRUE
-          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl --work-dir regression_result_daily --debug
+          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
      - name:  Get result
        run: |
+          eval "$(conda shell.bash hook)"
          pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
-          pytest -s -v --color=yes .github/scripts/oc_score_assert.py
+          python -m pytest -s -v --color=yes .github/scripts/oc_score_assert.py
      - name:  Remove Conda Env
        if: always()
        run: |
@ -71,4 +72,4 @@ jobs:
    steps:
      - name: notify
        run: |
-          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}