diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py
index 52897094..9b175daa 100644
--- a/.github/scripts/oc_score_assert.py
+++ b/.github/scripts/oc_score_assert.py
@@ -6,8 +6,11 @@ import yaml
 
 output_path = 'regression_result_daily'
 
-model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf']
-dataset_list = ['ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval']
+model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
+dataset_list = [
+    'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
+    'openbookqa_fact'
+]
 
 
 @pytest.fixture()
@@ -34,8 +37,8 @@ class TestChat:
 
     @pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
                                                 for p2 in dataset_list])
-    def test_demo_default(self, baseline_scores, result_scores, model,
-                          dataset):
+    def test_model_dataset_score(self, baseline_scores, result_scores, model,
+                                 dataset):
         base_score = baseline_scores.get(model).get(dataset)
         result_score = result_scores.get(model).get(dataset)
         assert_score(result_score, base_score)
diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml
index e80d2df9..6e249541 100644
--- a/.github/scripts/oc_score_baseline.yaml
+++ b/.github/scripts/oc_score_baseline.yaml
@@ -3,9 +3,21 @@ internlm-7b-hf:
     chid-dev: 81.68
     chid-test: 83.67
     openai_humaneval: 10.37
+    openbookqa: 44.4
+    openbookqa_fact: 73.2
 
 internlm-chat-7b-hf:
     ARC-c: 36.95
     chid-dev: 71.78
     chid-test: 76.87
     openai_humaneval: 21.34
+    openbookqa: 66.6
+    openbookqa_fact: 80.4
+
+chatglm3-6b-base-hf:
+    ARC-c: 43.05
+    chid-dev: 80.2
+    chid-test: 80.77
+    openai_humaneval: 20.73
+    openbookqa: 79.8
+    openbookqa_fact: 92.2
diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml
index 232852af..922bf433 100644
--- a/.github/workflows/daily-run-test.yml
+++ b/.github/workflows/daily-run-test.yml
@@ -50,11 +50,12 @@ jobs:
           conda info --envs
           rm -rf regression_result_daily
           export from_tf=TRUE
-          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl --work-dir regression_result_daily --debug
+          python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
       - name:  Get result
         run: |
+          eval "$(conda shell.bash hook)"
           pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
-          pytest -s -v --color=yes .github/scripts/oc_score_assert.py
+          python -m pytest -s -v --color=yes .github/scripts/oc_score_assert.py
       - name:  Remove Conda Env
         if: always()
         run: |
@@ -71,4 +72,4 @@ jobs:
     steps:
       - name: notify
         run: |
-          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
+          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}