From 89abcba486b8c1e6c6c8b93b6ed856a0d0bb3554 Mon Sep 17 00:00:00 2001
From: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com>
Date: Wed, 2 Oct 2024 12:30:38 +0800
Subject: [PATCH] [CI] Fix testcase failure (#1582)

* update

* Update oc_score_baseline.yaml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

---------

Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
---
 .github/scripts/oc_score_assert.py     |  3 +--
 .github/scripts/oc_score_baseline.yaml | 12 ++++++------
 .github/workflows/daily-run-test.yml   |  8 ++++----
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py
index 6f2c0a11..c01ef686 100644
--- a/.github/scripts/oc_score_assert.py
+++ b/.github/scripts/oc_score_assert.py
@@ -7,8 +7,7 @@ import yaml
 output_path = 'regression_result_daily'
 
 chat_model_list = [
-    'baichuan2-7b-chat-hf', 'glm-4-9b-chat-turbomind', 'glm-4-9b-chat-vllm',
-    'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
+    'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
     'deepseek-v2-lite-chat-hf', 'deepseek-7b-chat-vllm', 'gemma2-2b-it-hf',
     'gemma2-9b-it-hf', 'gemma-7b-it-vllm', 'internlm2_5-7b-chat-hf',
     'internlm2_5-20b-chat-hf', 'internlm2_5-7b-chat-turbomind',
diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml
index 9690aa2c..809dfea4 100644
--- a/.github/scripts/oc_score_baseline.yaml
+++ b/.github/scripts/oc_score_baseline.yaml
@@ -244,14 +244,14 @@ gemma-7b-hf:
     race-high: 66
 
 gemma2-2b-hf:
-    gsm8k: 8
-    race-middle: 31
-    race-high: 30
+    gsm8k: 33
+    race-middle: 56
+    race-high: 58
 
 gemma2-9b-hf:
-    gsm8k: 20
-    race-middle: 42
-    race-high: 35
+    gsm8k: 70
+    race-middle: 82
+    race-high: 84
 
 internlm2_5-7b-hf:
     gsm8k: 47
diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml
index 894b149e..42ada2f0 100644
--- a/.github/workflows/daily-run-test.yml
+++ b/.github/workflows/daily-run-test.yml
@@ -123,16 +123,16 @@ jobs:
           conda info --envs
           export from_tf=TRUE
           python tools/list_configs.py internlm2_5 mmlu
-          opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse --max-num-workers 2
+          opencompass --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }} --reuse
           rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
-          opencompass --models hf_internlm2_5_7b_chat hf_internlm2_5_1_8b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2
+          opencompass --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }} --reuse
           rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py
-          opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse --max-num-workers 2
+          opencompass --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }} --reuse
           rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py
-          opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse --max-num-workers 2
+          opencompass --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }} --reuse
           rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4_${{ matrix.cuda_env }}/*/summary regression_result_daily
           python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py
       - name:  Remove Conda Env