From fb69ba5eb8e2197fb836ebdd5bface5337d67ed2 Mon Sep 17 00:00:00 2001 From: zhulinJulia24 <145004780+zhulinJulia24@users.noreply.github.com> Date: Fri, 23 Aug 2024 01:49:17 +0800 Subject: [PATCH] [CI] add commond testcase into daily testcase (#1447) * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: zhulin1 --- .github/scripts/eval_regression_base.py | 51 +++++++------ .github/scripts/eval_regression_chat.py | 95 +++++++++++++++++-------- .github/scripts/oc_score_assert.py | 69 +++++++++++++++--- .github/scripts/oc_score_baseline.yaml | 36 +++++++++- .github/workflows/daily-run-test.yml | 40 +++++++---- 5 files changed, 215 insertions(+), 76 deletions(-) diff --git a/.github/scripts/eval_regression_base.py b/.github/scripts/eval_regression_base.py index 0d252380..8b4c6446 100644 --- a/.github/scripts/eval_regression_base.py +++ b/.github/scripts/eval_regression_base.py @@ -2,48 +2,57 @@ from mmengine.config import read_base with read_base(): # choose a list of datasets - from ...configs.datasets.gsm8k.gsm8k_gen_17d0dc import \ + from opencompass.configs.datasets.gsm8k.gsm8k_gen_17d0dc import \ gsm8k_datasets # noqa: F401, E501 - from ...configs.datasets.race.race_ppl import \ + from opencompass.configs.datasets.race.race_ppl import \ race_datasets # noqa: F401, E501 - from ...configs.models.deepseek.hf_deepseek_moe_16b_base import \ + from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_base import \ models as hf_deepseek_moe_16b_base_model # noqa: F401, E501 # read hf models - chat models - from ...configs.models.deepseek.lmdeploy_deepseek_7b_base import \ + from opencompass.configs.models.deepseek.lmdeploy_deepseek_7b_base import \ models as lmdeploy_deepseek_7b_base_model # noqa: F401, E501 - from ...configs.models.deepseek.vllm_deepseek_moe_16b_base import \ + from opencompass.configs.models.deepseek.vllm_deepseek_moe_16b_base import \ models as vllm_deepseek_moe_16b_base_model # noqa: F401, E501 - from ...configs.models.gemma.hf_gemma_2b import \ + from opencompass.configs.models.gemma.hf_gemma_2b import \ models as hf_gemma_2b_model # noqa: F401, E501 - from ...configs.models.gemma.hf_gemma_7b import \ + from opencompass.configs.models.gemma.hf_gemma_7b import \ models as hf_gemma_7b_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_1_8b import \ + from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import \ + models as hf_internlm2_5_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \ + models as hf_internlm2_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.hf_internlm2_base_7b import \ + models as hf_internlm2_base_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_1_8b import \ models as lmdeploy_internlm2_1_8b_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_7b import \ + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b import \ + models as lmdeploy_internlm2_5_7b_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_7b import \ models as lmdeploy_internlm2_7b_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \ + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_base_7b import \ models as lmdeploy_internlm2_base_7b_model # noqa: F401, E501 - from ...configs.models.hf_llama.lmdeploy_llama3_8b import \ + from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b import \ models as lmdeploy_llama3_8b_model # noqa: F401, E501 - from ...configs.models.mistral.hf_mistral_7b_v0_2 import \ + from opencompass.configs.models.mistral.hf_mistral_7b_v0_2 import \ models as hf_mistral_7b_v0_2_model # noqa: F401, E501 - from ...configs.models.mistral.vllm_mistral_7b_v0_2 import \ + from opencompass.configs.models.mistral.vllm_mistral_7b_v0_2 import \ models as vllm_mistral_7b_v0_2_model # noqa: F401, E501 - from ...configs.models.qwen.hf_qwen1_5_moe_a2_7b import \ + from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b import \ models as hf_qwen1_5_moe_a2_7b_model # noqa: F401, E501 - from ...configs.models.qwen.hf_qwen2_0_5b import \ + from opencompass.configs.models.qwen.hf_qwen2_0_5b import \ models as hf_qwen2_0_5b_model # noqa: F401, E501 - from ...configs.models.qwen.lmdeploy_qwen2_1_5b import \ + from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b import \ models as lmdeploy_qwen2_1_5b_model # noqa: F401, E501 - from ...configs.models.qwen.lmdeploy_qwen2_7b import \ + from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import \ models as lmdeploy_qwen2_7b_model # noqa: F401, E501 - from ...configs.models.qwen.vllm_qwen1_5_0_5b import \ + from opencompass.configs.models.qwen.vllm_qwen1_5_0_5b import \ models as vllm_qwen1_5_0_5b_model # noqa: F401, E501 - from ...configs.models.yi.hf_yi_1_5_6b import \ + from opencompass.configs.models.yi.hf_yi_1_5_6b import \ models as hf_yi_1_5_6b_model # noqa: F401, E501 - from ...configs.models.yi.hf_yi_1_5_9b import \ + from opencompass.configs.models.yi.hf_yi_1_5_9b import \ models as hf_yi_1_5_9b_model # noqa: F401, E501 - from ...configs.summarizers.medium import summarizer # noqa: F401, E501 + from opencompass.configs.summarizers.medium import \ + summarizer # noqa: F401, E501 models = sum([v for k, v in locals().items() if k.endswith('_model')], []) datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], []) diff --git a/.github/scripts/eval_regression_chat.py b/.github/scripts/eval_regression_chat.py index 8e29834d..8559f8ab 100644 --- a/.github/scripts/eval_regression_chat.py +++ b/.github/scripts/eval_regression_chat.py @@ -1,70 +1,105 @@ from mmengine.config import read_base +from opencompass.models import OpenAISDK + with read_base(): # choose a list of datasets - from ...configs.datasets.gsm8k.gsm8k_gen import \ + from opencompass.configs.datasets.gsm8k.gsm8k_gen import \ gsm8k_datasets # noqa: F401, E501 - from ...configs.datasets.race.race_gen import \ + from opencompass.configs.datasets.race.race_gen import \ race_datasets # noqa: F401, E501 # read hf models - chat models - from ...configs.models.baichuan.hf_baichuan2_7b_chat import \ + from opencompass.configs.models.baichuan.hf_baichuan2_7b_chat import \ models as hf_baichuan2_7b_chat_model # noqa: F401, E501 - from ...configs.models.chatglm.hf_glm4_9b_chat import \ + from opencompass.configs.models.chatglm.hf_glm4_9b_chat import \ models as hf_glm4_9b_chat_model # noqa: F401, E501 - from ...configs.models.deepseek.hf_deepseek_7b_chat import \ + from opencompass.configs.models.deepseek.hf_deepseek_7b_chat import \ models as hf_deepseek_7b_chat_model # noqa: F401, E501 - from ...configs.models.deepseek.hf_deepseek_moe_16b_chat import \ + from opencompass.configs.models.deepseek.hf_deepseek_moe_16b_chat import \ models as hf_deepseek_moe_16b_chat_model # noqa: F401, E501 - from ...configs.models.deepseek.vllm_deepseek_7b_chat import \ + from opencompass.configs.models.deepseek.vllm_deepseek_7b_chat import \ models as vllm_deepseek_7b_chat_model # noqa: F401, E501 - from ...configs.models.gemma.hf_gemma_2b_it import \ + from opencompass.configs.models.gemma.hf_gemma_2b_it import \ models as hf_gemma_2b_it_model # noqa: F401, E501 - from ...configs.models.gemma.hf_gemma_7b_it import \ + from opencompass.configs.models.gemma.hf_gemma_7b_it import \ models as hf_gemma_7b_it_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \ + from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b_chat import \ + models as hf_internlm2_5_7b_chat_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \ + models as lmdeploy_internlm2_5_7b_chat_model # noqa: F401, E501 + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b import \ models as lmdeploy_internlm2_chat_1_8b_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \ + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_1_8b_sft import \ models as lmdeploy_internlm2_chat_1_8b_sft_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_7b import \ + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_7b import \ models as lmdeploy_internlm2_chat_7b_model # noqa: F401, E501 - from ...configs.models.hf_internlm.lmdeploy_internlm2_chat_7b_sft import \ + from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_7b_sft import \ models as lmdeploy_internlm2_chat_7b_sft_model # noqa: F401, E501 - from ...configs.models.hf_internlm.vllm_internlm2_chat_7b import \ + from opencompass.configs.models.hf_internlm.vllm_internlm2_chat_7b import \ models as vllm_internlm2_chat_7b_model # noqa: F401, E501 - from ...configs.models.hf_llama.hf_llama3_8b_instruct import \ + from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \ models as hf_llama3_8b_instruct_model # noqa: F401, E501 - from ...configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \ + from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \ models as lmdeploy_llama3_8b_instruct_model # noqa: F401, E501 - from ...configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \ + from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \ models as hf_mistral_7b_instruct_v0_2_model # noqa: F401, E501 - from ...configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \ + from opencompass.configs.models.mistral.vllm_mistral_7b_instruct_v0_2 import \ models as vllm_mistral_7b_instruct_v0_2_model # noqa: F401, E501 - from ...configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \ + from opencompass.configs.models.openbmb.hf_minicpm_2b_dpo_fp32 import \ models as hf_minicpm_2b_dpo_fp32_model # noqa: F401, E501 - from ...configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \ + from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_bf16 import \ models as hf_minicpm_2b_sft_bf16_model # noqa: F401, E501 - from ...configs.models.openbmb.hf_minicpm_2b_sft_fp32 import \ + from opencompass.configs.models.openbmb.hf_minicpm_2b_sft_fp32 import \ models as hf_minicpm_2b_sft_fp32_model # noqa: F401, E501 - from ...configs.models.phi.hf_phi_3_mini_4k_instruct import \ + from opencompass.configs.models.phi.hf_phi_3_mini_4k_instruct import \ models as hf_phi_3_mini_4k_instruct_model # noqa: F401, E501 - from ...configs.models.phi.hf_phi_3_small_8k_instruct import \ + from opencompass.configs.models.phi.hf_phi_3_small_8k_instruct import \ models as hf_phi_3_mini_8k_instruct_model # noqa: F401, E501 - from ...configs.models.qwen.hf_qwen1_5_0_5b_chat import \ + from opencompass.configs.models.qwen.hf_qwen1_5_0_5b_chat import \ models as hf_qwen1_5_0_5b_chat_model # noqa: F401, E501 - from ...configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \ + from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \ models as lmdeploy_qwen2_1_5b_instruct_model # noqa: F401, E501 - from ...configs.models.qwen.lmdeploy_qwen2_7b_instruct import \ + from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \ models as lmdeploy_qwen2_7b_instruct_model # noqa: F401, E501 - from ...configs.models.qwen.vllm_qwen1_5_0_5b_chat import \ + from opencompass.configs.models.qwen.vllm_qwen1_5_0_5b_chat import \ models as vllm_qwen1_5_0_5b_chat_model # noqa: F401, E501 - from ...configs.models.yi.hf_yi_1_5_6b_chat import \ + from opencompass.configs.models.yi.hf_yi_1_5_6b_chat import \ models as hf_yi_1_5_6b_chat_model # noqa: F401, E501 - from ...configs.models.yi.hf_yi_1_5_9b_chat import \ + from opencompass.configs.models.yi.hf_yi_1_5_9b_chat import \ models as hf_yi_1_5_9b_chat_model # noqa: F401, E501 - from ...configs.summarizers.medium import summarizer # noqa: F401, E501 + from opencompass.configs.summarizers.medium import \ + summarizer # noqa: F401, E501 models = sum([v for k, v in locals().items() if k.endswith('_model')], []) datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], []) +api_meta_template = dict( + round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), + ], + reserved_roles=[dict(role='SYSTEM', api_role='SYSTEM')], +) + +model_name = '' + +models.append( + dict( + abbr='lmdeploy-api-test', + type=OpenAISDK, + key='EMPTY', + openai_api_base='http://10.1.9.14:10001/v1', + path='compass_judger_internlm2_102b_0508', + tokenizer_path='internlm/internlm2_5-20b-chat', + rpm_verbose=True, + meta_template=api_meta_template, + query_per_second=50, + max_out_len=1024, + max_seq_len=4096, + temperature=0.01, + batch_size=128, + retry=3, + )) + for d in datasets: d['reader_cfg']['test_range'] = '[0:100]' diff --git a/.github/scripts/oc_score_assert.py b/.github/scripts/oc_score_assert.py index a9b7691b..c454b772 100644 --- a/.github/scripts/oc_score_assert.py +++ b/.github/scripts/oc_score_assert.py @@ -8,22 +8,25 @@ output_path = 'regression_result_daily' chat_model_list = [ 'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf', - 'gemma-2b-it-hf', 'gemma-7b-it-hf', 'internlm2-chat-1.8b-turbomind', + 'gemma-2b-it-hf', 'gemma-7b-it-hf', 'internlm2_5-7b-chat-hf', + 'internlm2_5-7b-chat-turbomind', 'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind', - 'internlm2-chat-7b-sft-turbomind', 'llama-3-8b-instruct-hf', - 'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf', - 'minicpm-2b-dpo-fp32-hf', 'minicpm-2b-sft-bf16-hf', - 'minicpm-2b-sft-fp32-hf', 'phi-3-mini-4k-instruct-hf', - 'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-turbomind', - 'qwen2-7b-instruct-turbomind', 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf' + 'internlm2-chat-7b-sft-turbomind', 'internlm2_5-7b-chat-turbomind', + 'llama-3-8b-instruct-hf', 'llama-3-8b-instruct-turbomind', + 'mistral-7b-instruct-v0.2-hf', 'minicpm-2b-dpo-fp32-hf', + 'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf', + 'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf', + 'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind', + 'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf', 'lmdeploy-api-test' ] base_model_list = [ 'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind', 'gemma-2b-hf', 'gemma-7b-hf', 'internlm2-1.8b-turbomind', 'internlm2-7b-turbomind', - 'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind', - 'mistral-7b-v0.2-hf', 'qwen1.5-moe-a2.7b-hf', 'qwen2-0.5b-hf', - 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind', 'yi-1.5-6b-hf', - 'yi-1.5-9b-hf' + 'internlm2_5-7b-turbomind', 'internlm2_5-7b-hf', + 'internlm2-base-7b-turbomind', 'internlm2-base-7b-hf', + 'llama-3-8b-turbomind', 'mistral-7b-v0.2-hf', 'qwen1.5-moe-a2.7b-hf', + 'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind', + 'yi-1.5-6b-hf', 'yi-1.5-9b-hf' ] dataset_list = ['gsm8k', 'race-middle', 'race-high'] @@ -77,6 +80,50 @@ class TestBase: assert_score(result_score, base_score) +@pytest.mark.usefixtures('result_scores') +class TestCmdCase: + + @pytest.mark.case1 + @pytest.mark.parametrize('model, dataset', + [('internlm2_5-7b-hf', 'race-middle'), + ('internlm2_5-7b-hf', 'race-high')]) + def test_cmd_case1(self, result_scores, model, dataset): + if len(result_scores.keys()) != 1: + assert False, 'result is none' + result_score = result_scores.get(model).get(dataset) + assert_score(result_score, 91) + + @pytest.mark.case2 + @pytest.mark.parametrize('model, dataset', + [('internlm2_5-7b-chat-turbomind', 'race-middle'), + ('internlm2_5-7b-chat-turbomind', 'race-high')]) + def test_cmd_case2(self, result_scores, model, dataset): + if len(result_scores.keys()) != 1: + assert False, 'result is none' + result_score = result_scores.get(model).get(dataset) + assert_score(result_score, 91) + + @pytest.mark.case3 + @pytest.mark.parametrize('model, dataset', + [('internlm2_5-7b_hf', 'race-middle'), + ('internlm2_5-7b_hf', 'race-high')]) + def test_cmd_case3(self, result_scores, model, dataset): + if len(result_scores.keys()) != 1: + assert False, 'result is none' + result_score = result_scores.get(model).get(dataset) + assert_score(result_score, 91) + + @pytest.mark.case4 + @pytest.mark.parametrize('model, dataset', + [('internlm2_5-7b-chat_hf', 'race-middle'), + ('internlm2_5-7b-chat_hf', 'race-high')]) + def test_cmd_case4(self, result_scores, model, dataset): + if len(result_scores.keys()) != 1: + assert False, 'result is none' + result_score = result_scores.get(model).get(dataset) + assert_score(result_score, 91) + + def assert_score(score, baseline): if score is None or score == '-': assert False, 'value is none' diff --git a/.github/scripts/oc_score_baseline.yaml b/.github/scripts/oc_score_baseline.yaml index 14906442..8ec8a5f7 100644 --- a/.github/scripts/oc_score_baseline.yaml +++ b/.github/scripts/oc_score_baseline.yaml @@ -3,6 +3,11 @@ baichuan2-7b-chat-hf: race-middle: 74 race-high: 79 +glm-4-9b-chat-hf: + gsm8k: 75 + race-middle: 88 + race-high: 88 + deepseek-7b-chat-hf: gsm8k: 60 race-middle: 74 @@ -23,6 +28,16 @@ gemma-7b-it-hf: race-middle: 74 race-high: 71 +internlm2_5-7b-chat-hf: + gsm8k: 86 + race-middle: 92 + race-high: 93 + +internlm2_5-7b-chat-turbomind: + gsm8k: 87 + race-middle: 92 + race-high: 93 + internlm2-chat-1.8b-turbomind: gsm8k: 40 race-middle: 82 @@ -108,6 +123,10 @@ deepseek-moe-16b-base-hf: race-middle: 35 race-high: 23 +lmdeploy-api-test: + gsm8k: 90 + race-middle: 95 + race-high: 96 deepseek-7b-base-turbomind: gsm8k: 21 @@ -124,8 +143,18 @@ gemma-7b-hf: race-middle: 59 race-high: 66 +internlm2_5-7b-hf: + gsm8k: 46 + race-middle: 92 + race-high: 91 + +internlm2_5-7b-turbomind: + gsm8k: 73 + race-middle: 90 + race-high: 91 + internlm2-1.8b-turbomind: - gsm8k: 27 + gsm8k: 25 race-middle: 75 race-high: 72 @@ -134,6 +163,11 @@ internlm2-7b-turbomind: race-middle: 78 race-high: 76 +internlm2-base-7b-hf: + gsm8k: 2 + race-middle: 71 + race-high: 74 + internlm2-base-7b-turbomind: gsm8k: 39 race-middle: 75 diff --git a/.github/workflows/daily-run-test.yml b/.github/workflows/daily-run-test.yml index 7e01a3a4..c0884d33 100644 --- a/.github/workflows/daily-run-test.yml +++ b/.github/workflows/daily-run-test.yml @@ -14,6 +14,7 @@ env: PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub + DATEASET_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/llm-evaluation-datasets HF_DATASETS_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 HF_HUB_OFFLINE: 1 @@ -32,7 +33,7 @@ jobs: conda create -y --name ${{env.CONDA_ENV}} python=3.10 conda activate ${{env.CONDA_ENV}} pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 - pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.0.post1+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 + pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.2+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}} FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl --index-url https://download.pytorch.org/whl/cu118 pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118 pip install xformers==0.0.25.post1 --cache-dir ${{env.PIP_CACHE_PATH}} --extra-index-url https://download.pytorch.org/whl/cu118 @@ -46,7 +47,7 @@ jobs: conda info --envs - name: Prepare - prepare data and hf model run: | - cp -r ${{env.USERSPACE_PREFIX}}/data . + ln -s ${{env.DATEASET_CACHE_PATH}} data rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub - name: Run chat model test @@ -54,27 +55,40 @@ jobs: eval "$(conda shell.bash hook)" conda activate ${{env.CONDA_ENV}} conda info --envs - rm -rf regression_result_daily - export from_tf=TRUE - rm -rf /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary - python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }} --reuse - cp -r /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary regression_result_daily + python3 run.py .github/scripts/eval_regression_chat.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/chat/*/summary regression_result_daily python -m pytest -m chat -s -v --color=yes .github/scripts/oc_score_assert.py - name: Run base model test run: | eval "$(conda shell.bash hook)" conda activate ${{env.CONDA_ENV}} conda info --envs - rm -rf regression_result_daily - export from_tf=TRUE - rm -rf /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary - python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }} --reuse - cp -r /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/*/summary regression_result_daily + python3 run.py .github/scripts/eval_regression_base.py --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/base/*/summary regression_result_daily python -m pytest -m base -s -v --color=yes .github/scripts/oc_score_assert.py + - name: Run command testcase + run: | + eval "$(conda shell.bash hook)" + conda activate ${{env.CONDA_ENV}} + conda info --envs + export from_tf=TRUE + python tools/list_configs.py internlm2_5 mmlu + python run.py --models hf_internlm2_5_7b --datasets race_ppl --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1 --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd1/*/summary regression_result_daily + python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py + python run.py --models hf_internlm2_5_7b_chat --datasets race_gen -a lmdeploy --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2 --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd2/*/summary regression_result_daily + python -m pytest -m case2 -s -v --color=yes .github/scripts/oc_score_assert.py + python run.py --datasets race_ppl --hf-type base --hf-path internlm/internlm2_5-7b --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3 --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd3/*/summary regression_result_daily + python -m pytest -m case3 -s -v --color=yes .github/scripts/oc_score_assert.py + python run.py --datasets race_gen --hf-type chat --hf-path internlm/internlm2_5-7b-chat --work-dir /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4 --reuse + rm regression_result_daily -f && ln -s /cpfs01/user/qa-llm-cicd/report/${{ github.run_id }}/cmd4/*/summary regression_result_daily + python -m pytest -m case4 -s -v --color=yes .github/scripts/oc_score_assert.py - name: Remove Conda Env if: always() run: | - cp -r regression_result_daily/* /cpfs01/user/qa-llm-cicd/report + rm -rf regression_result_daily eval "$(conda shell.bash hook)" conda env remove -y --name ${{env.CONDA_ENV}} conda info --envs