update test workflow (#1167)

* Update pr-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update pr-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update oc_score_baseline.yaml

* Update daily-run-test.yml

* Update oc_score_assert.py

---------

Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
This commit is contained in:
zhulinJulia24 2024-05-16 15:32:57 +08:00 committed by GitHub
parent 8ea2c404d7
commit 94eb90569f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 26 additions and 18 deletions

View File

@ -6,7 +6,7 @@ import yaml
output_path = 'regression_result_daily'
model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
model_list = ['internlm2-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
dataset_list = [
'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
'openbookqa_fact'

View File

@ -1,10 +1,10 @@
internlm-7b-hf:
ARC-c: 36.27
chid-dev: 81.68
chid-test: 83.67
openai_humaneval: 10.37
openbookqa: 44.4
openbookqa_fact: 73.2
ARC-c: 34.24
chid-dev: 79.70
chid-test: 81.12
openai_humaneval: 10.98
openbookqa: 47.20
openbookqa_fact: 74.00
internlm-chat-7b-hf:
ARC-c: 36.95
@ -15,9 +15,17 @@ internlm-chat-7b-hf:
openbookqa_fact: 80.4
chatglm3-6b-base-hf:
ARC-c: 43.05
chid-dev: 80.2
chid-test: 80.77
ARC-c: 44.41
chid-dev: 78.22
chid-test: 78.57
openai_humaneval: 20.73
openbookqa: 79.8
openbookqa_fact: 92.2
openbookqa: 78.40
openbookqa_fact: 92.00
internlm2-7b-hf:
ARC-c: 34.92
chid-dev: 55.94
chid-test: 53.70
openai_humaneval: 44.51
openbookqa: 83.00
openbookqa_fact: 83.00

View File

@ -45,7 +45,6 @@ jobs:
cp -r ${{env.USERSPACE_PREFIX}}/data .
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_HUB_OFFLINE=1;
- name: Run test
run: |
eval "$(conda shell.bash hook)"
@ -53,7 +52,7 @@ jobs:
conda info --envs
rm -rf regression_result_daily
export from_tf=TRUE
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
python3 run.py --models hf_internlm_chat_7b hf_internlm2_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
- name: Get result
run: |
eval "$(conda shell.bash hook)"
@ -62,8 +61,9 @@ jobs:
- name: Remove Conda Env
if: always()
run: |
cp -r regression_result_daily/* /cpfs01/user/qa-llm-cicd/report
eval "$(conda shell.bash hook)"
conda env remove --name ${{env.CONDA_ENV}}
conda env remove -y --name ${{env.CONDA_ENV}}
conda info --envs
notify_to_feishu:

View File

@ -55,10 +55,10 @@ jobs:
- name: Get result
run: |
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
if (( ${score%.*} >= 75 && ${score%.*} <= 85 )); then
echo "score is $score between 75 and 85"
if (( ${score%.*} >= 79 && ${score%.*} <= 81 )); then
echo "score is $score between 79 and 81"
else
echo "score is $score not between 75 and 85"
echo "score is $score not between 79 and 81"
exit 1
fi
rm -rf regression_result