mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update test workflow (#1167)
* Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update oc_score_baseline.yaml * Update daily-run-test.yml * Update oc_score_assert.py --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
This commit is contained in:
parent
8ea2c404d7
commit
94eb90569f
2
.github/scripts/oc_score_assert.py
vendored
2
.github/scripts/oc_score_assert.py
vendored
@ -6,7 +6,7 @@ import yaml
|
||||
|
||||
output_path = 'regression_result_daily'
|
||||
|
||||
model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
|
||||
model_list = ['internlm2-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
|
||||
dataset_list = [
|
||||
'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
|
||||
'openbookqa_fact'
|
||||
|
30
.github/scripts/oc_score_baseline.yaml
vendored
30
.github/scripts/oc_score_baseline.yaml
vendored
@ -1,10 +1,10 @@
|
||||
internlm-7b-hf:
|
||||
ARC-c: 36.27
|
||||
chid-dev: 81.68
|
||||
chid-test: 83.67
|
||||
openai_humaneval: 10.37
|
||||
openbookqa: 44.4
|
||||
openbookqa_fact: 73.2
|
||||
ARC-c: 34.24
|
||||
chid-dev: 79.70
|
||||
chid-test: 81.12
|
||||
openai_humaneval: 10.98
|
||||
openbookqa: 47.20
|
||||
openbookqa_fact: 74.00
|
||||
|
||||
internlm-chat-7b-hf:
|
||||
ARC-c: 36.95
|
||||
@ -15,9 +15,17 @@ internlm-chat-7b-hf:
|
||||
openbookqa_fact: 80.4
|
||||
|
||||
chatglm3-6b-base-hf:
|
||||
ARC-c: 43.05
|
||||
chid-dev: 80.2
|
||||
chid-test: 80.77
|
||||
ARC-c: 44.41
|
||||
chid-dev: 78.22
|
||||
chid-test: 78.57
|
||||
openai_humaneval: 20.73
|
||||
openbookqa: 79.8
|
||||
openbookqa_fact: 92.2
|
||||
openbookqa: 78.40
|
||||
openbookqa_fact: 92.00
|
||||
|
||||
internlm2-7b-hf:
|
||||
ARC-c: 34.92
|
||||
chid-dev: 55.94
|
||||
chid-test: 53.70
|
||||
openai_humaneval: 44.51
|
||||
openbookqa: 83.00
|
||||
openbookqa_fact: 83.00
|
||||
|
6
.github/workflows/daily-run-test.yml
vendored
6
.github/workflows/daily-run-test.yml
vendored
@ -45,7 +45,6 @@ jobs:
|
||||
cp -r ${{env.USERSPACE_PREFIX}}/data .
|
||||
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
|
||||
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
|
||||
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_HUB_OFFLINE=1;
|
||||
- name: Run test
|
||||
run: |
|
||||
eval "$(conda shell.bash hook)"
|
||||
@ -53,7 +52,7 @@ jobs:
|
||||
conda info --envs
|
||||
rm -rf regression_result_daily
|
||||
export from_tf=TRUE
|
||||
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
|
||||
python3 run.py --models hf_internlm_chat_7b hf_internlm2_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
|
||||
- name: Get result
|
||||
run: |
|
||||
eval "$(conda shell.bash hook)"
|
||||
@ -62,8 +61,9 @@ jobs:
|
||||
- name: Remove Conda Env
|
||||
if: always()
|
||||
run: |
|
||||
cp -r regression_result_daily/* /cpfs01/user/qa-llm-cicd/report
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda env remove --name ${{env.CONDA_ENV}}
|
||||
conda env remove -y --name ${{env.CONDA_ENV}}
|
||||
conda info --envs
|
||||
|
||||
notify_to_feishu:
|
||||
|
6
.github/workflows/pr-run-test.yml
vendored
6
.github/workflows/pr-run-test.yml
vendored
@ -55,10 +55,10 @@ jobs:
|
||||
- name: Get result
|
||||
run: |
|
||||
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
|
||||
if (( ${score%.*} >= 75 && ${score%.*} <= 85 )); then
|
||||
echo "score is $score between 75 and 85"
|
||||
if (( ${score%.*} >= 79 && ${score%.*} <= 81 )); then
|
||||
echo "score is $score between 79 and 81"
|
||||
else
|
||||
echo "score is $score not between 75 and 85"
|
||||
echo "score is $score not between 79 and 81"
|
||||
exit 1
|
||||
fi
|
||||
rm -rf regression_result
|
||||
|
Loading…
Reference in New Issue
Block a user