mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Update daily test (#871)
* add daily test case * Update pr-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * Update pr-run-test.yml * Update daily-run-test.yml * Update oc_score_assert.py * Update daily-run-test.yml * Update daily-run-test.yml * Update daily-run-test.yml * update testcase baseline * fix test case name * add more models into daily test --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn> Co-authored-by: Leymore <zfz-960727@163.com>
This commit is contained in:
parent
fc84aff963
commit
b4a9acd7be
11
.github/scripts/oc_score_assert.py
vendored
11
.github/scripts/oc_score_assert.py
vendored
@ -6,8 +6,11 @@ import yaml
|
||||
|
||||
output_path = 'regression_result_daily'
|
||||
|
||||
model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf']
|
||||
dataset_list = ['ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval']
|
||||
model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf', 'chatglm3-6b-base-hf']
|
||||
dataset_list = [
|
||||
'ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval', 'openbookqa',
|
||||
'openbookqa_fact'
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@ -34,8 +37,8 @@ class TestChat:
|
||||
|
||||
@pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
|
||||
for p2 in dataset_list])
|
||||
def test_demo_default(self, baseline_scores, result_scores, model,
|
||||
dataset):
|
||||
def test_model_dataset_score(self, baseline_scores, result_scores, model,
|
||||
dataset):
|
||||
base_score = baseline_scores.get(model).get(dataset)
|
||||
result_score = result_scores.get(model).get(dataset)
|
||||
assert_score(result_score, base_score)
|
||||
|
12
.github/scripts/oc_score_baseline.yaml
vendored
12
.github/scripts/oc_score_baseline.yaml
vendored
@ -3,9 +3,21 @@ internlm-7b-hf:
|
||||
chid-dev: 81.68
|
||||
chid-test: 83.67
|
||||
openai_humaneval: 10.37
|
||||
openbookqa: 44.4
|
||||
openbookqa_fact: 73.2
|
||||
|
||||
internlm-chat-7b-hf:
|
||||
ARC-c: 36.95
|
||||
chid-dev: 71.78
|
||||
chid-test: 76.87
|
||||
openai_humaneval: 21.34
|
||||
openbookqa: 66.6
|
||||
openbookqa_fact: 80.4
|
||||
|
||||
chatglm3-6b-base-hf:
|
||||
ARC-c: 43.05
|
||||
chid-dev: 80.2
|
||||
chid-test: 80.77
|
||||
openai_humaneval: 20.73
|
||||
openbookqa: 79.8
|
||||
openbookqa_fact: 92.2
|
||||
|
7
.github/workflows/daily-run-test.yml
vendored
7
.github/workflows/daily-run-test.yml
vendored
@ -50,11 +50,12 @@ jobs:
|
||||
conda info --envs
|
||||
rm -rf regression_result_daily
|
||||
export from_tf=TRUE
|
||||
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl --work-dir regression_result_daily --debug
|
||||
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b hf_chatglm3_6b_base hf_chatglm3_6b hf_qwen_7b_chat hf_qwen_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl obqa_ppl --work-dir regression_result_daily
|
||||
- name: Get result
|
||||
run: |
|
||||
eval "$(conda shell.bash hook)"
|
||||
pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pytest -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
python -m pytest -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
- name: Remove Conda Env
|
||||
if: always()
|
||||
run: |
|
||||
@ -71,4 +72,4 @@ jobs:
|
||||
steps:
|
||||
- name: notify
|
||||
run: |
|
||||
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
|
||||
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- Daily test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
|
||||
|
Loading…
Reference in New Issue
Block a user