This commit is contained in:
zhulinJulia24 2025-04-03 09:57:01 +08:00
parent 69082bafb8
commit c2cc5f7054
4 changed files with 8 additions and 16 deletions

View File

@ -116,6 +116,8 @@ with read_base():
from ...volc import infer as volc_infer # noqa: F401, E501
hf_glm4_9b_model[0]['path'] = 'THUDM/glm-4-9b-hf'
race_datasets = [race_datasets[1]]
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])

View File

@ -97,8 +97,6 @@ with read_base():
models as hf_mistral_nemo_instruct_2407_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_small_instruct_2409 import \
models as hf_mistral_small_instruct_2409_model # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
models as hf_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501
from opencompass.configs.models.mistral.lmdeploy_mistral_large_instruct_2411 import \
models as \
lmdeploy_mistral_large_instruct_2411_model # noqa: F401, E501

View File

@ -89,9 +89,6 @@ chat:
llama-3-8b-instruct-turbomind:
gsm8k_accuracy: 68.75
race-high_accuracy: 84.38
internvl2_5-8b-turbomind:
gsm8k_accuracy: 0
race-high_accuracy: 0
mistral-7b-instruct-v0.2-hf:
gsm8k_accuracy: 40.62
race-high_accuracy: 75
@ -182,15 +179,15 @@ chat:
yi-1.5-34b-chat-turbomind:
gsm8k_accuracy: 75.00
race-high_accuracy: 93.75
deepseek-67b-chat-turbomind:
gsm8k_accuracy: 75.00
race-high_accuracy: 78.12
deepseek-r1-distill-qwen-32b-turbomind:
gsm8k_accuracy: 25
race-high_accuracy: 90.62
llama-3_3-70b-instruct-turbomind:
gsm8k_accuracy: 93.75
race-high_accuracy: 87.5
mixtral-8x7b-instruct-v0.1-hf:
gsm8k_accuracy: 59.38
race-high_accuracy: 81.25
mixtral-large-instruct-2411-turbomind:
gsm8k_accuracy: 87.50
race-high_accuracy: 93.75
@ -228,15 +225,10 @@ base:
GPQA_diamond_accuracy: 0
race-high_accuracy: 46.88
winogrande_accuracy: 71.88
deepseek-moe-16b-base-hf:
gsm8k_accuracy: 21.88
GPQA_diamond_accuracy: 0
race-high_accuracy: 21.88
winogrande_accuracy: 65.62
deepseek-7b-base-turbomind:
gsm8k_accuracy: 21.88
GPQA_diamond_accuracy: 0
race-high_accuracy: 46.88
race-high_accuracy: 43.75
winogrande_accuracy: 84.38
deepseek-moe-16b-base-vllm:
gsm8k_accuracy: 21.88
@ -269,7 +261,7 @@ base:
race-high_accuracy:
winogrande_accuracy:
gemma-7b-vllm:
gsm8k_accuracy: 53.12
gsm8k_accuracy: 43.75
GPQA_diamond_accuracy: 9.38
race-high_accuracy:
winogrande_accuracy:

View File

@ -258,7 +258,7 @@ jobs:
conda info --envs
export from_tf=TRUE
python tools/list_configs.py internlm2_5 mmlu
opencompass --models hf_internlm2_5_7b hf_internlm3_8b_instruct --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
opencompass --models hf_internlm2_5_7b --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
opencompass --models hf_internlm2_5_7b_chat hf_internlm3_8b_instruct --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 --dump-eval-details