mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update
This commit is contained in:
parent
69082bafb8
commit
c2cc5f7054
@ -116,6 +116,8 @@ with read_base():
|
||||
|
||||
from ...volc import infer as volc_infer # noqa: F401, E501
|
||||
|
||||
hf_glm4_9b_model[0]['path'] = 'THUDM/glm-4-9b-hf'
|
||||
|
||||
race_datasets = [race_datasets[1]]
|
||||
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
|
||||
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
|
||||
|
@ -97,8 +97,6 @@ with read_base():
|
||||
models as hf_mistral_nemo_instruct_2407_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mistral_small_instruct_2409 import \
|
||||
models as hf_mistral_small_instruct_2409_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
|
||||
models as hf_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501
|
||||
from opencompass.configs.models.mistral.lmdeploy_mistral_large_instruct_2411 import \
|
||||
models as \
|
||||
lmdeploy_mistral_large_instruct_2411_model # noqa: F401, E501
|
||||
|
18
.github/scripts/oc_score_baseline_testrange.yaml
vendored
18
.github/scripts/oc_score_baseline_testrange.yaml
vendored
@ -89,9 +89,6 @@ chat:
|
||||
llama-3-8b-instruct-turbomind:
|
||||
gsm8k_accuracy: 68.75
|
||||
race-high_accuracy: 84.38
|
||||
internvl2_5-8b-turbomind:
|
||||
gsm8k_accuracy: 0
|
||||
race-high_accuracy: 0
|
||||
mistral-7b-instruct-v0.2-hf:
|
||||
gsm8k_accuracy: 40.62
|
||||
race-high_accuracy: 75
|
||||
@ -182,15 +179,15 @@ chat:
|
||||
yi-1.5-34b-chat-turbomind:
|
||||
gsm8k_accuracy: 75.00
|
||||
race-high_accuracy: 93.75
|
||||
deepseek-67b-chat-turbomind:
|
||||
gsm8k_accuracy: 75.00
|
||||
race-high_accuracy: 78.12
|
||||
deepseek-r1-distill-qwen-32b-turbomind:
|
||||
gsm8k_accuracy: 25
|
||||
race-high_accuracy: 90.62
|
||||
llama-3_3-70b-instruct-turbomind:
|
||||
gsm8k_accuracy: 93.75
|
||||
race-high_accuracy: 87.5
|
||||
mixtral-8x7b-instruct-v0.1-hf:
|
||||
gsm8k_accuracy: 59.38
|
||||
race-high_accuracy: 81.25
|
||||
mixtral-large-instruct-2411-turbomind:
|
||||
gsm8k_accuracy: 87.50
|
||||
race-high_accuracy: 93.75
|
||||
@ -228,15 +225,10 @@ base:
|
||||
GPQA_diamond_accuracy: 0
|
||||
race-high_accuracy: 46.88
|
||||
winogrande_accuracy: 71.88
|
||||
deepseek-moe-16b-base-hf:
|
||||
gsm8k_accuracy: 21.88
|
||||
GPQA_diamond_accuracy: 0
|
||||
race-high_accuracy: 21.88
|
||||
winogrande_accuracy: 65.62
|
||||
deepseek-7b-base-turbomind:
|
||||
gsm8k_accuracy: 21.88
|
||||
GPQA_diamond_accuracy: 0
|
||||
race-high_accuracy: 46.88
|
||||
race-high_accuracy: 43.75
|
||||
winogrande_accuracy: 84.38
|
||||
deepseek-moe-16b-base-vllm:
|
||||
gsm8k_accuracy: 21.88
|
||||
@ -269,7 +261,7 @@ base:
|
||||
race-high_accuracy:
|
||||
winogrande_accuracy:
|
||||
gemma-7b-vllm:
|
||||
gsm8k_accuracy: 53.12
|
||||
gsm8k_accuracy: 43.75
|
||||
GPQA_diamond_accuracy: 9.38
|
||||
race-high_accuracy:
|
||||
winogrande_accuracy:
|
||||
|
2
.github/workflows/daily-run-test.yml
vendored
2
.github/workflows/daily-run-test.yml
vendored
@ -258,7 +258,7 @@ jobs:
|
||||
conda info --envs
|
||||
export from_tf=TRUE
|
||||
python tools/list_configs.py internlm2_5 mmlu
|
||||
opencompass --models hf_internlm2_5_7b hf_internlm3_8b_instruct --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
|
||||
opencompass --models hf_internlm2_5_7b --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
|
||||
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily
|
||||
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||
opencompass --models hf_internlm2_5_7b_chat hf_internlm3_8b_instruct --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 --dump-eval-details
|
||||
|
Loading…
Reference in New Issue
Block a user