mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
update
This commit is contained in:
parent
69082bafb8
commit
c2cc5f7054
@ -116,6 +116,8 @@ with read_base():
|
|||||||
|
|
||||||
from ...volc import infer as volc_infer # noqa: F401, E501
|
from ...volc import infer as volc_infer # noqa: F401, E501
|
||||||
|
|
||||||
|
hf_glm4_9b_model[0]['path'] = 'THUDM/glm-4-9b-hf'
|
||||||
|
|
||||||
race_datasets = [race_datasets[1]]
|
race_datasets = [race_datasets[1]]
|
||||||
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
|
models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
|
||||||
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
|
datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
|
||||||
|
@ -97,8 +97,6 @@ with read_base():
|
|||||||
models as hf_mistral_nemo_instruct_2407_model # noqa: F401, E501
|
models as hf_mistral_nemo_instruct_2407_model # noqa: F401, E501
|
||||||
from opencompass.configs.models.mistral.hf_mistral_small_instruct_2409 import \
|
from opencompass.configs.models.mistral.hf_mistral_small_instruct_2409 import \
|
||||||
models as hf_mistral_small_instruct_2409_model # noqa: F401, E501
|
models as hf_mistral_small_instruct_2409_model # noqa: F401, E501
|
||||||
from opencompass.configs.models.mistral.hf_mixtral_8x7b_instruct_v0_1 import \
|
|
||||||
models as hf_mixtral_8x7b_instruct_v0_1_model # noqa: F401, E501
|
|
||||||
from opencompass.configs.models.mistral.lmdeploy_mistral_large_instruct_2411 import \
|
from opencompass.configs.models.mistral.lmdeploy_mistral_large_instruct_2411 import \
|
||||||
models as \
|
models as \
|
||||||
lmdeploy_mistral_large_instruct_2411_model # noqa: F401, E501
|
lmdeploy_mistral_large_instruct_2411_model # noqa: F401, E501
|
||||||
|
18
.github/scripts/oc_score_baseline_testrange.yaml
vendored
18
.github/scripts/oc_score_baseline_testrange.yaml
vendored
@ -89,9 +89,6 @@ chat:
|
|||||||
llama-3-8b-instruct-turbomind:
|
llama-3-8b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 68.75
|
gsm8k_accuracy: 68.75
|
||||||
race-high_accuracy: 84.38
|
race-high_accuracy: 84.38
|
||||||
internvl2_5-8b-turbomind:
|
|
||||||
gsm8k_accuracy: 0
|
|
||||||
race-high_accuracy: 0
|
|
||||||
mistral-7b-instruct-v0.2-hf:
|
mistral-7b-instruct-v0.2-hf:
|
||||||
gsm8k_accuracy: 40.62
|
gsm8k_accuracy: 40.62
|
||||||
race-high_accuracy: 75
|
race-high_accuracy: 75
|
||||||
@ -182,15 +179,15 @@ chat:
|
|||||||
yi-1.5-34b-chat-turbomind:
|
yi-1.5-34b-chat-turbomind:
|
||||||
gsm8k_accuracy: 75.00
|
gsm8k_accuracy: 75.00
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
|
deepseek-67b-chat-turbomind:
|
||||||
|
gsm8k_accuracy: 75.00
|
||||||
|
race-high_accuracy: 78.12
|
||||||
deepseek-r1-distill-qwen-32b-turbomind:
|
deepseek-r1-distill-qwen-32b-turbomind:
|
||||||
gsm8k_accuracy: 25
|
gsm8k_accuracy: 25
|
||||||
race-high_accuracy: 90.62
|
race-high_accuracy: 90.62
|
||||||
llama-3_3-70b-instruct-turbomind:
|
llama-3_3-70b-instruct-turbomind:
|
||||||
gsm8k_accuracy: 93.75
|
gsm8k_accuracy: 93.75
|
||||||
race-high_accuracy: 87.5
|
race-high_accuracy: 87.5
|
||||||
mixtral-8x7b-instruct-v0.1-hf:
|
|
||||||
gsm8k_accuracy: 59.38
|
|
||||||
race-high_accuracy: 81.25
|
|
||||||
mixtral-large-instruct-2411-turbomind:
|
mixtral-large-instruct-2411-turbomind:
|
||||||
gsm8k_accuracy: 87.50
|
gsm8k_accuracy: 87.50
|
||||||
race-high_accuracy: 93.75
|
race-high_accuracy: 93.75
|
||||||
@ -228,15 +225,10 @@ base:
|
|||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 0
|
||||||
race-high_accuracy: 46.88
|
race-high_accuracy: 46.88
|
||||||
winogrande_accuracy: 71.88
|
winogrande_accuracy: 71.88
|
||||||
deepseek-moe-16b-base-hf:
|
|
||||||
gsm8k_accuracy: 21.88
|
|
||||||
GPQA_diamond_accuracy: 0
|
|
||||||
race-high_accuracy: 21.88
|
|
||||||
winogrande_accuracy: 65.62
|
|
||||||
deepseek-7b-base-turbomind:
|
deepseek-7b-base-turbomind:
|
||||||
gsm8k_accuracy: 21.88
|
gsm8k_accuracy: 21.88
|
||||||
GPQA_diamond_accuracy: 0
|
GPQA_diamond_accuracy: 0
|
||||||
race-high_accuracy: 46.88
|
race-high_accuracy: 43.75
|
||||||
winogrande_accuracy: 84.38
|
winogrande_accuracy: 84.38
|
||||||
deepseek-moe-16b-base-vllm:
|
deepseek-moe-16b-base-vllm:
|
||||||
gsm8k_accuracy: 21.88
|
gsm8k_accuracy: 21.88
|
||||||
@ -269,7 +261,7 @@ base:
|
|||||||
race-high_accuracy:
|
race-high_accuracy:
|
||||||
winogrande_accuracy:
|
winogrande_accuracy:
|
||||||
gemma-7b-vllm:
|
gemma-7b-vllm:
|
||||||
gsm8k_accuracy: 53.12
|
gsm8k_accuracy: 43.75
|
||||||
GPQA_diamond_accuracy: 9.38
|
GPQA_diamond_accuracy: 9.38
|
||||||
race-high_accuracy:
|
race-high_accuracy:
|
||||||
winogrande_accuracy:
|
winogrande_accuracy:
|
||||||
|
2
.github/workflows/daily-run-test.yml
vendored
2
.github/workflows/daily-run-test.yml
vendored
@ -258,7 +258,7 @@ jobs:
|
|||||||
conda info --envs
|
conda info --envs
|
||||||
export from_tf=TRUE
|
export from_tf=TRUE
|
||||||
python tools/list_configs.py internlm2_5 mmlu
|
python tools/list_configs.py internlm2_5 mmlu
|
||||||
opencompass --models hf_internlm2_5_7b hf_internlm3_8b_instruct --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
|
opencompass --models hf_internlm2_5_7b --datasets race_ppl demo_gsm8k_chat_gen --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1 --reuse --max-num-workers 2 --dump-eval-details
|
||||||
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily
|
rm regression_result_daily -f && ln -s ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd1/*/summary regression_result_daily
|
||||||
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
|
python -m pytest -m case1 -s -v --color=yes .github/scripts/oc_score_assert.py
|
||||||
opencompass --models hf_internlm2_5_7b_chat hf_internlm3_8b_instruct --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 --dump-eval-details
|
opencompass --models hf_internlm2_5_7b_chat hf_internlm3_8b_instruct --datasets race_gen demo_gsm8k_chat_gen -a lmdeploy --batch-size 1 --max-out-len 256 --work-dir ${{env.REPORT_ROOT}}/${{ github.run_id }}/cmd2 --reuse --max-num-workers 2 --dump-eval-details
|
||||||
|
Loading…
Reference in New Issue
Block a user