mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
parent
f66b0b347a
commit
97236c8e97
68
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
68
.github/scripts/oc_score_baseline_fullbench.yaml
vendored
@ -24,8 +24,8 @@ internlm2_5-7b-chat-hf_fullbench:
|
||||
lcb_test_output_pass@1: 18.75
|
||||
bbh-logical_deduction_seven_objects_score: 50
|
||||
bbh-multistep_arithmetic_two_score: 68.75
|
||||
mmlu-other_naive_average: 72.6
|
||||
cmmlu-china-specific_naive_average: 76.25
|
||||
mmlu-other_accuracy: 72.6
|
||||
cmmlu-china-specific_accuracy: 76.25
|
||||
mmlu_pro_math_accuracy: 25
|
||||
ds1000_Pandas_accuracy: 12.5
|
||||
ds1000_Numpy_accuracy: 0
|
||||
@ -101,8 +101,8 @@ internlm2_5-7b-chat-turbomind_fullbench:
|
||||
lcb_test_output_pass@1: 25.00
|
||||
bbh-logical_deduction_seven_objects_score: 50.00
|
||||
bbh-multistep_arithmetic_two_score: 68.75
|
||||
mmlu-other_naive_average: 69.71
|
||||
cmmlu-china-specific_naive_average: 75.83
|
||||
mmlu-other_accuracy: 69.71
|
||||
cmmlu-china-specific_accuracy: 75.83
|
||||
mmlu_pro_math_accuracy: 31.25
|
||||
ds1000_Pandas_accuracy: 0
|
||||
ds1000_Numpy_accuracy: 0
|
||||
@ -234,15 +234,15 @@ internlm2_5-7b-turbomind:
|
||||
sanitized_mbpp_score: 55.25
|
||||
dingo_en_192_score: 60.94
|
||||
dingo_zh_170_score: 67.65
|
||||
mmlu-stem_naive_average: 63.72
|
||||
mmlu-social-science_naive_average: 80.15
|
||||
mmlu-humanities_naive_average: 74.27
|
||||
mmlu-other_naive_average: 71.85
|
||||
cmmlu-stem_naive_average: 67.07
|
||||
cmmlu-social-science_naive_average: 81.49
|
||||
cmmlu-humanities_naive_average: 85.84
|
||||
cmmlu-other_naive_average: 82.69
|
||||
cmmlu-china-specific_naive_average: 79.88
|
||||
mmlu-stem_accuracy: 63.72
|
||||
mmlu-social-science_accuracy: 80.15
|
||||
mmlu-humanities_accuracy: 74.27
|
||||
mmlu-other_accuracy: 71.85
|
||||
cmmlu-stem_accuracy: 67.07
|
||||
cmmlu-social-science_accuracy: 81.49
|
||||
cmmlu-humanities_accuracy: 85.84
|
||||
cmmlu-other_accuracy: 82.69
|
||||
cmmlu-china-specific_accuracy: 79.88
|
||||
mmlu_pro_biology_accuracy: 58.58
|
||||
mmlu_pro_business_accuracy: 28.01
|
||||
mmlu_pro_chemistry_accuracy: 22.79
|
||||
@ -281,12 +281,12 @@ internlm2_5-7b-turbomind:
|
||||
longbench_naive_average: 46.19
|
||||
longbench_zh_naive_average: 49.3
|
||||
longbench_en_naive_average: 43.97
|
||||
longbench_single-document-qa_naive_average: 42.84
|
||||
longbench_multi-document-qa_naive_average: 37.29
|
||||
longbench_summarization_naive_average: 23.21
|
||||
longbench_few-shot-learning_naive_average: 61.67
|
||||
longbench_synthetic-tasks_naive_average: 60.05
|
||||
longbench_code-completion_naive_average: 52.09
|
||||
longbench_single-document-qa_score: 42.84
|
||||
longbench_multi-document-qa_score: 41.25
|
||||
longbench_summarization_score: 23.21
|
||||
longbench_few-shot-learning_score: 61.67
|
||||
longbench_synthetic-tasks_score: 60.05
|
||||
longbench_code-completion_score: 52.09
|
||||
|
||||
internlm2_5-7b-chat-turbomind:
|
||||
objective:
|
||||
@ -327,15 +327,15 @@ internlm2_5-7b-chat-turbomind:
|
||||
teval_naive_average: 80
|
||||
SciCode_sub_accuracy: 5.56
|
||||
qa_dingo_cn_score: 99.01
|
||||
mmlu-stem_naive_average: 68.2
|
||||
mmlu-social-science_naive_average: 75.8
|
||||
mmlu-humanities_naive_average: 69.3
|
||||
mmlu-other_naive_average: 71.3
|
||||
cmmlu-stem_naive_average: 66.64
|
||||
cmmlu-social-science_naive_average: 76
|
||||
cmmlu-humanities_naive_average: 77.9
|
||||
cmmlu-other_naive_average: 77.25
|
||||
cmmlu-china-specific_naive_average: 73.6
|
||||
mmlu-stem_accuracy: 68.2
|
||||
mmlu-social-science_accuracy: 75.8
|
||||
mmlu-humanities_accuracy: 69.3
|
||||
mmlu-other_accuracy: 71.3
|
||||
cmmlu-stem_accuracy: 66.64
|
||||
cmmlu-social-science_accuracy: 76
|
||||
cmmlu-humanities_accuracy: 77.9
|
||||
cmmlu-other_accuracy: 77.25
|
||||
cmmlu-china-specific_accuracy: 73.6
|
||||
mmlu_pro_biology_accuracy: 66.67
|
||||
mmlu_pro_business_accuracy: 47.91
|
||||
mmlu_pro_chemistry_accuracy: 35
|
||||
@ -448,9 +448,9 @@ internlm2_5-7b-chat-1m-turbomind:
|
||||
babilong_32k_naive_average: 48.9
|
||||
babilong_128k_naive_average: 40.8
|
||||
babilong_256k_naive_average: 23.5
|
||||
longbench_single-document-qa_naive_average: 43.56
|
||||
longbench_multi-document-qa_naive_average: 46.24
|
||||
longbench_summarization_naive_average: 24.32
|
||||
longbench_few-shot-learning_naive_average: 51.67
|
||||
longbench_synthetic-tasks_naive_average: 66.83
|
||||
longbench_code-completion_naive_average: 45.99
|
||||
longbench_single-document-qa_score: 43.56
|
||||
longbench_multi-document-qa_score: 46.24
|
||||
longbench_summarization_score: 24.32
|
||||
longbench_few-shot-learning_score: 51.67
|
||||
longbench_synthetic-tasks_score: 66.83
|
||||
longbench_code-completion_score: 45.99
|
||||
|
4
.github/workflows/daily-run-test.yml
vendored
4
.github/workflows/daily-run-test.yml
vendored
@ -157,7 +157,9 @@ jobs:
|
||||
pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install opencompass[vllm] --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install opencompass[full] --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install opencompass[api] --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /fs-computility/llm/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
||||
pip install xformers --index-url https://download.pytorch.org/whl/cu121 --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
cp -r /root/nltk_data ${{env.CONDA_PATH}}/envs/${{env.CONDA_ENV}}/nltk_data
|
||||
|
2
.github/workflows/pr-run-test.yml
vendored
2
.github/workflows/pr-run-test.yml
vendored
@ -45,7 +45,7 @@ jobs:
|
||||
. ${{env.CONDA_PATH}}/bin/activate
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
python3 -m pip uninstall opencompass -y
|
||||
python3 -m pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
python3 -m pip install -e ".[full]" --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda info --envs
|
||||
- name: conda env
|
||||
run: |
|
||||
|
Loading…
Reference in New Issue
Block a user