This commit is contained in:
zhulinJulia24 2025-02-26 17:11:00 +08:00
parent bdb2d46f59
commit 45033bd413
4 changed files with 27 additions and 27 deletions

View File

@ -24,7 +24,7 @@ models = [
abbr='lmdeploy-api-test', abbr='lmdeploy-api-test',
type=OpenAISDK, type=OpenAISDK,
key='EMPTY', key='EMPTY',
openai_api_base='http://localhost:23333/v1', openai_api_base='http://0.0.0.0:23333/v1',
path='internlm2', path='internlm2',
tokenizer_path='internlm/internlm2_5-7b-chat', tokenizer_path='internlm/internlm2_5-7b-chat',
rpm_verbose=True, rpm_verbose=True,

View File

@ -42,7 +42,7 @@ internlm2_5-7b-chat-hf_fullbench:
alpaca_eval_total: 20 alpaca_eval_total: 20
arenahard_score: 50 arenahard_score: 50
Followbench_naive_average: 1 Followbench_naive_average: 1
CompassArena_naive_average: 44.00 CompassArena_naive_average: 43
mtbench101_avg: 7.8 mtbench101_avg: 7.8
wildbench_average: -12.78 wildbench_average: -12.78
simpleqa_accuracy_given_attempted: 0 simpleqa_accuracy_given_attempted: 0
@ -58,7 +58,7 @@ internlm2_5-7b-chat-hf_fullbench:
alpaca_eval_helpful_base: 20 alpaca_eval_helpful_base: 20
compassarena_language_naive_average: 35 compassarena_language_naive_average: 35
compassarena_knowledge_naive_average: 55 compassarena_knowledge_naive_average: 55
compassarena_reason_v2_naive_average: 45.00 compassarena_reason_v2_naive_average: 40
compassarena_math_v2_naive_average: 55 compassarena_math_v2_naive_average: 55
compassarena_creationv2_zh_naive_average: 30 compassarena_creationv2_zh_naive_average: 30
followbench_llmeval_en_HSR_AVG: 1 followbench_llmeval_en_HSR_AVG: 1

View File

@ -6,7 +6,7 @@ chat:
gsm8k_accuracy: 71.88 gsm8k_accuracy: 71.88
race-high_accuracy: 90.62 race-high_accuracy: 90.62
glm-4-9b-chat-vllm: glm-4-9b-chat-vllm:
gsm8k_accuracy: 65.62 gsm8k_accuracy: 71.88
race-high_accuracy: 90.62 race-high_accuracy: 90.62
deepseek-7b-chat-hf: deepseek-7b-chat-hf:
gsm8k_accuracy: 46.88 gsm8k_accuracy: 46.88
@ -63,7 +63,7 @@ chat:
gsm8k_accuracy: 84.38 gsm8k_accuracy: 84.38
race-high_accuracy: 90.62 race-high_accuracy: 90.62
llama-3_2-3b-instruct-hf: llama-3_2-3b-instruct-hf:
gsm8k_accuracy: 65.62 gsm8k_accuracy: 68.75
race-high_accuracy: 81.25 race-high_accuracy: 81.25
llama-3-8b-instruct-hf: llama-3-8b-instruct-hf:
gsm8k_accuracy: 68.75 gsm8k_accuracy: 68.75
@ -75,7 +75,7 @@ chat:
gsm8k_accuracy: 78.12 gsm8k_accuracy: 78.12
race-high_accuracy: 90.62 race-high_accuracy: 90.62
llama-3_2-3b-instruct-turbomind: llama-3_2-3b-instruct-turbomind:
gsm8k_accuracy: 62.50 gsm8k_accuracy: 65.62
race-high_accuracy: 81.25 race-high_accuracy: 81.25
llama-3-8b-instruct-turbomind: llama-3-8b-instruct-turbomind:
gsm8k_accuracy: 71.88 gsm8k_accuracy: 71.88
@ -226,25 +226,25 @@ base:
race-high_accuracy: 25 race-high_accuracy: 25
winogrande_accuracy: 68.75 winogrande_accuracy: 68.75
gemma2-2b-hf: gemma2-2b-hf:
gsm8k_accuracy: 28.12 gsm8k_accuracy: 31.25
GPQA_diamond_accuracy: 3.12 GPQA_diamond_accuracy: 3.12
race-high_accuracy: 56.25 race-high_accuracy: 56.25
winogrande_accuracy: 71.88 winogrande_accuracy: 75.00
gemma2-9b-hf: gemma2-9b-hf:
gsm8k_accuracy: 68.75 gsm8k_accuracy: 75.00
GPQA_diamond_accuracy: 0 GPQA_diamond_accuracy: 0
race-high_accuracy: 81.25 race-high_accuracy: 84.38
winogrande_accuracy: 84.38 winogrande_accuracy: 81.25
gemma-2b-hf: gemma-2b-hf:
gsm8k_accuracy: 18.75 gsm8k_accuracy: 21.88
GPQA_diamond_accuracy: 3.12 GPQA_diamond_accuracy: 3.12
race-high_accuracy: 25 race-high_accuracy: 21.88
winogrande_accuracy: 53.12 winogrande_accuracy: 53.12
gemma-7b-hf: gemma-7b-hf:
gsm8k_accuracy: 56.25 gsm8k_accuracy: 56.25
GPQA_diamond_accuracy: 6.25 GPQA_diamond_accuracy: 3.12
race-high_accuracy: 65.62 race-high_accuracy: 65.62
winogrande_accuracy: 78.12 winogrande_accuracy: 71.88
gemma-2b-vllm: gemma-2b-vllm:
gsm8k_accuracy: 15.62 gsm8k_accuracy: 15.62
GPQA_diamond_accuracy: 3.12 GPQA_diamond_accuracy: 3.12
@ -441,10 +441,10 @@ base:
race-high_accuracy: 93.75 race-high_accuracy: 93.75
winogrande_accuracy: 87.5 winogrande_accuracy: 87.5
deepseek-v2-turbomind: deepseek-v2-turbomind:
gsm8k_accuracy: 71.88 gsm8k_accuracy: 65.62
GPQA_diamond_accuracy: 3.12 GPQA_diamond_accuracy: 15.62
race-high_accuracy: 81.25 race-high_accuracy: 93.75
winogrande_accuracy: 75 winogrande_accuracy: 84.38
llama-3-70b-hf: llama-3-70b-hf:
gsm8k_accuracy: 62.5 gsm8k_accuracy: 62.5
GPQA_diamond_accuracy: 3.12 GPQA_diamond_accuracy: 3.12

View File

@ -44,7 +44,7 @@ on:
type: string type: string
default: "['base_objective','chat_objective','chat_subjective','base_long_context','chat_long_context']" default: "['base_objective','chat_objective','chat_subjective','base_long_context','chat_long_context']"
schedule: schedule:
- cron: '15 14 * * *' - cron: '15 14 * * 0,2'
env: env:
HF_DATASETS_OFFLINE: 1 HF_DATASETS_OFFLINE: 1
@ -87,7 +87,7 @@ jobs:
name: my-artifact-${{ github.run_id }} name: my-artifact-${{ github.run_id }}
build-pypi-lmdeploy: build-pypi-lmdeploy:
if: ${{!cancelled() && (github.event_name != 'schedule' && inputs.build_lmdeploy)}} if: ${{!cancelled() && (github.event_name == 'schedule' || inputs.build_lmdeploy)}}
strategy: strategy:
matrix: matrix:
pyver: [py310] pyver: [py310]
@ -127,7 +127,7 @@ jobs:
needs: ['build-pypi', 'build-pypi-lmdeploy'] needs: ['build-pypi', 'build-pypi-lmdeploy']
runs-on: volc_cu12 runs-on: volc_cu12
environment: 'prod' environment: 'prod'
timeout-minutes: 240 #4hours timeout-minutes: 120 #2hours
steps: steps:
- name: Clone repository - name: Clone repository
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -148,7 +148,7 @@ jobs:
uses: nick-fields/retry@v3 uses: nick-fields/retry@v3
with: with:
max_attempts: 1 max_attempts: 1
timeout_minutes: 240 timeout_minutes: 120
command: | command: |
. ${{env.CONDA_PATH}}/bin/activate . ${{env.CONDA_PATH}}/bin/activate
conda create -y --name ${{env.CONDA_ENV}} python=3.10 conda create -y --name ${{env.CONDA_ENV}} python=3.10
@ -211,7 +211,7 @@ jobs:
uses: nick-fields/retry@v3 uses: nick-fields/retry@v3
with: with:
max_attempts: 1 max_attempts: 1
timeout_minutes: 120 timeout_minutes: 180
command: | command: |
. ${{env.CONDA_PATH}}/bin/activate . ${{env.CONDA_PATH}}/bin/activate
conda activate ${{env.CONDA_ENV}} conda activate ${{env.CONDA_ENV}}
@ -230,7 +230,7 @@ jobs:
regression_func: ${{fromJSON(github.event.inputs.regression_func_local || '["cmd","api","chat_sub_fullbench"]')}} regression_func: ${{fromJSON(github.event.inputs.regression_func_local || '["cmd","api","chat_sub_fullbench"]')}}
runs-on: volc_cu12_local runs-on: volc_cu12_local
environment: 'prod' environment: 'prod'
timeout-minutes: 240 #4hours timeout-minutes: 480 #6hours
steps: steps:
- name: Clone repository - name: Clone repository
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -306,7 +306,7 @@ jobs:
function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_objective","chat_objective","chat_subjective","base_long_context","chat_long_context"]')}} function_type: ${{fromJSON(github.event.inputs.fullbench_eval || '["base_objective","chat_objective","chat_subjective","base_long_context","chat_long_context"]')}}
runs-on: volc_cu12 runs-on: volc_cu12
environment: 'prod' environment: 'prod'
timeout-minutes: 360 #6hours timeout-minutes: 480 #6hours
steps: steps:
- name: Clone repository - name: Clone repository
uses: actions/checkout@v2 uses: actions/checkout@v2
@ -323,7 +323,7 @@ jobs:
uses: nick-fields/retry@v3 uses: nick-fields/retry@v3
with: with:
max_attempts: 1 max_attempts: 1
timeout_minutes: 360 timeout_minutes: 480
command: | command: |
. ${{env.CONDA_PATH}}/bin/activate . ${{env.CONDA_PATH}}/bin/activate
conda activate ${{env.CONDA_ENV}} conda activate ${{env.CONDA_ENV}}