mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Update] Add Long-Context configs for Gemma, OREAL, and Qwen2.5 models (#2048)
* [Update] Update Gemma, Oreal, Qwen Config * fix lint
This commit is contained in:
parent
a685ed7daf
commit
ff3275edf0
16
opencompass/configs/models/gemma/vllm_gemma_3_12b_it.py
Normal file
16
opencompass/configs/models/gemma/vllm_gemma_3_12b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='gemma-3-12b-it-vllm',
|
||||
path='google/gemma-3-12b-it',
|
||||
model_kwargs=dict(tensor_parallel_size=4,
|
||||
# for long context
|
||||
rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
16
opencompass/configs/models/gemma/vllm_gemma_3_27b_it.py
Normal file
16
opencompass/configs/models/gemma/vllm_gemma_3_27b_it.py
Normal file
@ -0,0 +1,16 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='gemma-3-27b-it-vllm',
|
||||
path='google/gemma-3-27b-it',
|
||||
model_kwargs=dict(tensor_parallel_size=4,
|
||||
# for long context
|
||||
rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
17
opencompass/configs/models/gemma/vllm_gemma_3_4b_it.py
Normal file
17
opencompass/configs/models/gemma/vllm_gemma_3_4b_it.py
Normal file
@ -0,0 +1,17 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='gemma-3-4b-it-vllm',
|
||||
path='google/gemma-3-4b-it',
|
||||
model_kwargs=dict(tensor_parallel_size=2,
|
||||
# for long context
|
||||
rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
|
||||
max_seq_len=140000,
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
@ -0,0 +1,19 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='internlm3-8b-instruct-turbomind',
|
||||
path='internlm/internlm3-8b-instruct',
|
||||
engine_config=dict(session_len=142000, max_batch_size=1, tp=2,
|
||||
# for long context
|
||||
rope_scaling_factor=6.0),
|
||||
gen_config=dict(
|
||||
top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=8192
|
||||
),
|
||||
max_seq_len=142000,
|
||||
max_out_len=8192,
|
||||
batch_size=1,
|
||||
run_cfg=dict(num_gpus=2),
|
||||
)
|
||||
]
|
20
opencompass/configs/models/hf_internlm/lmdeploy_oreal_32b.py
Normal file
20
opencompass/configs/models/hf_internlm/lmdeploy_oreal_32b.py
Normal file
@ -0,0 +1,20 @@
|
||||
from opencompass.models import TurboMindModelwithChatTemplate
|
||||
from opencompass.utils.text_postprocessors import extract_non_reasoning_content
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=TurboMindModelwithChatTemplate,
|
||||
abbr='OREAL-32B',
|
||||
path='internlm/OREAL-32B',
|
||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
|
||||
gen_config=dict(top_k=1,
|
||||
temperature=1e-6,
|
||||
top_p=0.9,
|
||||
max_new_tokens=32768),
|
||||
max_seq_len=32768,
|
||||
max_out_len=32768,
|
||||
batch_size=16,
|
||||
run_cfg=dict(num_gpus=4),
|
||||
pred_postprocessor=dict(type=extract_non_reasoning_content)
|
||||
)
|
||||
]
|
@ -0,0 +1,21 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='qwen2.5-14b-instruct-vllm',
|
||||
path='Qwen/Qwen2.5-14B-Instruct',
|
||||
model_kwargs=dict(
|
||||
tensor_parallel_size=4,
|
||||
rope_scaling={
|
||||
'factor': 4.0,
|
||||
'original_max_position_embeddings': 32768,
|
||||
'rope_type': 'yarn'
|
||||
},
|
||||
),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
@ -0,0 +1,21 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='qwen2.5-32b-instruct-vllm',
|
||||
path='Qwen/Qwen2.5-32B-Instruct',
|
||||
model_kwargs=dict(
|
||||
tensor_parallel_size=8,
|
||||
rope_scaling={
|
||||
'factor': 4.0,
|
||||
'original_max_position_embeddings': 32768,
|
||||
'rope_type': 'yarn'
|
||||
},
|
||||
),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=8),
|
||||
)
|
||||
]
|
@ -0,0 +1,21 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='qwen2_5-72b-instruct-vllm',
|
||||
path='Qwen/Qwen2.5-72B-Instruct',
|
||||
model_kwargs=dict(
|
||||
tensor_parallel_size=8,
|
||||
rope_scaling={
|
||||
'factor': 4.0,
|
||||
'original_max_position_embeddings': 32768,
|
||||
'rope_type': 'yarn'
|
||||
},
|
||||
),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=8),
|
||||
)
|
||||
]
|
@ -0,0 +1,21 @@
|
||||
from opencompass.models import VLLMwithChatTemplate
|
||||
|
||||
models = [
|
||||
dict(
|
||||
type=VLLMwithChatTemplate,
|
||||
abbr='qwen2.5-7b-instruct-vllm',
|
||||
path='Qwen/Qwen2.5-7B-Instruct',
|
||||
model_kwargs=dict(
|
||||
tensor_parallel_size=4,
|
||||
rope_scaling={
|
||||
'factor': 4.0,
|
||||
'original_max_position_embeddings': 32768,
|
||||
'rope_type': 'yarn'
|
||||
},
|
||||
),
|
||||
max_out_len=4096,
|
||||
batch_size=1,
|
||||
generation_kwargs=dict(temperature=0),
|
||||
run_cfg=dict(num_gpus=4),
|
||||
)
|
||||
]
|
Loading…
Reference in New Issue
Block a user