mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Update] update Subeval demo config (#1358)
* fix pip version * fix pip version * update demo config
This commit is contained in:
parent
86b6d18731
commit
8fe75e9937
@ -9,7 +9,6 @@ with read_base():
|
|||||||
from .datasets.subjective.fofo.fofo_judge import fofo_datasets
|
from .datasets.subjective.fofo.fofo_judge import fofo_datasets
|
||||||
from .datasets.subjective.multiround.mtbench_single_judge_diff_temp import mtbench_datasets
|
from .datasets.subjective.multiround.mtbench_single_judge_diff_temp import mtbench_datasets
|
||||||
from .datasets.subjective.multiround.mtbench101_judge import mtbench101_datasets
|
from .datasets.subjective.multiround.mtbench101_judge import mtbench101_datasets
|
||||||
from .models.chatglm.hf_chatglm3_6b import models
|
|
||||||
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
|
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
|
||||||
from opencompass.partitioners import NaivePartitioner, SizePartitioner
|
from opencompass.partitioners import NaivePartitioner, SizePartitioner
|
||||||
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
|
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
|
||||||
@ -30,9 +29,6 @@ api_meta_template = dict(
|
|||||||
|
|
||||||
# -------------Inference Stage ----------------------------------------
|
# -------------Inference Stage ----------------------------------------
|
||||||
# For subjective evaluation, we often set do sample for models
|
# For subjective evaluation, we often set do sample for models
|
||||||
for model in models:
|
|
||||||
model['generation_kwargs'] = dict(do_sample=True)
|
|
||||||
|
|
||||||
models = [
|
models = [
|
||||||
dict(
|
dict(
|
||||||
type=HuggingFaceChatGLM3,
|
type=HuggingFaceChatGLM3,
|
||||||
@ -49,51 +45,7 @@ models = [
|
|||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
),
|
),
|
||||||
generation_kwargs=dict(
|
generation_kwargs=dict(
|
||||||
do_sample=True,
|
do_sample=True, #For subjective evaluation, we suggest you do set do_sample when running model inference!
|
||||||
),
|
|
||||||
meta_template=api_meta_template,
|
|
||||||
max_out_len=2048,
|
|
||||||
max_seq_len=4096,
|
|
||||||
batch_size=8,
|
|
||||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
|
||||||
),dict(
|
|
||||||
type=HuggingFaceChatGLM3,
|
|
||||||
abbr='chatglm3-6b-hf2',
|
|
||||||
path='THUDM/chatglm3-6b',
|
|
||||||
tokenizer_path='THUDM/chatglm3-6b',
|
|
||||||
model_kwargs=dict(
|
|
||||||
device_map='auto',
|
|
||||||
trust_remote_code=True,
|
|
||||||
),
|
|
||||||
tokenizer_kwargs=dict(
|
|
||||||
padding_side='left',
|
|
||||||
truncation_side='left',
|
|
||||||
trust_remote_code=True,
|
|
||||||
),
|
|
||||||
generation_kwargs=dict(
|
|
||||||
do_sample=True,
|
|
||||||
),
|
|
||||||
meta_template=api_meta_template,
|
|
||||||
max_out_len=2048,
|
|
||||||
max_seq_len=4096,
|
|
||||||
batch_size=8,
|
|
||||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
|
||||||
),dict(
|
|
||||||
type=HuggingFaceChatGLM3,
|
|
||||||
abbr='chatglm3-6b-hf3',
|
|
||||||
path='THUDM/chatglm3-6b',
|
|
||||||
tokenizer_path='THUDM/chatglm3-6b',
|
|
||||||
model_kwargs=dict(
|
|
||||||
device_map='auto',
|
|
||||||
trust_remote_code=True,
|
|
||||||
),
|
|
||||||
tokenizer_kwargs=dict(
|
|
||||||
padding_side='left',
|
|
||||||
truncation_side='left',
|
|
||||||
trust_remote_code=True,
|
|
||||||
),
|
|
||||||
generation_kwargs=dict(
|
|
||||||
do_sample=True,
|
|
||||||
),
|
),
|
||||||
meta_template=api_meta_template,
|
meta_template=api_meta_template,
|
||||||
max_out_len=2048,
|
max_out_len=2048,
|
||||||
@ -103,7 +55,7 @@ models = [
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
datasets = [*alignbench_datasets, *alpacav2_datasets, *arenahard_datasets, *compassarena_datasets, *compassbench_datasets, *fofo_datasets, *mtbench_datasets, *mtbench101_datasets]
|
datasets = [*alignbench_datasets, *alpacav2_datasets, *arenahard_datasets, *compassarena_datasets, *compassbench_datasets, *fofo_datasets, *mtbench_datasets, *mtbench101_datasets] # add datasets you want
|
||||||
|
|
||||||
infer = dict(
|
infer = dict(
|
||||||
partitioner=dict(type=NaivePartitioner),
|
partitioner=dict(type=NaivePartitioner),
|
||||||
@ -124,7 +76,6 @@ judge_models = [dict(
|
|||||||
batch_size=8,
|
batch_size=8,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
)]
|
)]
|
||||||
judge_models = [models[0]]
|
|
||||||
|
|
||||||
## ------------- Evaluation Configuration
|
## ------------- Evaluation Configuration
|
||||||
eval = dict(
|
eval = dict(
|
||||||
|
Loading…
Reference in New Issue
Block a user