[Update] update Subeval demo config (#1358)

* fix pip version

* fix pip version

* update demo config
This commit is contained in:
bittersweet1999 2024-07-24 15:48:28 +08:00 committed by GitHub
parent 86b6d18731
commit 8fe75e9937
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9,7 +9,6 @@ with read_base():
from .datasets.subjective.fofo.fofo_judge import fofo_datasets
from .datasets.subjective.multiround.mtbench_single_judge_diff_temp import mtbench_datasets
from .datasets.subjective.multiround.mtbench101_judge import mtbench101_datasets
from .models.chatglm.hf_chatglm3_6b import models
from opencompass.models import HuggingFaceCausalLM, HuggingFace, HuggingFaceChatGLM3, OpenAI
from opencompass.partitioners import NaivePartitioner, SizePartitioner
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
@ -30,9 +29,6 @@ api_meta_template = dict(
# -------------Inference Stage ----------------------------------------
# For subjective evaluation, we often set do sample for models
for model in models:
model['generation_kwargs'] = dict(do_sample=True)
models = [
dict(
type=HuggingFaceChatGLM3,
@ -49,51 +45,7 @@ models = [
trust_remote_code=True,
),
generation_kwargs=dict(
do_sample=True,
),
meta_template=api_meta_template,
max_out_len=2048,
max_seq_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
),dict(
type=HuggingFaceChatGLM3,
abbr='chatglm3-6b-hf2',
path='THUDM/chatglm3-6b',
tokenizer_path='THUDM/chatglm3-6b',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
generation_kwargs=dict(
do_sample=True,
),
meta_template=api_meta_template,
max_out_len=2048,
max_seq_len=4096,
batch_size=8,
run_cfg=dict(num_gpus=1, num_procs=1),
),dict(
type=HuggingFaceChatGLM3,
abbr='chatglm3-6b-hf3',
path='THUDM/chatglm3-6b',
tokenizer_path='THUDM/chatglm3-6b',
model_kwargs=dict(
device_map='auto',
trust_remote_code=True,
),
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
generation_kwargs=dict(
do_sample=True,
do_sample=True, #For subjective evaluation, we suggest you do set do_sample when running model inference!
),
meta_template=api_meta_template,
max_out_len=2048,
@ -103,7 +55,7 @@ models = [
)
]
datasets = [*alignbench_datasets, *alpacav2_datasets, *arenahard_datasets, *compassarena_datasets, *compassbench_datasets, *fofo_datasets, *mtbench_datasets, *mtbench101_datasets]
datasets = [*alignbench_datasets, *alpacav2_datasets, *arenahard_datasets, *compassarena_datasets, *compassbench_datasets, *fofo_datasets, *mtbench_datasets, *mtbench101_datasets] # add datasets you want
infer = dict(
partitioner=dict(type=NaivePartitioner),
@ -124,7 +76,6 @@ judge_models = [dict(
batch_size=8,
temperature=0,
)]
judge_models = [models[0]]
## ------------- Evaluation Configuration
eval = dict(