mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00

* add one script for subjective * add one script for subjective * add one script for subjective * add one script for subjective --------- Co-authored-by: thebestannie <1290646445@qq.com>
26 lines
999 B
Python
26 lines
999 B
Python
from mmengine.config import read_base
|
|
|
|
with read_base():
|
|
from ..datasets.subjective.multiround.mtbench_single_judge_diff_temp import subjective_datasets
|
|
# from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets
|
|
from .model_cfg import models, judge_model, given_pred, infer, gpt4, runner
|
|
from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
|
|
from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
|
|
from opencompass.summarizers import MTBenchSummarizer
|
|
|
|
datasets = [*subjective_datasets]
|
|
|
|
for model in models:
|
|
if 'generation_kwargs' in model:
|
|
if 'do_sample' in model['generation_kwargs']:
|
|
del model['generation_kwargs']['do_sample']
|
|
|
|
eval = dict(
|
|
partitioner=dict(type=SubjectiveSizePartitioner, strategy='split', max_task_size=10000, mode='singlescore', models=models),
|
|
runner=runner
|
|
)
|
|
|
|
summarizer = dict(type=MTBenchSummarizer, judge_type='single')
|
|
|
|
work_dir = 'outputs/mtbench/'
|