2024-04-02 11:52:06 +08:00
# flake8: noqa: E501
import copy
import os . path as osp
2023-12-11 22:22:11 +08:00
from itertools import combinations , product
2023-10-13 19:50:54 +08:00
from typing import Dict , List , Optional , Tuple
from mmengine . config import ConfigDict
from opencompass . registry import PARTITIONERS
2024-04-02 11:52:06 +08:00
from opencompass . utils import ( deal_with_judge_model_abbr ,
get_infer_output_path , model_abbr_from_cfg )
2023-10-13 19:50:54 +08:00
from . naive import NaivePartitioner
2023-12-23 20:06:53 +08:00
def remove_duplicate_pairs ( model_combinations ) :
2024-04-02 11:52:06 +08:00
# For compare mode, we need to remove redundant pairs first
2023-12-23 20:06:53 +08:00
combo_dict = { }
for i , combo in enumerate ( model_combinations ) :
sorted_names = tuple ( sorted ( ( combo [ 0 ] [ ' abbr ' ] , combo [ 1 ] [ ' abbr ' ] ) ) )
if sorted_names not in combo_dict :
combo_dict [ sorted_names ] = i
new_model_combinations = [
model_combinations [ i ] for i in combo_dict . values ( )
]
return new_model_combinations
2024-04-02 11:52:06 +08:00
def replicate_tasks_with_judge_models ( tasks , judge_models , meta_judge_model ) :
# When all tasks are already partitioned, we add judge_models and meta_judge_model as additional args.
if meta_judge_model :
replicated_tasks = [ [ ] , [ ] ]
else :
replicated_tasks = [ ]
for task in tasks :
replicated_task_dicts = [ task . copy ( ) for _ in range ( len ( judge_models ) ) ]
for idx , replicated_task in enumerate ( replicated_task_dicts ) :
replicated_task [ ' judge_model ' ] = judge_models [ idx ]
if meta_judge_model :
meta_task = task . copy ( )
meta_task [ ' meta_judge_model ' ] = meta_judge_model
meta_task [ ' judge_models ' ] = judge_models
replicated_tasks [ 1 ] . append ( meta_task )
replicated_tasks [ 0 ] . extend ( replicated_task_dicts )
else :
replicated_tasks . extend ( replicated_task_dicts )
return replicated_tasks
def remove_already_tasks ( tasks , work_dir , meta_judge_model ) :
# Check and remove the already finished subjective evaluation tasks
if isinstance ( tasks , list ) and len ( tasks ) != 0 and isinstance (
tasks [ 0 ] , list ) :
tasks_to_keep = [ [ ] , [ ] ]
for i in range ( 2 ) :
for task in tasks [ i ] :
temp_task = copy . deepcopy ( task )
to_delete_index = [
] # To deal with the situation that the partition strategy is not split, which means that there will be a task contains multi dataset, and when we need to re-start, we need to remove the already done tasks.
for idx , dataset in enumerate ( task [ ' datasets ' ] [ 0 ] ) :
if i == 0 :
filename = get_infer_output_path (
deal_with_judge_model_abbr ( task [ ' models ' ] [ 0 ] ,
task [ ' judge_model ' ] ,
False ) , dataset ,
osp . join ( work_dir , ' results ' ) )
else :
filename = get_infer_output_path (
deal_with_judge_model_abbr (
task [ ' models ' ] [ 0 ] , task [ ' meta_judge_model ' ] ,
True ) , dataset , osp . join ( work_dir , ' results ' ) )
if osp . exists ( filename ) :
to_delete_index . append ( idx )
temp_task [ ' datasets ' ] [ 0 ] = [
temp_task [ ' datasets ' ] [ 0 ] [ j ]
for j in range ( len ( temp_task [ ' datasets ' ] [ 0 ] ) )
if j not in to_delete_index
]
if len ( temp_task [ ' datasets ' ] [ 0 ] ) != 0 :
tasks_to_keep [ i ] . append ( temp_task )
else :
tasks_to_keep = [ ]
for task in tasks :
temp_task = copy . deepcopy ( task )
to_delete_index = [
] # To deal with the situation that the partition strategy is not split, which means that there will be a task contains multi dataset, and when we need to re-start, we need to remove the already done tasks.
for idx , dataset in enumerate ( task [ ' datasets ' ] [ 0 ] ) :
filename = get_infer_output_path (
deal_with_judge_model_abbr ( task [ ' models ' ] [ 0 ] ,
task [ ' judge_model ' ] ) , dataset ,
osp . join ( work_dir , ' results ' ) )
if osp . exists ( filename ) :
to_delete_index . append ( idx )
# Remove the already done tasks
temp_task [ ' datasets ' ] [ 0 ] = [
temp_task [ ' datasets ' ] [ 0 ] [ j ]
for j in range ( len ( temp_task [ ' datasets ' ] [ 0 ] ) )
if j not in to_delete_index
]
if len ( temp_task [ ' datasets ' ] [ 0 ] ) != 0 :
tasks_to_keep . append ( temp_task )
return tasks_to_keep
2023-10-13 19:50:54 +08:00
@PARTITIONERS.register_module ( )
class SubjectiveNaivePartitioner ( NaivePartitioner ) :
""" Naive task partitioner for subjective evaluation. Compared to
NaivePartitioner , this partitioner squashes multiple models into a task .
Args :
out_dir ( str ) : The output directory of tasks .
keep_keys ( List [ str ] ) : The keys to be kept from the experiment config
to the task config .
"""
def __init__ ( self ,
mode : str ,
out_dir : str ,
2023-12-11 22:22:11 +08:00
models : Optional [ List [ ConfigDict ] ] = [ ] ,
base_models : Optional [ List [ ConfigDict ] ] = [ ] ,
compare_models : Optional [ List [ ConfigDict ] ] = [ ] ,
2024-04-02 11:52:06 +08:00
judge_models : Optional [ List [ ConfigDict ] ] = [ ] ,
meta_judge_model : Optional [ ConfigDict ] = None ,
2023-10-13 19:50:54 +08:00
model_pairs : Optional [ List [ Tuple ] ] = None ,
2024-04-02 11:52:06 +08:00
keep_keys : Optional [ List [ str ] ] = None ,
infer_order : Optional [ str ] = ' random ' ) :
2023-10-13 19:50:54 +08:00
super ( ) . __init__ ( out_dir = out_dir , keep_keys = keep_keys )
2023-12-11 22:22:11 +08:00
assert mode in [ ' singlescore ' , ' allpair ' , ' m2n ' , ' fixed ' ]
2024-04-02 11:52:06 +08:00
assert infer_order in [ ' random ' , ' double ' ]
2023-10-13 19:50:54 +08:00
self . mode = mode
2023-12-11 22:22:11 +08:00
self . models = models
self . base_models = base_models
self . compare_models = compare_models
2023-10-13 19:50:54 +08:00
self . model_pairs = model_pairs
2024-04-02 11:52:06 +08:00
self . judge_models = judge_models
self . meta_judge_model = meta_judge_model
self . infer_order = infer_order
2023-10-13 19:50:54 +08:00
2023-12-11 22:22:11 +08:00
def get_model_combinations (
self ,
models : List [ ConfigDict ] ,
base_models : Optional [ List [ ConfigDict ] ] = [ ] ,
compare_models : Optional [ List [ ConfigDict ] ] = [ ] ) - > List :
if self . mode == ' allpair ' :
assert len ( models ) > 1
2023-10-13 19:50:54 +08:00
return combinations ( models , 2 )
2023-12-11 22:22:11 +08:00
elif self . mode == ' m2n ' :
assert len ( base_models ) > 0 and len ( compare_models ) > 0
model_combinations = list ( product ( base_models , compare_models ) )
2023-12-23 20:06:53 +08:00
unique_combinations = remove_duplicate_pairs ( [
2023-12-11 22:22:11 +08:00
combo for combo in model_combinations if combo [ 0 ] != combo [ 1 ]
] )
return unique_combinations
2023-10-13 19:50:54 +08:00
elif self . mode == ' fixed ' :
pass
def partition ( self ,
models : List [ ConfigDict ] ,
datasets : List [ ConfigDict ] ,
work_dir : str ,
out_dir : str ,
add_cfg : Dict = { } ) - > List [ Dict ] :
""" Partition model-dataset pairs into tasks. Each task is defined as a
dict and will run independently as a unit . Its structure is as
follows :
. . code - block : : python
{
' models ' : [ ] , # a list of model configs
' datasets ' : [ [ ] ] , # a nested list of dataset configs, each
list corresponds to a model
' work_dir ' : ' ' , # the work dir
}
Args :
models ( List [ ConfigDict ] ) : A list of model configs .
datasets ( List [ ConfigDict ] ) : A list of dataset configs .
work_dir ( str ) : The work dir for the task .
out_dir ( str ) : The full output path for the task , intended for
Partitioners to check whether the task is finished via the
existency of result file in this directory .
Returns :
List [ Dict ] : A list of tasks .
"""
2023-12-11 22:22:11 +08:00
models = self . models if self . models != [ ] else models
base_models , compare_models = self . base_models , self . compare_models
2024-04-02 11:52:06 +08:00
judge_models , meta_judge_model = self . judge_models , self . meta_judge_model
2023-12-11 22:22:11 +08:00
if self . mode == ' singlescore ' :
models = models
else :
models = self . get_model_combinations ( models , base_models ,
compare_models )
2023-12-12 09:22:08 +08:00
model_dataset_combinations = [ { ' models ' : models , ' datasets ' : datasets } ]
2024-04-02 11:52:06 +08:00
tasks = super ( ) . partition (
2023-12-12 09:22:08 +08:00
model_dataset_combinations = model_dataset_combinations ,
work_dir = work_dir ,
out_dir = out_dir ,
add_cfg = add_cfg )
2024-04-02 11:52:06 +08:00
# We need to add judge models and meta-judge-model as new tasks
# When there is no meta-judge-model, we assign all judge models to each tasks
# When there is a meta-judge-model, we add an additional task stage
tasks = replicate_tasks_with_judge_models ( tasks , judge_models ,
meta_judge_model )
# We also need to check and remove the already done tasks
tasks = remove_already_tasks ( tasks , work_dir , meta_judge_model )
if isinstance ( tasks , list ) and len ( tasks ) != 0 and isinstance (
tasks [ 0 ] , list ) :
# Refer to meta review judge
for task_stage in tasks :
for task in task_stage :
task [ ' infer_order ' ] = self . infer_order
else :
# Refer to just have review judge
for task in tasks :
task [ ' infer_order ' ] = self . infer_order
return tasks