OpenCompass/opencompass/partitioners/sub_naive.py

221 lines
9.6 KiB
Python
Raw Normal View History

# flake8: noqa: E501
import copy
import os.path as osp
from itertools import combinations, product
2023-10-13 19:50:54 +08:00
from typing import Dict, List, Optional, Tuple
from mmengine.config import ConfigDict
from opencompass.registry import PARTITIONERS
from opencompass.utils import (deal_with_judge_model_abbr,
get_infer_output_path, model_abbr_from_cfg)
2023-10-13 19:50:54 +08:00
from .naive import NaivePartitioner
2023-12-23 20:06:53 +08:00
def remove_duplicate_pairs(model_combinations):
# For compare mode, we need to remove redundant pairs first
2023-12-23 20:06:53 +08:00
combo_dict = {}
for i, combo in enumerate(model_combinations):
sorted_names = tuple(sorted((combo[0]['abbr'], combo[1]['abbr'])))
if sorted_names not in combo_dict:
combo_dict[sorted_names] = i
new_model_combinations = [
model_combinations[i] for i in combo_dict.values()
]
return new_model_combinations
def replicate_tasks_with_judge_models(tasks, judge_models, meta_judge_model):
# When all tasks are already partitioned, we add judge_models and meta_judge_model as additional args.
if meta_judge_model:
replicated_tasks = [[], []]
else:
replicated_tasks = []
for task in tasks:
replicated_task_dicts = [task.copy() for _ in range(len(judge_models))]
for idx, replicated_task in enumerate(replicated_task_dicts):
replicated_task['judge_model'] = judge_models[idx]
if meta_judge_model:
meta_task = task.copy()
meta_task['meta_judge_model'] = meta_judge_model
meta_task['judge_models'] = judge_models
replicated_tasks[1].append(meta_task)
replicated_tasks[0].extend(replicated_task_dicts)
else:
replicated_tasks.extend(replicated_task_dicts)
return replicated_tasks
def remove_already_tasks(tasks, work_dir, meta_judge_model):
# Check and remove the already finished subjective evaluation tasks
if isinstance(tasks, list) and len(tasks) != 0 and isinstance(
tasks[0], list):
tasks_to_keep = [[], []]
for i in range(2):
for task in tasks[i]:
temp_task = copy.deepcopy(task)
to_delete_index = [
] # To deal with the situation that the partition strategy is not split, which means that there will be a task contains multi dataset, and when we need to re-start, we need to remove the already done tasks.
for idx, dataset in enumerate(task['datasets'][0]):
if i == 0:
filename = get_infer_output_path(
deal_with_judge_model_abbr(task['models'][0],
task['judge_model'],
False), dataset,
osp.join(work_dir, 'results'))
else:
filename = get_infer_output_path(
deal_with_judge_model_abbr(
task['models'][0], task['meta_judge_model'],
True), dataset, osp.join(work_dir, 'results'))
if osp.exists(filename):
to_delete_index.append(idx)
temp_task['datasets'][0] = [
temp_task['datasets'][0][j]
for j in range(len(temp_task['datasets'][0]))
if j not in to_delete_index
]
if len(temp_task['datasets'][0]) != 0:
tasks_to_keep[i].append(temp_task)
else:
tasks_to_keep = []
for task in tasks:
temp_task = copy.deepcopy(task)
to_delete_index = [
] # To deal with the situation that the partition strategy is not split, which means that there will be a task contains multi dataset, and when we need to re-start, we need to remove the already done tasks.
for idx, dataset in enumerate(task['datasets'][0]):
filename = get_infer_output_path(
deal_with_judge_model_abbr(task['models'][0],
task['judge_model']), dataset,
osp.join(work_dir, 'results'))
if osp.exists(filename):
to_delete_index.append(idx)
# Remove the already done tasks
temp_task['datasets'][0] = [
temp_task['datasets'][0][j]
for j in range(len(temp_task['datasets'][0]))
if j not in to_delete_index
]
if len(temp_task['datasets'][0]) != 0:
tasks_to_keep.append(temp_task)
return tasks_to_keep
2023-10-13 19:50:54 +08:00
@PARTITIONERS.register_module()
class SubjectiveNaivePartitioner(NaivePartitioner):
"""Naive task partitioner for subjective evaluation. Compared to
NaivePartitioner, this partitioner squashes multiple models into a task.
Args:
out_dir (str): The output directory of tasks.
keep_keys (List[str]): The keys to be kept from the experiment config
to the task config.
"""
def __init__(self,
mode: str,
out_dir: str,
models: Optional[List[ConfigDict]] = [],
base_models: Optional[List[ConfigDict]] = [],
compare_models: Optional[List[ConfigDict]] = [],
judge_models: Optional[List[ConfigDict]] = [],
meta_judge_model: Optional[ConfigDict] = None,
2023-10-13 19:50:54 +08:00
model_pairs: Optional[List[Tuple]] = None,
keep_keys: Optional[List[str]] = None,
infer_order: Optional[str] = 'random'):
2023-10-13 19:50:54 +08:00
super().__init__(out_dir=out_dir, keep_keys=keep_keys)
assert mode in ['singlescore', 'allpair', 'm2n', 'fixed']
assert infer_order in ['random', 'double']
2023-10-13 19:50:54 +08:00
self.mode = mode
self.models = models
self.base_models = base_models
self.compare_models = compare_models
2023-10-13 19:50:54 +08:00
self.model_pairs = model_pairs
self.judge_models = judge_models
self.meta_judge_model = meta_judge_model
self.infer_order = infer_order
2023-10-13 19:50:54 +08:00
def get_model_combinations(
self,
models: List[ConfigDict],
base_models: Optional[List[ConfigDict]] = [],
compare_models: Optional[List[ConfigDict]] = []) -> List:
if self.mode == 'allpair':
assert len(models) > 1
2023-10-13 19:50:54 +08:00
return combinations(models, 2)
elif self.mode == 'm2n':
assert len(base_models) > 0 and len(compare_models) > 0
model_combinations = list(product(base_models, compare_models))
2023-12-23 20:06:53 +08:00
unique_combinations = remove_duplicate_pairs([
combo for combo in model_combinations if combo[0] != combo[1]
])
return unique_combinations
2023-10-13 19:50:54 +08:00
elif self.mode == 'fixed':
pass
def partition(self,
models: List[ConfigDict],
datasets: List[ConfigDict],
work_dir: str,
out_dir: str,
add_cfg: Dict = {}) -> List[Dict]:
"""Partition model-dataset pairs into tasks. Each task is defined as a
dict and will run independently as a unit. Its structure is as
follows:
.. code-block:: python
{
'models': [], # a list of model configs
'datasets': [[]], # a nested list of dataset configs, each
list corresponds to a model
'work_dir': '', # the work dir
}
Args:
models (List[ConfigDict]): A list of model configs.
datasets (List[ConfigDict]): A list of dataset configs.
work_dir (str): The work dir for the task.
out_dir (str): The full output path for the task, intended for
Partitioners to check whether the task is finished via the
existency of result file in this directory.
Returns:
List[Dict]: A list of tasks.
"""
models = self.models if self.models != [] else models
base_models, compare_models = self.base_models, self.compare_models
judge_models, meta_judge_model = self.judge_models, self.meta_judge_model
if self.mode == 'singlescore':
models = models
else:
models = self.get_model_combinations(models, base_models,
compare_models)
model_dataset_combinations = [{'models': models, 'datasets': datasets}]
tasks = super().partition(
model_dataset_combinations=model_dataset_combinations,
work_dir=work_dir,
out_dir=out_dir,
add_cfg=add_cfg)
# We need to add judge models and meta-judge-model as new tasks
# When there is no meta-judge-model, we assign all judge models to each tasks
# When there is a meta-judge-model, we add an additional task stage
tasks = replicate_tasks_with_judge_models(tasks, judge_models,
meta_judge_model)
# We also need to check and remove the already done tasks
tasks = remove_already_tasks(tasks, work_dir, meta_judge_model)
if isinstance(tasks, list) and len(tasks) != 0 and isinstance(
tasks[0], list):
# Refer to meta review judge
for task_stage in tasks:
for task in task_stage:
task['infer_order'] = self.infer_order
else:
# Refer to just have review judge
for task in tasks:
task['infer_order'] = self.infer_order
return tasks