OpenCompass/opencompass/partitioners/base.py

from abc import abstractmethod
from copy import deepcopy
from typing import Dict, List, Optional

from mmengine.config import ConfigDict

from opencompass.utils import get_logger, task_abbr_from_cfg


class BasePartitioner:
    """Base class for partitioners. A partitioner is responsible for
    partitioning the config into tasks.

    Args:
        out_dir (str): The output directory of tasks.
        keep_keys (Optional[List[str]], optional): The keys to be kept from the
            experiment config to the task config. Defaults to None. If None,
            the following keys will be kept:

            - eval.runner.task.judge_cfg
            - eval.runner.task.dump_details
    """

    def __init__(self, out_dir: str, keep_keys: Optional[List[str]] = None):
        self.logger = get_logger()
        self.out_dir = out_dir
        if keep_keys is None:
            self.keep_keys = [
                'eval.runner.task.judge_cfg',
                'eval.runner.task.dump_details',
            ]
        else:
            self.keep_keys = keep_keys

    def __call__(self, cfg: ConfigDict) -> List[Dict]:
        """Generate tasks from config. Each task is defined as a
        dict and will run independently as a unit. Its structure is as
        follows:

        .. code-block:: python

            {
                'models': [],  # a list of model configs
                'datasets': [[]],  # a nested list of dataset configs, each
                                    list corresponds to a model
                'work_dir': '',  # the work dir
            }

        Args:
            cfg (ConfigDict): The config dict, containing "models", "dataset"
                and "work_dir" keys.

        Returns:
            List[Dict]: A list of tasks.
        """
        cfg = deepcopy(cfg)
        models = cfg['models']
        datasets = cfg['datasets']
        work_dir = cfg['work_dir']

        add_cfg = {}
        for k in self.keep_keys:
            try:
                key_chain = k.split('.')
                ori_ptr = cfg
                tgt_ptr = add_cfg
                for key in key_chain[:-1]:
                    ori_ptr = ori_ptr[key]
                    if key not in tgt_ptr:
                        tgt_ptr[key] = {}
                    tgt_ptr = tgt_ptr[key]
                tgt_ptr[key_chain[-1]] = ori_ptr[key_chain[-1]]
            except Exception:
                self.logger.debug(f'Key {k} not found in config, ignored.')
        self.logger.debug(f'Additional config: {add_cfg}')

        tasks = self.partition(models,
                               datasets,
                               work_dir,
                               self.out_dir,
                               add_cfg=add_cfg)

        self.logger.info(f'Partitioned into {len(tasks)} tasks.')
        for i, task in enumerate(tasks):
            self.logger.debug(f'Task {i}: {task_abbr_from_cfg(task)}')

        return tasks

    @abstractmethod
    def partition(self,
                  models: List[ConfigDict],
                  datasets: List[ConfigDict],
                  work_dir: str,
                  out_dir: str,
                  add_cfg: Dict = {}) -> List[Dict]:
        """Partition model-dataset pairs into tasks. Each task is defined as a
        dict and will run independently as a unit. Its structure is as
        follows:

        .. code-block:: python

            {
                'models': [],  # a list of model configs
                'datasets': [[]],  # a nested list of dataset configs, each
                                    list corresponds to a model
                'work_dir': '',  # the work dir
                **add_cfg  # other keys to be added in the config
            }

        Args:
            models (List[ConfigDict]): A list of model configs.
            datasets (List[ConfigDict]): A list of dataset configs.
            work_dir (str): The work dir for the task.
            out_dir (str): The full output path for the task, intended for
                Partitioners to check whether the task is finished via the
                existency of result file in this directory.
            add_cfg (dict): Other common keys to be added in the task config,
                used to share the same config among tasks. Defaults to {}.

        Returns:
            List[Dict]: A list of tasks.
        """
initial commit 2023-07-04 21:34:55 +08:00			`from abc import abstractmethod`
			`from copy import deepcopy`
[Sync] update (#517) 2023-10-27 20:31:22 +08:00			`from typing import Dict, List, Optional`
initial commit 2023-07-04 21:34:55 +08:00
			`from mmengine.config import ConfigDict`

			`from opencompass.utils import get_logger, task_abbr_from_cfg`


			`class BasePartitioner:`
			`"""Base class for partitioners. A partitioner is responsible for`
			`partitioning the config into tasks.`

			`Args:`
			`out_dir (str): The output directory of tasks.`
[Sync] update (#517) 2023-10-27 20:31:22 +08:00			`keep_keys (Optional[List[str]], optional): The keys to be kept from the`
			`experiment config to the task config. Defaults to None. If None,`
			`the following keys will be kept:`

			`- eval.runner.task.judge_cfg`
			`- eval.runner.task.dump_details`
initial commit 2023-07-04 21:34:55 +08:00			`"""`

[Sync] update (#517) 2023-10-27 20:31:22 +08:00			`def __init__(self, out_dir: str, keep_keys: Optional[List[str]] = None):`
initial commit 2023-07-04 21:34:55 +08:00			`self.logger = get_logger()`
			`self.out_dir = out_dir`
[Sync] update (#517) 2023-10-27 20:31:22 +08:00			`if keep_keys is None:`
			`self.keep_keys = [`
			`'eval.runner.task.judge_cfg',`
			`'eval.runner.task.dump_details',`
			`]`
			`else:`
			`self.keep_keys = keep_keys`
initial commit 2023-07-04 21:34:55 +08:00
			`def __call__(self, cfg: ConfigDict) -> List[Dict]:`
			`"""Generate tasks from config. Each task is defined as a`
			`dict and will run independently as a unit. Its structure is as`
			`follows:`

			`.. code-block:: python`

			`{`
			`'models': [], # a list of model configs`
			`'datasets': [[]], # a nested list of dataset configs, each`
			`list corresponds to a model`
			`'work_dir': '', # the work dir`
			`}`

			`Args:`
			`cfg (ConfigDict): The config dict, containing "models", "dataset"`
			`and "work_dir" keys.`

			`Returns:`
			`List[Dict]: A list of tasks.`
			`"""`
			`cfg = deepcopy(cfg)`
			`models = cfg['models']`
			`datasets = cfg['datasets']`
			`work_dir = cfg['work_dir']`

[Sync] Initial support of subjective evaluation (#421) Co-authored-by: Leymore <zfz-960727@163.com> 2023-09-22 15:42:31 +08:00			`add_cfg = {}`
			`for k in self.keep_keys:`
			`try:`
			`key_chain = k.split('.')`
			`ori_ptr = cfg`
			`tgt_ptr = add_cfg`
			`for key in key_chain[:-1]:`
			`ori_ptr = ori_ptr[key]`
			`if key not in tgt_ptr:`
			`tgt_ptr[key] = {}`
			`tgt_ptr = tgt_ptr[key]`
			`tgt_ptr[key_chain[-1]] = ori_ptr[key_chain[-1]]`
[Fix] keep keys (#431) 2023-09-22 17:30:54 +08:00			`except Exception:`
[Sync] update (#517) 2023-10-27 20:31:22 +08:00			`self.logger.debug(f'Key {k} not found in config, ignored.')`
			`self.logger.debug(f'Additional config: {add_cfg}')`
[Sync] Initial support of subjective evaluation (#421) Co-authored-by: Leymore <zfz-960727@163.com> 2023-09-22 15:42:31 +08:00
			`tasks = self.partition(models,`
			`datasets,`
			`work_dir,`
			`self.out_dir,`
			`add_cfg=add_cfg)`
initial commit 2023-07-04 21:34:55 +08:00
			`self.logger.info(f'Partitioned into {len(tasks)} tasks.')`
			`for i, task in enumerate(tasks):`
			`self.logger.debug(f'Task {i}: {task_abbr_from_cfg(task)}')`

			`return tasks`

			`@abstractmethod`
[Sync] Initial support of subjective evaluation (#421) Co-authored-by: Leymore <zfz-960727@163.com> 2023-09-22 15:42:31 +08:00			`def partition(self,`
			`models: List[ConfigDict],`
			`datasets: List[ConfigDict],`
			`work_dir: str,`
			`out_dir: str,`
			`add_cfg: Dict = {}) -> List[Dict]:`
initial commit 2023-07-04 21:34:55 +08:00			`"""Partition model-dataset pairs into tasks. Each task is defined as a`
			`dict and will run independently as a unit. Its structure is as`
			`follows:`

			`.. code-block:: python`

			`{`
			`'models': [], # a list of model configs`
			`'datasets': [[]], # a nested list of dataset configs, each`
			`list corresponds to a model`
			`'work_dir': '', # the work dir`
[Sync] Initial support of subjective evaluation (#421) Co-authored-by: Leymore <zfz-960727@163.com> 2023-09-22 15:42:31 +08:00			`**add_cfg # other keys to be added in the config`
initial commit 2023-07-04 21:34:55 +08:00			`}`

			`Args:`
			`models (List[ConfigDict]): A list of model configs.`
			`datasets (List[ConfigDict]): A list of dataset configs.`
			`work_dir (str): The work dir for the task.`
			`out_dir (str): The full output path for the task, intended for`
			`Partitioners to check whether the task is finished via the`
			`existency of result file in this directory.`
[Sync] Initial support of subjective evaluation (#421) Co-authored-by: Leymore <zfz-960727@163.com> 2023-09-22 15:42:31 +08:00			`add_cfg (dict): Other common keys to be added in the task config,`
			`used to share the same config among tasks. Defaults to {}.`
initial commit 2023-07-04 21:34:55 +08:00
			`Returns:`
			`List[Dict]: A list of tasks.`
			`"""`