mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge 642cd2839b
into 8c0ccf9a6b
This commit is contained in:
commit
32c1e38207
@ -141,7 +141,8 @@ class NumWorkerPartitioner(BasePartitioner):
|
||||
dataset = build_dataset_from_cfg(dataset)
|
||||
self.dataset_size[dataset_abbr] = len(dataset.test)
|
||||
|
||||
mmengine.mkdir_or_exist('.cache/')
|
||||
tmp_dir_root = osp.dirname(self.dataset_size_path)
|
||||
mmengine.mkdir_or_exist(tmp_dir_root)
|
||||
mmengine.dump(self.dataset_size,
|
||||
self.dataset_size_path,
|
||||
indent=4,
|
||||
|
@ -214,7 +214,8 @@ class SizePartitioner(BasePartitioner):
|
||||
dataset = build_dataset_from_cfg(dataset)
|
||||
self.dataset_size[dataset_abbr] = len(dataset.test)
|
||||
|
||||
mmengine.mkdir_or_exist('.cache/')
|
||||
tmp_dir_root = osp.dirname(self.dataset_size_path)
|
||||
mmengine.mkdir_or_exist(tmp_dir_root)
|
||||
mmengine.dump(self.dataset_size,
|
||||
self.dataset_size_path,
|
||||
indent=4,
|
||||
|
@ -198,7 +198,8 @@ class SubjectiveNumWorkerPartitioner(SubjectiveNaivePartitioner):
|
||||
dataset = build_dataset_from_cfg(dataset)
|
||||
self.dataset_size[dataset_abbr] = len(dataset.test)
|
||||
|
||||
mmengine.mkdir_or_exist('.cache/')
|
||||
tmp_dir_root = osp.dirname(self.dataset_size_path)
|
||||
mmengine.mkdir_or_exist(tmp_dir_root)
|
||||
mmengine.dump(self.dataset_size,
|
||||
self.dataset_size_path,
|
||||
indent=4,
|
||||
|
@ -274,7 +274,8 @@ class SubjectiveSizePartitioner(SubjectiveNaivePartitioner):
|
||||
dataset = build_dataset_from_cfg(dataset)
|
||||
self.dataset_size[dataset_abbr] = len(dataset.test)
|
||||
|
||||
mmengine.mkdir_or_exist('.cache/')
|
||||
tmp_dir_root = osp.dirname(self.dataset_size_path)
|
||||
mmengine.mkdir_or_exist(tmp_dir_root)
|
||||
mmengine.dump(self.dataset_size,
|
||||
self.dataset_size_path,
|
||||
indent=4,
|
||||
|
@ -15,18 +15,21 @@ class BaseRunner:
|
||||
task (ConfigDict): Task type config.
|
||||
debug (bool): Whether to run in debug mode.
|
||||
lark_bot_url (str): Lark bot url.
|
||||
tmp_dir (str): The directory to store temporary files.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
task: ConfigDict,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None):
|
||||
lark_bot_url: str = None,
|
||||
tmp_dir: str = 'tmp'):
|
||||
self.task_cfg = Config(task)
|
||||
self.debug = debug
|
||||
if lark_bot_url:
|
||||
self.lark_reporter = LarkReporter(lark_bot_url)
|
||||
else:
|
||||
self.lark_reporter = None
|
||||
self.tmp_dir = tmp_dir
|
||||
|
||||
def __call__(self, tasks: List[Dict[str, Any]]):
|
||||
"""Launch multiple tasks and summarize the results.
|
||||
|
@ -33,6 +33,9 @@ class DLCRunner(BaseRunner):
|
||||
retry (int): Number of retries when job failed. Default: 2.
|
||||
debug (bool): Whether to run in debug mode. Default: False.
|
||||
lark_bot_url (str): Lark bot url. Default: None.
|
||||
keep_tmp_file (bool): Whether to keep the temporary file.
|
||||
Default: True.
|
||||
tmp_dir (str): The directory to store temporary files. Default: 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@ -45,8 +48,12 @@ class DLCRunner(BaseRunner):
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = True,
|
||||
tmp_dir: str = 'tmp',
|
||||
):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.aliyun_cfg = aliyun_cfg
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
@ -114,12 +121,13 @@ class DLCRunner(BaseRunner):
|
||||
break
|
||||
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
param_file = f'{uuid_str}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
pwd = os.getcwd()
|
||||
try:
|
||||
cfg.dump(param_file)
|
||||
|
@ -48,6 +48,10 @@ class LocalRunner(BaseRunner):
|
||||
Defaults to 1.
|
||||
debug (bool): Whether to run in debug mode.
|
||||
lark_bot_url (str): Lark bot url.
|
||||
keep_tmp_file (bool): Whether to keep the temporary file. Defaults to
|
||||
False.
|
||||
tmp_dir (str): The directory to store temporary files.
|
||||
Defaults to 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -57,8 +61,12 @@ class LocalRunner(BaseRunner):
|
||||
max_workers_per_gpu: int = 1,
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = False,
|
||||
tmp_dir: str = 'tmp',
|
||||
**kwargs):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.max_workers_per_gpu = max_workers_per_gpu
|
||||
self.keep_tmp_file = keep_tmp_file
|
||||
@ -101,11 +109,12 @@ class LocalRunner(BaseRunner):
|
||||
num_gpus = task.num_gpus
|
||||
assert len(all_gpu_ids) >= num_gpus
|
||||
# get cmd
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
param_file = f'{uuid_str}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
try:
|
||||
task.cfg.dump(param_file)
|
||||
# if use torchrun, restrict it behaves the same as non
|
||||
@ -135,7 +144,8 @@ class LocalRunner(BaseRunner):
|
||||
else:
|
||||
task.run()
|
||||
else:
|
||||
tmp_logs = f'tmp/{os.getpid()}_debug.log'
|
||||
tmp_logs = f'{os.getpid()}_debug.log'
|
||||
tmp_logs = osp.join(self.tmp_dir, tmp_logs)
|
||||
get_logger().warning(
|
||||
f'Debug mode, log will be saved to {tmp_logs}')
|
||||
with open(tmp_logs, 'a') as log_file:
|
||||
@ -207,14 +217,13 @@ class LocalRunner(BaseRunner):
|
||||
|
||||
task_name = task.name
|
||||
|
||||
pwd = os.getcwd()
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'{pwd}/tmp/{uuid_str}_params.py'
|
||||
|
||||
param_file = f'{uuid_str}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
try:
|
||||
task.cfg.dump(param_file)
|
||||
tmpl = get_command_template(gpu_ids)
|
||||
|
@ -161,6 +161,8 @@ class LocalAPIRunner(BaseRunner):
|
||||
Defaults to 16.
|
||||
debug (bool): Whether to run in debug mode.
|
||||
lark_bot_url (str): Lark bot url.
|
||||
tmp_dir (str): The directory to store temporary files.
|
||||
Defaults to 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -168,8 +170,12 @@ class LocalAPIRunner(BaseRunner):
|
||||
concurrent_users: int,
|
||||
max_num_workers: int = 16,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
lark_bot_url: str = None,
|
||||
tmp_dir: str = 'tmp'):
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.concurrent_users = concurrent_users
|
||||
assert task['type'] in [
|
||||
@ -194,8 +200,9 @@ class LocalAPIRunner(BaseRunner):
|
||||
task = TASKS.build(dict(cfg=task, type=self.task_cfg['type']))
|
||||
task_name = task.name
|
||||
# get cmd
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'tmp/{os.getpid()}_params.py'
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
param_file = f'{os.getpid()}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
try:
|
||||
task.cfg.dump(param_file)
|
||||
cmd = task.get_command(cfg_path=param_file,
|
||||
|
@ -33,6 +33,8 @@ class SlurmRunner(BaseRunner):
|
||||
lark_bot_url (str): Lark bot url. Defaults to None.
|
||||
extra_command (List, optional): Extra slurm command.
|
||||
For example ['-c 12', '-w node1']. Defaults to None.
|
||||
tmp_dir (str): The directory to store temporary files.
|
||||
Defaults to 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -44,8 +46,12 @@ class SlurmRunner(BaseRunner):
|
||||
qos: str = None,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None,
|
||||
extra_command: Optional[List[str]] = None):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
extra_command: Optional[List[str]] = None,
|
||||
tmp_dir: str = 'tmp'):
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
self.partition = partition
|
||||
@ -93,8 +99,9 @@ class SlurmRunner(BaseRunner):
|
||||
task_name = task.name
|
||||
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
param_file = f'tmp/{os.getpid()}_params.py'
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
param_file = f'{os.getpid()}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
try:
|
||||
cfg.dump(param_file)
|
||||
|
||||
|
@ -47,6 +47,10 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
lark_bot_url (str): Lark bot url. Defaults to None.
|
||||
extra_command (List, optional): Extra slurm command.
|
||||
For example ['-c 12', '-w node1']. Defaults to None.
|
||||
keep_tmp_file (bool): Whether to keep the temporary file. Defaults to
|
||||
False.
|
||||
tmp_dir (str): The directory to store temporary files.
|
||||
Defaults to 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -60,8 +64,12 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None,
|
||||
extra_command: Optional[List[str]] = None,
|
||||
keep_tmp_file: bool = False):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
keep_tmp_file: bool = False,
|
||||
tmp_dir: str = 'tmp'):
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
self.partition = partition
|
||||
@ -172,11 +180,12 @@ class SlurmSequentialRunner(BaseRunner):
|
||||
task_name = self.task_prefix + task_name
|
||||
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'tmp/{uuid_str}_params.py'
|
||||
param_file = f'{uuid_str}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
process = None
|
||||
try:
|
||||
cfg.dump(param_file)
|
||||
|
@ -36,6 +36,9 @@ class VOLCRunner(BaseRunner):
|
||||
retry (int): Number of retries when job failed. Default: 2.
|
||||
debug (bool): Whether to run in debug mode. Default: False.
|
||||
lark_bot_url (str): Lark bot url. Default: None.
|
||||
keep_tmp_file (bool): Whether to keep the temporary file. Default:
|
||||
False.
|
||||
tmp_dir (str): The directory to store temporary files. Default: 'tmp'.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -48,8 +51,12 @@ class VOLCRunner(BaseRunner):
|
||||
retry: int = 2,
|
||||
debug: bool = False,
|
||||
lark_bot_url: str = None,
|
||||
keep_tmp_file: bool = True):
|
||||
super().__init__(task=task, debug=debug, lark_bot_url=lark_bot_url)
|
||||
keep_tmp_file: bool = True,
|
||||
tmp_dir: str = 'tmp'):
|
||||
super().__init__(task=task,
|
||||
debug=debug,
|
||||
lark_bot_url=lark_bot_url,
|
||||
tmp_dir=tmp_dir)
|
||||
self.volcano_cfg = volcano_cfg
|
||||
self.max_num_workers = max_num_workers
|
||||
self.retry = retry
|
||||
@ -101,13 +108,15 @@ class VOLCRunner(BaseRunner):
|
||||
# Build up VCC command
|
||||
pwd = os.getcwd()
|
||||
# Dump task config to file
|
||||
mmengine.mkdir_or_exist('tmp/')
|
||||
mmengine.mkdir_or_exist(self.tmp_dir)
|
||||
# Using uuid to avoid filename conflict
|
||||
import uuid
|
||||
uuid_str = str(uuid.uuid4())
|
||||
param_file = f'{pwd}/tmp/{uuid_str}_params.py'
|
||||
param_file = f'{uuid_str}_params.py'
|
||||
param_file = osp.join(self.tmp_dir, param_file)
|
||||
|
||||
volc_cfg_file = f'{pwd}/tmp/{uuid_str}_cfg.yaml'
|
||||
volc_cfg_file = f'{uuid_str}_cfg.yaml'
|
||||
volc_cfg_file = osp.join(self.tmp_dir, volc_cfg_file)
|
||||
volc_cfg = self._choose_flavor(num_gpus)
|
||||
with open(volc_cfg_file, 'w') as fp:
|
||||
yaml.dump(volc_cfg, fp, sort_keys=False)
|
||||
|
Loading…
Reference in New Issue
Block a user