mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Sync] Use finally to clean up temp files (#337)
This commit is contained in:
parent
2cd994c3d1
commit
ce65d3393b
@ -86,65 +86,70 @@ class DLCRunner(BaseRunner):
|
|||||||
# Dump task config to file
|
# Dump task config to file
|
||||||
mmengine.mkdir_or_exist('tmp/')
|
mmengine.mkdir_or_exist('tmp/')
|
||||||
param_file = f'tmp/{os.getpid()}_params.py'
|
param_file = f'tmp/{os.getpid()}_params.py'
|
||||||
task_cfg.dump(param_file)
|
try:
|
||||||
|
task_cfg.dump(param_file)
|
||||||
|
|
||||||
# Build up DLC command
|
# Build up DLC command
|
||||||
pwd = os.getcwd()
|
pwd = os.getcwd()
|
||||||
shell_cmd = (f'source {self.aliyun_cfg["bashrc_path"]}; '
|
shell_cmd = (
|
||||||
f'conda activate {self.aliyun_cfg["conda_env_name"]}; '
|
f'source {self.aliyun_cfg["bashrc_path"]}; '
|
||||||
f'cd {pwd}; '
|
f'conda activate {self.aliyun_cfg["conda_env_name"]}; '
|
||||||
'{task_cmd}')
|
f'cd {pwd}; '
|
||||||
|
'{task_cmd}')
|
||||||
|
|
||||||
tmpl = ('dlc create job'
|
tmpl = ('dlc create job'
|
||||||
f" --command '{shell_cmd}'"
|
f" --command '{shell_cmd}'"
|
||||||
f' --name {task_name[:512]}'
|
f' --name {task_name[:512]}'
|
||||||
' --kind BatchJob'
|
' --kind BatchJob'
|
||||||
f" -c {self.aliyun_cfg['dlc_config_path']}"
|
f" -c {self.aliyun_cfg['dlc_config_path']}"
|
||||||
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
|
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
|
||||||
' --worker_count 1'
|
' --worker_count 1'
|
||||||
f' --worker_cpu {max(num_gpus * 6, 8)}'
|
f' --worker_cpu {max(num_gpus * 6, 8)}'
|
||||||
f' --worker_gpu {num_gpus}'
|
f' --worker_gpu {num_gpus}'
|
||||||
f' --worker_memory {max(num_gpus * 32, 48)}'
|
f' --worker_memory {max(num_gpus * 32, 48)}'
|
||||||
f" --worker_image {self.aliyun_cfg['worker_image']}"
|
f" --worker_image {self.aliyun_cfg['worker_image']}"
|
||||||
' --interactive')
|
' --interactive')
|
||||||
get_cmd = partial(task.get_command, cfg_path=param_file, template=tmpl)
|
get_cmd = partial(task.get_command,
|
||||||
cmd = get_cmd()
|
cfg_path=param_file,
|
||||||
|
template=tmpl)
|
||||||
|
cmd = get_cmd()
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
logger.debug(f'Running command: {cmd}')
|
logger.debug(f'Running command: {cmd}')
|
||||||
|
|
||||||
# Run command with retry
|
# Run command with retry
|
||||||
if self.debug:
|
if self.debug:
|
||||||
stdout = None
|
stdout = None
|
||||||
else:
|
else:
|
||||||
out_path = task.get_log_path(file_extension='out')
|
out_path = task.get_log_path(file_extension='out')
|
||||||
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
||||||
stdout = open(out_path, 'w', encoding='utf-8')
|
stdout = open(out_path, 'w', encoding='utf-8')
|
||||||
|
|
||||||
if random_sleep:
|
|
||||||
time.sleep(random.randint(0, 10))
|
|
||||||
result = subprocess.run(cmd,
|
|
||||||
shell=True,
|
|
||||||
text=True,
|
|
||||||
stdout=stdout,
|
|
||||||
stderr=stdout)
|
|
||||||
|
|
||||||
retry = self.retry
|
|
||||||
output_paths = task.get_output_paths()
|
|
||||||
while self._job_failed(result.returncode, output_paths) and retry > 0:
|
|
||||||
retry -= 1
|
|
||||||
if random_sleep:
|
if random_sleep:
|
||||||
time.sleep(random.randint(0, 10))
|
time.sleep(random.randint(0, 10))
|
||||||
# Re-generate command to refresh ports.
|
|
||||||
cmd = get_cmd()
|
|
||||||
result = subprocess.run(cmd,
|
result = subprocess.run(cmd,
|
||||||
shell=True,
|
shell=True,
|
||||||
text=True,
|
text=True,
|
||||||
stdout=stdout,
|
stdout=stdout,
|
||||||
stderr=stdout)
|
stderr=stdout)
|
||||||
|
|
||||||
# Clean up
|
retry = self.retry
|
||||||
os.remove(param_file)
|
output_paths = task.get_output_paths()
|
||||||
|
while self._job_failed(result.returncode,
|
||||||
|
output_paths) and retry > 0:
|
||||||
|
retry -= 1
|
||||||
|
if random_sleep:
|
||||||
|
time.sleep(random.randint(0, 10))
|
||||||
|
# Re-generate command to refresh ports.
|
||||||
|
cmd = get_cmd()
|
||||||
|
result = subprocess.run(cmd,
|
||||||
|
shell=True,
|
||||||
|
text=True,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stdout)
|
||||||
|
finally:
|
||||||
|
# Clean up
|
||||||
|
os.remove(param_file)
|
||||||
return task_name, result.returncode
|
return task_name, result.returncode
|
||||||
|
|
||||||
def _job_failed(self, return_code: int, output_paths: List[str]) -> bool:
|
def _job_failed(self, return_code: int, output_paths: List[str]) -> bool:
|
||||||
|
@ -62,15 +62,17 @@ class LocalRunner(BaseRunner):
|
|||||||
# get cmd
|
# get cmd
|
||||||
mmengine.mkdir_or_exist('tmp/')
|
mmengine.mkdir_or_exist('tmp/')
|
||||||
param_file = f'tmp/{os.getpid()}_params.py'
|
param_file = f'tmp/{os.getpid()}_params.py'
|
||||||
task.cfg.dump(param_file)
|
try:
|
||||||
cmd = task.get_command(cfg_path=param_file,
|
task.cfg.dump(param_file)
|
||||||
template='{task_cmd}')
|
cmd = task.get_command(cfg_path=param_file,
|
||||||
# run in subprocess if starts with torchrun etc.
|
template='{task_cmd}')
|
||||||
if cmd.startswith('python'):
|
# run in subprocess if starts with torchrun etc.
|
||||||
task.run()
|
if cmd.startswith('python'):
|
||||||
else:
|
task.run()
|
||||||
subprocess.run(cmd, shell=True, text=True)
|
else:
|
||||||
os.remove(param_file)
|
subprocess.run(cmd, shell=True, text=True)
|
||||||
|
finally:
|
||||||
|
os.remove(param_file)
|
||||||
status.append((task_name, 0))
|
status.append((task_name, 0))
|
||||||
else:
|
else:
|
||||||
import torch
|
import torch
|
||||||
@ -141,31 +143,34 @@ class LocalRunner(BaseRunner):
|
|||||||
# Dump task config to file
|
# Dump task config to file
|
||||||
mmengine.mkdir_or_exist('tmp/')
|
mmengine.mkdir_or_exist('tmp/')
|
||||||
param_file = f'tmp/{os.getpid()}_{index}_params.py'
|
param_file = f'tmp/{os.getpid()}_{index}_params.py'
|
||||||
task.cfg.dump(param_file)
|
try:
|
||||||
|
task.cfg.dump(param_file)
|
||||||
|
|
||||||
# Build up slurm command
|
# Build up slurm command
|
||||||
tmpl = 'CUDA_VISIBLE_DEVICES=' + ','.join(str(i) for i in gpu_ids)
|
tmpl = 'CUDA_VISIBLE_DEVICES=' + ','.join(str(i) for i in gpu_ids)
|
||||||
tmpl += ' {task_cmd}'
|
tmpl += ' {task_cmd}'
|
||||||
get_cmd = partial(task.get_command, cfg_path=param_file, template=tmpl)
|
get_cmd = partial(task.get_command,
|
||||||
cmd = get_cmd()
|
cfg_path=param_file,
|
||||||
|
template=tmpl)
|
||||||
|
cmd = get_cmd()
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
logger.debug(f'Running command: {cmd}')
|
logger.debug(f'Running command: {cmd}')
|
||||||
|
|
||||||
# Run command
|
# Run command
|
||||||
out_path = task.get_log_path(file_extension='out')
|
out_path = task.get_log_path(file_extension='out')
|
||||||
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
||||||
stdout = open(out_path, 'w', encoding='utf-8')
|
stdout = open(out_path, 'w', encoding='utf-8')
|
||||||
|
|
||||||
result = subprocess.run(cmd,
|
result = subprocess.run(cmd,
|
||||||
shell=True,
|
shell=True,
|
||||||
text=True,
|
text=True,
|
||||||
stdout=stdout,
|
stdout=stdout,
|
||||||
stderr=stdout)
|
stderr=stdout)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
logger.warning(f'task {task_name} fail, see\n{out_path}')
|
logger.warning(f'task {task_name} fail, see\n{out_path}')
|
||||||
|
finally:
|
||||||
# Clean up
|
# Clean up
|
||||||
os.remove(param_file)
|
os.remove(param_file)
|
||||||
return task_name, result.returncode
|
return task_name, result.returncode
|
||||||
|
@ -91,60 +91,64 @@ class SlurmRunner(BaseRunner):
|
|||||||
# Dump task config to file
|
# Dump task config to file
|
||||||
mmengine.mkdir_or_exist('tmp/')
|
mmengine.mkdir_or_exist('tmp/')
|
||||||
param_file = f'tmp/{os.getpid()}_params.py'
|
param_file = f'tmp/{os.getpid()}_params.py'
|
||||||
task_cfg.dump(param_file)
|
try:
|
||||||
|
task_cfg.dump(param_file)
|
||||||
|
|
||||||
# Build up slurm command
|
# Build up slurm command
|
||||||
tmpl = 'srun'
|
tmpl = 'srun'
|
||||||
if self.partition:
|
if self.partition:
|
||||||
tmpl += f' -p {self.partition}'
|
tmpl += f' -p {self.partition}'
|
||||||
if self.quotatype:
|
if self.quotatype:
|
||||||
tmpl += f' --quotatype={self.quotatype}'
|
tmpl += f' --quotatype={self.quotatype}'
|
||||||
if self.qos:
|
if self.qos:
|
||||||
tmpl += f' --qos={self.qos}'
|
tmpl += f' --qos={self.qos}'
|
||||||
if num_gpus > 0:
|
if num_gpus > 0:
|
||||||
tmpl += f' --gres=gpu:{num_gpus}'
|
tmpl += f' --gres=gpu:{num_gpus}'
|
||||||
tmpl += f" -N1 -J '{task_name[:512]}'" + ' {task_cmd}'
|
tmpl += f" -N1 -J '{task_name[:512]}'" + ' {task_cmd}'
|
||||||
get_cmd = partial(task.get_command, cfg_path=param_file, template=tmpl)
|
get_cmd = partial(task.get_command,
|
||||||
cmd = get_cmd()
|
cfg_path=param_file,
|
||||||
|
template=tmpl)
|
||||||
|
cmd = get_cmd()
|
||||||
|
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
logger.debug(f'Running command: {cmd}')
|
logger.debug(f'Running command: {cmd}')
|
||||||
|
|
||||||
# Run command with retry
|
# Run command with retry
|
||||||
if self.debug:
|
if self.debug:
|
||||||
stdout = None
|
stdout = None
|
||||||
else:
|
else:
|
||||||
out_path = task.get_log_path(file_extension='out')
|
out_path = task.get_log_path(file_extension='out')
|
||||||
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
mmengine.mkdir_or_exist(osp.split(out_path)[0])
|
||||||
stdout = open(out_path, 'w', encoding='utf-8')
|
stdout = open(out_path, 'w', encoding='utf-8')
|
||||||
|
|
||||||
if random_sleep:
|
|
||||||
time.sleep(random.randint(0, 10))
|
|
||||||
result = subprocess.run(cmd,
|
|
||||||
shell=True,
|
|
||||||
text=True,
|
|
||||||
stdout=stdout,
|
|
||||||
stderr=stdout)
|
|
||||||
|
|
||||||
retry = self.retry
|
|
||||||
output_paths = task.get_output_paths()
|
|
||||||
while self._job_failed(result.returncode, output_paths) and retry > 0:
|
|
||||||
retry -= 1
|
|
||||||
if random_sleep:
|
if random_sleep:
|
||||||
time.sleep(random.randint(0, 10))
|
time.sleep(random.randint(0, 10))
|
||||||
# Re-generate command to refresh ports.
|
|
||||||
cmd = get_cmd()
|
|
||||||
result = subprocess.run(cmd,
|
result = subprocess.run(cmd,
|
||||||
shell=True,
|
shell=True,
|
||||||
text=True,
|
text=True,
|
||||||
stdout=stdout,
|
stdout=stdout,
|
||||||
stderr=stdout)
|
stderr=stdout)
|
||||||
|
|
||||||
if result.returncode != 0 and not self.debug:
|
retry = self.retry
|
||||||
logger.warning(f'task {task_name} fail, see\n{out_path}')
|
output_paths = task.get_output_paths()
|
||||||
|
while self._job_failed(result.returncode,
|
||||||
|
output_paths) and retry > 0:
|
||||||
|
retry -= 1
|
||||||
|
if random_sleep:
|
||||||
|
time.sleep(random.randint(0, 10))
|
||||||
|
# Re-generate command to refresh ports.
|
||||||
|
cmd = get_cmd()
|
||||||
|
result = subprocess.run(cmd,
|
||||||
|
shell=True,
|
||||||
|
text=True,
|
||||||
|
stdout=stdout,
|
||||||
|
stderr=stdout)
|
||||||
|
|
||||||
# Clean up
|
if result.returncode != 0 and not self.debug:
|
||||||
os.remove(param_file)
|
logger.warning(f'task {task_name} fail, see\n{out_path}')
|
||||||
|
finally:
|
||||||
|
# Clean up
|
||||||
|
os.remove(param_file)
|
||||||
return task_name, result.returncode
|
return task_name, result.returncode
|
||||||
|
|
||||||
def _job_failed(self, return_code: int, output_paths: List[str]) -> bool:
|
def _job_failed(self, return_code: int, output_paths: List[str]) -> bool:
|
||||||
|
@ -3,7 +3,9 @@ from typing import List, Union
|
|||||||
import tabulate
|
import tabulate
|
||||||
from mmengine.config import Config
|
from mmengine.config import Config
|
||||||
|
|
||||||
|
from opencompass.partitioners import NaivePartitioner, SizePartitioner
|
||||||
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
|
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
|
||||||
|
from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
|
||||||
from opencompass.utils import get_logger, match_files
|
from opencompass.utils import get_logger, match_files
|
||||||
|
|
||||||
|
|
||||||
@ -118,54 +120,61 @@ def exec_mm_infer_runner(tasks, args, cfg):
|
|||||||
runner(tasks)
|
runner(tasks)
|
||||||
|
|
||||||
|
|
||||||
def exec_infer_runner(tasks, args, cfg):
|
def get_config_type(obj) -> str:
|
||||||
"""execute infer runner according to args."""
|
return f'{obj.__module__}.{obj.__name__}'
|
||||||
if args.slurm:
|
|
||||||
runner = SlurmRunner(dict(type='OpenICLInferTask'),
|
|
||||||
max_num_workers=args.max_num_workers,
|
|
||||||
partition=args.partition,
|
|
||||||
quotatype=args.quotatype,
|
|
||||||
qos=args.qos,
|
|
||||||
retry=args.retry,
|
|
||||||
debug=args.debug,
|
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
|
||||||
elif args.dlc:
|
|
||||||
runner = DLCRunner(dict(type='OpenICLInferTask'),
|
|
||||||
max_num_workers=args.max_num_workers,
|
|
||||||
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
|
|
||||||
retry=args.retry,
|
|
||||||
debug=args.debug,
|
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
|
||||||
else:
|
|
||||||
runner = LocalRunner(task=dict(type='OpenICLInferTask'),
|
|
||||||
max_num_workers=args.max_num_workers,
|
|
||||||
max_workers_per_gpu=args.max_workers_per_gpu,
|
|
||||||
debug=args.debug,
|
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
|
||||||
runner(tasks)
|
|
||||||
|
|
||||||
|
|
||||||
def exec_eval_runner(tasks, args, cfg):
|
def fill_infer_cfg(cfg, args):
|
||||||
"""execute infer runner according to args."""
|
new_cfg = dict(infer=dict(
|
||||||
|
partitioner=dict(type=get_config_type(SizePartitioner),
|
||||||
|
max_task_size=args.max_partition_size,
|
||||||
|
gen_task_coef=args.gen_task_coef),
|
||||||
|
runner=dict(
|
||||||
|
max_num_workers=args.max_num_workers,
|
||||||
|
debug=args.debug,
|
||||||
|
task=dict(type=get_config_type(OpenICLInferTask)),
|
||||||
|
lark_bot_url=cfg['lark_bot_url'],
|
||||||
|
)), )
|
||||||
if args.slurm:
|
if args.slurm:
|
||||||
runner = SlurmRunner(dict(type='OpenICLEvalTask'),
|
new_cfg['infer']['runner']['type'] = get_config_type(SlurmRunner)
|
||||||
max_num_workers=args.max_num_workers,
|
new_cfg['infer']['runner']['partition'] = args.partition
|
||||||
partition=args.partition,
|
new_cfg['infer']['runner']['quotatype'] = args.quotatype
|
||||||
quotatype=args.quotatype,
|
new_cfg['infer']['runner']['qos'] = args.qos
|
||||||
qos=args.qos,
|
new_cfg['infer']['runner']['retry'] = args.retry
|
||||||
retry=args.retry,
|
|
||||||
debug=args.debug,
|
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
|
||||||
elif args.dlc:
|
elif args.dlc:
|
||||||
runner = DLCRunner(dict(type='OpenICLEvalTask'),
|
new_cfg['infer']['runner']['type'] = get_config_type(DLCRunner)
|
||||||
max_num_workers=args.max_num_workers,
|
new_cfg['infer']['runner']['aliyun_cfg'] = Config.fromfile(
|
||||||
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
|
args.aliyun_cfg)
|
||||||
retry=args.retry,
|
new_cfg['infer']['runner']['retry'] = args.retry
|
||||||
debug=args.debug,
|
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
|
||||||
else:
|
else:
|
||||||
runner = LocalRunner(task=dict(type='OpenICLEvalTask'),
|
new_cfg['infer']['runner']['type'] = get_config_type(LocalRunner)
|
||||||
max_num_workers=args.max_num_workers,
|
new_cfg['infer']['runner'][
|
||||||
debug=args.debug,
|
'max_workers_per_gpu'] = args.max_workers_per_gpu
|
||||||
lark_bot_url=cfg['lark_bot_url'])
|
cfg.merge_from_dict(new_cfg)
|
||||||
runner(tasks)
|
|
||||||
|
|
||||||
|
def fill_eval_cfg(cfg, args):
|
||||||
|
new_cfg = dict(
|
||||||
|
eval=dict(partitioner=dict(type=get_config_type(NaivePartitioner)),
|
||||||
|
runner=dict(
|
||||||
|
max_num_workers=args.max_num_workers,
|
||||||
|
debug=args.debug,
|
||||||
|
task=dict(type=get_config_type(OpenICLEvalTask)),
|
||||||
|
lark_bot_url=cfg['lark_bot_url'],
|
||||||
|
)))
|
||||||
|
if args.slurm:
|
||||||
|
new_cfg['eval']['runner']['type'] = get_config_type(SlurmRunner)
|
||||||
|
new_cfg['eval']['runner']['partition'] = args.partition
|
||||||
|
new_cfg['eval']['runner']['quotatype'] = args.quotatype
|
||||||
|
new_cfg['eval']['runner']['qos'] = args.qos
|
||||||
|
new_cfg['eval']['runner']['retry'] = args.retry
|
||||||
|
elif args.dlc:
|
||||||
|
new_cfg['eval']['runner']['type'] = get_config_type(DLCRunner)
|
||||||
|
new_cfg['eval']['runner']['aliyun_cfg'] = Config.fromfile(
|
||||||
|
args.aliyun_cfg)
|
||||||
|
new_cfg['eval']['runner']['retry'] = args.retry
|
||||||
|
else:
|
||||||
|
new_cfg['eval']['runner']['type'] = get_config_type(LocalRunner)
|
||||||
|
new_cfg['eval']['runner'][
|
||||||
|
'max_workers_per_gpu'] = args.max_workers_per_gpu
|
||||||
|
cfg.merge_from_dict(new_cfg)
|
||||||
|
112
run.py
112
run.py
@ -6,13 +6,12 @@ from datetime import datetime
|
|||||||
|
|
||||||
from mmengine.config import Config, DictAction
|
from mmengine.config import Config, DictAction
|
||||||
|
|
||||||
from opencompass.partitioners import (MultimodalNaivePartitioner,
|
from opencompass.partitioners import MultimodalNaivePartitioner
|
||||||
NaivePartitioner, SizePartitioner)
|
|
||||||
from opencompass.registry import PARTITIONERS, RUNNERS
|
from opencompass.registry import PARTITIONERS, RUNNERS
|
||||||
from opencompass.runners import SlurmRunner
|
from opencompass.runners import SlurmRunner
|
||||||
from opencompass.utils import LarkReporter, Summarizer, get_logger
|
from opencompass.utils import LarkReporter, Summarizer, get_logger
|
||||||
from opencompass.utils.run import (exec_eval_runner, exec_infer_runner,
|
from opencompass.utils.run import (exec_mm_infer_runner, fill_eval_cfg,
|
||||||
exec_mm_infer_runner, get_config_from_arg)
|
fill_infer_cfg, get_config_from_arg)
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
@ -245,39 +244,29 @@ def main():
|
|||||||
tasks = partitioner(cfg)
|
tasks = partitioner(cfg)
|
||||||
exec_mm_infer_runner(tasks, args, cfg)
|
exec_mm_infer_runner(tasks, args, cfg)
|
||||||
return
|
return
|
||||||
elif args.dlc or args.slurm or cfg.get('infer', None) is None:
|
|
||||||
# Use SizePartitioner to split into subtasks
|
if args.dlc or args.slurm or cfg.get('infer', None) is None:
|
||||||
partitioner = SizePartitioner(
|
fill_infer_cfg(cfg, args)
|
||||||
osp.join(cfg['work_dir'], 'predictions/'),
|
|
||||||
max_task_size=args.max_partition_size,
|
if args.partition is not None:
|
||||||
gen_task_coef=args.gen_task_coef)
|
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
|
||||||
tasks = partitioner(cfg)
|
cfg.infer.runner.partition = args.partition
|
||||||
if args.dry_run:
|
cfg.infer.runner.quotatype = args.quotatype
|
||||||
return
|
|
||||||
# execute the infer subtasks
|
|
||||||
exec_infer_runner(tasks, args, cfg)
|
|
||||||
# If they have specified "infer" in config and haven't used --slurm
|
|
||||||
# or --dlc, just follow the config
|
|
||||||
else:
|
else:
|
||||||
if args.partition is not None:
|
logger.warning('SlurmRunner is not used, so the partition '
|
||||||
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
|
'argument is ignored.')
|
||||||
cfg.infer.runner.partition = args.partition
|
if args.debug:
|
||||||
cfg.infer.runner.quotatype = args.quotatype
|
cfg.infer.runner.debug = True
|
||||||
else:
|
if args.lark:
|
||||||
logger.warning('SlurmRunner is not used, so the partition '
|
cfg.infer.runner.lark_bot_url = cfg['lark_bot_url']
|
||||||
'argument is ignored.')
|
cfg.infer.partitioner['out_dir'] = osp.join(cfg['work_dir'],
|
||||||
if args.debug:
|
'predictions/')
|
||||||
cfg.infer.runner.debug = True
|
partitioner = PARTITIONERS.build(cfg.infer.partitioner)
|
||||||
if args.lark:
|
tasks = partitioner(cfg)
|
||||||
cfg.infer.runner.lark_bot_url = cfg['lark_bot_url']
|
if args.dry_run:
|
||||||
cfg.infer.partitioner['out_dir'] = osp.join(
|
return
|
||||||
cfg['work_dir'], 'predictions/')
|
runner = RUNNERS.build(cfg.infer.runner)
|
||||||
partitioner = PARTITIONERS.build(cfg.infer.partitioner)
|
runner(tasks)
|
||||||
tasks = partitioner(cfg)
|
|
||||||
if args.dry_run:
|
|
||||||
return
|
|
||||||
runner = RUNNERS.build(cfg.infer.runner)
|
|
||||||
runner(tasks)
|
|
||||||
|
|
||||||
# evaluate
|
# evaluate
|
||||||
if args.mode in ['all', 'eval']:
|
if args.mode in ['all', 'eval']:
|
||||||
@ -289,37 +278,28 @@ def main():
|
|||||||
'also specified --slurm or --dlc. '
|
'also specified --slurm or --dlc. '
|
||||||
'The "eval" configuration will be overridden by '
|
'The "eval" configuration will be overridden by '
|
||||||
'your runtime arguments.')
|
'your runtime arguments.')
|
||||||
|
|
||||||
if args.dlc or args.slurm or cfg.get('eval', None) is None:
|
if args.dlc or args.slurm or cfg.get('eval', None) is None:
|
||||||
# Use NaivePartitioner,not split
|
fill_eval_cfg(cfg, args)
|
||||||
partitioner = NaivePartitioner(
|
|
||||||
osp.join(cfg['work_dir'], 'results/'))
|
if args.partition is not None:
|
||||||
tasks = partitioner(cfg)
|
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
|
||||||
if args.dry_run:
|
cfg.eval.runner.partition = args.partition
|
||||||
return
|
cfg.eval.runner.quotatype = args.quotatype
|
||||||
# execute the eval tasks
|
else:
|
||||||
exec_eval_runner(tasks, args, cfg)
|
logger.warning('SlurmRunner is not used, so the partition '
|
||||||
# If they have specified "eval" in config and haven't used --slurm
|
'argument is ignored.')
|
||||||
# or --dlc, just follow the config
|
if args.debug:
|
||||||
else:
|
cfg.eval.runner.debug = True
|
||||||
if args.partition is not None:
|
if args.lark:
|
||||||
if RUNNERS.get(cfg.infer.runner.type) == SlurmRunner:
|
cfg.eval.runner.lark_bot_url = cfg['lark_bot_url']
|
||||||
cfg.eval.runner.partition = args.partition
|
cfg.eval.partitioner['out_dir'] = osp.join(cfg['work_dir'], 'results/')
|
||||||
cfg.eval.runner.quotatype = args.quotatype
|
partitioner = PARTITIONERS.build(cfg.eval.partitioner)
|
||||||
else:
|
tasks = partitioner(cfg)
|
||||||
logger.warning('SlurmRunner is not used, so the partition '
|
if args.dry_run:
|
||||||
'argument is ignored.')
|
return
|
||||||
if args.debug:
|
runner = RUNNERS.build(cfg.eval.runner)
|
||||||
cfg.eval.runner.debug = True
|
runner(tasks)
|
||||||
if args.lark:
|
|
||||||
cfg.eval.runner.lark_bot_url = cfg['lark_bot_url']
|
|
||||||
cfg.eval.partitioner['out_dir'] = osp.join(cfg['work_dir'],
|
|
||||||
'results/')
|
|
||||||
partitioner = PARTITIONERS.build(cfg.eval.partitioner)
|
|
||||||
tasks = partitioner(cfg)
|
|
||||||
if args.dry_run:
|
|
||||||
return
|
|
||||||
runner = RUNNERS.build(cfg.eval.runner)
|
|
||||||
runner(tasks)
|
|
||||||
|
|
||||||
# visualize
|
# visualize
|
||||||
if args.mode in ['all', 'eval', 'viz']:
|
if args.mode in ['all', 'eval', 'viz']:
|
||||||
|
Loading…
Reference in New Issue
Block a user