From 4b38e64ff4a9dbabe8277aeabfabac1f4c02caaa Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Mon, 12 Jul 2021 13:09:30 +0800 Subject: [PATCH] Support view/resume experiment from external folder (#3870) --- docs/en_US/Tutorial/Nnictl.rst | 8 ++++++ nni/tools/nnictl/launcher.py | 48 ++++++++++++++++++++++++++++++-- nni/tools/nnictl/nnictl.py | 4 +++ nni/tools/nnictl/nnictl_utils.py | 2 +- 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/docs/en_US/Tutorial/Nnictl.rst b/docs/en_US/Tutorial/Nnictl.rst index 4b3d40f7b2..610c84d86a 100644 --- a/docs/en_US/Tutorial/Nnictl.rst +++ b/docs/en_US/Tutorial/Nnictl.rst @@ -166,6 +166,10 @@ nnictl resume - False - - set foreground mode, print log content to terminal + * - --experiment_dir, -e + - False + - + - Resume experiment from external folder, specify the full path of experiment folder @@ -218,6 +222,10 @@ nnictl view - False - - Rest port of the experiment you want to view + * - --experiment_dir, -e + - False + - + - View experiment from external folder, specify the full path of experiment folder diff --git a/nni/tools/nnictl/launcher.py b/nni/tools/nnictl/launcher.py index ecfcc2c0f2..d8557b24fb 100644 --- a/nni/tools/nnictl/launcher.py +++ b/nni/tools/nnictl/launcher.py @@ -539,7 +539,9 @@ def manage_stopped_experiment(args, mode): #find the latest stopped experiment if not args.id: print_error('Please set experiment id! \nYou could use \'nnictl {0} id\' to {0} a stopped experiment!\n' \ - 'You could use \'nnictl experiment list --all\' to show all experiments!'.format(mode)) + 'You could use \'nnictl experiment list --all\' to show all experiments!\n' \ + 'If your experiment is not started in current machine, you could specify experiment folder using ' \ + '--experiment_dir argument'.format(mode)) exit(1) else: if experiments_dict.get(args.id) is None: @@ -570,8 +572,48 @@ def manage_stopped_experiment(args, mode): def view_experiment(args): '''view a stopped experiment''' - manage_stopped_experiment(args, 'view') + if args.experiment_dir: + manage_external_experiment(args, 'view') + else: + manage_stopped_experiment(args, 'view') def resume_experiment(args): '''resume an experiment''' - manage_stopped_experiment(args, 'resume') + '''view a stopped experiment''' + if args.experiment_dir: + manage_external_experiment(args, 'resume') + else: + manage_stopped_experiment(args, 'resume') + +def manage_external_experiment(args, mode): + '''view a experiment from external path''' + # validate arguments + if not os.path.exists(args.experiment_dir): + print_error('Folder %s does not exist!' % args.experiment_dir) + exit(1) + if not os.path.isdir(args.experiment_dir): + print_error('Path %s is not folder directory!' % args.experiment_dir) + exit(1) + if args.id: + experiment_id = args.id + log_dir = args.experiment_dir + else: + print_normal('NNI can not detect experiment id in argument, will use last folder name as experiment id in experiment_dir argument.') + experiment_id = Path(args.experiment_dir).name + log_dir = os.path.dirname(args.experiment_dir) + if not experiment_id: + print_error("Please set experiment id argument, or add id as the last folder name in experiment_dir argument.") + exit(1) + args.url_prefix = None + experiment_config = Config(experiment_id, log_dir).get_config() + assert 'trainingService' in experiment_config or 'trainingServicePlatform' in experiment_config + try: + if 'trainingServicePlatform' in experiment_config: + experiment_config['logDir'] = log_dir + launch_experiment(args, experiment_config, mode, experiment_id, 1) + else: + experiment_config['experimentWorkingDirectory'] = log_dir + launch_experiment(args, experiment_config, mode, experiment_id, 2) + except Exception as exception: + print_error(exception) + exit(1) diff --git a/nni/tools/nnictl/nnictl.py b/nni/tools/nnictl/nnictl.py index a5f02c8c9e..962698e7c1 100644 --- a/nni/tools/nnictl/nnictl.py +++ b/nni/tools/nnictl/nnictl.py @@ -66,12 +66,16 @@ def parse_args(): parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', type=int, help='the port of restful server') parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode') parser_resume.add_argument('--foreground', '-f', action='store_true', help=' set foreground mode, print log content to terminal') + parser_resume.add_argument('--experiment_dir', '-e', help='resume experiment from external folder, specify the full path of ' \ + 'experiment folder') parser_resume.set_defaults(func=resume_experiment) # parse view command parser_view = subparsers.add_parser('view', help='view a stopped experiment') parser_view.add_argument('id', nargs='?', help='The id of the experiment you want to view') parser_view.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', type=int, help='the port of restful server') + parser_view.add_argument('--experiment_dir', '-e', help='view experiment from external folder, specify the full path of ' \ + 'experiment folder') parser_view.set_defaults(func=view_experiment) # parse update command diff --git a/nni/tools/nnictl/nnictl_utils.py b/nni/tools/nnictl/nnictl_utils.py index 1f23fd451b..52bba2bdf8 100644 --- a/nni/tools/nnictl/nnictl_utils.py +++ b/nni/tools/nnictl/nnictl_utils.py @@ -524,7 +524,7 @@ def experiment_clean(args): for experiment_id in experiment_id_list: experiment_id = get_config_filename(args) experiment_config = Config(experiment_id, Experiments().get_all_experiments()[experiment_id]['logDir']).get_config() - platform = experiment_config.get('trainingServicePlatform') + platform = experiment_config.get('trainingServicePlatform') or experiment_config.get('trainingService', {}).get('platform') if platform == 'remote': machine_list = experiment_config.get('machineList') remote_clean(machine_list, experiment_id)