From 6fb66fa4e9e9796eb3760afabcae1b4abc886e08 Mon Sep 17 00:00:00 2001 From: hougang liu Date: Tue, 9 Apr 2019 11:01:50 +0800 Subject: [PATCH] update katib laucher 1. sync up since katib directory org changed 2. add option deleteafterdone --- components/kubeflow/katib-launcher/Dockerfile | 2 +- components/kubeflow/katib-launcher/OWNERS | 4 ++++ .../kubeflow/katib-launcher/component.yaml | 1 + .../katib-launcher/kubeflow_katib_launcher_op.py | 3 ++- .../katib-launcher/src/launch_study_job.py | 16 +++++++++++++--- 5 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 components/kubeflow/katib-launcher/OWNERS diff --git a/components/kubeflow/katib-launcher/Dockerfile b/components/kubeflow/katib-launcher/Dockerfile index 651eaae16215..81d927e559ac 100644 --- a/components/kubeflow/katib-launcher/Dockerfile +++ b/components/kubeflow/katib-launcher/Dockerfile @@ -19,7 +19,7 @@ RUN apt-get update -y && \ pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 grpcio gcloud google-api-python-client protobuf kubernetes && \ wget https://github.com/kubeflow/katib/archive/master.zip && unzip master.zip -ENV PYTHONPATH $PYTHONPATH:/katib-master/pkg/api/python:/katib-master/py +ENV PYTHONPATH $PYTHONPATH:/katib-master/pkg/api/v1alpha1/python:/katib-master/py ADD build /ml diff --git a/components/kubeflow/katib-launcher/OWNERS b/components/kubeflow/katib-launcher/OWNERS new file mode 100644 index 000000000000..808ae38f7a92 --- /dev/null +++ b/components/kubeflow/katib-launcher/OWNERS @@ -0,0 +1,4 @@ +approvers: + - hougangliu +reviewers: + - hougangliu diff --git a/components/kubeflow/katib-launcher/component.yaml b/components/kubeflow/katib-launcher/component.yaml index 0cbcdf4c88ab..24c8591af472 100644 --- a/components/kubeflow/katib-launcher/component.yaml +++ b/components/kubeflow/katib-launcher/component.yaml @@ -34,5 +34,6 @@ implementation: --mcollectortemplatepath, {inputValue: Metrics collector template path}, --suggestionspec, {inputValue: Suggestion spec}, --studyjobtimeoutminutes, {inputValue: StudyJob timeout minutes}, + --deleteafterdone, {inputValue: Delete StudyJob when done if it's True}, --outputfile, {outputPath: Best parameter set}, ] diff --git a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py index 5a0a931d2ee6..ad06675f8ef0 100644 --- a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py +++ b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py @@ -16,7 +16,7 @@ def kubeflow_studyjob_launcher_op(name, namespace, optimizationtype, objectivevaluename, optimizationgoal, requestcount, metricsnames, parameterconfigs, nasConfig, workertemplatepath, mcollectortemplatepath, suggestionspec, - studyjob_timeout_minutes, output_file='/output.txt', step_name='StudyJob-Launcher'): + studyjob_timeout_minutes, delete=True, output_file='/output.txt', step_name='StudyJob-Launcher'): return dsl.ContainerOp( name = step_name, image = 'liuhougangxa/ml-pipeline-kubeflow-studyjob:latest', @@ -34,6 +34,7 @@ def kubeflow_studyjob_launcher_op(name, namespace, optimizationtype, objectiveva "--mcollectortemplatepath", mcollectortemplatepath, "--suggestionspec", suggestionspec, "--outputfile", output_file, + "--deleteafterdone", delete, '--studyjobtimeoutminutes', studyjob_timeout_minutes, ], file_outputs = {'hyperparameter': output_file} diff --git a/components/kubeflow/katib-launcher/src/launch_study_job.py b/components/kubeflow/katib-launcher/src/launch_study_job.py index 1def4b51be22..69cf04280538 100644 --- a/components/kubeflow/katib-launcher/src/launch_study_job.py +++ b/components/kubeflow/katib-launcher/src/launch_study_job.py @@ -39,6 +39,14 @@ def yamlOrJsonStr(str): def strToList(str): return str.split(",") +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Unsupported value encountered.') + def _update_or_pop(spec, name, value): if value: spec[name] = value @@ -121,6 +129,9 @@ def main(argv=None): parser.add_argument('--outputfile', type=str, default='/output.txt', help='The file which stores the best trial of the studyJob.') + parser.add_argument('--deleteafterdone', type=str2bool, + default=True, + help='When studyjob done, delete the studyjob automatically if it is True.') parser.add_argument('--studyjobtimeoutminutes', type=int, default=10, help='Time in minutes to wait for the StudyJob to complete') @@ -129,7 +140,6 @@ def main(argv=None): logging.getLogger().setLevel(logging.INFO) - logging.info('Generating studyjob template.') template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hp.template.yaml') content_yaml = _generate_studyjob_yaml(template_file, args.name, args.namespace, args.optimizationtype, args.objectivevaluename, @@ -157,8 +167,8 @@ def main(argv=None): f.write(json.dumps(ps_dict)) if succ: logging.info('Study success.') - - study_job_client.delete_study_job(api_client, job_name, job_namespace) + if args.deleteafterdone: + study_job_client.delete_study_job(api_client, job_name, job_namespace) if __name__== "__main__": main()