From 6797a48648faa7f63beb687437e4b6bb9fe99740 Mon Sep 17 00:00:00 2001 From: hougang liu Date: Tue, 9 Apr 2019 11:01:50 +0800 Subject: [PATCH 1/3] update katib laucher 1. sync up since katib directory org changed 2. add option deleteafterdone --- components/kubeflow/katib-launcher/Dockerfile | 2 +- components/kubeflow/katib-launcher/OWNERS | 4 ++++ .../kubeflow/katib-launcher/component.yaml | 6 ++++-- .../katib-launcher/kubeflow_katib_launcher_op.py | 3 ++- .../katib-launcher/src/launch_study_job.py | 16 +++++++++++++--- 5 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 components/kubeflow/katib-launcher/OWNERS diff --git a/components/kubeflow/katib-launcher/Dockerfile b/components/kubeflow/katib-launcher/Dockerfile index 651eaae1621..81d927e559a 100644 --- a/components/kubeflow/katib-launcher/Dockerfile +++ b/components/kubeflow/katib-launcher/Dockerfile @@ -19,7 +19,7 @@ RUN apt-get update -y && \ pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 grpcio gcloud google-api-python-client protobuf kubernetes && \ wget https://github.com/kubeflow/katib/archive/master.zip && unzip master.zip -ENV PYTHONPATH $PYTHONPATH:/katib-master/pkg/api/python:/katib-master/py +ENV PYTHONPATH $PYTHONPATH:/katib-master/pkg/api/v1alpha1/python:/katib-master/py ADD build /ml diff --git a/components/kubeflow/katib-launcher/OWNERS b/components/kubeflow/katib-launcher/OWNERS new file mode 100644 index 00000000000..808ae38f7a9 --- /dev/null +++ b/components/kubeflow/katib-launcher/OWNERS @@ -0,0 +1,4 @@ +approvers: + - hougangliu +reviewers: + - hougangliu diff --git a/components/kubeflow/katib-launcher/component.yaml b/components/kubeflow/katib-launcher/component.yaml index 0cbcdf4c88a..bbedda24936 100644 --- a/components/kubeflow/katib-launcher/component.yaml +++ b/components/kubeflow/katib-launcher/component.yaml @@ -5,7 +5,7 @@ inputs: - {name: Namespace, type: String, default: kubeflow, description: 'Namespace.'} - {name: Optimization type, type: String, default: minimize, description: 'Direction of optimization. minimize or maximize.'} - {name: Objective value name, type: String, description: 'Objective value name which trainer optimizes.'} -- {name: Optimization goal, type: Float, description: 'Stop studying once objectivevaluename value exceeds optimizationgoal'} +- {name: Optimization goal, type: Float, description: 'Stop studying once objectivevaluename value exceeds optimizationgoal.'} - {name: Request count, type: Integer, default: 1, description: 'Number of requests to the suggestion service.'} - {name: Metrics names, type: String, description: 'List of metric names (comma-delimited).'} - {name: Parameter configs, type: YAML, default: '', description: 'Parameter configs (YAML/JSON format).'} @@ -13,7 +13,8 @@ inputs: - {name: Worker template path, type: String, default: '', description: 'Worker spec.'} - {name: Metrics collector template path, type: String, default: '', description: 'Metrics collector spec.'} - {name: Suggestion spec, type: YAML, default: '', description: 'Suggestion spec (YAML/JSON format).'} -- {name: StudyJob timeout minutes, type: Integer, default: '10', description: 'Time in minutes to wait for the StudyJob to complete'} +- {name: StudyJob timeout minutes, type: Integer, default: '10', description: 'Time in minutes to wait for the StudyJob to complete.'} +- {name: Delete StudyJob flag, type: String, default: 'True', description: 'When StudyJob done, delete the StudyJob if it is True.'} outputs: - {name: Best parameter set, type: JSON, description: 'The parameter set of the best StudyJob trial.'} implementation: @@ -34,5 +35,6 @@ implementation: --mcollectortemplatepath, {inputValue: Metrics collector template path}, --suggestionspec, {inputValue: Suggestion spec}, --studyjobtimeoutminutes, {inputValue: StudyJob timeout minutes}, + --deleteafterdone, {inputValue: Delete StudyJob flag}, --outputfile, {outputPath: Best parameter set}, ] diff --git a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py index 5a0a931d2ee..ad06675f8ef 100644 --- a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py +++ b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py @@ -16,7 +16,7 @@ def kubeflow_studyjob_launcher_op(name, namespace, optimizationtype, objectivevaluename, optimizationgoal, requestcount, metricsnames, parameterconfigs, nasConfig, workertemplatepath, mcollectortemplatepath, suggestionspec, - studyjob_timeout_minutes, output_file='/output.txt', step_name='StudyJob-Launcher'): + studyjob_timeout_minutes, delete=True, output_file='/output.txt', step_name='StudyJob-Launcher'): return dsl.ContainerOp( name = step_name, image = 'liuhougangxa/ml-pipeline-kubeflow-studyjob:latest', @@ -34,6 +34,7 @@ def kubeflow_studyjob_launcher_op(name, namespace, optimizationtype, objectiveva "--mcollectortemplatepath", mcollectortemplatepath, "--suggestionspec", suggestionspec, "--outputfile", output_file, + "--deleteafterdone", delete, '--studyjobtimeoutminutes', studyjob_timeout_minutes, ], file_outputs = {'hyperparameter': output_file} diff --git a/components/kubeflow/katib-launcher/src/launch_study_job.py b/components/kubeflow/katib-launcher/src/launch_study_job.py index 1def4b51be2..69cf0428053 100644 --- a/components/kubeflow/katib-launcher/src/launch_study_job.py +++ b/components/kubeflow/katib-launcher/src/launch_study_job.py @@ -39,6 +39,14 @@ def yamlOrJsonStr(str): def strToList(str): return str.split(",") +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Unsupported value encountered.') + def _update_or_pop(spec, name, value): if value: spec[name] = value @@ -121,6 +129,9 @@ def main(argv=None): parser.add_argument('--outputfile', type=str, default='/output.txt', help='The file which stores the best trial of the studyJob.') + parser.add_argument('--deleteafterdone', type=str2bool, + default=True, + help='When studyjob done, delete the studyjob automatically if it is True.') parser.add_argument('--studyjobtimeoutminutes', type=int, default=10, help='Time in minutes to wait for the StudyJob to complete') @@ -129,7 +140,6 @@ def main(argv=None): logging.getLogger().setLevel(logging.INFO) - logging.info('Generating studyjob template.') template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hp.template.yaml') content_yaml = _generate_studyjob_yaml(template_file, args.name, args.namespace, args.optimizationtype, args.objectivevaluename, @@ -157,8 +167,8 @@ def main(argv=None): f.write(json.dumps(ps_dict)) if succ: logging.info('Study success.') - - study_job_client.delete_study_job(api_client, job_name, job_namespace) + if args.deleteafterdone: + study_job_client.delete_study_job(api_client, job_name, job_namespace) if __name__== "__main__": main() From 3346d344a0f68f7bd763503c315455a50715b125 Mon Sep 17 00:00:00 2001 From: hougang liu Date: Wed, 10 Apr 2019 08:31:24 +0800 Subject: [PATCH 2/3] fix comment --- components/kubeflow/katib-launcher/component.yaml | 4 ++-- .../katib-launcher/kubeflow_katib_launcher_op.py | 2 +- .../kubeflow/katib-launcher/src/launch_study_job.py | 13 +++---------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/components/kubeflow/katib-launcher/component.yaml b/components/kubeflow/katib-launcher/component.yaml index bbedda24936..fef29547ca2 100644 --- a/components/kubeflow/katib-launcher/component.yaml +++ b/components/kubeflow/katib-launcher/component.yaml @@ -14,7 +14,7 @@ inputs: - {name: Metrics collector template path, type: String, default: '', description: 'Metrics collector spec.'} - {name: Suggestion spec, type: YAML, default: '', description: 'Suggestion spec (YAML/JSON format).'} - {name: StudyJob timeout minutes, type: Integer, default: '10', description: 'Time in minutes to wait for the StudyJob to complete.'} -- {name: Delete StudyJob flag, type: String, default: 'True', description: 'When StudyJob done, delete the StudyJob if it is True.'} +- {name: Delete finished job, type: Boolean, default: True, description: 'When StudyJob done, delete the StudyJob if it is True.'} outputs: - {name: Best parameter set, type: JSON, description: 'The parameter set of the best StudyJob trial.'} implementation: @@ -35,6 +35,6 @@ implementation: --mcollectortemplatepath, {inputValue: Metrics collector template path}, --suggestionspec, {inputValue: Suggestion spec}, --studyjobtimeoutminutes, {inputValue: StudyJob timeout minutes}, - --deleteafterdone, {inputValue: Delete StudyJob flag}, + --deleteAfterDone, {inputValue: Delete finished job}, --outputfile, {outputPath: Best parameter set}, ] diff --git a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py index ad06675f8ef..8db12e933ec 100644 --- a/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py +++ b/components/kubeflow/katib-launcher/kubeflow_katib_launcher_op.py @@ -34,7 +34,7 @@ def kubeflow_studyjob_launcher_op(name, namespace, optimizationtype, objectiveva "--mcollectortemplatepath", mcollectortemplatepath, "--suggestionspec", suggestionspec, "--outputfile", output_file, - "--deleteafterdone", delete, + "--deleteAfterDone", delete, '--studyjobtimeoutminutes', studyjob_timeout_minutes, ], file_outputs = {'hyperparameter': output_file} diff --git a/components/kubeflow/katib-launcher/src/launch_study_job.py b/components/kubeflow/katib-launcher/src/launch_study_job.py index 69cf0428053..8f901a00d82 100644 --- a/components/kubeflow/katib-launcher/src/launch_study_job.py +++ b/components/kubeflow/katib-launcher/src/launch_study_job.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse import datetime +from distutils.util import strtobool import json import os import logging @@ -39,14 +40,6 @@ def yamlOrJsonStr(str): def strToList(str): return str.split(",") -def str2bool(v): - if v.lower() in ('yes', 'true', 't', 'y', '1'): - return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): - return False - else: - raise argparse.ArgumentTypeError('Unsupported value encountered.') - def _update_or_pop(spec, name, value): if value: spec[name] = value @@ -129,7 +122,7 @@ def main(argv=None): parser.add_argument('--outputfile', type=str, default='/output.txt', help='The file which stores the best trial of the studyJob.') - parser.add_argument('--deleteafterdone', type=str2bool, + parser.add_argument('--deleteAfterDone', type=strtobool, default=True, help='When studyjob done, delete the studyjob automatically if it is True.') parser.add_argument('--studyjobtimeoutminutes', type=int, @@ -167,7 +160,7 @@ def main(argv=None): f.write(json.dumps(ps_dict)) if succ: logging.info('Study success.') - if args.deleteafterdone: + if args.deleteAfterDone: study_job_client.delete_study_job(api_client, job_name, job_namespace) if __name__== "__main__": From d124871c20ce2904ffb4074feafddf2ef185c809 Mon Sep 17 00:00:00 2001 From: hougang liu Date: Wed, 10 Apr 2019 09:09:28 +0800 Subject: [PATCH 3/3] fix comment --- components/kubeflow/katib-launcher/component.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/kubeflow/katib-launcher/component.yaml b/components/kubeflow/katib-launcher/component.yaml index fef29547ca2..49c847f5921 100644 --- a/components/kubeflow/katib-launcher/component.yaml +++ b/components/kubeflow/katib-launcher/component.yaml @@ -14,7 +14,7 @@ inputs: - {name: Metrics collector template path, type: String, default: '', description: 'Metrics collector spec.'} - {name: Suggestion spec, type: YAML, default: '', description: 'Suggestion spec (YAML/JSON format).'} - {name: StudyJob timeout minutes, type: Integer, default: '10', description: 'Time in minutes to wait for the StudyJob to complete.'} -- {name: Delete finished job, type: Boolean, default: True, description: 'When StudyJob done, delete the StudyJob if it is True.'} +- {name: Delete finished job, type: Boolean, default: 'True', description: 'Whether to delete the job after it is finished.'} outputs: - {name: Best parameter set, type: JSON, description: 'The parameter set of the best StudyJob trial.'} implementation: