add component yaml for GCP components

kubeflow · Feb 28, 2019 · 6e3054e · 6e3054e
1 parent fa3ebef
commit 6e3054e
Show file tree

Hide file tree

Showing 5 changed files with 247 additions and 0 deletions.
diff --git a/components/gcp/dataflow/launch_python/component.yaml b/components/gcp/dataflow/launch_python/component.yaml
@@ -0,0 +1,44 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Launch Python
+description: |
+  Launch a self-executing beam python file.
+inputs:
+  - {name: python_file_path, description: 'The gcs or local path to the python file to run.'}
+  - {name: project_id, description: 'The ID of the parent project.' }
+  - {name: requirements_file_path, description: 'Optional, the gcs or local path to the pip requirements file', default: '' }
+  - {name: location, description: 'The regional endpoint to which to direct the request.', default: '' }
+  - {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', default: '' }
+  - {name: args, description: 'The list of args to pass to the python file.', default: '[]' }
+  - {name: wait_interval, default: '30', description: 'Optional wait interval between calls to get job status. Defaults to 30.' }
+outputs:
+  - {name: job_id, description: 'The id of the created dataflow job.'}
+implementation:
+  container:
+    image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
+    args: [
+      kfp_component.google.dataflow, launch_python,
+      --python_file_path, {inputValue: python_file_path},
+      --project_id, {inputValue: project_id},
+      --requirements_file_path, {inputValue: requirements_file_path},
+      --location, {inputValue: location},
+      --job_name_prefix, {inputValue: job_name_prefix},
+      --args, {inputValue: args},
+      --wait_interval, {inputValue: wait_interval}
+    ]
+    env:
+      KFP_POD_NAME: "{{pod.name}}"
+    fileOutputs:
+      job_id: /tmp/kfp/output/dataflow/job_id.txt
diff --git a/components/gcp/dataflow/launch_template/component.yaml b/components/gcp/dataflow/launch_template/component.yaml
@@ -0,0 +1,44 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Launch Dataflow Template
+description: |
+  Launchs a dataflow job from template.
+inputs:
+  - {name: project_id, description: 'Required. The ID of the Cloud Platform project that the job belongs to.'}
+  - {name: gcs_path, description: 'Required. A Cloud Storage path to the template from which to create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.' }
+  - {name: launch_parameters, description: 'Parameters to provide to the template being launched. Schema defined in https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters. `jobName` will be replaced by generated name.' }
+  - {name: location, description: 'The regional endpoint to which to direct the request.', optional: true }
+  - {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', optional: true }
+  - {name: validate_only, description: 'If true, the request is validated but not actually executed. Defaults to false.', optional: true, default: 'False' }
+  - {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', optional: true, default: '30'}
+outputs:
+  - {name: job_id, description: 'The ID of the created dataflow job.'}
+implementation:
+  container:
+    image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
+    args: [
+      kfp_component.google.dataflow, launch_template,
+      --project_id, {inputValue: project_id},
+      --gcs_path, {inputValue: gcs_path},
+      --launch_parameters, {inputValue: launch_parameters},
+      --location, {inputValue: location},
+      --job_name_prefix, {inputValue: job_name_prefix},
+      --validate_only, {inputValue: validate_only},
+      --wait_interval, {inputValue: wait_interval},
+    ]
+    env:
+      KFP_POD_NAME: "{{pod.name}}"
+    fileOutputs:
+      job_id: /tmp/kfp/output/dataflow/job_id.txt
diff --git a/components/gcp/ml_engine/batch_predict/component.yaml b/components/gcp/ml_engine/batch_predict/component.yaml
@@ -0,0 +1,50 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Batch predict against a model with Cloud ML Engine
+description: |
+  Creates a MLEngine batch prediction job.
+inputs:
+  - {name: project_id, description: 'Required. The ID of the parent project of the job.'}
+  - {name: model_path, description: 'The path to the model. It can be either: `projects/[PROJECT_ID]/models/[MODEL_ID]` or `projects/[PROJECT_ID]/models/[MODEL_ID]/versions/[VERSION_ID]` or a GCS path of a model file.' }
+  - {name: input_paths, description: 'Required. The Google Cloud Storage location of the input data files. May contain wildcards.' }
+  - {name: input_data_format, description: 'Required. The format of the input data files. See https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat.' }
+  - {name: output_path, description: 'Required. The output Google Cloud Storage location.' }
+  - {name: region, description: 'Required. The Google Compute Engine region to run the prediction job in.' }
+  - {name: output_data_format, description: 'Optional. Format of the output data files, defaults to JSON.', optional: true}
+  - {name: prediction_input, description: 'Input parameters to create a prediction job.', optional: true}
+  - {name: job_id_prefix, description: 'The prefix of the generated job id.', optional: true}
+  - {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', optional: true, default: '30'}
+outputs:
+  - {name: job_id, description: 'The ID of the created job.'}
+implementation:
+  container:
+    image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
+    args: [
+      kfp_component.google.ml_engine, batch_predict,
+      --project_id, {inputValue: project_id},
+      --model_path, {inputValue: model_path},
+      --input_paths, {inputValue: input_paths},
+      --input_data_format, {inputValue: input_data_format},
+      --output_path, {inputValue: output_path},
+      --region, {inputValue: region},
+      --output_data_format, {inputValue: output_data_format},
+      --prediction_input, {inputValue: prediction_input},
+      --job_id_prefix, {inputValue: job_id_prefix},
+      --wait_interval, {inputValue: wait_interval}
+    ]
+    env:
+      KFP_POD_NAME: "{{pod.name}}"
+    fileOutputs:
+      job_id: /tmp/kfp/output/ml_engine/job_id.txt
diff --git a/components/gcp/ml_engine/deploy/component.yaml b/components/gcp/ml_engine/deploy/component.yaml
@@ -0,0 +1,53 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Deploy a model to Cloud ML Engine
+description: |
+  Creates a Cloud Machine Learning version and optionally a model if it's not exist.
+inputs:
+  - {name: model_uri,                 description: 'Required, the GCS URI which contains a model file. Common used TF model search path (export/exporter) will be used if exist.'}
+  - {name: project_id,                description: 'Required, the ID of the parent project.'}
+  - {name: model_short_name,          description: 'Optional, the user provided name of the model.', optional: true }
+  - {name: version_short_name,        description: 'Optional, the user provided name of the version. If it is not provided, the operation uses a random name.', optional: true }
+  - {name: runtime_version,           description: 'Optional, the Cloud ML Engine runtime version to use for this deployment. If not set, Cloud ML Engine uses the default stable version, 1.0.', optional: true }
+  - {name: python_version,            description: 'Optional, the version of Python used in prediction. If not set, the default version is '2.7'. Python '3.5' is available when runtimeVersion is set to '1.4' and above. Python '2.7' works with all supported runtime versions.', optional: true }
+  - {name: version,                   description: 'Optional, the payload of the new version.', optional: true }
+  - {name: replace_existing_version,  description: 'Boolean flag indicates whether to replace existing version in case of conflict.', optional: true, default: 'Fasle' }
+  - {name: set_default,               description: 'Boolean flag indicates whether to set the new version as default version in the model.', optional: true, default: 'False'}
+  - {name: wait_interval,             description: 'The interval to wait for a long running operation.', optional: true, default: '30'}
+outputs:
+  - {name: model_uri,     description: 'The URI of the model.'}
+  - {name: model_name,    description: 'The name of the deployed model.'}
+  - {name: version_name,  description: 'The name of the deployed version.'}
+implementation:
+  container:
+    image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
+    args: [
+      kfp_component.google.ml_engine, deploy,
+      --model_uri, {inputValue: model_uri},
+      --project_id, {inputValue: project_id},
+      --model_short_name, {inputValue: model_short_name},
+      --version_short_name, {inputValue: version_short_name},
+      --runtime_version, {inputValue: runtime_version},
+      --version, {inputValue: version},
+      --replace_existing_version, {inputValue: replace_existing_version},
+      --set_default, {inputValue: set_default},
+      --wait_interval, {inputValue: wait_interval},
+    ]
+    env:
+      KFP_POD_NAME: "{{pod.name}}"
+    fileOutputs:
+      model_uri: /tmp/kfp/output/ml_engine/model_uri.txt
+      model_name: /tmp/kfp/output/ml_engine/model_name.txt
+      version_name: /tmp/kfp/output/ml_engine/version_name.txt
diff --git a/components/gcp/ml_engine/train/component.yaml b/components/gcp/ml_engine/train/component.yaml
@@ -0,0 +1,56 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Train a model with Cloud ML Engine
+description: |
+  Submits a Cloud Machine Learning training job.
+inputs:
+  - {name: project_id,        description: 'Required. The ID of the parent project of the job.'}
+  - {name: python_module,     description: 'The Python module name to run after installing the packages.', optional: true}
+  - {name: package_uris,      description: 'The Google Cloud Storage location of the packages with the training program and any additional dependencies. The maximum number of package URIs is 100.', optional: true}
+  - {name: region,            description: 'The Google Compute Engine region to run the training job in.', optional: true}
+  - {name: args,              description: 'Command line arguments to pass to the program.', optional: true}
+  - {name: job_dir,           description: 'A Google Cloud Storage path in which to store training outputs and other data needed for training. This path is passed to your TensorFlow program as the "--job-dir" command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training.', optional: true}
+  - {name: python_version,    description: 'The version of Python used in training. If not set, the default version is '2.7'. Python '3.5' is available when runtimeVersion is set to '1.4' and above.', optional: true}
+  - {name: runtime_version,   description: 'The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. ', optional: true}
+  - {name: master_image_uri,  description: 'The Docker image to run on the master replica. This image must be in Container Registry.', optional: true}
+  - {name: worker_image_uri,  description: 'The Docker image to run on the worker replica. This image must be in Container Registry.', optional: true}
+  - {name: training_input,    description: 'Input parameters to create a training job.', optional: true}
+  - {name: job_id_prefix,     description: 'The prefix of the generated job id.', optional: true}
+  - {name: wait_interval,     description: 'Optional wait interval between calls to get job status. Defaults to 30.', optional: true, default: '30'}
+outputs:
+  - {name: job_id,            description: 'The ID of the created job.'}
+implementation:
+  container:
+    image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
+    args: [
+      kfp_component.google.ml_engine, train,
+      --project_id, {inputValue: project_id},
+      --python_module, {inputValue: python_module},
+      --package_uris, {inputValue: package_uris},
+      --region, {inputValue: region},
+      --args, {inputValue: args},
+      --job_dir, {inputValue: job_dir},
+      --python_version, {inputValue: python_version},
+      --runtime_version, {inputValue: runtime_version},
+      --master_image_uri, {inputValue: master_image_uri},
+      --worker_image_uri, {inputValue: worker_image_uri},
+      --training_input, {inputValue: training_input},
+      --job_id_prefix, {inputValue: job_id_prefix},
+      --wait_interval, {inputValue: wait_interval}
+    ]
+    env:
+      KFP_POD_NAME: "{{pod.name}}"
+    fileOutputs:
+      job_id: /tmp/kfp/output/ml_engine/job_id.txt