Skip to content

Commit

Permalink
GCPcomponents yaml spec (kubeflow#887)
Browse files Browse the repository at this point in the history
* add component yaml for GCP components

* Add bigquery component yaml

* Fix typo and set default instead  of optional setting.
  • Loading branch information
hongye-sun authored and k8s-ci-robot committed Mar 1, 2019
1 parent 64da26a commit 91d50f6
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 0 deletions.
42 changes: 42 additions & 0 deletions components/gcp/bigquery/query/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Bigquery - Query
description: |
Submit a query to Bigquery service and write outputs to a GCS blob.
inputs:
- {name: query, description: 'The query used by Bigquery service to fetch the results.'}
- {name: project_id, description: 'The project to execute the query job.' }
- {name: dataset_id, description: 'The ID of the persistent dataset to keep the results of the query.'}
- {name: table_id, description: 'The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table.', default: '' }
- {name: output_gcs_path, description: 'The GCS blob path to dump the query results to.', default: '' }
- {name: job_config, description: 'The full config spec for the query job.', default: '' }
outputs:
- {name: output_gcs_path, description: 'The GCS blob path to dump the query results to.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.bigquery, query,
--query, {inputValue: query},
--project_id, {inputValue: project_id},
--dataset_id, {inputValue: dataset_id},
--table_id, {inputValue: table_id},
--output_gcs_path, {inputValue: output_gcs_path},
--job_config, {inputValue: job_config}
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
output_gcs_path: /tmp/kfp/output/bigquery/query-output-path.txt
44 changes: 44 additions & 0 deletions components/gcp/dataflow/launch_python/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Launch Python
description: |
Launch a self-executing beam python file.
inputs:
- {name: python_file_path, description: 'The gcs or local path to the python file to run.'}
- {name: project_id, description: 'The ID of the parent project.' }
- {name: requirements_file_path, description: 'Optional, the gcs or local path to the pip requirements file', default: '' }
- {name: location, description: 'The regional endpoint to which to direct the request.', default: '' }
- {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', default: '' }
- {name: args, description: 'The list of args to pass to the python file.', default: '[]' }
- {name: wait_interval, default: '30', description: 'Optional wait interval between calls to get job status. Defaults to 30.' }
outputs:
- {name: job_id, description: 'The id of the created dataflow job.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.dataflow, launch_python,
--python_file_path, {inputValue: python_file_path},
--project_id, {inputValue: project_id},
--requirements_file_path, {inputValue: requirements_file_path},
--location, {inputValue: location},
--job_name_prefix, {inputValue: job_name_prefix},
--args, {inputValue: args},
--wait_interval, {inputValue: wait_interval}
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
job_id: /tmp/kfp/output/dataflow/job_id.txt
44 changes: 44 additions & 0 deletions components/gcp/dataflow/launch_template/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Launch Dataflow Template
description: |
Launchs a dataflow job from template.
inputs:
- {name: project_id, description: 'Required. The ID of the Cloud Platform project that the job belongs to.'}
- {name: gcs_path, description: 'Required. A Cloud Storage path to the template from which to create the job. Must be valid Cloud Storage URL, beginning with `gs://`.' }
- {name: launch_parameters, description: 'Parameters to provide to the template being launched. Schema defined in https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters. `jobName` will be replaced by generated name.' }
- {name: location, description: 'The regional endpoint to which to direct the request.', default: '' }
- {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', default: '' }
- {name: validate_only, description: 'If true, the request is validated but not actually executed. Defaults to false.', default: 'False' }
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'}
outputs:
- {name: job_id, description: 'The ID of the created dataflow job.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.dataflow, launch_template,
--project_id, {inputValue: project_id},
--gcs_path, {inputValue: gcs_path},
--launch_parameters, {inputValue: launch_parameters},
--location, {inputValue: location},
--job_name_prefix, {inputValue: job_name_prefix},
--validate_only, {inputValue: validate_only},
--wait_interval, {inputValue: wait_interval},
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
job_id: /tmp/kfp/output/dataflow/job_id.txt
50 changes: 50 additions & 0 deletions components/gcp/ml_engine/batch_predict/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Batch predict against a model with Cloud ML Engine
description: |
Creates a MLEngine batch prediction job.
inputs:
- {name: project_id, description: 'Required. The ID of the parent project of the job.'}
- {name: model_path, description: 'The path to the model. It can be either: `projects/[PROJECT_ID]/models/[MODEL_ID]` or `projects/[PROJECT_ID]/models/[MODEL_ID]/versions/[VERSION_ID]` or a GCS path of a model file.' }
- {name: input_paths, description: 'Required. The Google Cloud Storage location of the input data files. May contain wildcards.' }
- {name: input_data_format, description: 'Required. The format of the input data files. See https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat.' }
- {name: output_path, description: 'Required. The output Google Cloud Storage location.' }
- {name: region, description: 'Required. The Google Compute Engine region to run the prediction job in.' }
- {name: output_data_format, description: 'Optional. Format of the output data files, defaults to JSON.', default: ''}
- {name: prediction_input, description: 'Input parameters to create a prediction job.', default: ''}
- {name: job_id_prefix, description: 'The prefix of the generated job id.', default: ''}
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'}
outputs:
- {name: job_id, description: 'The ID of the created job.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.ml_engine, batch_predict,
--project_id, {inputValue: project_id},
--model_path, {inputValue: model_path},
--input_paths, {inputValue: input_paths},
--input_data_format, {inputValue: input_data_format},
--output_path, {inputValue: output_path},
--region, {inputValue: region},
--output_data_format, {inputValue: output_data_format},
--prediction_input, {inputValue: prediction_input},
--job_id_prefix, {inputValue: job_id_prefix},
--wait_interval, {inputValue: wait_interval}
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
job_id: /tmp/kfp/output/ml_engine/job_id.txt
53 changes: 53 additions & 0 deletions components/gcp/ml_engine/deploy/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Deploy a model to Cloud ML Engine
description: |
Creates a Cloud Machine Learning version and optionally a model if it's not exist.
inputs:
- {name: model_uri, description: 'Required, the GCS URI which contains a model file. Common used TF model search path (export/exporter) will be used if exist.'}
- {name: project_id, description: 'Required, the ID of the parent project.'}
- {name: model_id, description: 'Optional, the user provided name of the model.', default: '' }
- {name: version_id, description: 'Optional, the user provided name of the version. If it is not provided, the operation uses a random name.', default: '' }
- {name: runtime_version, description: 'Optional, the Cloud ML Engine runtime version to use for this deployment. If not set, Cloud ML Engine uses the default stable version, 1.0.', default: '' }
- {name: python_version, description: 'Optional, the version of Python used in prediction. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above. Python `2.7` works with all supported runtime versions.', default: '' }
- {name: version, description: 'Optional, the payload of the new version.', default: '' }
- {name: replace_existing_version, description: 'Boolean flag indicates whether to replace existing version in case of conflict.', default: 'Fasle' }
- {name: set_default, description: 'Boolean flag indicates whether to set the new version as default version in the model.', default: 'False'}
- {name: wait_interval, description: 'The interval to wait for a long running operation.', default: '30'}
outputs:
- {name: model_uri, description: 'The URI of the model.'}
- {name: model_name, description: 'The name of the deployed model.'}
- {name: version_name, description: 'The name of the deployed version.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.ml_engine, deploy,
--model_uri, {inputValue: model_uri},
--project_id, {inputValue: project_id},
--model_short_name, {inputValue: model_short_name},
--version_short_name, {inputValue: version_short_name},
--runtime_version, {inputValue: runtime_version},
--version, {inputValue: version},
--replace_existing_version, {inputValue: replace_existing_version},
--set_default, {inputValue: set_default},
--wait_interval, {inputValue: wait_interval},
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
model_uri: /tmp/kfp/output/ml_engine/model_uri.txt
model_name: /tmp/kfp/output/ml_engine/model_name.txt
version_name: /tmp/kfp/output/ml_engine/version_name.txt
56 changes: 56 additions & 0 deletions components/gcp/ml_engine/train/component.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Train a model with Cloud ML Engine
description: |
Submits a Cloud Machine Learning training job.
inputs:
- {name: project_id, description: 'Required. The ID of the parent project of the job.'}
- {name: python_module, description: 'The Python module name to run after installing the packages.', default: ''}
- {name: package_uris, description: 'The Google Cloud Storage location of the packages with the training program and any additional dependencies. The maximum number of package URIs is 100.', default: ''}
- {name: region, description: 'The Google Compute Engine region to run the training job in.', default: ''}
- {name: args, description: 'Command line arguments to pass to the program.', default: ''}
- {name: job_dir, description: 'A Google Cloud Storage path in which to store training outputs and other data needed for training. This path is passed to your TensorFlow program as the `--job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training.', default: ''}
- {name: python_version, description: 'The version of Python used in training. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above.', default: ''}
- {name: runtime_version, description: 'The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. ', default: ''}
- {name: master_image_uri, description: 'The Docker image to run on the master replica. This image must be in Container Registry.', default: ''}
- {name: worker_image_uri, description: 'The Docker image to run on the worker replica. This image must be in Container Registry.', default: ''}
- {name: training_input, description: 'Input parameters to create a training job.', default: ''}
- {name: job_id_prefix, description: 'The prefix of the generated job id.', default: ''}
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'}
outputs:
- {name: job_id, description: 'The ID of the created job.'}
implementation:
container:
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest
args: [
kfp_component.google.ml_engine, train,
--project_id, {inputValue: project_id},
--python_module, {inputValue: python_module},
--package_uris, {inputValue: package_uris},
--region, {inputValue: region},
--args, {inputValue: args},
--job_dir, {inputValue: job_dir},
--python_version, {inputValue: python_version},
--runtime_version, {inputValue: runtime_version},
--master_image_uri, {inputValue: master_image_uri},
--worker_image_uri, {inputValue: worker_image_uri},
--training_input, {inputValue: training_input},
--job_id_prefix, {inputValue: job_id_prefix},
--wait_interval, {inputValue: wait_interval}
]
env:
KFP_POD_NAME: "{{pod.name}}"
fileOutputs:
job_id: /tmp/kfp/output/ml_engine/job_id.txt

0 comments on commit 91d50f6

Please sign in to comment.