From 3227325f7d49f28c7f2abe862790c01cf69eef5b Mon Sep 17 00:00:00 2001 From: hongye-sun <43763191+hongye-sun@users.noreply.github.com> Date: Tue, 5 Mar 2019 15:06:20 -0800 Subject: [PATCH] Add sample notebook and readme markdown for GCP components. (#899) * Add sample notebook and readme markdown for GCP components. * Add cloud ml train component notebook * Fix cmle deploy component spec. * Add CMLE deploy notebook. * Add notebook for CMLE batch prediction component. * Add notebook for dataflow launch template component. * Apply AIHub doc template and fix review comments * Updated the image gcr to public repo in component specs. --- components/gcp/bigquery/query/README.md | 112 +++++++++ components/gcp/bigquery/query/component.yaml | 2 +- components/gcp/bigquery/query/sample.ipynb | 207 ++++++++++++++++ .../gcp/dataflow/launch_python/README.md | 117 +++++++++ .../gcp/dataflow/launch_python/component.yaml | 2 +- .../gcp/dataflow/launch_python/sample.ipynb | 212 ++++++++++++++++ .../gcp/dataflow/launch_template/README.md | 118 +++++++++ .../dataflow/launch_template/component.yaml | 2 +- .../gcp/dataflow/launch_template/sample.ipynb | 213 ++++++++++++++++ .../gcp/ml_engine/batch_predict/README.md | 124 ++++++++++ .../ml_engine/batch_predict/component.yaml | 2 +- .../gcp/ml_engine/batch_predict/sample.ipynb | 219 +++++++++++++++++ components/gcp/ml_engine/deploy/README.md | 120 +++++++++ .../gcp/ml_engine/deploy/component.yaml | 6 +- components/gcp/ml_engine/deploy/sample.ipynb | 215 ++++++++++++++++ components/gcp/ml_engine/train/README.md | 136 +++++++++++ components/gcp/ml_engine/train/component.yaml | 2 +- components/gcp/ml_engine/train/sample.ipynb | 231 ++++++++++++++++++ 18 files changed, 2032 insertions(+), 8 deletions(-) create mode 100644 components/gcp/bigquery/query/README.md create mode 100644 components/gcp/bigquery/query/sample.ipynb create mode 100644 components/gcp/dataflow/launch_python/README.md create mode 100644 components/gcp/dataflow/launch_python/sample.ipynb create mode 100644 components/gcp/dataflow/launch_template/README.md create mode 100644 components/gcp/dataflow/launch_template/sample.ipynb create mode 100644 components/gcp/ml_engine/batch_predict/README.md create mode 100644 components/gcp/ml_engine/batch_predict/sample.ipynb create mode 100644 components/gcp/ml_engine/deploy/README.md create mode 100644 components/gcp/ml_engine/deploy/sample.ipynb create mode 100644 components/gcp/ml_engine/train/README.md create mode 100644 components/gcp/ml_engine/train/sample.ipynb diff --git a/components/gcp/bigquery/query/README.md b/components/gcp/bigquery/query/README.md new file mode 100644 index 00000000000..e814c572b89 --- /dev/null +++ b/components/gcp/bigquery/query/README.md @@ -0,0 +1,112 @@ + +# Bigquery - Query + +## Intended Use +A Kubeflow Pipeline component to submit a query to Google Cloud Bigquery service and dump outputs to a Google Cloud Storage blob. + +## Run-Time Parameters: +Name | Description +:--- | :---------- +query | The query used by Bigquery service to fetch the results. +project_id | The project to execute the query job. +dataset_id | The ID of the persistent dataset to keep the results of the query. If the dataset does not exist, the operation will create a new one. +table_id | The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table. +output_gcs_path | The GCS blob path to dump the query results to. +dataset_location | The location to create the dataset. Defaults to `US`. +job_config | The full config spec for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details. + +## Output: +Name | Description +:--- | :---------- +output_gcs_path | The GCS blob path to dump the query results to. + +## Sample + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' +GCS_WORKING_DIR = 'gs://' # No ending slash + +# Optional Parameters +EXPERIMENT_NAME = 'Bigquery -Query' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/bigquery/query/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +bigquery_query_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(bigquery_query_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='Bigquery query pipeline', + description='Bigquery query pipeline' +) +def pipeline( + query, + project_id, + dataset_id='', + table_id='', + output_gcs_path='', + dataset_location='US', + job_config='' +): + bigquery_query_op(query, project_id, dataset_id, table_id, output_gcs_path, dataset_location, + job_config).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'query': 'SELECT * FROM `bigquery-public-data.stackoverflow.posts_questions` LIMIT 10', + 'project_id': PROJECT_ID, + 'output_gcs_path': '{}/bigquery/query/questions.csv'.format(GCS_WORKING_DIR) +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/bigquery/query/component.yaml b/components/gcp/bigquery/query/component.yaml index 6e8e7b5b042..19a7fe7c7ae 100644 --- a/components/gcp/bigquery/query/component.yaml +++ b/components/gcp/bigquery/query/component.yaml @@ -26,7 +26,7 @@ outputs: - {name: output_gcs_path, description: 'The GCS blob path to dump the query results to.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.bigquery, query, --query, {inputValue: query}, diff --git a/components/gcp/bigquery/query/sample.ipynb b/components/gcp/bigquery/query/sample.ipynb new file mode 100644 index 00000000000..e019992ac32 --- /dev/null +++ b/components/gcp/bigquery/query/sample.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bigquery - Query\n", + "\n", + "## Intended Use\n", + "A Kubeflow Pipeline component to submit a query to Google Cloud Bigquery service and dump outputs to a Google Cloud Storage blob. \n", + "\n", + "## Input:\n", + "Name | Description\n", + ":--- | :----------\n", + "query | The query used by Bigquery service to fetch the results.\n", + "project_id | The project to execute the query job.\n", + "dataset_id | The ID of the persistent dataset to keep the results of the query. If the dataset does not exist, the operation will create a new one.\n", + "table_id | The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table.\n", + "output_gcs_path | The GCS blob path to dump the query results to.\n", + "dataset_location | The location to create the dataset. Defaults to `US`.\n", + "job_config | The full config spec for the query job. See [QueryJobConfig](https://googleapis.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig) for details.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "output_gcs_path | The GCS blob path to dump the query results to." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "GCS_WORKING_DIR = 'gs://' # No ending slash\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'Bigquery -Query'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/bigquery/query/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "bigquery_query_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(bigquery_query_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run the component as a single pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='Bigquery query pipeline',\n", + " description='Bigquery query pipeline'\n", + ")\n", + "def pipeline(\n", + " query, \n", + " project_id, \n", + " dataset_id='', \n", + " table_id='', \n", + " output_gcs_path='', \n", + " dataset_location='US', \n", + " job_config=''\n", + "):\n", + " bigquery_query_op(query, project_id, dataset_id, table_id, output_gcs_path, dataset_location, \n", + " job_config).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'query': 'SELECT * FROM `bigquery-public-data.stackoverflow.posts_questions` LIMIT 10',\n", + " 'project_id': PROJECT_ID,\n", + " 'output_gcs_path': '{}/bigquery/query/questions.csv'.format(GCS_WORKING_DIR)\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/components/gcp/dataflow/launch_python/README.md b/components/gcp/dataflow/launch_python/README.md new file mode 100644 index 00000000000..4dbfb134ad5 --- /dev/null +++ b/components/gcp/dataflow/launch_python/README.md @@ -0,0 +1,117 @@ + +# Dataflow - Launch Python + +## Intended Use +A Kubeflow Pipeline component to submit a Apache Beam job authored in python, to Google Cloud Dataflow for execution. The python beam code runs with Google Cloud Dataflow runner. + +## Run-Time Parameters: +Name | Description +:--- | :---------- +python_file_path | The gcs or local path to the python file to run. +project_id | The ID of the parent project. +requirements_file_path | Optional, the gcs or local path to the pip requirements file. +location | Optional. The regional endpoint to which to direct the request. +job_name_prefix | Optional. The prefix of the genrated job name. If not provided, the method will generated a random name. +args | The list of args to pass to the python file. +wait_interval | Optional wait interval between calls to get job status. Defaults to 30. + +## Output: +Name | Description +:--- | :---------- +job_id | The id of the created dataflow job. + +## Sample + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' +GCS_WORKING_DIR = 'gs://' # No ending slash + +# Optional Parameters +EXPERIMENT_NAME = 'Dataflow - Launch Python' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/dataflow/launch_python/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +dataflow_python_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(dataflow_python_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='Dataflow launch python pipeline', + description='Dataflow launch python pipeline' +) +def pipeline( + python_file_path, + project_id, + requirements_file_path = '', + location = '', + job_name_prefix = '', + args = '', + wait_interval = 30 +): + dataflow_python_op(python_file_path, project_id, requirements_file_path, location, job_name_prefix, args, + wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'python_file_path': 'gs://ml-pipeline-playground/samples/dataflow/wc/wc.py', + 'project_id': PROJECT_ID, + 'requirements_file_path': 'gs://ml-pipeline-playground/samples/dataflow/wc/requirements.txt', + 'args': json.dumps([ + '--output', '{}/wc/wordcount.out'.format(GCS_WORKING_DIR), + '--temp_location', '{}/dataflow/wc/tmp'.format(GCS_WORKING_DIR), + '--staging_location', '{}/dataflow/wc/staging'.format(GCS_WORKING_DIR) + ]) +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/dataflow/launch_python/component.yaml b/components/gcp/dataflow/launch_python/component.yaml index 9b8b5b54cc6..e26bbff9a71 100644 --- a/components/gcp/dataflow/launch_python/component.yaml +++ b/components/gcp/dataflow/launch_python/component.yaml @@ -27,7 +27,7 @@ outputs: - {name: job_id, description: 'The id of the created dataflow job.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.dataflow, launch_python, --python_file_path, {inputValue: python_file_path}, diff --git a/components/gcp/dataflow/launch_python/sample.ipynb b/components/gcp/dataflow/launch_python/sample.ipynb new file mode 100644 index 00000000000..625a6eecc69 --- /dev/null +++ b/components/gcp/dataflow/launch_python/sample.ipynb @@ -0,0 +1,212 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dataflow - Launch Python\n", + "\n", + "## Intended Use\n", + "A Kubeflow Pipeline component to submit a Apache Beam job authored in python, to Google Cloud Dataflow for execution. The python beam code runs with Google Cloud Dataflow runner.\n", + "\n", + "## Run-Time Parameters:\n", + "Name | Description\n", + ":--- | :----------\n", + "python_file_path | The gcs or local path to the python file to run.\n", + "project_id | The ID of the parent project.\n", + "requirements_file_path | Optional, the gcs or local path to the pip requirements file.\n", + "location | Optional. The regional endpoint to which to direct the request.\n", + "job_name_prefix | Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.\n", + "args | The list of args to pass to the python file.\n", + "wait_interval | Optional wait interval between calls to get job status. Defaults to 30.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The id of the created dataflow job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "GCS_WORKING_DIR = 'gs://' # No ending slash\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'Dataflow - Launch Python'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/dataflow/launch_python/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "dataflow_python_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(dataflow_python_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here is an illustrative pipeline that uses the component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='Dataflow launch python pipeline',\n", + " description='Dataflow launch python pipeline'\n", + ")\n", + "def pipeline(\n", + " python_file_path,\n", + " project_id,\n", + " requirements_file_path = '',\n", + " location = '',\n", + " job_name_prefix = '',\n", + " args = '',\n", + " wait_interval = 30\n", + "):\n", + " dataflow_python_op(python_file_path, project_id, requirements_file_path, location, job_name_prefix, args,\n", + " wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'python_file_path': 'gs://ml-pipeline-playground/samples/dataflow/wc/wc.py',\n", + " 'project_id': PROJECT_ID,\n", + " 'requirements_file_path': 'gs://ml-pipeline-playground/samples/dataflow/wc/requirements.txt',\n", + " 'args': json.dumps([\n", + " '--output', '{}/wc/wordcount.out'.format(GCS_WORKING_DIR),\n", + " '--temp_location', '{}/dataflow/wc/tmp'.format(GCS_WORKING_DIR),\n", + " '--staging_location', '{}/dataflow/wc/staging'.format(GCS_WORKING_DIR)\n", + " ])\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/components/gcp/dataflow/launch_template/README.md b/components/gcp/dataflow/launch_template/README.md new file mode 100644 index 00000000000..4ed2e93fd47 --- /dev/null +++ b/components/gcp/dataflow/launch_template/README.md @@ -0,0 +1,118 @@ + +# Dataflow - Launch Template + +## Intended Use + +A Kubeflow Pipeline component to submit a job from a dataflow template to Google Cloud Dataflow service. + +## Runtime Parameters: +Name | Description +:--- | :---------- +project_id | Required. The ID of the Cloud Platform project that the job belongs to. +gcs_path | Required. A Cloud Storage path to the template from which to create the job. Must be valid Cloud Storage URL, beginning with 'gs://'. +launch_parameters | Parameters to provide to the template being launched. Schema defined in https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters. `jobName` will be replaced by generated name. +location | Optional. The regional endpoint to which to direct the request. +job_name_prefix | Optional. The prefix of the genrated job name. If not provided, the method will generated a random name. +validate_only | If true, the request is validated but not actually executed. Defaults to false. +wait_interval | Optional wait interval between calls to get job status. Defaults to 30. + +## Output: +Name | Description +:--- | :---------- +job_id | The id of the created dataflow job. + +## Sample + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' +GCS_WORKING_DIR = 'gs://' # No ending slash + +# Optional Parameters +EXPERIMENT_NAME = 'Dataflow - Launch Template' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/dataflow/launch_template/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +dataflow_template_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(dataflow_template_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='Dataflow launch template pipeline', + description='Dataflow launch template pipeline' +) +def pipeline( + project_id, + gcs_path, + launch_parameters, + location='', + job_name_prefix='', + validate_only='', + wait_interval = 30 +): + dataflow_template_op(project_id, gcs_path, launch_parameters, location, job_name_prefix, validate_only, + wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'project_id': PROJECT_ID, + 'gcs_path': 'gs://dataflow-templates/latest/Word_Count', + 'launch_parameters': json.dumps({ + 'parameters': { + 'inputFile': 'gs://dataflow-samples/shakespeare/kinglear.txt', + 'output': '{}/dataflow/launch-template/'.format(GCS_WORKING_DIR) + } + }) +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/dataflow/launch_template/component.yaml b/components/gcp/dataflow/launch_template/component.yaml index b5dfa42a1e7..88f61b008c8 100644 --- a/components/gcp/dataflow/launch_template/component.yaml +++ b/components/gcp/dataflow/launch_template/component.yaml @@ -27,7 +27,7 @@ outputs: - {name: job_id, description: 'The ID of the created dataflow job.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.dataflow, launch_template, --project_id, {inputValue: project_id}, diff --git a/components/gcp/dataflow/launch_template/sample.ipynb b/components/gcp/dataflow/launch_template/sample.ipynb new file mode 100644 index 00000000000..ddcfc1a1514 --- /dev/null +++ b/components/gcp/dataflow/launch_template/sample.ipynb @@ -0,0 +1,213 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dataflow - Launch Template\n", + "\n", + "## Intended Use\n", + "\n", + "A Kubeflow Pipeline component to submit a job from a dataflow template to Google Cloud Dataflow service.\n", + "\n", + "## Runtime Parameters:\n", + "Name | Description\n", + ":--- | :----------\n", + "project_id | Required. The ID of the Cloud Platform project that the job belongs to.\n", + "gcs_path | Required. A Cloud Storage path to the template from which to create the job. Must be valid Cloud Storage URL, beginning with 'gs://'.\n", + "launch_parameters | Parameters to provide to the template being launched. Schema defined in https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters. `jobName` will be replaced by generated name.\n", + "location | Optional. The regional endpoint to which to direct the request.\n", + "job_name_prefix | Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.\n", + "validate_only | If true, the request is validated but not actually executed. Defaults to false.\n", + "wait_interval | Optional wait interval between calls to get job status. Defaults to 30.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The id of the created dataflow job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "GCS_WORKING_DIR = 'gs://' # No ending slash\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'Dataflow - Launch Template'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/dataflow/launch_template/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "dataflow_template_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(dataflow_template_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here is an illustrative pipeline that uses the component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='Dataflow launch template pipeline',\n", + " description='Dataflow launch template pipeline'\n", + ")\n", + "def pipeline(\n", + " project_id, \n", + " gcs_path, \n", + " launch_parameters, \n", + " location='', \n", + " job_name_prefix='', \n", + " validate_only='', \n", + " wait_interval = 30\n", + "):\n", + " dataflow_template_op(project_id, gcs_path, launch_parameters, location, job_name_prefix, validate_only, \n", + " wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'project_id': PROJECT_ID,\n", + " 'gcs_path': 'gs://dataflow-templates/latest/Word_Count',\n", + " 'launch_parameters': json.dumps({\n", + " 'parameters': {\n", + " 'inputFile': 'gs://dataflow-samples/shakespeare/kinglear.txt',\n", + " 'output': '{}/dataflow/launch-template/'.format(GCS_WORKING_DIR)\n", + " }\n", + " })\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/components/gcp/ml_engine/batch_predict/README.md b/components/gcp/ml_engine/batch_predict/README.md new file mode 100644 index 00000000000..802be61dffd --- /dev/null +++ b/components/gcp/ml_engine/batch_predict/README.md @@ -0,0 +1,124 @@ + +# CloudML - Batch Predict + +## Intended Use +A Kubeflow Pipeline component to submit a batch prediction job against a trained model to Google Cloud Machine Learning Engine service. + +## Runtime Parameters: +Name | Description +:--- | :---------- +project_id | Required. The ID of the parent project of the job. +model_path | Required. The path to the model. It can be either: `projects/[PROJECT_ID]/models/[MODEL_ID]` or `projects/[PROJECT_ID]/models/[MODEL_ID]/versions/[VERSION_ID]` or a GCS path of a model file. +input_paths | Required. The Google Cloud Storage location of the input data files. May contain wildcards. +input_data_format | Required. The format of the input data files. See https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat. +output_path | Required. The output Google Cloud Storage location. +region | Required. The Google Compute Engine region to run the prediction job in. +output_data_format | Optional. Format of the output data files, defaults to JSON. +prediction_input | Input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput). +job_id_prefix | The prefix of the generated job id. +wait_interval | Optional interval to wait for a long running operation. Defaults to 30. + +## Output: +Name | Description +:--- | :---------- +job_id | The ID of the created batch job. + +## Sample Code + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' +GCS_WORKING_DIR = 'gs://' # No ending slash + +# Optional Parameters +EXPERIMENT_NAME = 'CLOUDML - Batch Predict' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/batch_predict/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +mlengine_batch_predict_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(mlengine_batch_predict_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='CloudML batch predict pipeline', + description='CloudML batch predict pipeline' +) +def pipeline( + project_id, + model_path, + input_paths, + input_data_format, + output_path, + region, + output_data_format='', + prediction_input='', + job_id_prefix='', + wait_interval='30'): + task = mlengine_batch_predict_op(project_id, model_path, input_paths, input_data_format, + output_path, region, output_data_format, prediction_input, job_id_prefix, + wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'project_id': PROJECT_ID, + 'model_path': 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trained_model/', + 'input_paths': '["gs://ml-pipeline-playground/samples/ml_engine/cencus/test.json"]', + 'input_data_format': 'JSON', + 'output_path': GCS_WORKING_DIR + '/batch_predict/output/', + 'region': 'us-central1', + 'prediction_input': json.dumps({ + 'runtimeVersion': '1.10' + }) +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/ml_engine/batch_predict/component.yaml b/components/gcp/ml_engine/batch_predict/component.yaml index facc19706ea..2d9ecb1aa4a 100644 --- a/components/gcp/ml_engine/batch_predict/component.yaml +++ b/components/gcp/ml_engine/batch_predict/component.yaml @@ -30,7 +30,7 @@ outputs: - {name: job_id, description: 'The ID of the created job.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.ml_engine, batch_predict, --project_id, {inputValue: project_id}, diff --git a/components/gcp/ml_engine/batch_predict/sample.ipynb b/components/gcp/ml_engine/batch_predict/sample.ipynb new file mode 100644 index 00000000000..d3d1dc6a7d1 --- /dev/null +++ b/components/gcp/ml_engine/batch_predict/sample.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CloudML - Batch Predict\n", + "\n", + "## Intended Use\n", + "A Kubeflow Pipeline component to submit a batch prediction job against a trained model to Google Cloud Machine Learning Engine service.\n", + "\n", + "## Runtime Parameters:\n", + "Name | Description\n", + ":--- | :----------\n", + "project_id | Required. The ID of the parent project of the job.\n", + "model_path | Required. The path to the model. It can be either: `projects/[PROJECT_ID]/models/[MODEL_ID]` or `projects/[PROJECT_ID]/models/[MODEL_ID]/versions/[VERSION_ID]` or a GCS path of a model file.\n", + "input_paths | Required. The Google Cloud Storage location of the input data files. May contain wildcards.\n", + "input_data_format | Required. The format of the input data files. See https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat.\n", + "output_path | Required. The output Google Cloud Storage location.\n", + "region | Required. The Google Compute Engine region to run the prediction job in.\n", + "output_data_format | Optional. Format of the output data files, defaults to JSON.\n", + "prediction_input | Input parameters to create a prediction job. See [PredictionInput](https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#PredictionInput).\n", + "job_id_prefix | The prefix of the generated job id.\n", + "wait_interval | Optional interval to wait for a long running operation. Defaults to 30.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The ID of the created batch job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample Code\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "GCS_WORKING_DIR = 'gs://' # No ending slash\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'CLOUDML - Batch Predict'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/batch_predict/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "mlengine_batch_predict_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(mlengine_batch_predict_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here is an illustrative pipeline that uses the component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='CloudML batch predict pipeline',\n", + " description='CloudML batch predict pipeline'\n", + ")\n", + "def pipeline(\n", + " project_id, \n", + " model_path, \n", + " input_paths, \n", + " input_data_format, \n", + " output_path, \n", + " region, \n", + " output_data_format='', \n", + " prediction_input='', \n", + " job_id_prefix='',\n", + " wait_interval='30'):\n", + " task = mlengine_batch_predict_op(project_id, model_path, input_paths, input_data_format, \n", + " output_path, region, output_data_format, prediction_input, job_id_prefix,\n", + " wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'project_id': PROJECT_ID,\n", + " 'model_path': 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trained_model/',\n", + " 'input_paths': '[\"gs://ml-pipeline-playground/samples/ml_engine/cencus/test.json\"]',\n", + " 'input_data_format': 'JSON',\n", + " 'output_path': GCS_WORKING_DIR + '/batch_predict/output/',\n", + " 'region': 'us-central1',\n", + " 'prediction_input': json.dumps({\n", + " 'runtimeVersion': '1.10'\n", + " })\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/components/gcp/ml_engine/deploy/README.md b/components/gcp/ml_engine/deploy/README.md new file mode 100644 index 00000000000..63693f7f66a --- /dev/null +++ b/components/gcp/ml_engine/deploy/README.md @@ -0,0 +1,120 @@ + +# CloudML - Deploy + +## Intended Use +A Kubeflow Pipeline component to deploy a trained model from a Google Cloud Storage path to Google Cloud Machine Learning Engine service. + +## Runtime Parameters: +Name | Description +:--- | :---------- +model_uri | Required, the GCS URI which contains a model file. Common used TF model search path (export/exporter) will be used if exist. +project_id | Required. The ID of the parent project. +model_id | Optional, the user provided name of the model. +version_id | Optional, the user provided name of the version. If it is not provided, the operation uses a random name. +runtime_version | Optinal, the Cloud ML Engine runtime version to use for this deployment. If not set, Cloud ML Engine uses the default stable version, 1.0. +python_version | optinal, the version of Python used in prediction. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above. Python `2.7` works with all supported runtime versions. +version | Optional, the payload of the new version. +replace_existing_version | Boolean flag indicates whether to replace existing version in case of conflict. Defaults to false. +set_default | boolean flag indicates whether to set the new version as default version in the model. Defaults to false. +wait_interval | Optional interval to wait for a long running operation. Defaults to 30. + +## Output: +Name | Description +:--- | :---------- +model_uri | The GCS URI for the found model. +version_name | The deployed version resource name. + +## Sample Code + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' + +# Optional Parameters +EXPERIMENT_NAME = 'CLOUDML - Deploy' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/deploy/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +mlengine_deploy_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(mlengine_deploy_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='CloudML deploy pipeline', + description='CloudML deploy pipeline' +) +def pipeline( + model_uri, + project_id, + model_id = '', + version_id = '', + runtime_version = '', + python_version = '', + version = '', + replace_existing_version = 'False', + set_default = 'False', + wait_interval = '30'): + task = mlengine_deploy_op(model_uri, project_id, model_id, version_id, runtime_version, + python_version, version, replace_existing_version, set_default, + wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'model_uri': 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trained_model/', + 'project_id': PROJECT_ID, + 'model_id': 'kfp_sample_model', + 'runtime_version': '1.10', + 'set_default': 'True' +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/ml_engine/deploy/component.yaml b/components/gcp/ml_engine/deploy/component.yaml index e57181ec1cb..edb8ffb0a2f 100644 --- a/components/gcp/ml_engine/deploy/component.yaml +++ b/components/gcp/ml_engine/deploy/component.yaml @@ -32,13 +32,13 @@ outputs: - {name: version_name, description: 'The name of the deployed version.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.ml_engine, deploy, --model_uri, {inputValue: model_uri}, --project_id, {inputValue: project_id}, - --model_short_name, {inputValue: model_short_name}, - --version_short_name, {inputValue: version_short_name}, + --model_id, {inputValue: model_id}, + --version_id, {inputValue: version_id}, --runtime_version, {inputValue: runtime_version}, --version, {inputValue: version}, --replace_existing_version, {inputValue: replace_existing_version}, diff --git a/components/gcp/ml_engine/deploy/sample.ipynb b/components/gcp/ml_engine/deploy/sample.ipynb new file mode 100644 index 00000000000..429b27e93cc --- /dev/null +++ b/components/gcp/ml_engine/deploy/sample.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CloudML - Deploy\n", + "\n", + "## Intended Use\n", + "A Kubeflow Pipeline component to deploy a trained model from a Google Cloud Storage path to Google Cloud Machine Learning Engine service.\n", + "\n", + "## Runtime Parameters:\n", + "Name | Description\n", + ":--- | :----------\n", + "model_uri | Required, the GCS URI which contains a model file. Common used TF model search path (export/exporter) will be used if exist. \n", + "project_id | Required. The ID of the parent project.\n", + "model_id | Optional, the user provided name of the model.\n", + "version_id | Optional, the user provided name of the version. If it is not provided, the operation uses a random name.\n", + "runtime_version | Optinal, the Cloud ML Engine runtime version to use for this deployment. If not set, Cloud ML Engine uses the default stable version, 1.0. \n", + "python_version | optinal, the version of Python used in prediction. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above. Python `2.7` works with all supported runtime versions.\n", + "version | Optional, the payload of the new version.\n", + "replace_existing_version | Boolean flag indicates whether to replace existing version in case of conflict. Defaults to false.\n", + "set_default | boolean flag indicates whether to set the new version as default version in the model. Defaults to false.\n", + "wait_interval | Optional interval to wait for a long running operation. Defaults to 30.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "model_uri | The GCS URI for the found model.\n", + "version_name | The deployed version resource name." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample Code\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'CLOUDML - Deploy'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/deploy/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "mlengine_deploy_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(mlengine_deploy_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here is an illustrative pipeline that uses the component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='CloudML deploy pipeline',\n", + " description='CloudML deploy pipeline'\n", + ")\n", + "def pipeline(\n", + " model_uri,\n", + " project_id,\n", + " model_id = '',\n", + " version_id = '',\n", + " runtime_version = '',\n", + " python_version = '',\n", + " version = '',\n", + " replace_existing_version = 'False',\n", + " set_default = 'False',\n", + " wait_interval = '30'):\n", + " task = mlengine_deploy_op(model_uri, project_id, model_id, version_id, runtime_version, \n", + " python_version, version, replace_existing_version, set_default, \n", + " wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'model_uri': 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trained_model/',\n", + " 'project_id': PROJECT_ID,\n", + " 'model_id': 'kfp_sample_model',\n", + " 'runtime_version': '1.10',\n", + " 'set_default': 'True'\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/components/gcp/ml_engine/train/README.md b/components/gcp/ml_engine/train/README.md new file mode 100644 index 00000000000..28dd0317568 --- /dev/null +++ b/components/gcp/ml_engine/train/README.md @@ -0,0 +1,136 @@ + +# CloudML - Train + +## Intended Use +A Kubeflow Pipeline component to submit a Cloud Machine Learning Engine training job as a step in a pipeline + +## Runtime Parameters: +Name | Description +:--- | :---------- +project_id | Required. The ID of the parent project of the job. +python_module | The Python module name to run after installing the packages. +package_uris | The Google Cloud Storage location of the packages with the training program and any additional dependencies. The maximum number of package URIs is 100. +region | The Google Compute Engine region to run the training job in. +args | Command line arguments to pass to the program. +job_dir | The list of args to pass to the python file. +python_version | A Google Cloud Storage path in which to store training outputs and other data needed for training. This path is passed to your TensorFlow program as the `--job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training. +runtime_version | The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. +master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry. +worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry. +training_input | Input parameters to create a training job. +job_id_prefix | The prefix of the generated job id. +wait_interval | Optional wait interval between calls to get job status. Defaults to 30. + +## Output: +Name | Description +:--- | :---------- +job_id | The ID of the created job. + +## Sample + +Note: the sample code below works in both IPython notebook or python code directly. + +### Set sample parameters + + +```python +# Required Parameters +PROJECT_ID = '' +GCS_WORKING_DIR = 'gs://' # No ending slash + +# Optional Parameters +EXPERIMENT_NAME = 'CLOUDML - Train' +COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/train/component.yaml' +``` + +### Install KFP SDK + + +```python +# Install the SDK (Uncomment the code if the SDK is not installed before) +# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz' +# !pip3 install $KFP_PACKAGE --upgrade +``` + +### Load component definitions + + +```python +import kfp.components as comp + +mlengine_train_op = comp.load_component_from_url(COMPONENT_SPEC_URI) +display(mlengine_train_op) +``` + +### Here is an illustrative pipeline that uses the component + + +```python +import kfp.dsl as dsl +import kfp.gcp as gcp +import json +@dsl.pipeline( + name='CloudML training pipeline', + description='CloudML training pipeline' +) +def pipeline( + project_id, + python_module, + package_uris, + region, + args = '', + job_dir = '', + python_version = '', + runtime_version = '', + master_image_uri = '', + worker_image_uri = '', + training_input = '', + job_id_prefix = '', + wait_interval = '30'): + task = mlengine_train_op(project_id, python_module, package_uris, region, args, job_dir, python_version, + runtime_version, master_image_uri, worker_image_uri, training_input, job_id_prefix, + wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa')) +``` + +### Compile the pipeline + + +```python +pipeline_func = pipeline +pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' +import kfp.compiler as compiler +compiler.Compiler().compile(pipeline_func, pipeline_filename) +``` + +### Submit the pipeline for execution + + +```python +#Specify pipeline argument values +arguments = { + 'project_id': PROJECT_ID, + 'python_module': 'trainer.task', + 'package_uris': json.dumps([ + 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trainer.tar.gz' + ]), + 'region': 'us-central1', + 'args': json.dumps([ + '--train-files', 'gs://cloud-samples-data/ml-engine/census/data/adult.data.csv', + '--eval-files', 'gs://cloud-samples-data/ml-engine/census/data/adult.test.csv', + '--train-steps', '1000', + '--eval-steps', '100', + '--verbosity', 'DEBUG' + ]), + 'job_dir': GCS_WORKING_DIR + '/train/output/', + 'runtime_version': '1.10' +} + +#Get or create an experiment and submit a pipeline run +import kfp +client = kfp.Client() +experiment = client.create_experiment(EXPERIMENT_NAME) + +#Submit a pipeline run +run_name = pipeline_func.__name__ + ' run' +run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments) +``` diff --git a/components/gcp/ml_engine/train/component.yaml b/components/gcp/ml_engine/train/component.yaml index db465d7bea9..481aa4ade96 100644 --- a/components/gcp/ml_engine/train/component.yaml +++ b/components/gcp/ml_engine/train/component.yaml @@ -33,7 +33,7 @@ outputs: - {name: job_id, description: 'The ID of the created job.'} implementation: container: - image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest + image: gcr.io/ml-pipeline/ml-pipeline-gcp:latest args: [ kfp_component.google.ml_engine, train, --project_id, {inputValue: project_id}, diff --git a/components/gcp/ml_engine/train/sample.ipynb b/components/gcp/ml_engine/train/sample.ipynb new file mode 100644 index 00000000000..5463a57f2f5 --- /dev/null +++ b/components/gcp/ml_engine/train/sample.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CloudML - Train\n", + "\n", + "## Intended Use\n", + "A Kubeflow Pipeline component to submit a Cloud Machine Learning Engine training job as a step in a pipeline\n", + "\n", + "## Runtime Parameters:\n", + "Name | Description\n", + ":--- | :----------\n", + "project_id | Required. The ID of the parent project of the job.\n", + "python_module | The Python module name to run after installing the packages.\n", + "package_uris | The Google Cloud Storage location of the packages with the training program and any additional dependencies. The maximum number of package URIs is 100.\n", + "region | The Google Compute Engine region to run the training job in.\n", + "args | Command line arguments to pass to the program.\n", + "job_dir | The list of args to pass to the python file.\n", + "python_version | A Google Cloud Storage path in which to store training outputs and other data needed for training. This path is passed to your TensorFlow program as the `--job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training.\n", + "runtime_version | The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0.\n", + "master_image_uri | The Docker image to run on the master replica. This image must be in Container Registry.\n", + "worker_image_uri | The Docker image to run on the worker replica. This image must be in Container Registry.\n", + "training_input | Input parameters to create a training job.\n", + "job_id_prefix | The prefix of the generated job id.\n", + "wait_interval | Optional wait interval between calls to get job status. Defaults to 30.\n", + "\n", + "## Output:\n", + "Name | Description\n", + ":--- | :----------\n", + "job_id | The ID of the created job." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample\n", + "\n", + "Note: the sample code below works in both IPython notebook or python code directly." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set sample parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Required Parameters\n", + "PROJECT_ID = ''\n", + "GCS_WORKING_DIR = 'gs://' # No ending slash\n", + "\n", + "# Optional Parameters\n", + "EXPERIMENT_NAME = 'CLOUDML - Train'\n", + "COMPONENT_SPEC_URI = 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/gcp/ml_engine/train/component.yaml'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install KFP SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Install the SDK (Uncomment the code if the SDK is not installed before)\n", + "# KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.11/kfp.tar.gz'\n", + "# !pip3 install $KFP_PACKAGE --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load component definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "mlengine_train_op = comp.load_component_from_url(COMPONENT_SPEC_URI)\n", + "display(mlengine_train_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Here is an illustrative pipeline that uses the component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.dsl as dsl\n", + "import kfp.gcp as gcp\n", + "import json\n", + "@dsl.pipeline(\n", + " name='CloudML training pipeline',\n", + " description='CloudML training pipeline'\n", + ")\n", + "def pipeline(\n", + " project_id,\n", + " python_module,\n", + " package_uris,\n", + " region,\n", + " args = '',\n", + " job_dir = '',\n", + " python_version = '',\n", + " runtime_version = '',\n", + " master_image_uri = '',\n", + " worker_image_uri = '',\n", + " training_input = '',\n", + " job_id_prefix = '',\n", + " wait_interval = '30'):\n", + " task = mlengine_train_op(project_id, python_module, package_uris, region, args, job_dir, python_version,\n", + " runtime_version, master_image_uri, worker_image_uri, training_input, job_id_prefix, \n", + " wait_interval).apply(gcp.use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Compile the pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_func = pipeline\n", + "pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'\n", + "import kfp.compiler as compiler\n", + "compiler.Compiler().compile(pipeline_func, pipeline_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the pipeline for execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Specify pipeline argument values\n", + "arguments = {\n", + " 'project_id': PROJECT_ID,\n", + " 'python_module': 'trainer.task',\n", + " 'package_uris': json.dumps([\n", + " 'gs://ml-pipeline-playground/samples/ml_engine/cencus/trainer.tar.gz'\n", + " ]),\n", + " 'region': 'us-central1',\n", + " 'args': json.dumps([\n", + " '--train-files', 'gs://cloud-samples-data/ml-engine/census/data/adult.data.csv',\n", + " '--eval-files', 'gs://cloud-samples-data/ml-engine/census/data/adult.test.csv',\n", + " '--train-steps', '1000',\n", + " '--eval-steps', '100',\n", + " '--verbosity', 'DEBUG'\n", + " ]),\n", + " 'job_dir': GCS_WORKING_DIR + '/train/output/',\n", + " 'runtime_version': '1.10'\n", + "}\n", + "\n", + "#Get or create an experiment and submit a pipeline run\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(EXPERIMENT_NAME)\n", + "\n", + "#Submit a pipeline run\n", + "run_name = pipeline_func.__name__ + ' run'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, arguments)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}