From 52d59950bdce522d70647b9180eb3072ac3a02d1 Mon Sep 17 00:00:00 2001
From: Alexey Volkov <alexey.volkov@ark-kun.com>
Date: Thu, 9 Apr 2020 23:15:47 -0700
Subject: [PATCH] Components - Add model URL to AutoML - Create model/dataset
 for tables  (#3486)

* Re-generated the components

* Components - Add model URL to AutoML - Create model for tables

Fixes https://github.com/kubeflow/pipelines/issues/3246

* Added dataset URL to the AutoML - Create dataset for tables component
---
 .../create_dataset_for_tables/component.py    |  20 ++-
 .../create_dataset_for_tables/component.yaml  | 155 +++++++++---------
 .../create_model_for_tables/component.py      |  22 ++-
 .../create_model_for_tables/component.yaml    | 145 ++++++++--------
 4 files changed, 174 insertions(+), 168 deletions(-)

diff --git a/components/gcp/automl/create_dataset_for_tables/component.py b/components/gcp/automl/create_dataset_for_tables/component.py
index 644fd647509..9239e780b15 100644
--- a/components/gcp/automl/create_dataset_for_tables/component.py
+++ b/components/gcp/automl/create_dataset_for_tables/component.py
@@ -24,13 +24,9 @@ def automl_create_dataset_for_tables(
     retry=None, #=google.api_core.gapic_v1.method.DEFAULT,
     timeout: float = None, #=google.api_core.gapic_v1.method.DEFAULT,
     metadata: dict = None,
-) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]):
+) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str), ('dataset_url', 'URI')]):
     '''automl_create_dataset_for_tables creates an empty Dataset for AutoML tables
     '''
-    import sys
-    import subprocess
-    subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.4.0', '--quiet', '--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
-
     import google
     from google.cloud import automl
     client = automl.AutoMlClient()
@@ -50,9 +46,19 @@ def automl_create_dataset_for_tables(
     )
     print(dataset)
     dataset_id = dataset.name.rsplit('/', 1)[-1]
-    return (dataset.name, dataset.create_time, dataset_id)
+    dataset_url = 'https://console.cloud.google.com/automl-tables/locations/{region}/datasets/{dataset_id}/schemav2?project={project_id}'.format(
+        project_id=gcp_project_id,
+        region=gcp_region,
+        dataset_id=dataset_id,
+    )
+    return (dataset.name, dataset.create_time, dataset_id, dataset_url)
 
 
 if __name__ == '__main__':
     import kfp
-    kfp.components.func_to_container_op(automl_create_dataset_for_tables, output_component_file='component.yaml', base_image='python:3.7')
+    kfp.components.func_to_container_op(
+        automl_create_dataset_for_tables,
+        output_component_file='component.yaml',
+        base_image='python:3.7',
+        packages_to_install=['google-cloud-automl==0.4.0']
+    )
diff --git a/components/gcp/automl/create_dataset_for_tables/component.yaml b/components/gcp/automl/create_dataset_for_tables/component.yaml
index 4dfdeddfdeb..74257db9fdd 100644
--- a/components/gcp/automl/create_dataset_for_tables/component.yaml
+++ b/components/gcp/automl/create_dataset_for_tables/component.yaml
@@ -1,61 +1,46 @@
 name: Automl create dataset for tables
-description: |
-  automl_create_dataset_for_tables creates an empty Dataset for AutoML tables
+description: automl_create_dataset_for_tables creates an empty Dataset for AutoML
+  tables
 inputs:
-- name: gcp_project_id
-  type: String
-- name: gcp_region
-  type: String
-- name: display_name
-  type: String
-- name: description
-  type: String
-  optional: true
-- name: tables_dataset_metadata
-  type: JsonObject
-  default: '{}'
-  optional: true
-- name: retry
-  optional: true
-- name: timeout
-  type: Float
-  optional: true
-- name: metadata
-  type: JsonObject
-  optional: true
+- {name: gcp_project_id, type: String}
+- {name: gcp_region, type: String}
+- {name: display_name, type: String}
+- {name: description, type: String, optional: true}
+- {name: tables_dataset_metadata, type: JsonObject, default: '{}', optional: true}
+- {name: retry, optional: true}
+- {name: timeout, type: Float, optional: true}
+- {name: metadata, type: JsonObject, optional: true}
 outputs:
-- name: dataset_path
-  type: String
-- name: create_time
-  type: String
-- name: dataset_id
-  type: String
+- {name: dataset_path, type: String}
+- {name: create_time, type: String}
+- {name: dataset_id, type: String}
+- {name: dataset_url, type: URI}
 implementation:
   container:
     image: python:3.7
     command:
+    - sh
+    - -c
+    - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+      'google-cloud-automl==0.4.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip
+      install --quiet --no-warn-script-location 'google-cloud-automl==0.4.0' --user)
+      && "$0" "$@"
     - python3
     - -u
     - -c
     - |
-      from typing import NamedTuple
-
       def automl_create_dataset_for_tables(
-          gcp_project_id: str,
-          gcp_region: str,
-          display_name: str,
-          description: str = None,
-          tables_dataset_metadata: dict = {},
+          gcp_project_id ,
+          gcp_region ,
+          display_name ,
+          description  = None,
+          tables_dataset_metadata  = {},
           retry=None, #=google.api_core.gapic_v1.method.DEFAULT,
-          timeout: float = None, #=google.api_core.gapic_v1.method.DEFAULT,
-          metadata: dict = None,
-      ) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]):
+          timeout  = None, #=google.api_core.gapic_v1.method.DEFAULT,
+          metadata  = None,
+      )          :
           '''automl_create_dataset_for_tables creates an empty Dataset for AutoML tables
           '''
-          import sys
-          import subprocess
-          subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.4.0', '--quiet', '--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
-
           import google
           from google.cloud import automl
           client = automl.AutoMlClient()
@@ -75,28 +60,42 @@ implementation:
           )
           print(dataset)
           dataset_id = dataset.name.rsplit('/', 1)[-1]
-          return (dataset.name, dataset.create_time, dataset_id)
+          dataset_url = 'https://console.cloud.google.com/automl-tables/locations/{region}/datasets/{dataset_id}/schemav2?project={project_id}'.format(
+              project_id=gcp_project_id,
+              region=gcp_region,
+              dataset_id=dataset_id,
+          )
+          return (dataset.name, dataset.create_time, dataset_id, dataset_url)
 
       import json
+      def _serialize_str(str_value: str) -> str:
+          if not isinstance(str_value, str):
+              raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
+          return str_value
+
       import argparse
-      _missing_arg = object()
-      _parser = argparse.ArgumentParser(prog='Automl create dataset for tables', description='automl_create_dataset_for_tables creates an empty Dataset for AutoML tables\n')
-      _parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--display-name", dest="display_name", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--description", dest="description", type=str, required=False, default=_missing_arg)
-      _parser.add_argument("--tables-dataset-metadata", dest="tables_dataset_metadata", type=json.loads, required=False, default=_missing_arg)
-      _parser.add_argument("--retry", dest="retry", type=str, required=False, default=_missing_arg)
-      _parser.add_argument("--timeout", dest="timeout", type=float, required=False, default=_missing_arg)
-      _parser.add_argument("--metadata", dest="metadata", type=json.loads, required=False, default=_missing_arg)
-      _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=3)
-      _parsed_args = {k: v for k, v in vars(_parser.parse_args()).items() if v is not _missing_arg}
+      _parser = argparse.ArgumentParser(prog='Automl create dataset for tables', description='automl_create_dataset_for_tables creates an empty Dataset for AutoML tables')
+      _parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--display-name", dest="display_name", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--description", dest="description", type=str, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--tables-dataset-metadata", dest="tables_dataset_metadata", type=json.loads, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--retry", dest="retry", type=str, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--timeout", dest="timeout", type=float, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--metadata", dest="metadata", type=json.loads, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=4)
+      _parsed_args = vars(_parser.parse_args())
       _output_files = _parsed_args.pop("_output_paths", [])
 
       _outputs = automl_create_dataset_for_tables(**_parsed_args)
 
-      if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
-          _outputs = [_outputs]
+      _output_serializers = [
+          _serialize_str,
+          _serialize_str,
+          _serialize_str,
+          str,
+
+      ]
 
       import os
       for idx, output_file in enumerate(_output_files):
@@ -105,45 +104,41 @@ implementation:
           except OSError:
               pass
           with open(output_file, 'w') as f:
-              f.write(str(_outputs[idx]))
+              f.write(_output_serializers[idx](_outputs[idx]))
     args:
     - --gcp-project-id
-    - inputValue: gcp_project_id
+    - {inputValue: gcp_project_id}
     - --gcp-region
-    - inputValue: gcp_region
+    - {inputValue: gcp_region}
     - --display-name
-    - inputValue: display_name
+    - {inputValue: display_name}
     - if:
-        cond:
-          isPresent: description
+        cond: {isPresent: description}
         then:
         - --description
-        - inputValue: description
+        - {inputValue: description}
     - if:
-        cond:
-          isPresent: tables_dataset_metadata
+        cond: {isPresent: tables_dataset_metadata}
         then:
         - --tables-dataset-metadata
-        - inputValue: tables_dataset_metadata
+        - {inputValue: tables_dataset_metadata}
     - if:
-        cond:
-          isPresent: retry
+        cond: {isPresent: retry}
         then:
         - --retry
-        - inputValue: retry
+        - {inputValue: retry}
     - if:
-        cond:
-          isPresent: timeout
+        cond: {isPresent: timeout}
         then:
         - --timeout
-        - inputValue: timeout
+        - {inputValue: timeout}
     - if:
-        cond:
-          isPresent: metadata
+        cond: {isPresent: metadata}
         then:
         - --metadata
-        - inputValue: metadata
+        - {inputValue: metadata}
     - '----output-paths'
-    - outputPath: dataset_path
-    - outputPath: create_time
-    - outputPath: dataset_id
+    - {outputPath: dataset_path}
+    - {outputPath: create_time}
+    - {outputPath: dataset_id}
+    - {outputPath: dataset_url}
diff --git a/components/gcp/automl/create_model_for_tables/component.py b/components/gcp/automl/create_model_for_tables/component.py
index 21b126cff38..205a4a064c0 100644
--- a/components/gcp/automl/create_model_for_tables/component.py
+++ b/components/gcp/automl/create_model_for_tables/component.py
@@ -24,11 +24,7 @@ def automl_create_model_for_tables(
     input_feature_column_paths: list = None,
     optimization_objective: str = 'MAXIMIZE_AU_PRC',
     train_budget_milli_node_hours: int = 1000,
-) -> NamedTuple('Outputs', [('model_path', str), ('model_id', str)]):
-    import sys
-    import subprocess
-    subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.4.0', '--quiet', '--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
-
+) -> NamedTuple('Outputs', [('model_path', str), ('model_id', str), ('model_page_url', 'URI'),]):
     from google.cloud import automl
     client = automl.AutoMlClient()
 
@@ -50,9 +46,21 @@ def automl_create_model_for_tables(
     print(result)
     model_name = result.name
     model_id = model_name.rsplit('/', 1)[-1]
-    return (model_name, model_id)
+    model_url = 'https://console.cloud.google.com/automl-tables/locations/{region}/datasets/{dataset_id};modelId={model_id};task=basic/train?project={project_id}'.format(
+        project_id=gcp_project_id,
+        region=gcp_region,
+        dataset_id=dataset_id,
+        model_id=model_id,
+    )
+
+    return (model_name, model_id, model_url)
 
 
 if __name__ == '__main__':
     import kfp
-    kfp.components.func_to_container_op(automl_create_model_for_tables, output_component_file='component.yaml', base_image='python:3.7')
+    kfp.components.func_to_container_op(
+        automl_create_model_for_tables,
+        output_component_file='component.yaml',
+        base_image='python:3.7',
+        packages_to_install=['google-cloud-automl==0.4.0']
+    )
diff --git a/components/gcp/automl/create_model_for_tables/component.yaml b/components/gcp/automl/create_model_for_tables/component.yaml
index 86ad38730bd..8b909eb1ee8 100644
--- a/components/gcp/automl/create_model_for_tables/component.yaml
+++ b/components/gcp/automl/create_model_for_tables/component.yaml
@@ -1,56 +1,41 @@
 name: Automl create model for tables
 inputs:
-- name: gcp_project_id
-  type: String
-- name: gcp_region
-  type: String
-- name: display_name
-  type: String
-- name: dataset_id
-  type: String
-- name: target_column_path
-  type: String
-  optional: true
-- name: input_feature_column_paths
-  type: JsonArray
-  optional: true
-- name: optimization_objective
-  type: String
-  default: MAXIMIZE_AU_PRC
-  optional: true
-- name: train_budget_milli_node_hours
-  type: Integer
-  default: '1000'
-  optional: true
+- {name: gcp_project_id, type: String}
+- {name: gcp_region, type: String}
+- {name: display_name, type: String}
+- {name: dataset_id, type: String}
+- {name: target_column_path, type: String, optional: true}
+- {name: input_feature_column_paths, type: JsonArray, optional: true}
+- {name: optimization_objective, type: String, default: MAXIMIZE_AU_PRC, optional: true}
+- {name: train_budget_milli_node_hours, type: Integer, default: '1000', optional: true}
 outputs:
-- name: model_path
-  type: String
-- name: model_id
-  type: String
+- {name: model_path, type: String}
+- {name: model_id, type: String}
+- {name: model_page_url, type: URI}
 implementation:
   container:
     image: python:3.7
     command:
+    - sh
+    - -c
+    - (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
+      'google-cloud-automl==0.4.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip
+      install --quiet --no-warn-script-location 'google-cloud-automl==0.4.0' --user)
+      && "$0" "$@"
     - python3
     - -u
     - -c
     - |
-      from typing import NamedTuple
-
       def automl_create_model_for_tables(
-          gcp_project_id: str,
-          gcp_region: str,
-          display_name: str,
-          dataset_id: str,
-          target_column_path: str = None,
-          input_feature_column_paths: list = None,
-          optimization_objective: str = 'MAXIMIZE_AU_PRC',
-          train_budget_milli_node_hours: int = 1000,
-      ) -> NamedTuple('Outputs', [('model_path', str), ('model_id', str)]):
-          import sys
-          import subprocess
-          subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.4.0', '--quiet', '--no-warn-script-location'], env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
-
+          gcp_project_id ,
+          gcp_region ,
+          display_name ,
+          dataset_id ,
+          target_column_path  = None,
+          input_feature_column_paths  = None,
+          optimization_objective  = 'MAXIMIZE_AU_PRC',
+          train_budget_milli_node_hours  = 1000,
+      )        :
           from google.cloud import automl
           client = automl.AutoMlClient()
 
@@ -72,28 +57,43 @@ implementation:
           print(result)
           model_name = result.name
           model_id = model_name.rsplit('/', 1)[-1]
-          return (model_name, model_id)
+          model_url = 'https://console.cloud.google.com/automl-tables/locations/{region}/datasets/{dataset_id};modelId={model_id};task=basic/train?project={project_id}'.format(
+              project_id=gcp_project_id,
+              region=gcp_region,
+              dataset_id=dataset_id,
+              model_id=model_id,
+          )
+
+          return (model_name, model_id, model_url)
+
+      def _serialize_str(str_value: str) -> str:
+          if not isinstance(str_value, str):
+              raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
+          return str_value
 
       import json
       import argparse
-      _missing_arg = object()
       _parser = argparse.ArgumentParser(prog='Automl create model for tables', description='')
-      _parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--display-name", dest="display_name", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--dataset-id", dest="dataset_id", type=str, required=True, default=_missing_arg)
-      _parser.add_argument("--target-column-path", dest="target_column_path", type=str, required=False, default=_missing_arg)
-      _parser.add_argument("--input-feature-column-paths", dest="input_feature_column_paths", type=json.loads, required=False, default=_missing_arg)
-      _parser.add_argument("--optimization-objective", dest="optimization_objective", type=str, required=False, default=_missing_arg)
-      _parser.add_argument("--train-budget-milli-node-hours", dest="train_budget_milli_node_hours", type=int, required=False, default=_missing_arg)
-      _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=2)
-      _parsed_args = {k: v for k, v in vars(_parser.parse_args()).items() if v is not _missing_arg}
+      _parser.add_argument("--gcp-project-id", dest="gcp_project_id", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--gcp-region", dest="gcp_region", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--display-name", dest="display_name", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--dataset-id", dest="dataset_id", type=str, required=True, default=argparse.SUPPRESS)
+      _parser.add_argument("--target-column-path", dest="target_column_path", type=str, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--input-feature-column-paths", dest="input_feature_column_paths", type=json.loads, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--optimization-objective", dest="optimization_objective", type=str, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("--train-budget-milli-node-hours", dest="train_budget_milli_node_hours", type=int, required=False, default=argparse.SUPPRESS)
+      _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=3)
+      _parsed_args = vars(_parser.parse_args())
       _output_files = _parsed_args.pop("_output_paths", [])
 
       _outputs = automl_create_model_for_tables(**_parsed_args)
 
-      if not hasattr(_outputs, '__getitem__') or isinstance(_outputs, str):
-          _outputs = [_outputs]
+      _output_serializers = [
+          _serialize_str,
+          _serialize_str,
+          str,
+
+      ]
 
       import os
       for idx, output_file in enumerate(_output_files):
@@ -102,40 +102,37 @@ implementation:
           except OSError:
               pass
           with open(output_file, 'w') as f:
-              f.write(str(_outputs[idx]))
+              f.write(_output_serializers[idx](_outputs[idx]))
     args:
     - --gcp-project-id
-    - inputValue: gcp_project_id
+    - {inputValue: gcp_project_id}
     - --gcp-region
-    - inputValue: gcp_region
+    - {inputValue: gcp_region}
     - --display-name
-    - inputValue: display_name
+    - {inputValue: display_name}
     - --dataset-id
-    - inputValue: dataset_id
+    - {inputValue: dataset_id}
     - if:
-        cond:
-          isPresent: target_column_path
+        cond: {isPresent: target_column_path}
         then:
         - --target-column-path
-        - inputValue: target_column_path
+        - {inputValue: target_column_path}
     - if:
-        cond:
-          isPresent: input_feature_column_paths
+        cond: {isPresent: input_feature_column_paths}
         then:
         - --input-feature-column-paths
-        - inputValue: input_feature_column_paths
+        - {inputValue: input_feature_column_paths}
     - if:
-        cond:
-          isPresent: optimization_objective
+        cond: {isPresent: optimization_objective}
         then:
         - --optimization-objective
-        - inputValue: optimization_objective
+        - {inputValue: optimization_objective}
     - if:
-        cond:
-          isPresent: train_budget_milli_node_hours
+        cond: {isPresent: train_budget_milli_node_hours}
         then:
         - --train-budget-milli-node-hours
-        - inputValue: train_budget_milli_node_hours
+        - {inputValue: train_budget_milli_node_hours}
     - '----output-paths'
-    - outputPath: model_path
-    - outputPath: model_id
+    - {outputPath: model_path}
+    - {outputPath: model_id}
+    - {outputPath: model_page_url}