Bump TFX to 0.21.2 (#3255)

* bump tfx * bump tfma * modernize py sample * clean up * update notebook sample * Remove tmp hack in visualization server
kubeflow · Mar 11, 2020 · fad7510 · fad7510
1 parent f8a4521
commit fad7510
Show file tree

Hide file tree

Showing 9 changed files with 123 additions and 70 deletions.
diff --git a/backend/requirements.in b/backend/requirements.in
@@ -1 +1 @@
-tfx==0.21.1
+tfx==0.21.2
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -8,7 +8,7 @@ absl-py==0.8.1            # via ml-metadata, tensorboard, tensorflow, tensorflow
 apache-beam[gcp]==2.17.0  # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx, tfx-bsl
 astor==0.8.1              # via tensorflow
 attrs==19.3.0             # via jsonschema
-avro-python3==1.9.1       # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx, tfx-bsl
+avro-python3==1.9.1       # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
 backcall==0.1.0           # via ipython
 bleach==3.1.1             # via nbconvert
 cachetools==3.1.1         # via apache-beam, google-auth
@@ -94,7 +94,7 @@ pyparsing==2.4.6          # via pydot
 pyrsistent==0.15.7        # via jsonschema
 python-dateutil==2.8.1    # via apache-beam, jupyter-client, pandas
 pytz==2019.3              # via apache-beam, google-api-core, pandas
-pyyaml==3.13              # via tfx
+pyyaml==5.3               # via tfx
 pyzmq==19.0.0             # via jupyter-client, notebook
 qtconsole==4.6.0          # via jupyter
 requests-oauthlib==1.3.0  # via google-auth-oauthlib
@@ -116,7 +116,7 @@ termcolor==1.1.0          # via tensorflow
 terminado==0.8.3          # via notebook
 testpath==0.4.4           # via nbconvert
 tfx-bsl==0.21.3           # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx
-tfx==0.21.1               # via -r requirements.in (line 1)
+tfx==0.21.2               # via -r requirements.in (line 1)
 tornado==6.0.3            # via ipykernel, jupyter-client, notebook, terminado
 traitlets==4.3.3          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook, qtconsole
 uritemplate==3.0.1        # via google-api-python-client

diff --git a/backend/src/apiserver/visualization/requirements.txt b/backend/src/apiserver/visualization/requirements.txt
@@ -11,6 +11,6 @@ pandas==0.24.2
 pyarrow==0.15.1
 scikit_learn==0.21.2
 tensorflow-metadata==0.21.1
-tensorflow-model-analysis==0.21.1
+tensorflow-model-analysis==0.21.5
 tensorflow-data-validation==0.21.1
 tornado==6.0.2
diff --git a/frontend/src/lib/OutputArtifactLoader.ts b/frontend/src/lib/OutputArtifactLoader.ts
@@ -341,8 +341,7 @@ export class OutputArtifactLoader {
           `slicing_metrics_view = tfma.view.render_slicing_metrics(eval_result, slicing_spec=slicing_spec)`,
           `view = io.StringIO()`,
           `embed_minimal_html(view, views=[slicing_metrics_view], title='Slicing Metrics')`,
-          `html = view.getvalue().replace('dist/embed-amd.js" crossorigin="anonymous"></script>', 'dist/embed-amd.js" crossorigin="anonymous" data-jupyter-widgets-cdn="https://cdn.jsdelivr.net/gh/Bobgy/model-analysis@kfp/tensorflow_model_analysis/notebook/jupyter/js/dist/" crossorigin="anonymous"></script>')`,
-          `display(HTML(html))`,
+          `display(HTML(view.getvalue()))`,
         ];
         return buildArtifactViewer(script);
       }),

diff --git a/samples/core/parameterized_tfx_oss/README.md b/samples/core/parameterized_tfx_oss/README.md
@@ -16,13 +16,12 @@ Please refer to inline comments for the purpose of each step in both samples.
 # Compilation
 * `parameterized_tfx_oss.py`: 
 In order to successfully compile the Python sample, you'll need to have a TFX installation at 
-version 0.21.0 by running `python3 -m pip install tfx==0.21.0`. After that, under the sample dir run
+version 0.21.2 by running `python3 -m pip install tfx==0.21.2`. After that, under the sample dir run
 `python3 parameterized_tfx_oss.py` to compile the TFX pipeline into KFP pipeline package.
 The compilation is done by invoking `kfp_runner.run(pipeline)` in the script.
 
 * `parameterized_tfx_oss.ipynb`:
-The notebook sample includes the installation of various dependencies as its first step. Especially,
-it depends on the latest released KFP and a nightly built TFX to leverage `TFX::RuntimeParameter`.
+The notebook sample includes the installation of various dependencies as its first step.
 
 # Permission
 

diff --git a/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py b/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py
@@ -18,10 +18,10 @@
 from typing import Text
 
 import kfp
+import tensorflow_model_analysis as tfma
 from tfx.components.evaluator.component import Evaluator
 from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
 from tfx.components.example_validator.component import ExampleValidator
-from tfx.components.model_validator.component import ModelValidator
 from tfx.components.pusher.component import Pusher
 from tfx.components.schema_gen.component import SchemaGen
 from tfx.components.statistics_gen.component import StatisticsGen
@@ -30,7 +30,6 @@
 from tfx.orchestration import data_types
 from tfx.orchestration import pipeline
 from tfx.orchestration.kubeflow import kubeflow_dag_runner
-from tfx.proto import evaluator_pb2
 from tfx.utils.dsl_utils import external_input
 from tfx.proto import pusher_pb2
 from tfx.proto import trainer_pb2
@@ -39,8 +38,7 @@
 # Path to the module file, should be a GCS path.
 _taxi_module_file_param = data_types.RuntimeParameter(
     name='module-file',
-    default=
-    'gs://ml-pipeline-playground/tfx_taxi_simple/modules/taxi_utils.py',
+    default='gs://ml-pipeline-playground/tfx_taxi_simple/modules/taxi_utils.py',
     ptype=Text,
 )
 
@@ -97,24 +95,56 @@ def _create_pipeline(
       train_args=trainer_pb2.TrainArgs(num_steps=10),
       eval_args=trainer_pb2.EvalArgs(num_steps=5),
   )
+  # Set the TFMA config for Model Evaluation and Validation.
+  eval_config = tfma.EvalConfig(
+      model_specs=[
+          # Using signature 'eval' implies the use of an EvalSavedModel. To use
+          # a serving model remove the signature to defaults to 'serving_default'
+          # and add a label_key.
+          tfma.ModelSpec(signature_name='eval')
+      ],
+      metrics_specs=[
+          tfma.MetricsSpec(
+              # The metrics added here are in addition to those saved with the
+              # model (assuming either a keras model or EvalSavedModel is used).
+              # Any metrics added into the saved model (for example using
+              # model.compile(..., metrics=[...]), etc) will be computed
+              # automatically.
+              metrics=[tfma.MetricConfig(class_name='ExampleCount')],
+              # To add validation thresholds for metrics saved with the model,
+              # add them keyed by metric name to the thresholds map.
+              thresholds={
+                  'binary_accuracy':
+                      tfma.MetricThreshold(
+                          value_threshold=tfma.GenericValueThreshold(
+                              lower_bound={'value': 0.5}
+                          ),
+                          change_threshold=tfma.GenericChangeThreshold(
+                              direction=tfma.MetricDirection.HIGHER_IS_BETTER,
+                              absolute={'value': -1e-10}
+                          )
+                      )
+              }
+          )
+      ],
+      slicing_specs=[
+          # An empty slice spec means the overall slice, i.e. the whole dataset.
+          tfma.SlicingSpec(),
+          # Data can be sliced along a feature column. In this case, data is
+          # sliced along feature column trip_start_hour.
+          tfma.SlicingSpec(feature_keys=['trip_start_hour'])
+      ]
+  )
+
   model_analyzer = Evaluator(
       examples=example_gen.outputs['examples'],
       model=trainer.outputs['model'],
-      feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(
-          specs=[
-              evaluator_pb2.SingleSlicingSpec(
-                  column_for_slicing=['trip_start_hour']
-              )
-          ]
-      ),
-  )
-  model_validator = ModelValidator(
-      examples=example_gen.outputs['examples'], model=trainer.outputs['model']
+      eval_config=eval_config,
   )
 
   pusher = Pusher(
       model=trainer.outputs['model'],
-      model_blessing=model_validator.outputs['blessing'],
+      model_blessing=model_analyzer.outputs['blessing'],
       push_destination=pusher_pb2.PushDestination(
           filesystem=pusher_pb2.PushDestination.Filesystem(
               base_directory=os.path.
@@ -128,7 +158,7 @@ def _create_pipeline(
       pipeline_root=pipeline_root,
       components=[
           example_gen, statistics_gen, infer_schema, validate_stats, transform,
-          trainer, model_analyzer, model_validator, pusher
+          trainer, model_analyzer, pusher
       ],
       enable_cache=enable_cache,
   )
@@ -147,7 +177,7 @@ def _create_pipeline(
   config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
       kubeflow_metadata_config=kubeflow_dag_runner.
       get_default_kubeflow_metadata_config(),
-      tfx_image='tensorflow/tfx:0.21.1',
+      tfx_image='tensorflow/tfx:0.21.2',
   )
   kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(
       output_filename=__file__ + '.yaml', config=config

diff --git a/samples/core/parameterized_tfx_oss/taxi_pipeline_notebook.ipynb b/samples/core/parameterized_tfx_oss/taxi_pipeline_notebook.ipynb
@@ -29,40 +29,38 @@
    "source": [
     "!python3 -m pip install pip --upgrade --quiet --user\n",
     "!python3 -m pip install kfp --upgrade --quiet --user\n",
-    "!python3 -m pip install tfx==0.21.0 --quiet --user"
+    "!python3 -m pip install tfx==0.21.2 --quiet --user"
    ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
    "source": [
     "Note: if you're warned by \n",
     "```\n",
     "WARNING: The script {LIBRARY_NAME} is installed in '/home/jupyter/.local/bin' which is not on PATH.\n",
     "```\n",
     "You might need to fix by running the next cell and restart the kernel."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "# Set `PATH` to include user python binary directory and a directory containing `skaffold`.\n",
     "PATH=%env PATH\n",
     "%env PATH={PATH}:/home/jupyter/.local/bin"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
+   ]
   },
   {
    "cell_type": "markdown",
@@ -85,10 +83,11 @@
     "\n",
     "import kfp\n",
     "\n",
+    "import tensorflow_model_analysis as tfma\n",
+    "\n",
     "from tfx.components import Evaluator\n",
     "from tfx.components import CsvExampleGen\n",
     "from tfx.components import ExampleValidator\n",
-    "from tfx.components import ModelValidator\n",
     "from tfx.components import Pusher\n",
     "from tfx.components import SchemaGen\n",
     "from tfx.components import StatisticsGen\n",
@@ -147,13 +146,6 @@
     "    name='eval-steps',\n",
     "    default=5,\n",
     "    ptype=int,\n",
-    ")\n",
-    "\n",
-    "# Column name for slicing.\n",
-    "slicing_column = data_types.RuntimeParameter(\n",
-    "    name='slicing-column',\n",
-    "    default='trip_start_hour',\n",
-    "    ptype=Text,\n",
     ")"
    ]
   },
@@ -202,20 +194,53 @@
     "  train_args={'num_steps': train_steps},\n",
     "  eval_args={'num_steps': eval_steps})\n",
     "\n",
+    "# Set the TFMA config for Model Evaluation and Validation.\n",
+    "eval_config = tfma.EvalConfig(\n",
+    "    model_specs=[\n",
+    "      # Using signature 'eval' implies the use of an EvalSavedModel. To use\n",
+    "      # a serving model remove the signature to defaults to 'serving_default'\n",
+    "      # and add a label_key.\n",
+    "      tfma.ModelSpec(signature_name='eval')\n",
+    "    ],\n",
+    "    metrics_specs=[\n",
+    "      tfma.MetricsSpec(\n",
+    "          # The metrics added here are in addition to those saved with the\n",
+    "          # model (assuming either a keras model or EvalSavedModel is used).\n",
+    "          # Any metrics added into the saved model (for example using\n",
+    "          # model.compile(..., metrics=[...]), etc) will be computed\n",
+    "          # automatically.\n",
+    "          metrics=[\n",
+    "              tfma.MetricConfig(class_name='ExampleCount')\n",
+    "          ],\n",
+    "          # To add validation thresholds for metrics saved with the model,\n",
+    "          # add them keyed by metric name to the thresholds map.\n",
+    "          thresholds = {\n",
+    "              'binary_accuracy': tfma.MetricThreshold(\n",
+    "                  value_threshold=tfma.GenericValueThreshold(\n",
+    "                      lower_bound={'value': 0.5}),\n",
+    "                  change_threshold=tfma.GenericChangeThreshold(\n",
+    "                     direction=tfma.MetricDirection.HIGHER_IS_BETTER,\n",
+    "                     absolute={'value': -1e-10}))\n",
+    "          }\n",
+    "      )\n",
+    "    ],\n",
+    "    slicing_specs=[\n",
+    "      # An empty slice spec means the overall slice, i.e. the whole dataset.\n",
+    "      tfma.SlicingSpec(),\n",
+    "      # Data can be sliced along a feature column. In this case, data is\n",
+    "      # sliced along feature column trip_start_hour.\n",
+    "      tfma.SlicingSpec(feature_keys=['trip_start_hour'])\n",
+    "    ])\n",
+    "\n",
     "# The name of slicing column is specified as a RuntimeParameter.\n",
-    "model_analyzer = Evaluator(\n",
+    "evaluator = Evaluator(\n",
     "  examples=example_gen.outputs['examples'],\n",
     "  model=trainer.outputs['model'],\n",
-    "  feature_slicing_spec=dict(specs=[{\n",
-    "      'column_for_slicing': [slicing_column]\n",
-    "  }]))\n",
-    "\n",
-    "model_validator = ModelValidator(\n",
-    "  examples=example_gen.outputs['examples'], model=trainer.outputs['model'])\n",
+    "  eval_config=eval_config)\n",
     "\n",
     "pusher = Pusher(\n",
     "  model=trainer.outputs['model'],\n",
-    "  model_blessing=model_validator.outputs['blessing'],\n",
+    "  model_blessing=evaluator.outputs['blessing'],\n",
     "  push_destination=pusher_pb2.PushDestination(\n",
     "      filesystem=pusher_pb2.PushDestination.Filesystem(\n",
     "          base_directory=os.path.join(\n",
@@ -251,7 +276,7 @@
    "source": [
     "# Specify a TFX docker image. For the full list of tags please see:\n",
     "# https://hub.docker.com/r/tensorflow/tfx/tags\n",
-    "tfx_image = 'tensorflow/tfx:0.21.1'\n",
+    "tfx_image = 'tensorflow/tfx:0.21.2'\n",
     "config = kubeflow_dag_runner.KubeflowDagRunnerConfig(\n",
     "      kubeflow_metadata_config=kubeflow_dag_runner\n",
     "      .get_default_kubeflow_metadata_config(),\n",
@@ -283,29 +308,29 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 2",
    "language": "python",
-   "name": "python3"
+   "name": "python2"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 3
+    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.5rc1"
+   "pygments_lexer": "ipython2",
+   "version": "2.7.16"
   },
   "pycharm": {
    "stem_cell": {
     "cell_type": "raw",
-    "source": [],
     "metadata": {
      "collapsed": false
-    }
+    },
+    "source": []
    }
   }
  },

diff --git a/test/sample-test/requirements.in b/test/sample-test/requirements.in
@@ -9,4 +9,4 @@ google-api-python-client==1.7.8
 google-cloud-storage==1.17.0
 fire==0.2.1
 yamale==2.0
-tfx==0.21.1
+tfx==0.21.2