diff --git a/samples/core/kubeflow_training_classification/README.md b/samples/core/kubeflow_training_classification/README.md deleted file mode 100644 index a551605772e..00000000000 --- a/samples/core/kubeflow_training_classification/README.md +++ /dev/null @@ -1,47 +0,0 @@ -## Overview - -The `kubeflow-training-classification.py` pipeline creates a TensorFlow model on structured data and image URLs (Google Cloud Storage). It works for both classification and regression. -Everything runs inside the pipeline cluster (Kubeflow). The only possible dependency is Google Cloud DataFlow if you enable the "*cloud*" mode for -the preprocessing or prediction step. - -## The requirements - -By default, the preprocessing and prediction steps use the "*local*" mode and run inside the cluster. If you specify the value of "*preprocess_mode*" as "*cloud*", you must enable the -[DataFlow API](https://cloud.google.com/endpoints/docs/openapi/enable-api) for the given GCP project so that the preprocessing step -can use Cloud DataFlow. - -Note: The trainer depends on Kubeflow API version v1alpha2. - -## Compiling the pipeline template - -Follow the guide to [building a pipeline](https://www.kubeflow.org/docs/guides/pipelines/build-pipeline/) to install the Kubeflow Pipelines SDK, then run the following command to compile the sample Python into a workflow specification. The specification takes the form of a YAML file compressed into a `.tar.gz` file. - -```bash -dsl-compile --py kubeflow-training-classification.py --output kubeflow-training-classification.tar.gz -``` - -## Deploying the pipeline - -Open the Kubeflow pipelines UI. Create a new pipeline, and then upload the compiled specification (`.tar.gz` file) as a new pipeline template. - -The pipeline requires one argument: - -1. An output directory in a Google Cloud Storage bucket, of the form `gs:///`. - -## Components source - -Preprocessing: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft/src), - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/tft) - -Training: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher/src), - [container](https://github.com/kubeflow/pipelines/tree/master/components/kubeflow/launcher) - -Prediction: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict/src), - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataflow/predict) - -Confusion Matrix: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix/src), - [container](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix) diff --git a/samples/core/kubeflow_training_classification/kubeflow_training_classification.py b/samples/core/kubeflow_training_classification/kubeflow_training_classification.py deleted file mode 100755 index aecb2da2f15..00000000000 --- a/samples/core/kubeflow_training_classification/kubeflow_training_classification.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import kfp -from kfp import components -from kfp import dsl -from kfp import gcp - -dataflow_tf_transform_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/tft/component.yaml') -kubeflow_tf_training_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/kubeflow/dnntrainer/component.yaml') -dataflow_tf_predict_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/dataflow/predict/component.yaml') -confusion_matrix_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/48dd338c8ab328084633c51704cda77db79ac8c2/components/local/confusion_matrix/component.yaml') - -@dsl.pipeline( - name='TF training and prediction pipeline', - description='' -) -def kubeflow_training(output, project, - evaluation='gs://ml-pipeline-playground/flower/eval100.csv', - train='gs://ml-pipeline-playground/flower/train200.csv', - schema='gs://ml-pipeline-playground/flower/schema.json', - learning_rate=0.1, - hidden_layer_size='100,50', - steps=2000, - target='label', - workers=0, - pss=0, - preprocess_mode='local', - predict_mode='local', -): - output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' - - # set the flag to use GPU trainer - use_gpu = False - - preprocess = dataflow_tf_transform_op( - training_data_file_pattern=train, - evaluation_data_file_pattern=evaluation, - schema=schema, - gcp_project=project, - run_mode=preprocess_mode, - preprocessing_module='', - transformed_data_dir=output_template - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - training = kubeflow_tf_training_op( - transformed_data_dir=preprocess.output, - schema=schema, - learning_rate=learning_rate, - hidden_layer_size=hidden_layer_size, - steps=steps, - target=target, - preprocessing_module='', - training_output_dir=output_template - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - if use_gpu: - training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:0517114dc2b365a4a6d95424af6157ead774eff3', - training.set_gpu_limit(1) - - prediction = dataflow_tf_predict_op( - data_file_pattern=evaluation, - schema=schema, - target_column=target, - model=training.output, - run_mode=predict_mode, - gcp_project=project, - predictions_dir=output_template - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - confusion_matrix = confusion_matrix_op( - predictions=prediction.output, - output_dir=output_template - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - -if __name__ == '__main__': - kfp.compiler.Compiler().compile(kubeflow_training, __file__ + '.zip') diff --git a/test/sample-test/run_sample_test.py b/test/sample-test/run_sample_test.py index 9624d2dc952..0e69f3ef007 100644 --- a/test/sample-test/run_sample_test.py +++ b/test/sample-test/run_sample_test.py @@ -96,15 +96,6 @@ def main(): 'steps': '5' } - elif args.testname == 'kubeflow_training_classification': - params = { - 'output': args.output, - 'project': 'ml-pipeline-test', - 'evaluation': 'gs://ml-pipeline-dataset/sample-test/flower/eval15.csv', - 'train': 'gs://ml-pipeline-dataset/sample-test/flower/train30.csv', - 'hidden-layer-size': '10,5', - 'steps': '5' - } elif args.testname == 'xgboost_training_cm': params = { 'output': args.output, @@ -150,22 +141,7 @@ def main(): ###### Validate the results for specific test cases ###### #TODO: Add result check for tfx-cab-classification after launch. - if args.testname == 'kubeflow_training_classification': - cm_tar_path = './confusion_matrix.tar.gz' - utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, - 'mlpipeline-ui-metadata') - with tarfile.open(cm_tar_path) as tar_handle: - file_handles = tar_handle.getmembers() - assert len(file_handles) == 1 - - with tar_handle.extractfile(file_handles[0]) as f: - cm_data = json.load(io.TextIOWrapper(f)) - utils.add_junit_test( - test_cases, 'confusion matrix format', - (len(cm_data['outputs'][0]['schema']) == 3), - 'the column number of the confusion matrix output is not equal to three' - ) - elif args.testname == 'xgboost_training_cm': + if args.testname == 'xgboost_training_cm': cm_tar_path = './confusion_matrix.tar.gz' utils.get_artifact_in_minio(workflow_json, 'confusion-matrix', cm_tar_path, 'mlpipeline-ui-metadata') diff --git a/test/sample-test/run_test.sh b/test/sample-test/run_test.sh index 21de19d2eb8..bf5c10a932e 100755 --- a/test/sample-test/run_test.sh +++ b/test/sample-test/run_test.sh @@ -203,17 +203,6 @@ xgboost_training_cm_injection() { sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-roc:\([a-zA-Z0-9_.-]\)\+|${LOCAL_ROC_IMAGE}|g" ${TEST_NAME}.yaml } -################################################################################ -# Utility function to inject correct images to python files for -# kubeflow_training_classification test. -################################################################################ -kubeflow_training_classification_injection() { - sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_TFT_IMAGE}|g" ${TEST_NAME}.py - sed -i "s|gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:\([a-zA-Z0-9_.-]\)\+|${KUBEFLOW_DNNTRAINER_IMAGE}|g" ${TEST_NAME}.py - sed -i "s|gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:\([a-zA-Z0-9_.-]\)\+|${DATAFLOW_PREDICT_IMAGE}|g" ${TEST_NAME}.py - sed -i "s|gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:\([a-zA-Z0-9_.-]\)\+|${LOCAL_CONFUSIONMATRIX_IMAGE}|g" ${TEST_NAME}.py -} - if [[ -z "$RESULTS_GCS_DIR" ]]; then usage exit 1 @@ -231,17 +220,7 @@ echo "Run the sample tests..." # Run the tests preparation ${TEST_NAME} -if [[ "${TEST_NAME}" == "kubeflow_training_classification" ]]; then - #TODO(numerology): convert the sed commands to sed -e - # 's|gcr.io/ml-pipeline/|gcr.io/ml-pipeline-test/' and tag replacement. Also - # let the postsubmit tests refer to yaml files. - if [ -n "${DATAFLOW_TFT_IMAGE}" ];then - kubeflow_training_classification_injection - fi - - dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml" - check_result ${TEST_NAME} -elif [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then +if [[ "${TEST_NAME}" == "tfx_cab_classification" ]]; then dsl-compile --py "${TEST_NAME}.py" --output "${TEST_NAME}.yaml" if [[ -n "${DATAFLOW_TFT_IMAGE}" ]]; then tfx_cab_classification_injection diff --git a/test/sample_test.yaml b/test/sample_test.yaml index 211329e5c5a..5ce101b4c02 100644 --- a/test/sample_test.yaml +++ b/test/sample_test.yaml @@ -67,7 +67,6 @@ spec: - name: test-name value: "{{item}}" withItems: - - kubeflow_training_classification - tfx_cab_classification - xgboost_training_cm - kubeflow_pipeline_using_TFX_OSS_components