diff --git a/samples/core/kfp_env_validation/kfp_env_validation.ipynb b/samples/core/kfp_env_validation/kfp_env_validation.ipynb new file mode 100644 index 00000000000..2d082b6666c --- /dev/null +++ b/samples/core/kfp_env_validation/kfp_env_validation.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright 2020 Google Inc. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install Pipeline SDK - This only needs to be run once in the environment. \n", + "!python3 -m pip install 'kfp>=0.1.31' --user --quiet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KubeFlow Pipelines - Creating an environment validation pipeline using KFP diagnose_me libraries \n", + "#### Step 0 - Gets all known configurations ( this step does not fail due to errors) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_diagnose_me():\n", + " \"\"\" Prints a dump of gcp environment configurations.\n", + "\n", + " Raises:\n", + " RuntimeError: If gcp credentials are not configured correctly\n", + " \"\"\"\n", + " \n", + " # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n", + " import subprocess\n", + " subprocess.run(\n", + " ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n", + " capture_output=True)\n", + " subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n", + " capture_output=True)\n", + " subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n", + " subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n", + " capture_output=True)\n", + "\n", + " subprocess.run(['kfp', 'diagnose_me'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1 - Validates GCP credentials are configured correctly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def verify_gcp_credentials():\n", + " \"\"\" Verifies if gcp credentials are configured correctly.\n", + "\n", + " Raises:\n", + " RuntimeError: If gcp credentials are not configured correctly\n", + " \"\"\"\n", + "\n", + " # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n", + " import subprocess\n", + " subprocess.run(\n", + " ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n", + " capture_output=True)\n", + " subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n", + " capture_output=True)\n", + " subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n", + " subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n", + " capture_output=True)\n", + "\n", + " import sys\n", + " from typing import List, Text\n", + " import os\n", + " from kfp.cli.diagnose_me import gcp\n", + "\n", + " # Get the project ID\n", + " project_config = gcp.get_gcp_configuration(\n", + " gcp.Commands.GET_GCLOUD_DEFAULT, human_readable=False)\n", + " project_id = ''\n", + " if not project_config.has_error:\n", + " project_id = project_config.parsed_output['core']['project']\n", + " print('GCP credentials are configured with access to project: %s ...\\n' %\n", + " (project_id))\n", + " print('Following account(s) are active under this pipeline:\\n')\n", + " subprocess.run(['gcloud', 'auth', 'list'])\n", + " return\n", + "\n", + " raise RuntimeError(\n", + " 'Project configuration is not accessible with error %s\\n' %\n", + " (project_config.stderr) + 'Follow the instructions at\\n' +\n", + " 'https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/guide.md#gcp-service-account-credentials \\n'\n", + " + 'to verify you have configured the required gcp secret.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 2 - Print scope configuration for each service account" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def print_scopes():\n", + " \"\"\" Prints the scope settings for each instance and service account.\n", + "\n", + " Raises:\n", + " RuntimeError: If gcp credentials are not configured correctly\n", + " \"\"\"\n", + "\n", + " # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n", + " import subprocess\n", + " subprocess.run(\n", + " ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n", + " capture_output=True)\n", + " subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n", + " capture_output=True)\n", + " subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n", + " subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n", + " capture_output=True)\n", + "\n", + " import sys\n", + " from typing import List, Text \n", + " import os\n", + " from kfp.cli.diagnose_me import gcp\n", + " import json\n", + " # Get the project ID\n", + " project_config = gcp.get_gcp_configuration(gcp.Commands.GET_GCLOUD_DEFAULT,human_readable=False)\n", + " project_id = '' \n", + " if not project_config.has_error:\n", + " project_id = project_config.parsed_output['core']['project']\n", + " print('Retrieving service account scope for each instant in project %s ...' % (project_id))\n", + " else: \n", + " raise RuntimeError('Could not retrieve project ID with error %s' % (project_config.stderr))\n", + " \n", + " # Get the status of GCP APIs and add the results to a dictionary\n", + " scope_results = gcp.get_gcp_configuration(\n", + " gcp.Commands.GET_SCOPES)\n", + " \n", + " status = []\n", + " \n", + " if scope_results.has_error:\n", + " raise RuntimeError('could not retrieve SCOPE status with error: %s' %(scope_results.stderr))\n", + "\n", + " for item in scope_results.parsed_output:\n", + " temp = {}\n", + " temp['instance_name'] = item.get('name',None)\n", + " for service_account in item.get('serviceAccounts',[]):\n", + " temp['service_account'] = service_account.get('email',None)\n", + " temp['scopes'] = service_account.get('scopes', None)\n", + " status.append(temp)\n", + " \n", + " # Printing the results in stdout for logging purposes \n", + " print(json.dumps(status,indent = 4, sort_keys = True))\n", + " \n", + " return" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3 - Validate if required APIs are enabled in the project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def verfiy_gcp_apis(target_apis:str):\n", + " \"\"\" Verifies if specified APIs are enabled under the gcp project.\n", + " \n", + " \n", + " Args: \n", + " target_apis: comma separated name of the apis\n", + " \n", + " Raises:\n", + " RuntimeError: If gcp secret is not configured correctly, or service account does not \n", + " have proper privilege to access the API status. \n", + " \"\"\"\n", + " \n", + " # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n", + " import subprocess\n", + " subprocess.run(['curl','https://bootstrap.pypa.io/get-pip.py','-o','get-pip.py'], capture_output=True)\n", + " subprocess.run(['apt-get', 'install', 'python3-distutils','--yes'], capture_output=True)\n", + " subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n", + " subprocess.run(['python3', '-m','pip','install','kfp>=0.1.31', '--quiet'], capture_output=True)\n", + " \n", + " \n", + " import sys\n", + " from typing import List, Text \n", + " import os\n", + " from kfp.cli.diagnose_me import gcp\n", + " \n", + " # Get the project ID\n", + " project_config = gcp.get_gcp_configuration(gcp.Commands.GET_GCLOUD_DEFAULT,human_readable=False)\n", + " project_id = '' \n", + " if not project_config.has_error:\n", + " project_id = project_config.parsed_output['core']['project']\n", + " print('Verifying APIs in project %s ...' % (project_id))\n", + " else: \n", + " raise RuntimeError('Could not retrieve project ID with error %s' % (project_config.stderr))\n", + " \n", + " # Get the status of GCP APIs and add the results to a dictionary\n", + " api_config_results = gcp.get_gcp_configuration(\n", + " gcp.Commands.GET_APIS)\n", + " \n", + " api_status = {}\n", + " \n", + " if api_config_results.has_error:\n", + " raise RuntimeError('could not retrieve API status with error: %s' %(api_config_results.stderr))\n", + " \n", + " for item in api_config_results.parsed_output:\n", + " api_status[item['config']['name']] = item['state']\n", + " # printing the results in stdout for logging purposes \n", + " print('%s %s' % (item['config']['name'], item['state']))\n", + " \n", + "\n", + " # Check if target apis are enabled \n", + " api_check_results = True\n", + " error_list = []\n", + " for api in target_apis.replace(' ','').split(','): \n", + " if 'ENABLED'!= api_status.get(api, 'DISABLED'):\n", + " api_check_results = False\n", + " error_list.append('API \\\"%s\\\" is not enabled. To enable this api go to https://pantheon.corp.google.com/apis/library/%s?project=%s' %(api,api,project_id))\n", + " \n", + " if api_check_results:\n", + " return\n", + " else:\n", + " raise RuntimeError('Required APIs are not enabled:\\n'+ '\\n'.join(error_list))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp.components as comp\n", + "\n", + "run_diagnose_me_op = comp.func_to_container_op(\n", + " run_diagnose_me, base_image='google/cloud-sdk:276.0.0')\n", + "\n", + "verify_gcp_credentials_op = comp.func_to_container_op(\n", + " verify_gcp_credentials, base_image='google/cloud-sdk:276.0.0')\n", + "\n", + "print_scopes_op = comp.func_to_container_op(\n", + " print_scopes, base_image='google/cloud-sdk:276.0.0')\n", + "\n", + "\n", + "verify_gcp_apis_op = comp.func_to_container_op(\n", + " verfiy_gcp_apis, base_image='google/cloud-sdk:276.0.0')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from kfp.gcp import use_gcp_secret\n", + "from kfp import dsl\n", + "\n", + "@dsl.pipeline(\n", + " name='Verify KFP Env',\n", + " description=\"\"\"\n", + " Verifies if env is configured properly by \n", + " Runs diagnose_me tool in the environment and outputs the results \n", + " - Verify credentials are set correctly and print out the active service account name\n", + " - Print the current scope for each service account \n", + " - Verify the specified APIs are enabled in the project. To learn more about\n", + " available APIs go to https://pantheon.corp.google.com/apis/library/.\"\"\"\n", + ")\n", + "def verify_gcp_kfp_env(\n", + " target_apis='stackdriver.googleapis.com, storage-api.googleapis.com, '\n", + " 'bigquery.googleapis.com, dataflow.googleapis.com'\n", + "):\n", + " \"\"\"A sample pipeline to help verifies KFP environment setup.\"\"\"\n", + " \n", + " # This pipeline assumes a user-gcp-sa is needed for execution, if no secret is needed,\n", + " # or a different secret is being used following should be updated accordingly. \n", + " task0 = run_diagnose_me_op().apply(use_gcp_secret('user-gcp-sa'))\n", + " task1 = verify_gcp_credentials_op().apply(use_gcp_secret('user-gcp-sa'))\n", + " task2 = print_scopes_op().apply(use_gcp_secret('user-gcp-sa'))\n", + " task3 = verify_gcp_apis_op(target_apis).apply(use_gcp_secret('user-gcp-sa'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp\n", + "client = kfp.Client(host='')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.create_run_from_pipeline_func(verify_gcp_kfp_env, arguments={})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}