Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kfp diagnose_me pipeline #2843

Merged
merged 13 commits into from
Jan 22, 2020
364 changes: 364 additions & 0 deletions samples/core/kfp_env_validation/kfp_env_validation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,364 @@
{
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
SinaChavoshi marked this conversation as resolved.
Show resolved Hide resolved
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Copyright 2020 Google Inc. All Rights Reserved.\n",
"#\n",
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# http://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Install Pipeline SDK - This only needs to be run once in the environment. \n",
"!python3 -m pip install 'kfp>=0.1.31' --user --quiet"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# KubeFlow Pipelines - Creating an environment validation pipeline using KFP diagnose_me libraries \n",
"#### Step 0 - Gets all known configurations ( this step does not fail due to errors) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def run_diagnose_me():\n",
" \"\"\" Prints a dump of gcp environment configurations.\n",
"\n",
" Raises:\n",
" RuntimeError: If gcp credentials are not configured correctly\n",
" \"\"\"\n",
" \n",
" # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n",
" import subprocess\n",
" subprocess.run(\n",
" ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n",
" capture_output=True)\n",
" subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n",
" capture_output=True)\n",
" subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n",
" subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n",
" capture_output=True)\n",
"\n",
" subprocess.run(['kfp', 'diagnose_me'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Step 1 - Validates GCP credentials are configured correctly"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def verify_gcp_credentials():\n",
" \"\"\" Verifies if gcp credentials are configured correctly.\n",
"\n",
" Raises:\n",
" RuntimeError: If gcp credentials are not configured correctly\n",
" \"\"\"\n",
"\n",
" # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n",
" import subprocess\n",
" subprocess.run(\n",
" ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n",
" capture_output=True)\n",
" subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n",
" capture_output=True)\n",
" subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n",
" subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n",
" capture_output=True)\n",
"\n",
" import sys\n",
" from typing import List, Text\n",
" import os\n",
" from kfp.cli.diagnose_me import gcp\n",
"\n",
" # Get the project ID\n",
" project_config = gcp.get_gcp_configuration(\n",
" gcp.Commands.GET_GCLOUD_DEFAULT, human_readable=False)\n",
" project_id = ''\n",
" if not project_config.has_error:\n",
" project_id = project_config.parsed_output['core']['project']\n",
" print('GCP credentials are configured with access to project: %s ...\\n' %\n",
" (project_id))\n",
" print('Following account(s) are active under this pipeline:\\n')\n",
" subprocess.run(['gcloud', 'auth', 'list'])\n",
" return\n",
"\n",
" raise RuntimeError(\n",
" 'Project configuration is not accessible with error %s\\n' %\n",
" (project_config.stderr) + 'Follow the instructions at\\n' +\n",
" 'https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/guide.md#gcp-service-account-credentials \\n'\n",
" + 'to verify you have configured the required gcp secret.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Step 2 - Print scope configuration for each service account"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def print_scopes():\n",
" \"\"\" Prints the scope settings for each instance and service account.\n",
"\n",
" Raises:\n",
" RuntimeError: If gcp credentials are not configured correctly\n",
" \"\"\"\n",
"\n",
" # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n",
" import subprocess\n",
" subprocess.run(\n",
" ['curl', 'https://bootstrap.pypa.io/get-pip.py', '-o', 'get-pip.py'],\n",
" capture_output=True)\n",
" subprocess.run(['apt-get', 'install', 'python3-distutils', '--yes'],\n",
" capture_output=True)\n",
" subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n",
" subprocess.run(['python3', '-m', 'pip', 'install', 'kfp>=0.1.31', '--quiet'],\n",
" capture_output=True)\n",
"\n",
" import sys\n",
" from typing import List, Text \n",
" import os\n",
" from kfp.cli.diagnose_me import gcp\n",
" import json\n",
" # Get the project ID\n",
" project_config = gcp.get_gcp_configuration(gcp.Commands.GET_GCLOUD_DEFAULT,human_readable=False)\n",
" project_id = '' \n",
" if not project_config.has_error:\n",
" project_id = project_config.parsed_output['core']['project']\n",
" print('Retrieving service account scope for each instant in project %s ...' % (project_id))\n",
" else: \n",
" raise RuntimeError('Could not retrieve project ID with error %s' % (project_config.stderr))\n",
" \n",
" # Get the status of GCP APIs and add the results to a dictionary\n",
" scope_results = gcp.get_gcp_configuration(\n",
" gcp.Commands.GET_SCOPES)\n",
" \n",
" status = []\n",
" \n",
" if scope_results.has_error:\n",
" raise RuntimeError('could not retrieve SCOPE status with error: %s' %(scope_results.stderr))\n",
"\n",
" for item in scope_results.parsed_output:\n",
" temp = {}\n",
" temp['instance_name'] = item.get('name',None)\n",
" for service_account in item.get('serviceAccounts',[]):\n",
" temp['service_account'] = service_account.get('email',None)\n",
" temp['scopes'] = service_account.get('scopes', None)\n",
" status.append(temp)\n",
" \n",
" # Printing the results in stdout for logging purposes \n",
" print(json.dumps(status,indent = 4, sort_keys = True))\n",
" \n",
" return"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Step 3 - Validate if required APIs are enabled in the project"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def verfiy_gcp_apis(target_apis:str):\n",
" \"\"\" Verifies if specified APIs are enabled under the gcp project.\n",
" \n",
" \n",
" Args: \n",
" target_apis: comma separated name of the apis\n",
" \n",
" Raises:\n",
" RuntimeError: If gcp secret is not configured correctly, or service account does not \n",
" have proper privilege to access the API status. \n",
" \"\"\"\n",
" \n",
" # Installing pip3 and kfp, since the base image 'google/cloud-sdk:276.0.0' does not come with pip3 pre-installed.\n",
" import subprocess\n",
" subprocess.run(['curl','https://bootstrap.pypa.io/get-pip.py','-o','get-pip.py'], capture_output=True)\n",
" subprocess.run(['apt-get', 'install', 'python3-distutils','--yes'], capture_output=True)\n",
" subprocess.run(['python3', 'get-pip.py'], capture_output=True)\n",
" subprocess.run(['python3', '-m','pip','install','kfp>=0.1.31', '--quiet'], capture_output=True)\n",
" \n",
" \n",
" import sys\n",
" from typing import List, Text \n",
" import os\n",
" from kfp.cli.diagnose_me import gcp\n",
" \n",
" # Get the project ID\n",
" project_config = gcp.get_gcp_configuration(gcp.Commands.GET_GCLOUD_DEFAULT,human_readable=False)\n",
" project_id = '' \n",
" if not project_config.has_error:\n",
" project_id = project_config.parsed_output['core']['project']\n",
" print('Verifying APIs in project %s ...' % (project_id))\n",
" else: \n",
" raise RuntimeError('Could not retrieve project ID with error %s' % (project_config.stderr))\n",
" \n",
" # Get the status of GCP APIs and add the results to a dictionary\n",
" api_config_results = gcp.get_gcp_configuration(\n",
" gcp.Commands.GET_APIS)\n",
" \n",
" api_status = {}\n",
" \n",
" if api_config_results.has_error:\n",
" raise RuntimeError('could not retrieve API status with error: %s' %(api_config_results.stderr))\n",
" \n",
" for item in api_config_results.parsed_output:\n",
" api_status[item['config']['name']] = item['state']\n",
" # printing the results in stdout for logging purposes \n",
" print('%s %s' % (item['config']['name'], item['state']))\n",
" \n",
"\n",
" # Check if target apis are enabled \n",
" api_check_results = True\n",
" error_list = []\n",
" for api in target_apis.replace(' ','').split(','): \n",
" if 'ENABLED'!= api_status.get(api, 'DISABLED'):\n",
" api_check_results = False\n",
" error_list.append('API \\\"%s\\\" is not enabled. To enable this api go to https://pantheon.corp.google.com/apis/library/%s?project=%s' %(api,api,project_id))\n",
" \n",
" if api_check_results:\n",
" return\n",
" else:\n",
" raise RuntimeError('Required APIs are not enabled:\\n'+ '\\n'.join(error_list))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import kfp.components as comp\n",
"\n",
"run_diagnose_me_op = comp.func_to_container_op(\n",
" run_diagnose_me, base_image='google/cloud-sdk:276.0.0')\n",
"\n",
"verify_gcp_credentials_op = comp.func_to_container_op(\n",
" verify_gcp_credentials, base_image='google/cloud-sdk:276.0.0')\n",
"\n",
"print_scopes_op = comp.func_to_container_op(\n",
" print_scopes, base_image='google/cloud-sdk:276.0.0')\n",
"\n",
"\n",
"verify_gcp_apis_op = comp.func_to_container_op(\n",
" verfiy_gcp_apis, base_image='google/cloud-sdk:276.0.0')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from kfp.gcp import use_gcp_secret\n",
"from kfp import dsl\n",
"\n",
"@dsl.pipeline(\n",
" name='Verify KFP Env',\n",
" description=\"\"\"\n",
" Verifies if env is configured properly by \n",
" Runs diagnose_me tool in the environment and outputs the results \n",
" - Verify credentials are set correctly and print out the active service account name\n",
" - Print the current scope for each service account \n",
" - Verify the specified APIs are enabled in the project. To learn more about\n",
" available APIs go to https://pantheon.corp.google.com/apis/library/.\"\"\"\n",
")\n",
"def verify_gcp_kfp_env(\n",
" target_apis='stackdriver.googleapis.com, storage-api.googleapis.com, '\n",
" 'bigquery.googleapis.com, dataflow.googleapis.com'\n",
"):\n",
" \"\"\"A sample pipeline to help verifies KFP environment setup.\"\"\"\n",
" \n",
" # This pipeline assumes a user-gcp-sa is needed for execution, if no secret is needed,\n",
" # or a different secret is being used following should be updated accordingly. \n",
" task0 = run_diagnose_me_op().apply(use_gcp_secret('user-gcp-sa'))\n",
" task1 = verify_gcp_credentials_op().apply(use_gcp_secret('user-gcp-sa'))\n",
" task2 = print_scopes_op().apply(use_gcp_secret('user-gcp-sa'))\n",
" task3 = verify_gcp_apis_op(target_apis).apply(use_gcp_secret('user-gcp-sa'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"client = kfp.Client(host='<your-host-name>')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.create_run_from_pipeline_func(verify_gcp_kfp_env, arguments={})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}