diff --git a/manifests/gcp_marketplace/README.md b/manifests/gcp_marketplace/README.md index 5c89e7fe046..f71f5d351bc 100644 --- a/manifests/gcp_marketplace/README.md +++ b/manifests/gcp_marketplace/README.md @@ -1,19 +1,11 @@ # Kubeflow Pipelines for GKE Marketplace -> **Alpha version:** -Kubeflow Pipelines on GCP Marketplace is currently in **Alpha** with limited -support. The Kubeflow team is interested in any feedback you may have, in -particular with regards to usability of the feature. Please raise any issues -or discussion items in the -[Kubeflow Pipelines issue tracker](https://github.com/kubeflow/pipelines/issues). - Kubeflow Pipelines can be installed using either of the following approaches: * [Using the Google Cloud Platform Console](#using-install-platform-console) * [Using the command line](#using-install-command-line) - ## Using the Google Cloud Platform Marketplace Get up and running with a few clicks! Install this Kubeflow Pipelines app to a @@ -24,4 +16,41 @@ Google Kubernetes Engine cluster using Google Cloud Marketplace. Follow the ## Using the command line We prefer you use Google Cloud Platform Marketplace UI to deploy the application. -If you really want to use command line, please follow the [guide](https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/cli.md). +If you want to know how , please follow the [guide](https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/cli.md). It's not target for production usage. The tool "mpdev" is for Kubeflow Pipeline developers. We will provide better command line experiences in 2020 Q2/Q3. Please check [Standalone CLI](https://www.kubeflow.org/docs/pipelines/installation/standalone-deployment/) for now on how to install via commandline. + +## Developement guide + +This section details how to test your changes before submit codes. + +1. Code changes and locally committed + +2. Build + +``` +gcloud builds submit --config=.cloudbuild.yaml --substitutions=COMMIT_SHA="$(git rev-parse HEAD)" --project=ml-pipeline-test +``` + +`gcr.io/$PROJECT_ID/hosted/$COMMIT_SHA/` contains the binaries. + +3. Auto-test (Install & Uninstall) + +MM_VER is major minor version parsed from VERSION file which is on major.minor.patch version format. + +``` +MM_VER=$(cat VERSION | sed -e "s#[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)#\1.\2#") +gcloud builds submit --config=test/cloudbuild/mkp_verify.yaml --substitutions=COMMIT_SHA="$(git rev-parse HEAD)",_DEPLOYER_VERSION=$MM_VER --project=ml-pipeline-test +``` + +4. Manual-test (Install with advanced parameters and don't uninstall) + +Make sure your kubectl can connect to a target test cluster. + +```shell +APP_INSTANCE_NAME= +NAMESPACE= # Make sure you already created the namespace +MANAGEDSTORAGE=true # True means use CloudSQL + Minio-GCS; False means use in-cluster PVC + MySQL. +CLOUDSQL= # Format like project_id:zone:cloudsql_instance_name +PROJECTID= # This field will be removed after Marketplace can pass in the project ID +mpdev install --deployer=gcr.io/ml-pipeline-test/hosted/$(git rev-parse HEAD)/deployer:$MM_VER \ + --parameters='{"name": "'$APP_INSTANCE_NAME'", "namespace": "'$NAMESPACE'", "managedstorage.enabled": '$MANAGEDSTORAGE', "managedstorage.cloudsqlInstanceConnectionName": "'$CLOUDSQL'", "managedstorage.gcsProjectId": "'$PROJECTID'"}' +``` diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/metadata.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/metadata.yaml index 2e16ed62523..9c88122d42a 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/metadata.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/metadata.yaml @@ -38,6 +38,7 @@ spec: image: {{ .Values.images.metadataserver }} imagePullPolicy: 'Always' env: + # TODO: merge all into mysql-credential {{ if .Values.managedstorage.enabled }} - name: DBCONFIG_USER valueFrom: @@ -126,23 +127,6 @@ spec: --- apiVersion: v1 kind: ConfigMap -metadata: - name: metadata-configmap - labels: - component: metadata-server -data: - {{ if .Values.managedstorage.databaseNamePrefix }} - mysql_database: '{{ .Values.managedstorage.databaseNamePrefix }}_metadata' - {{ else }} - mysql_database: '{{ .Release.Name | replace "-" "_" | replace "." "_"}}_metadata' - {{ end }} - mysql_host: "mysql" - mysql_port: "3306" - username: "root" - password: "" ---- -apiVersion: v1 -kind: ConfigMap metadata: name: metadata-mysql-configmap labels: @@ -155,8 +139,6 @@ data: {{ end }} MYSQL_HOST: "mysql" MYSQL_PORT: "3306" - username: "root" - password: "" --- apiVersion: v1 kind: ConfigMap diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/minio.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/minio.yaml index 0cb9d6600b3..f6c8dc11ad8 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/minio.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/minio.yaml @@ -38,13 +38,17 @@ spec: - args: - gateway - gcs + - {{ .Values.managedstorage.gcsProjectId }} env: - name: MINIO_ACCESS_KEY value: minio - name: MINIO_SECRET_KEY value: minio123 - - name: GOOGLE_APPLICATION_CREDENTIALS - value: "/etc/credentials/application_default_credentials.json" + # Minio is KFP system workload and we use GCE's default service account + # or later Workload Identity's corresponding service account. + # So here no need to setup GOOGLE_APPLICATION_CREDENTIALS. + # - name: GOOGLE_APPLICATION_CREDENTIALS + # value: "/etc/credentials/application_default_credentials.json" image: {{ .Values.images.minio }} name: minio ports: diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/mysql.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/mysql.yaml index 2daaf175499..088929fd3cc 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/mysql.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/templates/mysql.yaml @@ -43,7 +43,8 @@ spec: "-dir=/cloudsql", # Replace with your own CloudSQL instance ID "-instances={{ .Values.managedstorage.cloudsqlInstanceConnectionName }}=tcp:0.0.0.0:3306", - "-credential_file=/credentials/application_default_credentials.json", + # System workload uses GCE default service account or Workload Identity's service account + # "-credential_file=/credentials/application_default_credentials.json", "term_timeout=10s"] # set term_timeout if require graceful handling of shutdown # NOTE: proxy will stop accepting new connections; only wait on existing connections diff --git a/manifests/gcp_marketplace/chart/kubeflow-pipelines/values.yaml b/manifests/gcp_marketplace/chart/kubeflow-pipelines/values.yaml index 20cc9b0d013..4bc2d0238d8 100644 --- a/manifests/gcp_marketplace/chart/kubeflow-pipelines/values.yaml +++ b/manifests/gcp_marketplace/chart/kubeflow-pipelines/values.yaml @@ -27,7 +27,12 @@ managedstorage: # # gcsBucketName is used in two places, so I wrote a template string here that # can be evaluated in each place. - gcsBucketName: '{{ if .Values.managedstorage.databaseNamePrefix }}{{ printf "%s-%s" .Values.managedstorage.cloudsqlInstanceConnectionName .Values.managedstorage.databaseNamePrefix | replace ":" "-" | trunc 50 }}{{ else }}{{ printf "%s-%s" .Values.managedstorage.cloudsqlInstanceConnectionName .Release.Name | replace ":" "-" | trunc 50 }}{{ end }}' + # + # Name pattern: + # If spedify databaseNamePrefix: %{cloudsqlInstanceConnectionName}-%{truncedDatabaseNamePrefix} + # else: %{cloudsqlInstanceConnectionName}-%{releaseName} + gcsBucketName: '{{ if .Values.managedstorage.databaseNamePrefix }}{{ printf "%s-%s" .Values.managedstorage.cloudsqlInstanceConnectionName .Values.managedstorage.databaseNamePrefix | replace ":" "-" | lower | trunc 60 }}{{ else }}{{ printf "%s-%s" .Values.managedstorage.cloudsqlInstanceConnectionName .Release.Name | replace ":" "-" | lower | trunc 60 }}{{ end }}' databaseNamePrefix: null dbUsername: 'root' dbPassword: '' + gcsProjectId: '' diff --git a/manifests/gcp_marketplace/schema.yaml b/manifests/gcp_marketplace/schema.yaml index 74a2b241968..2ad0bdf3d4b 100644 --- a/manifests/gcp_marketplace/schema.yaml +++ b/manifests/gcp_marketplace/schema.yaml @@ -108,6 +108,57 @@ properties: type: string x-google-marketplace: type: NAMESPACE + managedstorage.enabled: + type: boolean + title: Use managed storage + description: |- + Use Cloud SQL and GCS for storing the data. + Using CloudSQL and GCS provides better reliability and performance, + as well as features such as data backup and usage monitoring. + This is the recommended option especially for production scenarios. + If false, the data will be stored in GCE Persistent Disk. + default: false + managedstorage.cloudsqlInstanceConnectionName: + type: string + title: Cloud SQL instance connection name (Managed storage only) + description: |- + This field must be specified if choose to use managed storage. + Provide the instance connection name for an existing Cloud SQL for MySQL instance. + The instance connection name can be found on the instance detail page in the Cloud SQL console. + The instance connection name uses the format project:zone:instance-name, for example,myproject:us-central1:myinstance. + For more details on how to create a new instance, see https://cloud.google.com/sql/docs/mysql/quickstart. + managedstorage.dbUsername: + type: string + title: Database username (Managed storage only) + description: |- + The database username to use when connecting to the Cloud SQL instance. + If you leave this field empty, the deployment will use the default 'root' user account to connect. + For more details about MySQL users, see https://cloud.google.com/sql/docs/mysql/users. + managedstorage.dbPassword: + type: string + title: Database password (Managed storage only) + x-google-marketplace: + type: MASKED_FIELD + description: |- + The database password to use when connecting to the Cloud SQL instance. + If you leave this field empty, the deployment will try to connect to the instance without providing a password. + This will fail if a password is required for the username you provided. + managedstorage.databaseNamePrefix: + type: string + title: Database name prefix (Managed storage only) + description: |- + The prefix of the database name. Kubeflow Pipelines will create two databases, + [prefix]_pipeline and [prefix]_metadata. + Use lowercase letters, numbers, and hyphens. Start with a letter. + If the prefix specified is same as an old deployment in the past, + the deployment will recover from an old deployment. + If this not specified, the app instance name will be used. + managedstorage.gcsProjectId: + type: string + title: GCS Bucket's project ID (Managed storage only) + description: |- + Normally it's the same project which installs Kubeflow Pipelines. + It's required if enabled managed storage. required: - name