Skip to content

Commit

Permalink
Improvements and bug fixes in DM config. (kubeflow#904)
Browse files Browse the repository at this point in the history
* Improvements and bug fixes in DM config.

* Create a service account to be used to authorize TFJobs and other work
  within the cluster.

* Create a helper script to download service account keys and turn
  them into K8s keys

* Fix some bugs in the docs.

* Fix kubeflow#878 create a GCP service account for the user.

* IAP script needs a GCP service account with network admin privileges.
* Add network admin privileges to the admin service account.
* Name the secrets in K8s so that be default the names are the same across
  the deployments. This way there's one less parameter to set for
  every deployment.

* VM service account should have a unique name per deployment so deployments
  are isolated.

* Need to grant the VM service account logs and monitoring access to support
  monitoring.

* I don't think there's any reason to allow user to specify name of the
  VM service account in the YAML file right now.

* Address comments.

* Autoformat jsonnet.
  • Loading branch information
jlewi authored and k8s-ci-robot committed Jun 1, 2018
1 parent 8a73fd3 commit 91d07f5
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 25 deletions.
2 changes: 0 additions & 2 deletions docs/gke/configs/cluster-kubeflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ resources:
# This is the name of the GCP static ip address to reserve for your domain.
# This must be different for each Kubeflow deployment in your project.
ipName: kubeflow-ip
# Name of the service account to use for k8s worker node pools
vmServiceAccountName: kubeflow-service-account
# Provide the config for the bootstrapper. This should be a string
# containing the YAML spec for the bootstrapper.
#
Expand Down
56 changes: 51 additions & 5 deletions docs/gke/configs/cluster.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ limitations under the License.
{% set STATEFULSETS_COLLECTION = '/apis/apps/v1/namespaces/{namespace}/statefulsets' %}
{% set CLUSTER_ROLE_BINDING_COLLECTION = '/apis/rbac.authorization.k8s.io/v1/clusterrolebindings' %}

{# Names for service accounts.#}
{# Names for service accounts.
-admin is to be used for admin tasks
-user is to be used by users for actual jobs.
-vm is used for the VM service account attached to the GKE VMs.
#}
{% set KF_ADMIN_NAME = NAME_PREFIX + '-admin' %}
{% set KF_USER_NAME = NAME_PREFIX + '-user' %}
{% set KF_VM_SA_NAME = NAME_PREFIX + '-vm' %}

{# For most of the K8s resources we set the deletePolicy to abandon; otherwise deployment manager reports various errors.
Since we delete the cluster all the K8s resources will be deleted anyway.
Expand All @@ -58,7 +65,14 @@ resources:
properties:
accountId: {{ KF_ADMIN_NAME }}
displayName: Service Account used for Kubeflow admin actions.
- name: kubeflow-cluster-vm-service-account

- name: {{ KF_USER_NAME }}
type: iam.v1.serviceAccount
properties:
accountId: {{ KF_USER_NAME }}
displayName: Service Account used for Kubeflow user actions.

- name: {{ KF_VM_SA_NAME }}
type: iam.v1.serviceAccount
properties:
accountId: {{ properties['vmServiceAccountName'] }}
Expand Down Expand Up @@ -106,15 +120,15 @@ resources:
{% endif %}
nodeConfig:
machineType: n1-standard-1
serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com
serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com
{% if properties['securityConfig']['secureNodeMetadata'] %}
workloadMetadataConfig:
nodeMetadata: SECURE
{% endif %}
oauthScopes: {{ VM_OAUTH_SCOPES }}
metadata:
dependsOn:
- kubeflow-cluster-vm-service-account
- {{ KF_VM_SA_NAME }}

# We manage the node pools as separate resources.
# We do this so that if we want to make changes we can delete the existing resource and then recreate it.
Expand Down Expand Up @@ -166,7 +180,7 @@ resources:
nodeMetadata: SECURE
{% endif %}
machineType: n1-standard-8
serviceAccount: {{ properties['vmServiceAccountName'] }}@{{ env['project'] }}.iam.gserviceaccount.com
serviceAccount: {{ KF_VM_SA_NAME }}@{{ env['project'] }}.iam.gserviceaccount.com
oauthScopes: {{ VM_OAUTH_SCOPES }}
accelerators:
- acceleratorCount: 1
Expand Down Expand Up @@ -286,9 +300,40 @@ TODO(jlewi): Do we need to serialize API activation
members:
- {{ 'serviceAccount:' + env['project_number'] + '@cloudservices.gserviceaccount.com' }}

{# servicemanagement.admin is needed by CloudEndpoints controller
so we can create a service to get a hostname.
#}
- role: roles/servicemanagement.admin
members:
- {{ 'serviceAccount:' + KF_ADMIN_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}
{# Network admin is needed to enable IAP and configure network settings
like backend timeouts and health checks.
#}
- role: roles/compute.networkAdmin
members:
- {{ 'serviceAccount:' + KF_ADMIN_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

- role: roles/storage.admin
members:
- {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

- role: roles/bigquery.admin
members:
- {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

- role: roles/dataflow.admin
members:
- {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

- role: roles/logging.logWriter
members:
{# VM service account is used to write logs. #}
- {{ 'serviceAccount:' + KF_VM_SA_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

- role: roles/monitoring.metricWriter
members:
{# VM service account is used to write monitoring data. #}
- {{ 'serviceAccount:' + KF_VM_SA_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

remove: []

Expand All @@ -297,6 +342,7 @@ TODO(jlewi): Do we need to serialize API activation
- get-iam-policy
- iam-api
- {{ KF_ADMIN_NAME }}
- {{ KF_USER_NAME }}
runtimePolicy:
- UPDATE_ALWAYS

Expand Down
3 changes: 3 additions & 0 deletions docs/gke/configs/env-kubeflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ export PROJECT=kubeflow
# The name must be unique for each deployment within your project.
export DEPLOYMENT_NAME=kubeflow

# Set this to the zone in your ${CONFIG_FILE}
export ZONE=us-east1-d

# Set config file to the YAML file defining your deployment manager configs.
export CONFIG_FILE=cluster-${PROJECT}.yaml

Expand Down
18 changes: 18 additions & 0 deletions docs/gke/create_k8s_secrets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#
# A simple helper script to download secrets for Kubeflow service
# accounts and store them as K8s secrets.
set -ex
export SA_EMAIL=${DEPLOYMENT_NAME}-admin@${PROJECT}.iam.gserviceaccount.com

# TODO(jlewi): We should name the secrets more consistently based on the service account name.
# We will need to update the component configs though
gcloud --project=${PROJECT} iam service-accounts keys create ${SA_EMAIL}.json --iam-account ${SA_EMAIL}
kubectl create secret generic --namespace=kubeflow admin-gcp-sa --from-file=admin-gcp-sa.json=./${SA_EMAIL}.json

export USER_EMAIL=${DEPLOYMENT_NAME}-user@${PROJECT}.iam.gserviceaccount.com
export USER_SECRET_NAME=${DEPLOYMENT_NAME}-user
gcloud --project=${PROJECT} iam service-accounts keys create ${USER_EMAIL}.json --iam-account $USER_EMAIL
# We want the secret name to be the same by default for all clusters so
# that users don't have to set it manually.
kubectl create secret generic --namespace=kubeflow user-gcp-sa --from-file=user-gcp-sa.json=./${USER_EMAIL}.json
26 changes: 10 additions & 16 deletions docs/gke/gke_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ The instructions also take advantage of IAP to provide secure authenticated acce

1. Modify `cluster-kubeflow.yaml`

1. Set the zone for your cluster
1. Set property `ipName` to a value that is unique with respect to your project
1. Set parameter ipName in bootstrapperConfig to the value selected in the previous step
1. Set parameter acmeEmail in bootstrapperConfig to your email address
1. Set parameter hostname in bootstrapperConfig
1. Change the initial number of nodes if desired
1. Set the zone for your cluster
1. Set property `ipName` to a value that is unique with respect to your project
1. Set parameter ipName in bootstrapperConfig to the value selected in the previous step
1. Set parameter acmeEmail in bootstrapperConfig to your email address
1. Set parameter hostname in bootstrapperConfig
1. Change the initial number of nodes if desired

* If you want GPUs set a non-zero number for number of GPU nodes.

Expand All @@ -47,24 +47,18 @@ The instructions also take advantage of IAP to provide secure authenticated acce
1. Get credentials for the newly configured cluster

```
gcloud --project=${PROJECT} container clusters get-credentials --zone=${ZONE} ${DEPLOYMENT_NAME}-${NAME}
gcloud --project=${PROJECT} container clusters get-credentials --zone=${ZONE} ${DEPLOYMENT_NAME}
```

* ZONE - this will be the zone specified in your ${CONFIG_FILE}
* NAME - this will be the name specified in your ${CONFIG_FILE}

1. Create a service account and IAM bindings for the cloud-endpoints-controller

* You can skip this step if you are using a custom domain.
1. Create K8s secrets containing the secrets for the GCP service accounts to be used with Kubeflow

```
export SA_EMAIL=${DEPLOYMENT_NAME}-${NAME}@${PROJECT}.iam.gserviceaccount.com
gcloud --project=${PROJECT} iam service-accounts keys create ${SA_EMAIL}.json --iam-account $SA_EMAIL
kubectl create secret generic --namespace=kubeflow cloudep-sa --from-file=./${SA_EMAIL}.json
. env-kubeflow.sh
./create_k8s_secrets.sh
```

* ${NAME} is the name of the resource in your ${CONFIG_FILE}

### Create oauth client credentials

Create an OAuth Client ID to be used to identify IAP when requesting acces to user's email to verify their identity.
Expand Down
20 changes: 20 additions & 0 deletions kubeflow/core/iap.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,10 @@
name: "ENVOY_ADMIN",
value: "http://localhost:" + envoyAdminPort,
},
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/var/run/secrets/sa/admin-gcp-sa.json",
},
],
volumeMounts: [
{
Expand All @@ -233,6 +237,11 @@
mountPath: "/var/shared/",
name: "shared",
},
{
name: "sa-key",
readOnly: true,
mountPath: "/var/run/secrets/sa",
},
],
},
],
Expand All @@ -250,6 +259,12 @@
},
name: "shared",
},
{
name: "sa-key",
secret: {
secretName: "admin-gcp-sa",
},
},
],
},
},
Expand Down Expand Up @@ -316,6 +331,11 @@
exit 1
fi
# Activate the service account
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
# Print out the config for debugging
gcloud config list
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
while [[ -z ${BACKEND_ID} ]];
do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)');
Expand Down
4 changes: 2 additions & 2 deletions kubeflow/core/prototypes/cloud-endpoints.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
// @description Provides cloud-endpoints prototypes for creating Cloud Endpoints services and DNS records.
// @shortDescription Cloud Endpoint domain creation.
// @param name string Name for the component
// @param secretName string Name of secret containing the json service account key.
// @optionalParam secretKey string cloudep-sa.json Name of the key in the secret containing the JSON service account key.
// @optionalParam secretName string admin-gcp-sa Name of secret containing the json service account key.
// @optionalParam secretKey string admin-gcp-sa.json Name of the key in the secret containing the JSON service account key.
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.

local k = import "k.libsonnet";
Expand Down

0 comments on commit 91d07f5

Please sign in to comment.