Skip to content

Commit

Permalink
Update cert-manager to 1.1.0 and use ingressShims for automated Certi…
Browse files Browse the repository at this point in the history
…ficates. (#400)

* Bump `cert-manager` to version 1.1.0, changing `clouddns` to `cloudDNS` and upgrading the API version to v1.

* Create the secret after the cluster is created in a new task: `gke/create/certificate-manager-secret`.

* Destroy the secret and remove the key from the DNS solver SA in a new task: `gke/destroy/certificate-manager-secret` (fixes #391).

* Move certificate resources into the cert-manager helmfile using `incubator/raw` chart and wait after cert-manager deployment to make sure the resources are available for the issuers.

* Add new environment variable `CERTIFICATE_MANAGER_CLUSTER_ISSUER` to control default cert issuer.

* Use `GCP_DNS_SOLVER_NAME` and `GCP_CLOUD_DNS_SERVICE_ACCOUNT` to format the DNS workload service account.

* Use `kubernetes.io/tls-acme` annotation for automatic secret deployment using ingress shims.

* Fix bad templating for frontend ingress introduced in #395 and update frontend chart version.
  • Loading branch information
willgraf authored Dec 9, 2020
1 parent d9e91f5 commit 029bb52
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 76 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ ENV CLOUDSDK_CONTAINER_CLUSTER=""
ENV CLOUDSDK_BUCKET=""
ENV CLOUDSDK_COMPUTE_REGION=""
ENV GCP_SERVICE_ACCOUNT=${CLOUDSDK_CONTAINER_CLUSTER}@${CLOUDSDK_CORE_PROJECT}.iam.gserviceaccount.com
ENV GCP_DNS_SOLVER_NAME=dns01-solver
ENV GCP_CLOUD_DNS_SERVICE_ACCOUNT=${GCP_CLOUD_DNS_SERVICE_ACCOUNT}@${CLOUDSDK_CORE_PROJECT}.iam.gserviceaccount.com
ENV GCP_PREDICTION_GPU_TYPE="nvidia-tesla-t4"
ENV GCP_TRAINING_GPU_TYPE="nvidia-tesla-v100"
ENV GKE_MACHINE_TYPE="n1-standard-1"
Expand All @@ -55,6 +57,7 @@ ENV CONSUMER_MACHINE_TYPE="n1-standard-2"
ENV CLOUD_PROVIDER=""
ENV ELK_DEPLOYMENT_TOGGLE=""
ENV CERTIFICATE_MANAGER_ENABLED=""
ENV CERTIFICATE_MANAGER_CLUSTER_ISSUER="letsencrypt-staging"

# Filesystem entry for tfstate
RUN s3 fstab '${KOPS_STATE_STORE}' '/' '/s3'
Expand Down
2 changes: 0 additions & 2 deletions conf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ create: \
$(CLOUD_PROVIDER)/create/all \
kubectl/create/prometheus-redis-exporter-script \
helmfile/create/all \
kubectl/create/certificates \
kubectl/display/ip
@kubens deepcell
@echo "DeepCell Kiosk has been created successfully."
Expand All @@ -37,7 +36,6 @@ test/create: \
$(CLOUD_PROVIDER)/test/create/all \
kubectl/create/prometheus-redis-exporter-script \
helmfile/create/all \
kubectl/create/certificates \
kubectl/display/ip
@kubens deepcell
@echo "Cluster created"
Expand Down
52 changes: 0 additions & 52 deletions conf/addons/certificate-issuers.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion conf/charts/frontend/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
apiVersion: v1
name: frontend
version: 0.2.0
version: 0.2.1
#kubeVersion: ^1.9.8
description: This project provides the frontend interface for the Tensorflow-serving backend of Deepcell.
keywords:
Expand Down
6 changes: 4 additions & 2 deletions conf/charts/frontend/templates/ingress.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := include "fullname" . -}}
{{- $ingressPath := .Values.ingress.path -}}
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
Expand All @@ -25,9 +27,9 @@ spec:
- host: {{ . }}
http:
paths:
- path: {{ template "fullname" . }}
- path: {{ $ingressPath }}
backend:
serviceName: {{ template "fullname" . }}
serviceName: {{ $fullName }}
servicePort: http
{{- end }}
{{- if .Values.ingress.tls }}
Expand Down
77 changes: 74 additions & 3 deletions conf/helmfile.d/0010.cert-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ helmDefaults:
################################################################################

repositories:
# Stable repo of official helm charts
# Add the Jetstack Helm repository
- name: jetstack
url: https://charts.jetstack.io
# Kubernetes incubator repo of helm charts
- name: incubator
url: https://charts.helm.sh/incubator

releases:

Expand All @@ -22,12 +25,80 @@ releases:
namespace: cert-manager
labels:
chart: cert-manager
repo: stable
repo: jetstack
component: cert-manager
namespace: cert-manager
vendor: kubernetes
chart: jetstack/cert-manager
version: 1.0.3
version: 1.1.0
wait: true
atomic: true
cleanupOnFail: true
hooks:
- events: ["postsync"]
# Give cert-manager time to initialize itself
showlogs: true
command: /bin/sleep
args:
- 15
values:
- installCRDs: true

# serviceAccount:
# annotations:
# iam.gke.io/gcp-service-account: {{ env "GCP_CLOUD_DNS_SERVICE_ACCOUNT" | default "cloud-dns-sa-not-found"}}

ingressShim:
# Use "letsencrypt-prod" for production.
defaultIssuerName: {{ env "CERTIFICATE_MANAGER_CLUSTER_ISSUER" | default "letsecrypt-staging" }}
defaultIssuerKind: ClusterIssuer
defaultIssuerGroup: cert-manager.io

#
# Certificate Issuers
- name: cert-manager-issuers
needs:
- cert-manager/cert-manager
namespace: cert-manager
chart: incubator/raw
atomic: true
cleanupOnFail: true
force: true
values:
- resources:

- apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
server: https://acme-staging-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-staging
solvers:
- selector: {}
dns01:
cloudDNS:
project: {{ env "CLOUDSDK_CORE_PROJECT" | default "project-not-found" }}
serviceAccountSecretRef:
name: clouddns-dns01-solver-svc-acct
key: key.json

- apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-prod
solvers:
- selector: {}
dns01:
cloudDNS:
project: {{ env "CLOUDSDK_CORE_PROJECT" | default "project-not-found" }}
serviceAccountSecretRef:
name: clouddns-dns01-solver-svc-acct
key: key.json
7 changes: 4 additions & 3 deletions conf/helmfile.d/0300.frontend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ releases:
vendor: vanvalenlab
default: true
chart: '{{ env "CHARTS_PATH" | default "/conf/charts" }}/frontend'
version: 0.2.0
version: 0.2.1
values:
- replicas: 1

Expand Down Expand Up @@ -59,9 +59,10 @@ releases:
nginx.ingress.kubernetes.io/ssl-redirect: "false"
nginx.ingress.kubernetes.io/proxy-body-size: "1g"
{{ if ne (env "CERTIFICATE_MANAGER_ENABLED" | default "") "" }}
kubernetes.io/tls-acme: "true"
nginx.ingress.kubernetes.io/auth-tls-secret: "deepcell/tls-cert"
# Update to "letsencrypt-prod" for production.
cert-manager.io/cluster-issuer: "letsencrypt-staging"
# Use "letsencrypt-prod" for production.
cert-manager.io/cluster-issuer: {{ env "CERTIFICATE_MANAGER_CLUSTER_ISSUER" | default "letsecrypt-staging" }}
{{ end }}

# Use $DNS_DOMAIN_NAME in production
Expand Down
40 changes: 32 additions & 8 deletions conf/tasks/Makefile.gke
Original file line number Diff line number Diff line change
Expand Up @@ -217,24 +217,47 @@ gke/destroy/service-account:
## Create Certificate Manager service account.
gke/create/certificate-manager-sa:
ifneq "" "${CERTIFICATE_MANAGER_ENABLED}"
@gcloud iam service-accounts create "dns01-solver" --display-name "dns01-solver" || \
@gcloud iam service-accounts create $(GCP_DNS_SOLVER_NAME) \
--display-name "Service Account to support ACME DNS-01 challenge." || \
echo "cert-manager service account already exists"
@gcloud projects add-iam-policy-binding $(CLOUDSDK_CORE_PROJECT) \
--member serviceAccount:dns01-solver@$(CLOUDSDK_CORE_PROJECT).iam.gserviceaccount.com \
--role roles/dns.admin
--role roles/dns.admin \
--member serviceAccount:$(GCP_CLOUD_DNS_SERVICE_ACCOUNT)
# @gcloud iam service-accounts add-iam-policy-binding \
# --role roles/iam.workloadIdentityUser \
# --member "serviceAccount:$(CLOUDSDK_CORE_PROJECT).svc.id.goog[cert-manager/cert-manager]" \
# $(GCP_CLOUD_DNS_SERVICE_ACCOUNT)
else
@echo "Certificate Manager is Disabled"
endif

## Create Certificate Manager Secret
gke/create/certificate-manager-secret:
ifneq "" "${CERTIFICATE_MANAGER_ENABLED}"
@gcloud iam service-accounts keys create key.json \
--iam-account dns01-solver@$(CLOUDSDK_CORE_PROJECT).iam.gserviceaccount.com
--iam-account $(GCP_CLOUD_DNS_SERVICE_ACCOUNT)
-@kubectl create namespace cert-manager
@kubectl -n default create secret generic clouddns-dns01-solver-svc-acct --from-file=key.json
@kubectl -n cert-manager create secret generic clouddns-dns01-solver-svc-acct --from-file=key.json
else
@echo "Certificate Manager is Disabled"
endif

## Remove Certifiate Manager Secret Key
gke/destroy/certifiate-manager-secret: KEY_ID = $(shell sh -c "cat key.json | jq '.private_key_id' -r" )
gke/destroy/certifiate-manager-secret:
ifneq "" "${CERTIFICATE_MANAGER_ENABLED}"
@gcloud iam service-accounts keys delete $(KEY_ID) --quiet \
--iam-account $(GCP_CLOUD_DNS_SERVICE_ACCOUNT) || \
echo "Could not remove key from IAM account."
else
@echo "Certificate Manager is Disabled"
endif

## Create bucket used by deepcell
gke/create/bucket:
@echo "Creating Google Cloud Storage Bucket ${CLOUDSDK_CORE_PROJECT}..."
@echo "Creating Google Cloud Storage Bucket ${CLOUDSDK_BUCKET}..."
@gsutil mb -p $(CLOUDSDK_CORE_PROJECT) gs://$(CLOUDSDK_BUCKET) \
|| echo "Bucket ${CLOUDSDK_CODE_PROJECT} already exists. No need to create that bucket."
|| echo "Bucket ${CLOUDSDK_BUCKET} already exists. No need to create that bucket."
@-gsutil acl ch -u $(GCP_SERVICE_ACCOUNT):O gs://$(CLOUDSDK_BUCKET)
@echo "Google Cloud Storage Bucket creation finished."
@echo " "
Expand Down Expand Up @@ -270,6 +293,7 @@ gke/deploy/nvidia:
## Create cluster resources, after authentication
gke/create/resources: \
gke/create/cluster \
gke/create/certificate-manager-secret \
gke/create/node-pools \
gke/create/elk-node-pools \
gke/create/bucket \
Expand All @@ -288,12 +312,12 @@ gke/destroy/all: \
gke/destroy/node-pools \
gke/destroy/cluster \
gke/destroy/service-account \
gke/destroy/certifiate-manager-secret \
gke/destroy/pds
@echo "GKE cluster destroyed"
@exit 0



# https://cloud.google.com/storage/docs/access-control/iam-roles
# Currently, using Editor and Kubernetes Engine Admin roles for the testing-ci service account.
# It might be possible to replace Editor with someting more specific, but more research would be needed.
Expand Down
5 changes: 0 additions & 5 deletions conf/tasks/Makefile.kubectl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,3 @@ kubectl/destroy/prometheus/operator:
-@kubectl delete crd alertmanagers.monitoring.coreos.com
-@kubectl delete crd podmonitors.monitoring.coreos.com
-@kubectl delete crd thanosrulers.monitoring.coreos.com

kubectl/create/certificates:
ifneq "" "${CERTIFICATE_MANAGER_ENABLED}"
-@gomplate -f addons/certificate-issuers.yaml | kubectl apply -f -
endif
3 changes: 3 additions & 0 deletions scripts/menu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,9 @@ function configure_gke() {

export CLOUD_PROVIDER=gke
export GCP_SERVICE_ACCOUNT=${CLOUDSDK_CONTAINER_CLUSTER}@${CLOUDSDK_CORE_PROJECT}.iam.gserviceaccount.com
# export GCP_DNS_SOLVER_NAME=dns01-solver-$(date +%s)
export GCP_DNS_SOLVER_NAME=dns01-solver
export GCP_CLOUD_DNS_SERVICE_ACCOUNT=${GCP_DNS_SOLVER_NAME}@${CLOUDSDK_CORE_PROJECT}.iam.gserviceaccount.com

# These 2 values are hard-coded for now, menu is commented out above.
export GPU_MACHINE_TYPE=${GPU_MACHINE_TYPE:-n1-highmem-2}
Expand Down

0 comments on commit 029bb52

Please sign in to comment.