diff --git a/.cloudbuild.yaml b/.cloudbuild.yaml index bbecb56ab83..76bf03d84a5 100644 --- a/.cloudbuild.yaml +++ b/.cloudbuild.yaml @@ -94,6 +94,18 @@ steps: '--build-arg', 'COMMIT_HASH=$COMMIT_SHA', '-f', '/workspace/backend/metadata_writer/Dockerfile', '/workspace'] waitFor: ["-"] +- id: 'buildCacheServer' +name: 'gcr.io/cloud-builders/docker' +args: ['build', '-t', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA', + '--build-arg', 'COMMIT_HASH=$COMMIT_SHA', '-f', + '/workspace/backend/Dockerfile.cacheserver', '/workspace'] +waitFor: ["-"] +- id: 'buildCacheDeployer' +name: 'gcr.io/cloud-builders/docker' +args: ['build', '-t', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA', + '--build-arg', 'COMMIT_HASH=$COMMIT_SHA', '-f', + '/workspace/backend/src/cache/deployer/Dockerfile', '/workspace'] +waitFor: ["-"] # Build marketplace deployer - id: 'buildMarketplaceDeployer' @@ -215,6 +227,8 @@ images: - 'gcr.io/$PROJECT_ID/inverse-proxy-agent:$COMMIT_SHA' - 'gcr.io/$PROJECT_ID/visualization-server:$COMMIT_SHA' - 'gcr.io/$PROJECT_ID/metadata-writer:$COMMIT_SHA' +- 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA' +- 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA' # Images for Marketplace - 'gcr.io/$PROJECT_ID/deployer:$COMMIT_SHA' diff --git a/.release.cloudbuild.yaml b/.release.cloudbuild.yaml index 46e9f214935..ccdbb2fb935 100644 --- a/.release.cloudbuild.yaml +++ b/.release.cloudbuild.yaml @@ -297,6 +297,70 @@ steps: docker push gcr.io/ml-pipeline/google/pipelines/metadatawriter:$(cat /workspace/mm.ver) docker push gcr.io/ml-pipeline/google/pipelines-test/metadatawriter:$(cat /workspace/mm.ver) +- id: 'pullCacheServer' + name: 'gcr.io/cloud-builders/docker' + args: ['pull', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA'] + waitFor: ['-'] +- id: 'tagCacheServerVersionNumber' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA', 'gcr.io/ml-pipeline/cache-server:$TAG_NAME'] + waitFor: ['pullCacheServer'] +- id: 'tagCacheServerCommitSHA' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA', 'gcr.io/ml-pipeline/cache-server:$COMMIT_SHA'] + waitFor: ['pullCacheServer'] +- id: 'tagCacheServerForMarketplace' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA', 'gcr.io/ml-pipeline/google/pipelines/cacheserver:$TAG_NAME'] + waitFor: ['pullCacheServer'] +- id: 'tagCacheServerForMarketplaceTest' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA', 'gcr.io/ml-pipeline/google/pipelines-test/cacheserver:$TAG_NAME'] + waitFor: ['pullCacheServer'] +- id: 'tagCacheServerForMarketplaceMajorMinor' + waitFor: ['pullCacheServer', 'parseMajorMinorVersion'] + name: 'gcr.io/cloud-builders/docker' + entrypoint: bash + args: + - -ceux + - | + docker tag gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA gcr.io/ml-pipeline/google/pipelines/cacheserver:$(cat /workspace/mm.ver) + docker tag gcr.io/$PROJECT_ID/cache-server:$COMMIT_SHA gcr.io/ml-pipeline/google/pipelines-test/cacheserver:$(cat /workspace/mm.ver) + docker push gcr.io/ml-pipeline/google/pipelines/cacheserver:$(cat /workspace/mm.ver) + docker push gcr.io/ml-pipeline/google/pipelines-test/cacheserver:$(cat /workspace/mm.ver) + +- id: 'pullCacheDeployer' + name: 'gcr.io/cloud-builders/docker' + args: ['pull', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA'] + waitFor: ['-'] +- id: 'tagCacheDeployerVersionNumber' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA', 'gcr.io/ml-pipeline/cache-deployer:$TAG_NAME'] + waitFor: ['pullCacheDeployer'] +- id: 'tagCacheDeployerCommitSHA' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA', 'gcr.io/ml-pipeline/cache-deployer:$COMMIT_SHA'] + waitFor: ['pullCacheDeployer'] +- id: 'tagCacheDeployerForMarketplace' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA', 'gcr.io/ml-pipeline/google/pipelines/cachedeployer:$TAG_NAME'] + waitFor: ['pullCacheDeployer'] +- id: 'tagCacheDeployerForMarketplaceTest' + name: 'gcr.io/cloud-builders/docker' + args: ['tag', 'gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA', 'gcr.io/ml-pipeline/google/pipelines-test/cachedeployer:$TAG_NAME'] + waitFor: ['pullCacheDeployer'] +- id: 'tagCacheDeployerForMarketplaceMajorMinor' + waitFor: ['pullCacheDeployer', 'parseMajorMinorVersion'] + name: 'gcr.io/cloud-builders/docker' + entrypoint: bash + args: + - -ceux + - | + docker tag gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA gcr.io/ml-pipeline/google/pipelines/cachedeployer:$(cat /workspace/mm.ver) + docker tag gcr.io/$PROJECT_ID/cache-deployer:$COMMIT_SHA gcr.io/ml-pipeline/google/pipelines-test/cachedeployer:$(cat /workspace/mm.ver) + docker push gcr.io/ml-pipeline/google/pipelines/cachedeployer:$(cat /workspace/mm.ver) + docker push gcr.io/ml-pipeline/google/pipelines-test/cachedeployer:$(cat /workspace/mm.ver) + - name: 'gcr.io/cloud-builders/docker' args: ['pull', 'gcr.io/$PROJECT_ID/metadata-envoy:$COMMIT_SHA'] id: 'pullMetadataEnvoy' @@ -558,6 +622,8 @@ images: - 'gcr.io/ml-pipeline/google/pipelines/metadataenvoy:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines/metadatawriter:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines/deployer:$TAG_NAME' +- 'gcr.io/ml-pipeline/google/pipelines/cacheserver:$TAG_NAME' +- 'gcr.io/ml-pipeline/google/pipelines/cachedeployer:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test/frontend:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test/apiserver:$TAG_NAME' @@ -574,6 +640,8 @@ images: - 'gcr.io/ml-pipeline/google/pipelines-test/argoworkflowcontroller:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test/metadataenvoy:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test/metadatawriter:$TAG_NAME' +- 'gcr.io/ml-pipeline/google/pipelines-test/cacheserver:$TAG_NAME' +- 'gcr.io/ml-pipeline/google/pipelines-test/cachedeployer:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test/deployer:$TAG_NAME' - 'gcr.io/ml-pipeline/google/pipelines-test:$TAG_NAME' timeout: '1200s' diff --git a/backend/Dockerfile.cacheserver b/backend/Dockerfile.cacheserver new file mode 100644 index 00000000000..934ccfc7f1e --- /dev/null +++ b/backend/Dockerfile.cacheserver @@ -0,0 +1,20 @@ +# Dockerfile for building the source code of cache_server +FROM golang:1.11-alpine3.7 as builder + +RUN apk update && apk upgrade && \ + apk add --no-cache bash git openssh gcc musl-dev + +WORKDIR /go/src/github.com/kubeflow/pipelines +COPY . . + +RUN GO111MODULE=on go build -o /bin/cache_server backend/src/cache/*.go +RUN git clone https://github.com/hashicorp/golang-lru.git /kfp/cache/golang-lru/ + +FROM alpine:3.8 +WORKDIR /bin + +COPY --from=builder /bin/cache_server /bin/cache_server +COPY --from=builder /go/src/github.com/kubeflow/pipelines/third_party/license.txt /bin/license.txt +COPY --from=builder /kfp/cache/golang-lru/* /bin/golang-lru/ + +ENTRYPOINT [ "/bin/cache_server" ] \ No newline at end of file diff --git a/backend/src/cache/OWNERS b/backend/src/cache/OWNERS new file mode 100644 index 00000000000..6a3d99036c4 --- /dev/null +++ b/backend/src/cache/OWNERS @@ -0,0 +1,6 @@ +approvers: + - Ark-kun + - rui5i +reviewers: + - Ark-kun + - rui5i \ No newline at end of file diff --git a/backend/src/cache/README.md b/backend/src/cache/README.md new file mode 100644 index 00000000000..4a8e0b9512b --- /dev/null +++ b/backend/src/cache/README.md @@ -0,0 +1,23 @@ +## Build src image +To build the Docker image of cache server, run the following Docker command from the pipelines directory: + +``` +docker build -t gcr.io/ml-pipeline/cache-server:latest -f backend/Dockerfile.cacheserver . +``` + +## Deploy cache service to an existing KFP deployment +1. Configure kubectl to talk to your newly created cluster. Refer to [Configuring cluster access for kubectl](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl). +2. Run deploy shell script to generate certificates and create MutatingWebhookConfiguration: + +``` +# Assume KFP is deployed in the namespace kubeflow +export NAMESPACE=kubeflow +./deployer/deploy-cache-service.sh +``` + +3. Go to pipelines/manifests/kustomize/base/cache folder and run following scripts: + +``` +kubectl apply -f cache-deployment.yaml --namespace $NAMESPACE +kubectl apply -f cache-service.yaml --namespace $NAMESPACE +``` diff --git a/backend/src/cache/admission.go b/backend/src/cache/admission.go new file mode 100644 index 00000000000..ae274baa1b8 --- /dev/null +++ b/backend/src/cache/admission.go @@ -0,0 +1,178 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + + "k8s.io/api/admission/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/apimachinery/pkg/types" +) + +type OperationType string + +const ( + OperationTypeAdd OperationType = "add" +) + +// patchOperation is an operation of a JSON patch, see https://tools.ietf.org/html/rfc6902 . +type patchOperation struct { + Op OperationType `json:"op"` + Path string `json:"path"` + Value interface{} `json:"value,omitempty"` +} + +// admitFunc is a callback for admission controller logic. Given an AdmissionRequest, it returns the sequence of patch +// operations to be applied in case of success, or the error that will be shown when the operation is rejected. +type admitFunc func(*v1beta1.AdmissionRequest) ([]patchOperation, error) + +const ( + ContentType string = "Content-Type" + JsonContentType string = "application/json" +) + +var ( + universalDeserializer = serializer.NewCodecFactory(runtime.NewScheme()).UniversalDeserializer() +) + +// isKubeNamespace checks if the given namespace is a Kubernetes-owned namespace. +func isKubeNamespace(ns string) bool { + return ns == metav1.NamespacePublic || ns == metav1.NamespaceSystem +} + +// doServeAdmitFunc parses the HTTP request for an admission controller webhook, and -- in case of a well-formed +// request -- delegates the admission control logic to the given admitFunc. The response body is then returned as raw +// bytes. +func doServeAdmitFunc(w http.ResponseWriter, r *http.Request, admit admitFunc) ([]byte, error) { + // Step 1: Request validation. Only handle POST requests with a body and json content type. + + if r.Method != http.MethodPost { + w.WriteHeader(http.StatusMethodNotAllowed) + return nil, fmt.Errorf("Invalid method %q, only POST requests are allowed", r.Method) + } + + body, err := ioutil.ReadAll(r.Body) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("Could not read request body: %v", err) + } + + if contentType := r.Header.Get(ContentType); contentType != JsonContentType { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("Unsupported content type %q, only %q is supported", contentType, JsonContentType) + } + + // Step 2: Parse the AdmissionReview request. + + var admissionReviewReq v1beta1.AdmissionReview + + _, _, err = universalDeserializer.Decode(body, nil, &admissionReviewReq) + + if err != nil { + w.WriteHeader(http.StatusBadRequest) + return nil, fmt.Errorf("Could not deserialize request: %v", err) + } + if admissionReviewReq.Request == nil { + w.WriteHeader(http.StatusBadRequest) + return nil, errors.New("Malformed admission review request: request body is nil") + } + + // Step 3: Construct the AdmissionReview response. + + // Apply the admit() function only for non-Kubernetes namespaces. For objects in Kubernetes namespaces, return + // an empty set of patch operations. + if isKubeNamespace(admissionReviewReq.Request.Namespace) { + return allowedResponse(admissionReviewReq.Request.UID, nil), nil + } + + var patchOps []patchOperation + + patchOps, err = admit(admissionReviewReq.Request) + if err != nil { + return errorResponse(admissionReviewReq.Request.UID, err), nil + } + + patchBytes, err := json.Marshal(patchOps) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return nil, fmt.Errorf("Could not marshal JSON patch: %v", err) + } + + return allowedResponse(admissionReviewReq.Request.UID, patchBytes), nil +} + +// serveAdmitFunc is a wrapper around doServeAdmitFunc that adds error handling and logging. +func serveAdmitFunc(w http.ResponseWriter, r *http.Request, admit admitFunc) { + log.Print("Handling webhook request ...") + + var writeErr error + if bytes, err := doServeAdmitFunc(w, r, admit); err != nil { + log.Printf("Error handling webhook request: %v", err) + w.WriteHeader(http.StatusInternalServerError) + _, writeErr = w.Write([]byte(err.Error())) + } else { + log.Print("Webhook request handled successfully") + _, writeErr = w.Write(bytes) + } + + if writeErr != nil { + log.Printf("Could not write response: %v", writeErr) + } +} + +// admitFuncHandler takes an admitFunc and wraps it into a http.Handler by means of calling serveAdmitFunc. +func admitFuncHandler(admit admitFunc) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + serveAdmitFunc(w, r, admit) + }) +} + +func allowedResponse(uid types.UID, patchBytes []byte) []byte { + admissionReviewResponse := v1beta1.AdmissionReview{ + Response: &v1beta1.AdmissionResponse{ + UID: uid, + }, + } + admissionReviewResponse.Response.Allowed = true + admissionReviewResponse.Response.Patch = patchBytes + + bytes, err := json.Marshal(&admissionReviewResponse) + if err != nil { + return errorResponse(uid, err) + } + return bytes +} + +func errorResponse(uid types.UID, err error) []byte { + admissionReviewResponse := v1beta1.AdmissionReview{ + Response: &v1beta1.AdmissionResponse{ + UID: uid, + }, + } + admissionReviewResponse.Response.Allowed = false + admissionReviewResponse.Response.Result = &metav1.Status{ + Message: err.Error(), + } + bytes, _ := json.Marshal(&admissionReviewResponse) + return bytes +} diff --git a/backend/src/cache/admission_test.go b/backend/src/cache/admission_test.go new file mode 100644 index 00000000000..4a9286f26f6 --- /dev/null +++ b/backend/src/cache/admission_test.go @@ -0,0 +1,111 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/api/admission/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + fakeAdmissionReview = v1beta1.AdmissionReview{ + TypeMeta: metav1.TypeMeta{ + Kind: "pods", + APIVersion: "v1", + }, + Request: &v1beta1.AdmissionRequest{ + UID: "123", + }, + Response: &v1beta1.AdmissionResponse{ + UID: "123", + }, + } +) + +func fakeAdmitFunc(req *v1beta1.AdmissionRequest) ([]patchOperation, error) { + operation := patchOperation{ + Op: OperationTypeAdd, + Path: "test", + Value: "test", + } + return []patchOperation{operation}, nil +} + +func TestIsKubeNamespace(t *testing.T) { + assert.True(t, isKubeNamespace("kube-public")) + assert.True(t, isKubeNamespace("kube-system")) + assert.False(t, isKubeNamespace("kube")) +} + +func TestDoServeAdmitFunc(t *testing.T) { + body, err := json.Marshal(fakeAdmissionReview) + req, err := http.NewRequest("POST", "/url", strings.NewReader(string(body))) + req.Header.Set("Content-Type", "application/json") + + if err != nil { + t.Fatalf("failed to send fake request, err: %v", err) + } + + rr := httptest.NewRecorder() + patchOperations, err := doServeAdmitFunc(rr, req, fakeAdmitFunc) + require.NotNil(t, patchOperations) + assert.Nil(t, err) +} + +func TestDoServeAdmitFuncWithInvalidHttpMethod(t *testing.T) { + req, _ := http.NewRequest("Get", "", nil) + rr := httptest.NewRecorder() + patchOperations, err := doServeAdmitFunc(rr, req, fakeAdmitFunc) + assert.Nil(t, patchOperations) + assert.Contains(t, err.Error(), "Invalid method") +} + +func TestDoServeAdmitFuncWithInvalidContentType(t *testing.T) { + req, err := http.NewRequest("POST", "/url", strings.NewReader("")) + rr := httptest.NewRecorder() + patchOperations, err := doServeAdmitFunc(rr, req, fakeAdmitFunc) + assert.Nil(t, patchOperations) + assert.Contains(t, err.Error(), "Unsupported content type") +} + +func TestDoServeAdmitFuncWithInvalidRequestBody(t *testing.T) { + req, err := http.NewRequest("POST", "/url", strings.NewReader("invalid")) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + patchOperations, err := doServeAdmitFunc(rr, req, fakeAdmitFunc) + assert.Nil(t, patchOperations) + assert.Contains(t, err.Error(), "Could not deserialize request") +} + +func TestDoServeAdmitFuncWithEmptyAdmissionRequest(t *testing.T) { + invalidRequest := fakeAdmissionReview + invalidRequest.Request = nil + body, _ := json.Marshal(invalidRequest) + req, _ := http.NewRequest("POST", "/url", strings.NewReader(string(body))) + req.Header.Set("Content-Type", "application/json") + + rr := httptest.NewRecorder() + patchOperations, err := doServeAdmitFunc(rr, req, fakeAdmitFunc) + assert.Nil(t, patchOperations) + assert.Contains(t, err.Error(), "Malformed admission review request: request body is nil") +} diff --git a/backend/src/cache/deployer/Dockerfile b/backend/src/cache/deployer/Dockerfile new file mode 100644 index 00000000000..c9b399a7911 --- /dev/null +++ b/backend/src/cache/deployer/Dockerfile @@ -0,0 +1,21 @@ +FROM google/cloud-sdk:alpine + +RUN apk add --update \ + python \ + curl \ + which \ + jq \ + bash \ + openssl + +RUN gcloud components install kubectl + +ADD backend/src/cache/deployer/* /kfp/cache/deployer/ + +WORKDIR /kfp/cache/deployer + +RUN chmod +x deploy-cache-service.sh +RUN chmod +x webhook-create-signed-cert.sh +RUN chmod +x webhook-patch-ca-bundle.sh + +ENTRYPOINT ["/bin/sh", "/kfp/cache/deployer/deploy-cache-service.sh"] \ No newline at end of file diff --git a/backend/src/cache/deployer/cache-configmap.yaml.template b/backend/src/cache/deployer/cache-configmap.yaml.template new file mode 100644 index 00000000000..5e5abf6b036 --- /dev/null +++ b/backend/src/cache/deployer/cache-configmap.yaml.template @@ -0,0 +1,17 @@ +apiVersion: admissionregistration.k8s.io/v1beta1 +kind: MutatingWebhookConfiguration +metadata: + name: cache-webhook +webhooks: + - name: cache-server.${NAMESPACE}.svc + clientConfig: + service: + name: cache-server + namespace: ${NAMESPACE} + path: "/mutate" + caBundle: ${CA_BUNDLE} + rules: + - operations: [ "CREATE" ] + apiGroups: [""] + apiVersions: ["v1"] + resources: ["pods"] \ No newline at end of file diff --git a/backend/src/cache/deployer/deploy-cache-service.sh b/backend/src/cache/deployer/deploy-cache-service.sh new file mode 100755 index 00000000000..94199cc472d --- /dev/null +++ b/backend/src/cache/deployer/deploy-cache-service.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script is for deploying cache service to an existing cluster. +# Prerequisite: config kubectl to talk to your cluster. See ref below: +# https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl + +set -ex + +echo "Start deploying cache service to existing cluster:" + +NAMESPACE=${NAMESPACE_TO_WATCH:-default} +export CA_FILE="ca_cert" +rm -f ${CA_FILE} +touch ${CA_FILE} + +# Generate signed certificate for cache server. +./webhook-create-signed-cert.sh --namespace "${NAMESPACE}" --cert_output_path "${CA_FILE}" +echo "Signed certificate generated for cache server" + +# Patch CA_BUNDLE for MutatingWebhookConfiguration +NAMESPACE="$NAMESPACE" ./webhook-patch-ca-bundle.sh --cert_input_path "${CA_FILE}" <./cache-configmap.yaml.template >./cache-configmap-ca-bundle.yaml +echo "CA_BUNDLE patched successfully" + +# Create MutatingWebhookConfiguration +cat ./cache-configmap-ca-bundle.yaml +kubectl apply -f ./cache-configmap-ca-bundle.yaml --namespace "${NAMESPACE}" diff --git a/backend/src/cache/deployer/webhook-create-signed-cert.sh b/backend/src/cache/deployer/webhook-create-signed-cert.sh new file mode 100755 index 00000000000..0b7301035df --- /dev/null +++ b/backend/src/cache/deployer/webhook-create-signed-cert.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +usage() { + cat <> ${tmpdir}/csr.conf +[req] +req_extensions = v3_req +distinguished_name = req_distinguished_name +[req_distinguished_name] +[ v3_req ] +basicConstraints = CA:FALSE +keyUsage = nonRepudiation, digitalSignature, keyEncipherment +extendedKeyUsage = serverAuth +subjectAltName = @alt_names +[alt_names] +DNS.1 = ${service} +DNS.2 = ${service}.${namespace} +DNS.3 = ${service}.${namespace}.svc +EOF + +openssl genrsa -out ${tmpdir}/server-key.pem 2048 +openssl req -new -key ${tmpdir}/server-key.pem -subj "/CN=${service}.${namespace}.svc" -out ${tmpdir}/server.csr -config ${tmpdir}/csr.conf + +echo "start running kubectl..." + +# clean-up any previously created CSR for our service. Ignore errors if not present. +kubectl delete csr ${csrName} 2>/dev/null || true + +# create server cert/key CSR and send to k8s API +cat <&2 + exit 1 +fi +echo ${serverCert} | openssl base64 -d -A -out ${tmpdir}/server-cert.pem + +echo ${serverCert} > ${cert_output_path} + +# create the secret with CA cert and server cert/key +kubectl create secret generic ${secret} \ + --from-file=key.pem=${tmpdir}/server-key.pem \ + --from-file=cert.pem=${tmpdir}/server-cert.pem \ + --dry-run -o yaml | + kubectl -n ${namespace} apply -f - \ No newline at end of file diff --git a/backend/src/cache/deployer/webhook-patch-ca-bundle.sh b/backend/src/cache/deployer/webhook-patch-ca-bundle.sh new file mode 100755 index 00000000000..3515549f9cf --- /dev/null +++ b/backend/src/cache/deployer/webhook-patch-ca-bundle.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file will patch CA bundle and namespace to MutatingWebhookConfiguration + +ROOT=$(cd $(dirname $0)/../../; pwd) + +set -o errexit +set -o nounset +set -o pipefail +set -ex + +while [[ $# -gt 0 ]]; do + case ${1} in + --cert_input_path) + cert_input_path="$2" + shift + ;; + esac + shift +done + +[ -z ${cert_input_path} ] && cert_input_path=${CA_FILE} + +export CA_BUNDLE=$(cat ${cert_input_path}) + +if command -v envsubst >/dev/null 2>&1; then + envsubst +else + sed -e "s|\${CA_BUNDLE}|${CA_BUNDLE}|g" -e "s|\${NAMESPACE}|${NAMESPACE}|g" +fi diff --git a/backend/src/cache/main.go b/backend/src/cache/main.go new file mode 100644 index 00000000000..ae30c79898d --- /dev/null +++ b/backend/src/cache/main.go @@ -0,0 +1,47 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "net/http" + "path/filepath" +) + +const ( + TlsDir string = "/etc/webhook/certs" + TlsCertFile string = "cert.pem" + TlsKeyFile string = "key.pem" +) + +const ( + MutateApi string = "/mutate" + WebhookPort string = ":8443" +) + +func main() { + certPath := filepath.Join(TlsDir, TlsCertFile) + keyPath := filepath.Join(TlsDir, TlsKeyFile) + + mux := http.NewServeMux() + mux.Handle(MutateApi, admitFuncHandler(mutatePodIfCached)) + server := &http.Server{ + // We listen on port 8443 such that we do not need root privileges or extra capabilities for this server. + // The Service object will take care of mapping this port to the HTTPS port 443. + Addr: WebhookPort, + Handler: mux, + } + log.Fatal(server.ListenAndServeTLS(certPath, keyPath)) +} diff --git a/backend/src/cache/mutation.go b/backend/src/cache/mutation.go new file mode 100644 index 00000000000..745da9a3dcd --- /dev/null +++ b/backend/src/cache/mutation.go @@ -0,0 +1,78 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "log" + + "k8s.io/api/admission/v1beta1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + ArgoWorkflowTemplate string = "workflows.argoproj.io/template" + ExecutionKey string = "pipelines.kubeflow.org/execution_cache_key" + AnnotationPath string = "/metadata/annotations" +) + +var ( + podResource = metav1.GroupVersionResource{Version: "v1", Resource: "pods"} +) + +// mutatePodIfCached will check whether the execution has already been run before from MLMD and apply the output into pod.metadata.output +func mutatePodIfCached(req *v1beta1.AdmissionRequest) ([]patchOperation, error) { + // This handler should only get called on Pod objects as per the MutatingWebhookConfiguration in the YAML file. + // However, if (for whatever reason) this gets invoked on an object of a different kind, issue a log message but + // let the object request pass through otherwise. + if req.Resource != podResource { + log.Printf("Expect resource to be %q, but found %q", podResource, req.Resource) + return nil, nil + } + + // Parse the Pod object. + raw := req.Object.Raw + pod := corev1.Pod{} + if _, _, err := universalDeserializer.Decode(raw, nil, &pod); err != nil { + return nil, fmt.Errorf("could not deserialize pod object: %v", err) + } + + var patches []patchOperation + annotations := pod.ObjectMeta.Annotations + template, exists := annotations[ArgoWorkflowTemplate] + var executionHashKey string + if !exists { + return patches, nil + } + + // Generate the executionHashKey based on pod.metadata.annotations.workflows.argoproj.io/template + hash := sha256.New() + hash.Write([]byte(template)) + md := hash.Sum(nil) + executionHashKey = hex.EncodeToString(md) + + annotations[ExecutionKey] = executionHashKey + // Add executionKey to pod.metadata.annotations + patches = append(patches, patchOperation{ + Op: OperationTypeAdd, + Path: AnnotationPath, + Value: annotations, + }) + + return patches, nil +} diff --git a/backend/src/cache/mutation_test.go b/backend/src/cache/mutation_test.go new file mode 100644 index 00000000000..8a445ac1d43 --- /dev/null +++ b/backend/src/cache/mutation_test.go @@ -0,0 +1,95 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/api/admission/v1beta1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +var ( + fakePod = &corev1.Pod{ + TypeMeta: metav1.TypeMeta{ + Kind: "Pod", + APIVersion: "v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + ArgoWorkflowTemplate: "test_template", + }, + }, + } + fakeAdmissionRequest = v1beta1.AdmissionRequest{ + UID: "test-12345", + Kind: metav1.GroupVersionKind{ + Group: "group", + Version: "v1", + Kind: "k8s", + }, + Resource: metav1.GroupVersionResource{ + Version: "v1", + Resource: "pods", + }, + SubResource: "subresource", + Name: "test", + Namespace: "default", + Operation: "test", + Object: runtime.RawExtension{ + Raw: EncodePod(fakePod), + }, + } +) + +func EncodePod(pod *corev1.Pod) []byte { + reqBodyBytes := new(bytes.Buffer) + json.NewEncoder(reqBodyBytes).Encode(*pod) + + return reqBodyBytes.Bytes() +} + +func TestMutatePodIfCachedWithErrorPodResource(t *testing.T) { + mockAdmissionRequest := &v1beta1.AdmissionRequest{ + Resource: metav1.GroupVersionResource{ + Version: "wrong", Resource: "wrong", + }, + } + patchOperations, err := mutatePodIfCached(mockAdmissionRequest) + assert.Nil(t, patchOperations) + assert.Nil(t, err) +} + +func TestMutatePodIfCachedWithDecodeError(t *testing.T) { + invalidAdmissionRequest := fakeAdmissionRequest + invalidAdmissionRequest.Object.Raw = []byte{5, 5} + patchOperation, err := mutatePodIfCached(&invalidAdmissionRequest) + assert.Nil(t, patchOperation) + assert.Contains(t, err.Error(), "could not deserialize pod object") +} + +func TestMutatePodIfCached(t *testing.T) { + patchOperation, err := mutatePodIfCached(&fakeAdmissionRequest) + assert.Nil(t, err) + require.NotNil(t, patchOperation) + require.Equal(t, 1, len(patchOperation)) + require.Equal(t, patchOperation[0].Op, OperationTypeAdd) +} diff --git a/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrole.yaml b/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrole.yaml new file mode 100644 index 00000000000..15050c78e02 --- /dev/null +++ b/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrole.yaml @@ -0,0 +1,85 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app: kubeflow-pipelines-cache-deployer-clusterrole + name: kubeflow-pipelines-cache-deployer-clusterrole +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - update + - patch +- apiGroups: + - "*" + resources: + - configmaps + verbs: + - get + - create +- apiGroups: + - extensions + - apps + resources: + - deployments + verbs: + - get + - create + - list + - watch + - update + - patch + - delete +- apiGroups: + - "" + resources: + - services + verbs: + - get + - create + - list + - watch + - update + - patch + - delete +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + - certificatesigningrequests/approval + verbs: + - create + - get + - update + - watch + - delete + - patch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - watch + - create + - update + - patch + - delete diff --git a/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrolebinding.yaml b/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrolebinding.yaml new file mode 100644 index 00000000000..1dc9dc55d1a --- /dev/null +++ b/manifests/kustomize/base/cache-deployer/cache-deployer-clusterrolebinding.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubeflow-pipelines-cache-deployer-clusterrolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kubeflow-pipelines-cache-deployer-clusterrole +subjects: +- kind: ServiceAccount + name: kubeflow-pipelines-cache-deployer-sa \ No newline at end of file diff --git a/manifests/kustomize/base/cache-deployer/cache-deployer-job.yaml b/manifests/kustomize/base/cache-deployer/cache-deployer-job.yaml new file mode 100644 index 00000000000..faa4cc75e86 --- /dev/null +++ b/manifests/kustomize/base/cache-deployer/cache-deployer-job.yaml @@ -0,0 +1,24 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: cache-deployer-job + labels: + app: cache-deployer +spec: + template: + metadata: + labels: + app: cache-deployer + spec: + containers: + - name: main + image: gcr.io/ml-pipeline-test/cache-deployer:latest + imagePullPolicy: Always + env: + - name: NAMESPACE_TO_WATCH + valueFrom: + fieldRef: + fieldPath: metadata.namespace + serviceAccountName: kubeflow-pipelines-cache-deployer-sa + restartPolicy: OnFailure + \ No newline at end of file diff --git a/manifests/kustomize/base/cache-deployer/cache-deployer-sa.yaml b/manifests/kustomize/base/cache-deployer/cache-deployer-sa.yaml new file mode 100644 index 00000000000..9cd266d737d --- /dev/null +++ b/manifests/kustomize/base/cache-deployer/cache-deployer-sa.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kubeflow-pipelines-cache-deployer-sa \ No newline at end of file diff --git a/manifests/kustomize/base/cache-deployer/kustomization.yaml b/manifests/kustomize/base/cache-deployer/kustomization.yaml new file mode 100644 index 00000000000..c4d431f52be --- /dev/null +++ b/manifests/kustomize/base/cache-deployer/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - cache-deployer-clusterrole.yaml + - cache-deployer-clusterrolebinding.yaml + - cache-deployer-sa.yaml + - cache-deployer-job.yaml + \ No newline at end of file diff --git a/manifests/kustomize/base/cache/cache-deployment.yaml b/manifests/kustomize/base/cache/cache-deployment.yaml new file mode 100644 index 00000000000..175c0d981a3 --- /dev/null +++ b/manifests/kustomize/base/cache/cache-deployment.yaml @@ -0,0 +1,31 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cache-server + labels: + app: cache-server +spec: + replicas: 1 + selector: + matchLabels: + app: cache-server + template: + metadata: + labels: + app: cache-server + spec: + containers: + - name: server + image: gcr.io/ml-pipeline-test/cache-server:latest + imagePullPolicy: Always + ports: + - containerPort: 8443 + name: webhook-api + volumeMounts: + - name: webhook-tls-certs + mountPath: /etc/webhook/certs + readOnly: true + volumes: + - name: webhook-tls-certs + secret: + secretName: webhook-server-tls \ No newline at end of file diff --git a/manifests/kustomize/base/cache/cache-service.yaml b/manifests/kustomize/base/cache/cache-service.yaml new file mode 100644 index 00000000000..5916d541ec4 --- /dev/null +++ b/manifests/kustomize/base/cache/cache-service.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: cache-server +spec: + selector: + app: cache-server + ports: + - port: 443 + targetPort: webhook-api \ No newline at end of file diff --git a/manifests/kustomize/base/cache/kustomization.yaml b/manifests/kustomize/base/cache/kustomization.yaml new file mode 100644 index 00000000000..16aa045194b --- /dev/null +++ b/manifests/kustomize/base/cache/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - cache-deployment.yaml + - cache-service.yaml diff --git a/test/build-images.sh b/test/build-images.sh index cf77e62de29..fb6036484d6 100755 --- a/test/build-images.sh +++ b/test/build-images.sh @@ -31,11 +31,13 @@ if echo "$BUILT_IMAGES" | grep viewer-crd-controller && \ echo "$BUILT_IMAGES" | grep inverse-proxy-agent && \ echo "$BUILT_IMAGES" | grep metadata-writer && \ + echo "$BUILT_IMAGES" | grep cache-server && \ + echo "$BUILT_IMAGES" | grep cache-deployer && \ echo "$BUILT_IMAGES" | grep visualization-server; then echo "docker images for api-server, frontend, scheduledworkflow, \ - persistenceagent, viewer-crd-controller, inverse-proxy-agent, metadata-writer, and visualization-server \ - are already built in ${GCR_IMAGE_BASE_DIR}." + persistenceagent, viewer-crd-controller, inverse-proxy-agent, metadata-writer, cache-server, \ + cache-deployer and visualization-server are already built in ${GCR_IMAGE_BASE_DIR}." else echo "submitting cloud build to build docker images for commit ${COMMIT_SHA}..." IMAGES_BUILDING=true diff --git a/test/cloudbuild/batch_build.yaml b/test/cloudbuild/batch_build.yaml index 0dfb9b7391e..594dba4aded 100644 --- a/test/cloudbuild/batch_build.yaml +++ b/test/cloudbuild/batch_build.yaml @@ -33,6 +33,14 @@ steps: name: 'gcr.io/cloud-builders/docker' args: ['build', '-t', '$_GCR_BASE/metadata-writer', '-f', 'backend/metadata_writer/Dockerfile', '.'] waitFor: ["-"] + - id: 'buildCacheServer' + name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', '$_GCR_BASE/cache-server', '-f', 'backend/Dockerfile.cacheserver', '.'] + waitFor: ["-"] + - id: 'buildCacheDeployer' + name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', '$_GCR_BASE/cache-deployer', '-f', 'backend/src/cache/deployer/Dockerfile', '.'] + waitFor: ["-"] options: machineType: N1_HIGHCPU_8 # use a fast machine to build because there a lot of work images: @@ -43,4 +51,6 @@ images: - "$_GCR_BASE/visualization-server" - "$_GCR_BASE/inverse-proxy-agent" - "$_GCR_BASE/metadata-writer" + - "$_GCR_BASE/cache-server" + - "$_GCR_BASE/cache-deployer" timeout: 1800s # 30min diff --git a/test/deploy-pipeline-lite.sh b/test/deploy-pipeline-lite.sh index 0fdc6752f73..e9ce4aa9b22 100755 --- a/test/deploy-pipeline-lite.sh +++ b/test/deploy-pipeline-lite.sh @@ -56,6 +56,8 @@ kustomize edit set image gcr.io/ml-pipeline/viewer-crd-controller=${GCR_IMAGE_BA kustomize edit set image gcr.io/ml-pipeline/visualization-server=${GCR_IMAGE_BASE_DIR}/visualization-server:${GCR_IMAGE_TAG} kustomize edit set image gcr.io/ml-pipeline/inverse-proxy-agent=${GCR_IMAGE_BASE_DIR}/inverse-proxy-agent:${GCR_IMAGE_TAG} kustomize edit set image gcr.io/ml-pipeline/metadata-writer=${GCR_IMAGE_BASE_DIR}/metadata-writer:${GCR_IMAGE_TAG} +kustomize edit set image gcr.io/ml-pipeline-test/cache-server=${GCR_IMAGE_BASE_DIR}/cache-server:${GCR_IMAGE_TAG} +kustomize edit set image gcr.io/ml-pipeline-test/cache-deployer=${GCR_IMAGE_BASE_DIR}/cache-deployer:${GCR_IMAGE_TAG} cat kustomization.yaml kustomize build . | kubectl apply -f - diff --git a/test/manifests/dev/kustomization.yaml b/test/manifests/dev/kustomization.yaml index 58be3be24a9..f38db9d885c 100644 --- a/test/manifests/dev/kustomization.yaml +++ b/test/manifests/dev/kustomization.yaml @@ -1,7 +1,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: kubeflow + # Actual image overrides will be added in test scripts. images: [] resources: - ../../../manifests/kustomize/env/dev +- ../../../manifests/kustomize/base/cache +- ../../../manifests/kustomize/base/cache-deployer