From 19ee0bbf5950a7a585b2ce7393701a6848e8adc7 Mon Sep 17 00:00:00 2001
From: Dan Sun <dsun20@bloomberg.net>
Date: Fri, 26 Jun 2020 14:39:31 -0400
Subject: [PATCH] Update KFServing docs (#897)

* Fix up kfserving install doc link

* Update quick install for 0.3.0

* Upgrade quick install to use istio 1.6.2

* Add perf test job for sklearn example

* Add KFServing demo gif

* Reorganize examples

* Add feature descriptions

* Add feature table for model serve

* Add alibi references

* Update main README

* Add batcher/gRPC example

* Fix perf job for sklearn example

* separate custom predictor

* Update batching and alibi

* Add roadmap
---
 README.md                                     |  36 ++++-
 docs/samples/README.md                        | 152 ++++++++++++------
 docs/samples/autoscaling/README.md            |   2 +-
 docs/samples/bentoml/README.md                |   2 +-
 docs/samples/custom-domain/README.md          |   2 +-
 docs/samples/custom/hello-world/README.md     |   2 +-
 .../custom/kfserving-custom-model/README.md   |   2 +-
 docs/samples/custom/prebuilt-image/README.md  |   2 +-
 docs/samples/gcp-iap/README.md                |   2 +-
 docs/samples/kafka/README.md                  |   2 +-
 docs/samples/onnx/README.md                   |   2 +-
 docs/samples/pytorch/README.md                |   2 +-
 docs/samples/rollouts/README.md               |   2 +-
 docs/samples/s3/README.md                     |   2 +-
 docs/samples/sklearn/README.md                |   2 +-
 docs/samples/sklearn/perf.yaml                |  47 ++++++
 docs/samples/tensorflow/README.md             |   2 +-
 .../transformer/image_transformer/README.md   |   2 +-
 docs/samples/triton/simple_string/README.md   |   2 +-
 docs/samples/xgboost/README.md                |   2 +-
 hack/quick_install.sh                         |  99 ++++++------
 21 files changed, 248 insertions(+), 120 deletions(-)
 create mode 100644 docs/samples/sklearn/perf.yaml

diff --git a/README.md b/README.md
index 467fb10a0cd..7f501ce5ca3 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ Knative Serving and Istio should be available on Kubernetes Cluster, Knative dep
 - [Istio](https://knative.dev/docs/install/installing-istio): v1.1.6+
 
 If you want to get up running Knative quickly or you do not need service mesh, we recommend installing Istio without service mesh(sidecar injection).
-- [Knative Serving](https://knative.dev/docs/install/knative-with-any-k8s): v0.11.1+
+- [Knative Serving](https://knative.dev/docs/install/knative-with-any-k8s): v0.11.2+
 
 Currently only `Knative Serving` is required, `cluster-local-gateway` is required to serve cluster-internal traffic for transformer and explainer use cases. Please follow instructions here to install [cluster local gateway](https://knative.dev/docs/install/installing-istio/#updating-your-install-to-use-cluster-local-gateway)
 
@@ -55,7 +55,6 @@ If you are using Kubeflow dashboard or [profile controller](https://www.kubeflow
 
 Make sure you have
 [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl-on-linux),
-[kustomize v3.5.4+](https://github.com/kubernetes-sigs/kustomize/blob/master/docs/INSTALL.md),
 [helm 3](https://helm.sh/docs/intro/install) installed before you start.(2 mins for setup)
 1) If you do not have an existing kubernetes cluster you can create a quick kubernetes local cluster with [kind](https://github.com/kubernetes-sigs/kind#installation-and-usage).(this takes 30s)
 ```bash
@@ -65,6 +64,16 @@ kind create cluster
 ```bash
 ./hack/quick_install.sh
 ```
+#### Ingress Setup and Monitoring Stack
+- [Configure Custom Ingress Gateway](https://knative.dev/docs/serving/setting-up-custom-ingress-gateway/)
+  - In addition you need to update [KFServing configmap](config/default/configmap/inferenceservice.yaml) to use the custom ingress gateway.
+- [Configure HTTPS Connection](https://knative.dev/docs/serving/using-a-tls-cert/)
+- [Configure Custom Domain](https://knative.dev/docs/serving/using-a-custom-domain/)
+- [Metrics](https://knative.dev/docs/serving/accessing-metrics/)
+- [Tracing](https://knative.dev/docs/serving/accessing-traces/)
+- [Logging](https://knative.dev/docs/serving/using-a-custom-domain/)
+- [Dashboard for ServiceMesh](https://istio.io/latest/docs/tasks/observability/kiali/)
+
 ### Test KFServing Installation 
 
 1) To check if KFServing Controller is installed correctly, please run the following command
@@ -94,6 +103,21 @@ kubectl port-forward --namespace istio-system $(kubectl get pod --namespace isti
 SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris -n kfserving-test -o jsonpath='{.status.url}' | cut -d "/" -f 3)
 curl -v -H "Host: ${SERVICE_HOSTNAME}" http://localhost:8080/v1/models/sklearn-iris:predict -d @./docs/samples/sklearn/iris-input.json
 ```
+5) Run Performance Test
+```bash
+kubectl create -f docs/samples/sklearn/perf.test
+# wait the job to be done and check the log
+kubectl logs load-test8b58n-rgfxr 
+Requests      [total, rate, throughput]         30000, 500.02, 499.99
+Duration      [total, attack, wait]             1m0s, 59.998s, 3.336ms
+Latencies     [min, mean, 50, 90, 95, 99, max]  1.743ms, 2.748ms, 2.494ms, 3.363ms, 4.091ms, 7.749ms, 46.354ms
+Bytes In      [total, mean]                     690000, 23.00
+Bytes Out     [total, mean]                     2460000, 82.00
+Success       [ratio]                           100.00%
+Status Codes  [code:count]                      200:30000  
+Error Set:
+```
+ 
 ### Use KFServing SDK
 * Install the SDK
   ```
@@ -103,8 +127,11 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://localhost:8080/v1/models/sklearn-i
 
 * Follow the [example here](docs/samples/client/kfserving_sdk_sample.ipynb) to use the KFServing SDK to create, rollout, promote, and delete an InferenceService instance.
 
-### KFServing Examples 
-[KFServing examples](./docs/samples/README.md)
+### KFServing Features and Examples
+[KFServing Features and Examples](./docs/samples/README.md)
+
+### KFServing Roadmap
+[KFServing Roadmap](./ROADMAP.md)
 
 ### KFServing Concepts and Data Plane
 [KFServing Concepts and Data Plane](./docs/README.md)
@@ -123,3 +150,4 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://localhost:8080/v1/models/sklearn-i
 
 ### Contributor Guide
 [Contributor Guide](./CONTRIBUTING.md)
+
diff --git a/docs/samples/README.md b/docs/samples/README.md
index 696628e0c4c..36620d6e027 100644
--- a/docs/samples/README.md
+++ b/docs/samples/README.md
@@ -1,61 +1,115 @@
-## KFServing Examples
-
-### Deploy KFServing InferenceService with out of the box Predictor
-[SKLearn Model](./sklearn)
-
-[PyTorch Model](./pytorch)
-
-[Tensorflow Model](./tensorflow)
-
-[XGBoost Model](./xgboost)
-
-[ONNX Model with ONNX Runtime](./onnx)
-
-[Simple String Model with NVIDIA Triton Inference Server](./triton/simple_string)
-
-[Serve BERT Model with NVIDIA Triton Inference Server](./triton/bert)
-
-### Deploy KFServing InferenceService with a custom Predictor
-
-[Hello World Flask Server](./custom/hello-world)
-
-[KFServing Custom Model](./custom/kfserving-custom-model)
-
-[Prebuilt Image](./custom/prebuilt-image)
-
-[BentoML](./bentoml)
-
-### Deploy KFServing InferenceService with Transformer
-[Image Transformer with PyTorch Predictor](./transformer/image_transformer)
-
-### Deploy KFServing InferenceService with Explainer
-[Alibi Image Explainer](./explanation/alibi/imagenet)
-
-[Alibi Text Explainer](./explanation/alibi/moviesentiment)
-
-[Alibi Tabular Explainer](./explanation/alibi/income)
-
-### Deploy KFServing InferenceService with Cloud or PVC storage
-
-[Models on S3](./s3)
-
-[Models on PVC](./pvc)
-
-[Models on Azure](./azure)
-
-### Deploy KFServing InferenceService with Autoscaling, Canary Rollout and Other Integrations
+## KFServing Features and Examples
+
+### Deploy InferenceService with Predictor
+KFServing provides a simple Kubernetes CRD to allow deploying trained models onto model servers such as [TFServing](https://www.tensorflow.org/tfx/guide/serving), 
+[ONNXRuntime](https://github.com/microsoft/onnxruntime), [Triton Inference Server](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs),
+[KFServer](https://github.com/kubeflow/kfserving/tree/master/python/kfserving). These model servers are also exposing a standardised API for both REST and gRPC. You could also choose to build your own model server for more complex use case,
+KFServing provides basic API primitives to allow you easily build custom model server, you can use other tools like [BentoML](https://docs.bentoml.org/en/latest) to build your custom model serve image.
+After models are deployed onto model servers with KFServing, you get all the following serverless features provided by KFServing
+- Scale to and from Zero
+- Request based Autoscaling on CPU/GPU
+- Revision Management
+- Optimized Container
+- Batching and Logger
+- Traffic management
+- Security with AuthN/AuthZ
+- Distributed Tracing
+- Out-of-the-box metrics
+- Ingress/Egress control
+
+| Out-of-the-box Predictor  | Exported model| HTTP | gRPC | Examples |
+| ------------- | ------------- | ------------- | ------------- | ------------- |
+| Deploy SKLearn Model on KFServer | pickled model(model.pkl, model.joblib) | :heavy_check_mark: | V2 |[SKLearn Iris](./sklearn)  |
+| Deploy XGBoost Model on KFServer | pickled model(model.bst) | :heavy_check_mark: | V2 |[XGBoost Iris](./xgboost)  |
+| Deploy Pytorch Model on KFServer  | [torch.save model(model.pt)](https://pytorch.org/docs/master/generated/torch.save.html) | :heavy_check_mark: | V2 |  [PyTorch Cifar10](./pytorch)  |
+| Deploy Tensorflow Model on TFServing  | [Tensorflow SavedModel](https://www.tensorflow.org/guide/saved_model) | :heavy_check_mark: | :heavy_check_mark: | [Tensorflow Flowers](./tensorflow)  |
+| Deploy ONNX Model on ONNXRuntime  | [Exported onnx model(model.onnx)](https://github.com/onnx/tutorials#converting-to-onnx-format) | :heavy_check_mark: | :heavy_check_mark: |[ONNX Style Model](./onnx)  |
+| Deploy Model on Triton Server | [Tensorflow,PyTorch,ONNX,TensorRT](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/model_repository.html)| :heavy_check_mark: | :heavy_check_mark: | [Simple String](./triton/simple_string) |
+
+| Custom Predictor  | Examples |
+| ------------- |  ------------- |
+| Deploy model on custom KFServer | [Custom KFServer](./custom/kfserving-custom-model)|
+| Deploy model on BentoML | [SKLearn Iris with BentoML](./bentoml)|
+| Deploy model on custom HTTP Server  | [Prebuilt model server](./custom/prebuilt-image)|
+| Deploy model on custom gRPC Server  | [Prebuilt gRPC server](./custom/grpc-server)|
+
+In addition to deploy InferenceService with HTTP/gRPC endpoint, you can also deploy InferenceService with [Knative Event Sources](https://knative.dev/docs/eventing/sources/index.html) such as Kafka
+, you can find an example [here](./kafka) which shows how to build an async inference pipeline. 
+
+### Deploy InferenceService with Transformer
+KFServing transformer enables users to define a pre/post processing step before the prediction and explanation workflow.
+KFServing transformer runs as a separate microservice and can work with any type of pre-packaged model server, it can also 
+scale differently from the predictor if your transformer is CPU bound while predictor requires running on GPU. 
+
+| Features  | Examples |
+| ------------- | ------------- |
+| Deploy Transformer with KFServer | [Image Transformer with PyTorch KFServer](./transformer/image_transformer)  |
+| Deploy Transformer with Triton Server | [BERT Model with tokenizer](./triton/bert)  |
+
+### Deploy InferenceService with Explainer
+Model explainability answers the question: "Why did my model make this prediction" for a given instance. KFServing 
+integrates with [Alibi Explainer](https://github.com/SeldonIO/alibi) which implements a black-box algorithm by generating a lot of similar looking intances 
+for a given instance and send out to the model server to produce an explanation.
+
+
+| Features  | Examples |
+| ------------- | ------------- |
+| Deploy Alibi Image Explainer| [Imagenet Explainer](./explanation/alibi/imagenet)  |
+| Deploy Alibi Income Explainer| [Income Explainer](./explanation/alibi/income)  |
+| Deploy Alibi Text Explainer| [Alibi Text Explainer](./explanation/alibi/moviesentiment) |
+
+### Deploy InferenceService with Outlier/Drift Detector
+In order to trust and reliably act on model predictions, it is crucial to monitor the distribution of the incoming
+requests via various different type of detectors. KFServing integrates [Alibi Detect](https://github.com/SeldonIO/alibi-detect) with the following components:
+- Drift detector checks when the distribution of incoming requests is diverging from a reference distribution such as that of the training data 
+- Outlier detector flags single instances which do not follow the training distribution.
+
+| Features  | Examples |
+| ------------- | ------------- |
+| Deploy Alibi Outlier Detection| [Cifar outlier detector](./outlier-detection/alibi-detect/cifar10) |
+| Deploy Alibi Drift Detection| [Cifar drift detector](./drift-detection/alibi-detect/cifar10) |
+
+### Deploy InferenceService with Cloud/PVC storage
+| Feature  | Examples |
+| ------------- | ------------- |
+| Deploy Model on S3| [Mnist model on S3](./s3) |
+| Deploy Model on PVC| [Models on PVC](./pvc)  |
+| Deploy Model on Azure| [Models on Azure](./azure) |
+
+### Autoscaling
+KFServing's main serverless capability is to allow you to run inference workload without worrying about scaling your service manually once it is deployed. KFServing leverages Knative's [autoscaler](https://knative.dev/docs/serving/configuring-autoscaling/),
+the autoscaler works on GPU as well since the Autoscaler is based on request volume instead of GPU/CPU metrics which can be hard
+ to reason about. 
+ 
 [Autoscale inference workload on CPU/GPU](./autoscaling)
 
 [InferenceService on GPU nodes](./accelerators)
 
+### Canary Rollout
+Canary deployment enables rollout releases by splitting traffic between different versions to ensure safe rollout.
+
 [Canary Rollout](./rollouts)
 
+### Kubeflow Pipeline Integration
 [InferenceService with Kubeflow Pipeline](./pipelines)
 
-[InferenceService with Request/Response Logger](./logger/basic)
+### Request Batching(Alpha)
+Batching individual inference requests can be important as most of ML/DL frameworks are optimized for batch requests.
+In cases where the services receive heavy load of requests, its advantageous to batch the requests. This allows for maximally
+utilizing the CPU/GPU compute resource, but user needs to carefully perform enough tests to find optimal batch size and analyze 
+the traffic patterns before enabling the batch inference. KFServing injects a batcher sidecar so it can work with any model server
+deployed on KFServing, you can read more from this [example](./batcher).
 
-[InferenceService with Kafka Event Source](./kafka)
+### Request/Response Logger
+KFServing supports logging your inference request/response by injecting a sidecar alongside with your model server.
 
+| Feature  | Examples |
+| ------------- | ------------- |
+| Deploy Logger with a Logger Service| [Message Dumper Service](./logger/basic)  |
+| Deploy Async Logger| [Message Dumper Using Knative Eventing](./logger/knative-eventing)  |
+
+
+### Deploy InferenceService behind an Authentication Proxy with Kubeflow
 [InferenceService on Kubeflow with Istio-Dex](./istio-dex)
-### Deploy KFServing InferenceService behind an Authentication Proxy
+
 [InferenceService behind GCP Identity Aware Proxy (IAP) ](./gcp-iap)
diff --git a/docs/samples/autoscaling/README.md b/docs/samples/autoscaling/README.md
index 19ce90e45c2..64b69178feb 100644
--- a/docs/samples/autoscaling/README.md
+++ b/docs/samples/autoscaling/README.md
@@ -22,7 +22,7 @@
 
 # Autoscale InferenceService with your inference workload
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 3. [Metrics installation](https://knative.dev/docs/serving/installing-logging-metrics-traces) for viewing scaling graphs (optional).
 4. The [hey](https://github.com/rakyll/hey) load generator installed (go get -u github.com/rakyll/hey).
diff --git a/docs/samples/bentoml/README.md b/docs/samples/bentoml/README.md
index b1e0bc009a7..563e89d5652 100644
--- a/docs/samples/bentoml/README.md
+++ b/docs/samples/bentoml/README.md
@@ -21,7 +21,7 @@ workflow, with DevOps best practices baked in.
 
 Before starting this guide, make sure you have the following:
 
-* Your ~/.kube/config should point to a cluster with KFServing installed.
+* Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 * Your cluster's Istio Ingress gateway must be network accessible.
 * Docker and Docker hub must be properly configured on your local system
 * Python 3.6 or above
diff --git a/docs/samples/custom-domain/README.md b/docs/samples/custom-domain/README.md
index e6379ae302d..384b8a046ce 100644
--- a/docs/samples/custom-domain/README.md
+++ b/docs/samples/custom-domain/README.md
@@ -2,7 +2,7 @@
 
 ## Setup
 
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 3. You have a custom domain configured to route incoming traffic either to the Cloud provided Kubernetes Ingress gateway or the istio-ingressgateway's IP address / Load Balancer.
 
diff --git a/docs/samples/custom/hello-world/README.md b/docs/samples/custom/hello-world/README.md
index 16cbae98bc2..7af5000af87 100644
--- a/docs/samples/custom/hello-world/README.md
+++ b/docs/samples/custom/hello-world/README.md
@@ -2,7 +2,7 @@
 
 ## Setup
 
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Build and push the sample Docker Image
diff --git a/docs/samples/custom/kfserving-custom-model/README.md b/docs/samples/custom/kfserving-custom-model/README.md
index 4662eb559df..747dbb49ac1 100644
--- a/docs/samples/custom/kfserving-custom-model/README.md
+++ b/docs/samples/custom/kfserving-custom-model/README.md
@@ -30,7 +30,7 @@ Follow the instructions in the notebook to deploy the InferenseService with the
 
 ### Setup
 
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ### Build and push the sample Docker Image
diff --git a/docs/samples/custom/prebuilt-image/README.md b/docs/samples/custom/prebuilt-image/README.md
index 8be536e8b6f..12dc48f1944 100644
--- a/docs/samples/custom/prebuilt-image/README.md
+++ b/docs/samples/custom/prebuilt-image/README.md
@@ -2,7 +2,7 @@
 
 ## Setup
 
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/gcp-iap/README.md b/docs/samples/gcp-iap/README.md
index 04e15257e6d..f5168abcc9a 100644
--- a/docs/samples/gcp-iap/README.md
+++ b/docs/samples/gcp-iap/README.md
@@ -2,7 +2,7 @@
 When using Kubeflow with GCP it is common to use a [GCP Identity Aware Proxy](https://cloud.google.com/iap) (IAP) to manage client authentication to the KFServing endpoints.  The proxy intercepts and authenticates users and passes identity assertion (JWT) to kubernetes service/pods.  Whilst it is also possible to add access control (i.e. programmable or service mesh authorization), this is not described here.
 
 ### Prerequisites
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving) and have applied the [knative istio probe fix](https://github.com/kubeflow/manifests/commit/928cf483361730121ac18bc4d0e7a9c129f15ee2) (see below).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving) and have applied the [knative istio probe fix](https://github.com/kubeflow/manifests/commit/928cf483361730121ac18bc4d0e7a9c129f15ee2) (see below).
 2. Your gcloud config is initialised to the project containing the k8s cluster and has a service-account that can download IAP key file.
 3. You are using Knative serving v0.11.2 or v0.14.0+
 4. You are using a recent version of KFServing (v0.3+)
diff --git a/docs/samples/kafka/README.md b/docs/samples/kafka/README.md
index 7e9b2325d47..b49a156c3bb 100644
--- a/docs/samples/kafka/README.md
+++ b/docs/samples/kafka/README.md
@@ -1,7 +1,7 @@
 
 # End to end inference example with Minio and Kafka
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 3. Install Minio with following Minio deploy step.
 4. Use existing Kafka cluster or install Kafka on your cluster with [Confluent helm chart](https://www.confluent.io/blog/getting-started-apache-kafka-kubernetes/).
diff --git a/docs/samples/onnx/README.md b/docs/samples/onnx/README.md
index f2566aa09df..aa9c16dc376 100644
--- a/docs/samples/onnx/README.md
+++ b/docs/samples/onnx/README.md
@@ -1,7 +1,7 @@
 
 # Predict on a InferenceService using ONNX
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/pytorch/README.md b/docs/samples/pytorch/README.md
index 81d6d05f56e..a159bd5e7ff 100644
--- a/docs/samples/pytorch/README.md
+++ b/docs/samples/pytorch/README.md
@@ -61,7 +61,7 @@ print(res.text)
 # Predict on a InferenceService using PyTorch
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/rollouts/README.md b/docs/samples/rollouts/README.md
index cd6211545f6..0a79278b0ad 100644
--- a/docs/samples/rollouts/README.md
+++ b/docs/samples/rollouts/README.md
@@ -2,7 +2,7 @@
 To test a canary rollout, you can use the canary.yaml, which declares a canary model that is set to receive 10% of requests.
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/s3/README.md b/docs/samples/s3/README.md
index d1359a0d627..027d2ac05aa 100644
--- a/docs/samples/s3/README.md
+++ b/docs/samples/s3/README.md
@@ -1,7 +1,7 @@
 
 # Predict on a InferenceService with saved model on S3
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 3. Your cluster's Istio Egresss gateway must [allow accessing S3 Storage](https://knative.dev/docs/serving/outbound-network-access/)
 4. The example uses the Kubeflow's Minio setup if you have [Kubeflow](https://www.kubeflow.org/docs/started/getting-started/) installed,
diff --git a/docs/samples/sklearn/README.md b/docs/samples/sklearn/README.md
index bf5a2681999..7271e521fca 100644
--- a/docs/samples/sklearn/README.md
+++ b/docs/samples/sklearn/README.md
@@ -37,7 +37,7 @@ print(res.text)
 # Predict on a InferenceService using SKLearn
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/sklearn/perf.yaml b/docs/samples/sklearn/perf.yaml
new file mode 100644
index 00000000000..542c5af8405
--- /dev/null
+++ b/docs/samples/sklearn/perf.yaml
@@ -0,0 +1,47 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  generateName: load-test
+spec:
+  backoffLimit: 6
+  parallelism: 1
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/inject: "false"
+    spec:
+      containers:
+      - args:
+        - vegeta -cpus=5 attack -duration=1m -rate=500/1s -targets=/var/vegeta/cfg
+          | vegeta report -type=text
+        command:
+        - sh 
+        - -c
+        image: peterevans/vegeta:latest
+        imagePullPolicy: Always
+        name: vegeta
+        volumeMounts:
+        - mountPath: /var/vegeta
+          name: vegeta-cfg
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: vegeta-cfg
+        name: vegeta-cfg
+---
+apiVersion: v1
+data:
+  cfg: |
+    POST http://sklearn-iris.default.svc.cluster.local/v1/models/sklearn-iris:predict
+    @/var/vegeta/payload
+  payload: |
+    {
+      "instances": [
+        [6.8,  2.8,  4.8,  1.4],
+        [6.0,  3.4,  4.5,  1.6]
+      ]
+    }
+kind: ConfigMap
+metadata:
+  annotations:
+  name: vegeta-cfg
diff --git a/docs/samples/tensorflow/README.md b/docs/samples/tensorflow/README.md
index d41862b8a47..c563fee4448 100644
--- a/docs/samples/tensorflow/README.md
+++ b/docs/samples/tensorflow/README.md
@@ -1,7 +1,7 @@
 
 # Predict on a InferenceService using Tensorflow
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 
diff --git a/docs/samples/transformer/image_transformer/README.md b/docs/samples/transformer/image_transformer/README.md
index 83017da8fd2..d9770ab2900 100644
--- a/docs/samples/transformer/image_transformer/README.md
+++ b/docs/samples/transformer/image_transformer/README.md
@@ -3,7 +3,7 @@
 Most of the model servers expect tensors as input data, so a pre-processing step is needed before making the prediction call if the user is sending in raw input format. Transformer is a service we orchestrated from InferenceService spec for user implemented pre/post processing code. In the [pytorch](../../pytorch/README.md) example we call the prediction endpoint with tensor inputs, and in this example we add additional pre-processing step to allow the user send raw image data.
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ##  Build Transformer image
diff --git a/docs/samples/triton/simple_string/README.md b/docs/samples/triton/simple_string/README.md
index ed3e00f7fe9..a3a71a633a6 100644
--- a/docs/samples/triton/simple_string/README.md
+++ b/docs/samples/triton/simple_string/README.md
@@ -1,7 +1,7 @@
 
 # Predict on a InferenceService using Triton Inference Server
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/docs/samples/xgboost/README.md b/docs/samples/xgboost/README.md
index d30ba9c88ab..7c20769d189 100644
--- a/docs/samples/xgboost/README.md
+++ b/docs/samples/xgboost/README.md
@@ -53,7 +53,7 @@ print(res.text)
 ## Predict on a InferenceService using XGBoost
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/blob/master/docs/DEVELOPER_GUIDE.md#deploy-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
 2. Your cluster's Istio Ingress gateway must be network accessible.
 
 ## Create the InferenceService
diff --git a/hack/quick_install.sh b/hack/quick_install.sh
index bd8d6cf8fbb..6775bcb6595 100755
--- a/hack/quick_install.sh
+++ b/hack/quick_install.sh
@@ -1,14 +1,11 @@
 set -e 
 
-export ISTIO_VERSION=1.3.6
-export KNATIVE_VERSION=v0.12.0
-export KFSERVING_VERSION=0.2.2
+export ISTIO_VERSION=1.6.2
+export KNATIVE_VERSION=v0.15.0
+export KFSERVING_VERSION=v0.3.0
 curl -L https://git.io/getLatestIstio | sh -
 cd istio-${ISTIO_VERSION}
 
-# Install istio CRD
-for i in install/kubernetes/helm/istio-init/files/crd*yaml; do kubectl apply -f $i; done
-
 # Create istio-system namespace
 cat <<EOF | kubectl apply -f -
 apiVersion: v1
@@ -19,58 +16,60 @@ metadata:
     istio-injection: disabled
 EOF
 
-# Install istio without service mesh
-helm template --namespace=istio-system \
-  --set prometheus.enabled=false \
-  --set mixer.enabled=false \
-  --set mixer.policy.enabled=false \
-  --set mixer.telemetry.enabled=false \
-  `# Pilot doesn't need a sidecar.` \
-  --set pilot.sidecar=false \
-  --set pilot.resources.requests.memory=128Mi \
-  `# Disable galley (and things requiring galley).` \
-  --set galley.enabled=false \
-  --set global.useMCP=false \
-  `# Disable security / policy.` \
-  --set security.enabled=false \
-  --set global.disablePolicyChecks=true \
-  `# Disable sidecar injection.` \
-  --set sidecarInjectorWebhook.enabled=false \
-  --set global.proxy.autoInject=disabled \
-  --set global.omitSidecarInjectorConfigMap=true \
-  --set gateways.istio-ingressgateway.autoscaleMin=1 \
-  --set gateways.istio-ingressgateway.autoscaleMax=2 \
-  `# Set pilot trace sampling to 100%` \
-  --set pilot.traceSampling=100 \
-  --set global.mtls.auto=false \
-  install/kubernetes/helm/istio \
-  > ./istio-lean.yaml
+cat << EOF > ./istio-minimal-operator.yaml
+apiVersion: install.istio.io/v1alpha1
+kind: IstioOperator
+spec:
+  values:
+    global:
+      proxy:
+        autoInject: disabled
+      useMCP: false
+      # The third-party-jwt is not enabled on all k8s.
+      # See: https://istio.io/docs/ops/best-practices/security/#configure-third-party-service-account-tokens
+      jwtPolicy: first-party-jwt
 
-kubectl apply -f istio-lean.yaml
+  addonComponents:
+    pilot:
+      enabled: true
+    tracing:
+      enabled: true
+    kiali:
+      enabled: true
+    prometheus:
+      enabled: true
 
-# Install istio local gateway
-helm template --namespace=istio-system \
-  --set gateways.custom-gateway.autoscaleMin=1 \
-  --set gateways.custom-gateway.autoscaleMax=2 \
-  --set gateways.custom-gateway.cpu.targetAverageUtilization=60 \
-  --set gateways.custom-gateway.labels.app='cluster-local-gateway' \
-  --set gateways.custom-gateway.labels.istio='cluster-local-gateway' \
-  --set gateways.custom-gateway.type='ClusterIP' \
-  --set gateways.istio-ingressgateway.enabled=false \
-  --set gateways.istio-egressgateway.enabled=false \
-  --set gateways.istio-ilbgateway.enabled=false \
-  --set global.mtls.auto=false \
-  install/kubernetes/helm/istio \
-  -f install/kubernetes/helm/istio/example-values/values-istio-gateways.yaml \
-  | sed -e "s/custom-gateway/cluster-local-gateway/g" -e "s/customgateway/clusterlocalgateway/g" \
-  > ./istio-local-gateway.yaml
+  components:
+    ingressGateways:
+      - name: istio-ingressgateway
+        enabled: true
+      - name: cluster-local-gateway
+        enabled: true
+        label:
+          istio: cluster-local-gateway
+          app: cluster-local-gateway
+        k8s:
+          service:
+            type: ClusterIP
+            ports:
+            - port: 15020
+              name: status-port
+            - port: 80
+              name: http2
+            - port: 443
+              name: https
+EOF
 
-kubectl apply -f istio-local-gateway.yaml
+bin/istioctl manifest apply -f istio-minimal-operator.yaml
 
 # Install Knative
 kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-crds.yaml
 kubectl apply --filename https://github.com/knative/serving/releases/download/${KNATIVE_VERSION}/serving-core.yaml
+kubectl apply --filename https://github.com/knative/net-istio/releases/download/${KNATIVE_VERSION}/release.yaml
 
+# Install Cert Manager
+kubectl apply --validate=false -f https://github.com/jetstack/cert-manager/releases/download/v0.15.1/cert-manager.yaml
+kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager
 cd ..
 # Install KFServing
 kubectl apply -f install/${KFSERVING_VERSION}/kfserving.yaml