Support secret name overrides with annotation (kubeflow#3034)

* Add annotation for secret name overrides Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Remove the field Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Fix secret name annotation Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Update kserve url storage example docs Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Address review comments Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Update config docs for storage secret Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Fix storage secret name Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Add test for secret name annotation override Signed-off-by: Dan Sun <dsun20@bloomberg.net> * Fix flake8 lint Signed-off-by: Dan Sun <dsun20@bloomberg.net> --------- Signed-off-by: Dan Sun <dsun20@bloomberg.net>
magdalenakuhn17 · Jul 31, 2023 · 3edd0a9 · 3edd0a9
1 parent 32be746
commit 3edd0a9
Show file tree

Hide file tree

Showing 17 changed files with 375 additions and 315 deletions.
diff --git a/charts/kserve-resources/templates/configmap.yaml b/charts/kserve-resources/templates/configmap.yaml
@@ -64,7 +64,6 @@ data:
            "memoryLimit": "1Gi",
            "cpuRequest": "100m",
            "cpuLimit": "1",
-           "storageSpecSecretName": "storage-config",
            "enableDirectPvcVolumeMount": false
        }
      storageInitializer: |-
@@ -84,9 +83,6 @@ data:
            # cpuLimit is the limits.cpu to set for the storage initializer init container.
            "cpuLimit": "1",
 
-           # storageSpecSecretName contains the secret name which has the credentials for downloading the model.
-           "storageSpecSecretName": "storage-config",
-
            # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly.
            # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.
            # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737
@@ -97,6 +93,8 @@ data:
      # Example
      credentials: |-
        {
+          "storageSpecSecretName": "storage-config",
+          "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
           "gcs": {
               "gcsCredentialFileName": "gcloud-application-credentials.json"
           },
@@ -124,6 +122,14 @@ data:
      # The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided.
      credentials: |-
        {
+          # storageSpecSecretName contains the secret name which has the credentials for downloading the model.
+          # This option is used when specifying the storage spec on isvc yaml.
+          "storageSpecSecretName": "storage-config",
+
+          # The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value.
+          # When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account
+          # When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap
+          "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
           # Configuration for google cloud storage
           "gcs": {
               # gcsCredentialFileName specifies the filename of the gcs credential
@@ -419,6 +425,8 @@ data:
     }
   credentials: |-
     {
+       "storageSpecSecretName": "{{ .Values.kserve.storage.storageSpecSecretName }}",
+       "storageSecretNameAnnotation": "{{ .Values.kserve.storage.storageSecretNameAnnotation }}",
        "gcs": {
            "gcsCredentialFileName": "gcloud-application-credentials.json"
        },
@@ -475,8 +483,7 @@ data:
         "memoryRequest": "100Mi",
         "memoryLimit": "1Gi",
         "cpuRequest": "100m",
-        "cpuLimit": "1",
-        "storageSpecSecretName": "{{ .Values.kserve.storage.storageSpecSecretName }}"
+        "cpuLimit": "1"
     }
   metricsAggregator: |-
     {

diff --git a/charts/kserve-resources/values.yaml b/charts/kserve-resources/values.yaml
@@ -11,6 +11,7 @@ kserve:
     image: kserve/storage-initializer
     tag: *defaultVersion
     storageSpecSecretName: storage-config
+    storageSecretNameAnnotation: serving.kserve.io/secretName
     s3:
       accessKeyIdName: AWS_ACCESS_KEY_ID
       secretAccessKeyName: AWS_SECRET_ACCESS_KEY

diff --git a/config/configmap/inferenceservice.yaml b/config/configmap/inferenceservice.yaml
@@ -63,7 +63,6 @@ data:
            "memoryLimit": "1Gi",
            "cpuRequest": "100m",
            "cpuLimit": "1",
-           "storageSpecSecretName": "storage-config",
            "enableDirectPvcVolumeMount": false
        }
      storageInitializer: |-
@@ -82,9 +81,6 @@ data:
            
            # cpuLimit is the limits.cpu to set for the storage initializer init container.
            "cpuLimit": "1",
-           
-           # storageSpecSecretName contains the secret name which has the credentials for downloading the model.
-           "storageSpecSecretName": "storage-config",
        
            # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly.
            # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container.
@@ -96,6 +92,8 @@ data:
      # Example
      credentials: |-
        {
+          "storageSpecSecretName": "storage-config",
+          "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
           "gcs": {
               "gcsCredentialFileName": "gcloud-application-credentials.json"
           },
@@ -123,6 +121,14 @@ data:
      # The rest of the fields are used in both authentication methods (IAM Role for Service Account & IAM User Access Key Secret) if a non-empty value is provided.
      credentials: |-
        {
+          # storageSpecSecretName contains the secret name which has the credentials for downloading the model.
+          # This option is used when specifying the storage spec on isvc yaml.
+          "storageSpecSecretName": "storage-config",
+
+          # The annotation can be specified on isvc yaml to allow overriding with the secret name reference from the annotation value.
+          # When using storageUri the order of the precedence is: secret name reference annotation > secret name references from service account
+          # When using storageSpec the order of the precedence is: secret name reference annotation > storageSpecSecretName in configmap
+
           # Configuration for google cloud storage
           "gcs": {
               # gcsCredentialFileName specifies the filename of the gcs credential
@@ -411,12 +417,13 @@ data:
         "memoryLimit": "1Gi",
         "cpuRequest": "100m",
         "cpuLimit": "1",
-        "storageSpecSecretName": "storage-config",
         "enableDirectPvcVolumeMount": false
     }
 
   credentials: |-
     {
+       "storageSpecSecretName": "storage-config",
+       "storageSecretNameAnnotation": "serving.kserve.io/storageSecretName",
        "gcs": {
            "gcsCredentialFileName": "gcloud-application-credentials.json"
        },

diff --git a/docs/samples/storage/uri/README.md b/docs/samples/storage/uri/README.md
@@ -4,11 +4,11 @@ This allows you to specify a model object via the URI (Uniform Resource Identifi
 This `storageUri` option supports single file models, like `sklearn` which is specified by a [joblib](https://joblib.readthedocs.io/en/latest/) file, or artifacts (e.g. `tar` or `zip`) which contain all the necessary dependencies for other model types (e.g. `tensorflow` or `pytorch`). Here, we'll show examples from both of the above.
 
 ## Setup
-1. Your ~/.kube/config should point to a cluster with [KFServing installed](https://github.com/kubeflow/kfserving/#install-kfserving).
+1. Your ~/.kube/config should point to a cluster with [KServe installed](https://github.com/kserve/kserve).
 2. Your cluster's Istio Ingress gateway must be [network-accessible](https://istio.io/latest/docs/tasks/traffic-management/ingress/ingress-control/).
 3. Your cluster's Istio Egress gateway must [allow http / https traffic](https://istio.io/latest/docs/tasks/traffic-management/egress/egress-gateway/)
 
-## Create HTTP/HTTPS header Secret and attach to Service account
+## Create HTTP/HTTPS header Secret
 If you do not require headers in your HTTP/HTTPS service request then you can skip this step.
 You can define headers using the following format:
 
@@ -22,15 +22,7 @@ data:
   https-host: ZXhhbXBsZS5jb20=
   headers: |-
     ewoiYWNjb3VudC1uYW1lIjogInNvbWVfYWNjb3VudF9uYW1lIiwKInNlY3JldC1rZXkiOiAic29tZV9zZWNyZXRfa2V5Igp9
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: sa
-secrets:
-  - name: mysecret
 ```
-Make sure you have serviceAccountName specified in your predictor in your inference service. These headers will be applied to any http/https requests that have the same host.
 
 You will need to base64 encode the headers and host. Make sure the headers are in proper json format.
 ```text
@@ -46,9 +38,39 @@ ZXhhbXBsZS5jb20=
 ewoiYWNjb3VudC1uYW1lIjogInNvbWVfYWNjb3VudF9uYW1lIiwKInNlY3JldC1rZXkiOiAic29tZV9zZWNyZXRfa2V5Igp9
 ```
 
+### Reference The Secret
+You can refer the secret with annotation `serving.kserve.io/storageSecretName`.
+```yaml
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  name: sklearn-from-uri
+  annotations:
+    serving.kserve.io/storageSecretName: mysecret
+
+spec:
+  predictor:
+    sklearn:
+      storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
+```
+
+Alternatively you can attach the secret name references to the service account secrets.
+
+```yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: sa
+secrets:
+  - name: mysecret
+```
+
+Make sure you have serviceAccountName specified in your predictor in your inference service. These headers will be applied to any http/https requests that have the same host.
+
+
 ## Sklearn
 ### Train and freeze the model
-Here, we'll train a simple iris model. Please note that `kfserving` requires `sklearn==0.20.3`. 
+Here, we'll train a simple iris model. Please note that `kserve` requires `scikit-learn==1.0.2`. 
 
 ```python
 from sklearn import svm
@@ -74,16 +96,14 @@ Now, you'll need to take that frozen model object and put it somewhere on the we
 
 ### Specify and create the `InferenceService`
 ```yaml
-apiVersion: serving.kserve.io/v1alpha2
+apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   name: sklearn-from-uri
 spec:
-  default:
-    predictor:
-      sklearn:
-        storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
-
+  predictor:
+    sklearn:
+      storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
 ```
 
 Apply the CRD,
@@ -95,7 +115,7 @@ Expected Output
 $ inferenceservice.serving.kserve.io/sklearn-from-uri created
 ```
 ### Run a prediction
-The first is to [determine the ingress IP and ports](https://github.com/kubeflow/kfserving/blob/master/README.md#determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
+The first is to [determine the ingress IP and ports](https://kserve.github.io/website/master/get_started/first_isvc/#4-determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
 
 Now, if everything went according to plan you should be able to hit the endpoint exposing the model we just uploaded.
 
@@ -193,19 +213,19 @@ Where we assume the `0001/` directory has the structure:
 Note that building the tarball from the directory specifying a version number is required for `tensorflow`.
 
 Now, you can either push the `.tar` or `.tgz` file to some remote uri.
+
 ### Specify and create the `InferenceService`
 And again, if everything went to plan we should be able to pull down the tarball and expose the endpoint.
 
 ```yaml
-apiVersion: serving.kserve.io/v1alpha2
+apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   name: tensorflow-from-uri-gzip
 spec:
-  default:
-    predictor:
-      tensorflow:
-        storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
+  predictor:
+    tensorflow:
+       storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
 ```
 Apply the CRD,
 ```bash
@@ -217,7 +237,7 @@ $ inferenceservice.serving.kserve.io/tensorflow-from-uri created
 ```
 
 ## Run a prediction
-Again, make sure to first [determine the ingress IP and ports](https://github.com/kubeflow/kfserving/blob/master/README.md#determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
+Again, make sure to first [determine the ingress IP and ports](https://kserve.github.io/website/master/get_started/first_isvc/#4-determine-the-ingress-ip-and-ports) and set `INGRESS_HOST` and `INGRESS_PORT`.
 
 Now that our endpoint is up and running, we can get some predictions.
 
@@ -266,4 +286,4 @@ $ *   Trying 10.0.1.16...
     ]
   ]
 }
-```
+```
diff --git a/docs/samples/storage/uri/sklearn.yaml b/docs/samples/storage/uri/sklearn.yaml
@@ -1,9 +1,8 @@
-apiVersion: serving.kserve.io/v1alpha2
+apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   name: sklearn-from-uri
 spec:
-  default:
-    predictor:
-      sklearn:
-        storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
+  predictor:
+    sklearn:
+      storageUri: https://github.com/tduffy000/kfserving-uri-examples/blob/master/sklearn/frozen/model.joblib?raw=true
diff --git a/docs/samples/storage/uri/tensorflow.yaml b/docs/samples/storage/uri/tensorflow.yaml
@@ -1,9 +1,8 @@
-apiVersion: serving.kserve.io/v1alpha2
+apiVersion: serving.kserve.io/v1beta1
 kind: InferenceService
 metadata:
   name: tensorflow-from-uri
 spec:
-  default:
-    predictor:
-      tensorflow:
-        storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
+  predictor:
+    tensorflow:
+      storageUri: https://raw.githubusercontent.com/tduffy000/kfserving-uri-examples/master/tensorflow/frozen/model_artifacts.tar.gz
diff --git a/hack/quick_install.sh b/hack/quick_install.sh
@@ -30,7 +30,7 @@ while getopts ":hsr" option; do
    esac
 done
 
-export ISTIO_VERSION=1.16.2
+export ISTIO_VERSION=1.17.2
 export KNATIVE_SERVING_VERSION=knative-v1.10.1
 export KNATIVE_ISTIO_VERSION=knative-v1.10.0
 export KSERVE_VERSION=v0.11.0-rc1