Skip to content

Commit

Permalink
Implement KFServing v1beta1 controller (kubeflow#1042)
Browse files Browse the repository at this point in the history
* Move existing controller to v1alpha2

* Implement v1beta1 controller

* Fix test crd path

* Add controller test for predictor

* Setup webhook for v1beta1 and v1alpha2

* Fix webhook setup

* Add api conversion

* Patch conversion webhook to crd

* Fix the configmap decoding

* Fix v1beta1 controller test

* Address comments

* Add transformer

* Transformer support

* Fix validate create for http test

* Reconcile kfserving ingress

* Add license

* Add rbac for ingress and svc

* Add rbac for ingress and svc

* Fix ingress host

* Switch to use virtual service

* Add conversion test

* Add crd conversion test

* Add explainer

* More conversion test

* Add custom predictor defaulter test

* Don't use nested inline

* Fix transformer and explainer

* Set ingress condition

* Add ingress asserts

* Fix explainer route

* Update ksvc traffic targets based on canary traffic split

* e2e test to work with v1beta1

* skip gpu test

* Fix canary traffic pointer type

* Download yq

* Add nil check for spec what miss component implementation

* Expose podspec on component

* Support http url in v1beta1

* fix typo

* Update traffic on routespec when canaryPercent is changed

* Add canary rollout test

* Add explainer test

* Address comments

* Use tritonserver

* Make container name and probe port field optional

* Add traffic percent in status

* Set xgboost nthread

* Fix updates in test

* Use default service name for backwards compatibility

* Call knative defaulter to avoid diffs during reconcilation

* Set SessionAffinity

* Check if configured as internal domain
  • Loading branch information
yuzisun authored Sep 21, 2020
1 parent d45e36c commit deba8df
Show file tree
Hide file tree
Showing 101 changed files with 19,162 additions and 4,083 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ jobs:
- name: Test
run: |
export GOPATH=/home/runner/go
export PATH=$PATH:/usr/local/kubebuilder/bin
export PATH=$PATH:/usr/local/kubebuilder/bin:/home/runner/go/bin
wget -O $GOPATH/bin/yq https://github.com/mikefarah/yq/releases/download/3.3.2/yq_linux_amd64
chmod +x $GOPATH/bin/yq
make test
13 changes: 12 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ undeploy-dev:

# Generate manifests e.g. CRD, RBAC etc.
manifests: controller-gen
$(CONTROLLER_GEN) $(CRD_OPTIONS) paths=./pkg/apis/serving/v1alpha2/... output:crd:dir=config/crd
$(CONTROLLER_GEN) $(CRD_OPTIONS) paths=./pkg/apis/serving/... output:crd:dir=config/crd
$(CONTROLLER_GEN) rbac:roleName=kfserving-manager-role paths=./pkg/controller/... output:rbac:artifacts:config=config/rbac
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths=./pkg/apis/serving/v1alpha2
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths=./pkg/apis/serving/v1beta1
Expand All @@ -105,6 +105,17 @@ manifests: controller-gen
perl -pi -e 's/Any/string/g' config/crd/serving.kubeflow.org_inferenceservices.yaml
perl -pi -e 's/storedVersions: null/storedVersions: []/g' config/crd/serving.kubeflow.org_trainedmodels.yaml
perl -pi -e 's/conditions: null/conditions: []/g' config/crd/serving.kubeflow.org_trainedmodels.yaml
perl -pi -e 's/Any/string/g' config/crd/serving.kubeflow.org_trainedmodels.yaml
#TODO v1beta1 crd openAPIV3Schema is too big and kubectl client side apply takes long time to do diffs, need to use k8s 1.18's server side apply
#https://kubernetes.io/blog/2020/04/01/kubernetes-1.18-feature-server-side-apply-beta-2/#what-is-server-side-apply
#remove the required property on framework as name field needs to be optional
yq d -i config/crd/serving.kubeflow.org_inferenceservices.yaml 'spec.versions[1].schema.openAPIV3Schema.properties.spec.properties.*.properties.*.required'
#knative does not allow setting port on liveness or readiness probe
yq d -i config/crd/serving.kubeflow.org_inferenceservices.yaml 'spec.versions[1].schema.openAPIV3Schema.properties.spec.properties.*.properties.*.properties.readinessProbe.properties.httpGet.required'
yq d -i config/crd/serving.kubeflow.org_inferenceservices.yaml 'spec.versions[1].schema.openAPIV3Schema.properties.spec.properties.*.properties.*.properties.livenessProbe.properties.httpGet.required'
yq d -i config/crd/serving.kubeflow.org_inferenceservices.yaml 'spec.versions[1].schema.openAPIV3Schema.properties.spec.properties.*.properties.*.properties.readinessProbe.properties.tcpSocket.required'
yq d -i config/crd/serving.kubeflow.org_inferenceservices.yaml 'spec.versions[1].schema.openAPIV3Schema.properties.spec.properties.*.properties.*.properties.livenessProbe.properties.tcpSocket.required'


# Run go fmt against code
fmt:
Expand Down
34 changes: 25 additions & 9 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ import (
"flag"
"github.com/kubeflow/kfserving/pkg/apis/serving/v1alpha2"
"github.com/kubeflow/kfserving/pkg/apis/serving/v1beta1"
v1alph2controller "github.com/kubeflow/kfserving/pkg/controller/inferenceservice"
v1beta1controller "github.com/kubeflow/kfserving/pkg/controller/v1beta1/inferenceservice"
trainedmodelcontroller "github.com/kubeflow/kfserving/pkg/controller/v1beta1/trainedmodel"
"github.com/kubeflow/kfserving/pkg/controller/v1beta1/trainedmodel/reconcilers/modelconfig"
"github.com/kubeflow/kfserving/pkg/webhook/admission/inferenceservice"
"github.com/kubeflow/kfserving/pkg/webhook/admission/pod"
"istio.io/client-go/pkg/apis/networking/v1alpha3"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -96,23 +95,29 @@ func main() {
os.Exit(1)
}

log.Info("Setting up core scheme")
if err := v1.AddToScheme(mgr.GetScheme()); err != nil {
log.Error(err, "unable to add Core APIs to scheme")
os.Exit(1)
}

// Setup all Controllers
setupLog.Info("Setting up v1alpha2 controller")
setupLog.Info("Setting up v1beta1 controller")
eventBroadcaster := record.NewBroadcaster()
clientSet, err := kubernetes.NewForConfig(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "unable to create clientSet")
os.Exit(1)
}
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
if err = (&v1alph2controller.InferenceServiceReconciler{
if err = (&v1beta1controller.InferenceServiceReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("v1alpha2Controllers").WithName("InferenceService"),
Log: ctrl.Log.WithName("v1beta1Controllers").WithName("InferenceService"),
Scheme: mgr.GetScheme(),
Recorder: eventBroadcaster.NewRecorder(
mgr.GetScheme(), v1.EventSource{Component: "v1alpha2Controllers"}),
mgr.GetScheme(), v1.EventSource{Component: "v1beta1Controllers"}),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "v1alpha2Controller", "InferenceService")
setupLog.Error(err, "unable to create controller", "v1beta1Controller", "InferenceService")
os.Exit(1)
}

Expand All @@ -136,8 +141,19 @@ func main() {

log.Info("registering webhooks to the webhook server")
hookServer.Register("/mutate-pods", &webhook.Admission{Handler: &pod.Mutator{}})
hookServer.Register("/validate-inferenceservices", &webhook.Admission{Handler: &inferenceservice.Validator{}})
hookServer.Register("/mutate-inferenceservices", &webhook.Admission{Handler: &inferenceservice.Defaulter{}})

if err = ctrl.NewWebhookManagedBy(mgr).
For(&v1alpha2.InferenceService{}).
Complete(); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "v1alpha2")
os.Exit(1)
}
if err = ctrl.NewWebhookManagedBy(mgr).
For(&v1beta1.InferenceService{}).
Complete(); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "v1beta1")
os.Exit(1)
}

// Start the Cmd
log.Info("Starting the Cmd.")
Expand Down
2 changes: 1 addition & 1 deletion config/configmap/inferenceservice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ data:
},
"triton": {
"image": "nvcr.io/nvidia/tritonserver",
"defaultImageVersion": "20.03-py3",
"defaultImageVersion": "20.08-py3",
"supportedFrameworks": [
"tensorrt",
"tensorflow",
Expand Down
Loading

0 comments on commit deba8df

Please sign in to comment.