From 6da64235437eb7cfd91bceea71f40c3e1a5ce217 Mon Sep 17 00:00:00 2001 From: Neil Wilson Date: Fri, 16 Apr 2021 10:11:06 +0100 Subject: [PATCH] Cluster Autoscaler Brightbox Cloud Provider This is the independent code for the provider plugin --- .../cloudprovider/brightbox/Makefile | 44 ++ .../cloudprovider/brightbox/README.md | 162 ++++ .../brightbox/brightbox_cloud_provider.go | 336 ++++++++ .../brightbox_cloud_provider_test.go | 735 ++++++++++++++++++ .../brightbox/brightbox_node_group.go | 438 +++++++++++ .../brightbox/brightbox_node_group_test.go | 347 +++++++++ .../brightbox/examples/check-env.yaml | 16 + .../examples/cluster-autoscaler-secret.yaml | 13 + .../brightbox/examples/config.rb | 39 + .../brightbox/examples/rebase.sh | 21 + .../builder/builder_brightbox.go | 42 + 11 files changed, 2193 insertions(+) create mode 100644 cluster-autoscaler/cloudprovider/brightbox/Makefile create mode 100644 cluster-autoscaler/cloudprovider/brightbox/README.md create mode 100644 cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go create mode 100644 cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider_test.go create mode 100644 cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go create mode 100644 cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go create mode 100644 cluster-autoscaler/cloudprovider/brightbox/examples/check-env.yaml create mode 100644 cluster-autoscaler/cloudprovider/brightbox/examples/cluster-autoscaler-secret.yaml create mode 100644 cluster-autoscaler/cloudprovider/brightbox/examples/config.rb create mode 100644 cluster-autoscaler/cloudprovider/brightbox/examples/rebase.sh create mode 100644 cluster-autoscaler/cloudprovider/builder/builder_brightbox.go diff --git a/cluster-autoscaler/cloudprovider/brightbox/Makefile b/cluster-autoscaler/cloudprovider/brightbox/Makefile new file mode 100644 index 00000000000..14c20c9dd55 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/Makefile @@ -0,0 +1,44 @@ +export BUILD_TAGS=brightbox +export REGISTRY=brightbox +export GOARCH?=$(shell go env GOARCH) +ifndef TAG + override TAG=dev +endif +export TAG +ifeq ($(TAG), dev) + deploydeps=build +endif + +.PHONY: deploy +deploy: examples/config.rb $(deploydeps) + helm repo update + ruby $< | \ + helm template release autoscaler/cluster-autoscaler \ + --namespace kube-system -f - | \ + kubectl -n kube-system apply -f - + +.PHONY: remove +remove: examples/config.rb + helm repo update + ruby $< | \ + helm template release autoscaler/cluster-autoscaler \ + --namespace kube-system -f - | \ + kubectl -n kube-system delete -f - + +.PHONY: secret +secret: ${HOME}/.docker/config.json + -kubectl create secret generic regcred \ + --from-file=.dockerconfigjson=$? \ + --type=kubernetes.io/dockerconfigjson + +../../cluster-autoscaler: brightbox_cloud_provider.go brightbox_node_group.go + $(MAKE) -C $(@D) container + docker tag ${REGISTRY}/cluster-autoscaler-${BUILD_TAGS}-${GOARCH}:${TAG} ${REGISTRY}/cluster-autoscaler-${BUILD_TAGS}:${TAG} + docker push ${REGISTRY}/cluster-autoscaler-${BUILD_TAGS}:${TAG} + +.PHONY: build +build: ../../cluster-autoscaler + +.PHONY: clean +clean: + $(MAKE) -C ../.. $@ diff --git a/cluster-autoscaler/cloudprovider/brightbox/README.md b/cluster-autoscaler/cloudprovider/brightbox/README.md new file mode 100644 index 00000000000..ca8498e50ce --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/README.md @@ -0,0 +1,162 @@ +# Cluster Autoscaler for Brightbox Cloud + +This cloud provider implements the autoscaling function for +[Brightbox Cloud](https://www.brightbox.com). The autoscaler should +work on any Kubernetes clusters running on Brightbox Cloud, however +the approach is tailored to clusters built with the [Kubernetes Cluster +Builder](https://github.com/brightbox/kubernetes-cluster) + +# How Autoscaler works on Brightbox Cloud + +The autoscaler looks for [Server +Groups](https://www.brightbox.com/docs/guides/cli/server-groups/) named +after the cluster-name option passed to the autoscaler (--cluster-name). + +A group named with a suffix of the cluster-name +(e.g. k8s-worker.k8s-test.cluster.local) is a candidate to be a scaling +group. The autoscaler will then check the description to see if it is +a pair of integers separated by a colon (e.g. 1:4). If it finds those +numbers then they will become the minimum and maximum server size for +that group, and autoscaler will attempt to scale the group between those sizes. + +The type of server, the image used and the target zone will be +dynamically determined from the existing members. If these differ, or +there are no existing servers, autoscaler will log an error and will not +scale that group. + +A group named precisely the same as the cluster-name +(e.g. k8s-test.cluster.local) is considered to be the default cluster +group and all autoscaled servers created are placed within it as well +as the scaling group. + +The Brightbox Cloud provider only supports auto-discovery mode using +this pattern. `node-group-auto-discovery` and `nodes` options are +effectively ignored. + +## Cluster configuration + +If you are using the [Kubernetes Cluster +Builder](https://github.com/brightbox/kubernetes-cluster) set the +`worker_min` and `worker_max` values to scale the worker group, and the +`storage_min` and `storage_max` values to scale the storage group. + +The Cluster Builder will ensure the group name and description are +updated with the correct values in the format that autoscaler can recognise. + +Generally it is best to keep the `min` and the `count` values to be the +same within the Cluster Buider and let autoscaler create and destroy +servers dynamically up the the `max` value. + +While using autoscaler you may find that the Cluster Builder recreates +servers that have been scaled down, if you use the manifests to maintain +the cluster for other reasons (changing the management address for +example). This is a limitation of the Terraform state database, and +autoscaler will scale the cluster back down during the next few minutes. + +# Autoscaler Brightbox cloudprovider configuration + +The Brightbox Cloud cloudprovider is configured via Environment Variables +suppied to the autoscaler pod. The easiest way to do this is to [create +a secret](https://kubernetes.io/docs/concepts/configuration/secret/#creating-a-secret-manually) containing the variables within the `kube-system` namespace. + +``` +apiVersion: v1 +kind: Secret +metadata: + name: brightbox-credentials + namespace: kube-system +type: Opaque +data: + BRIGHTBOX_API_URL: + BRIGHTBOX_CLIENT: + BRIGHTBOX_CLIENT_SECRET: + BRIGHTBOX_KUBE_JOIN_COMMAND: + BRIGHTBOX_KUBE_VERSION: +``` + +The join command can be obtained from the kubeadm token command + +``` +$ kubeadm token create --ttl 0 --description 'Cluster autoscaling token' --print-join-command +``` + +[Brightbox API +Clients](https://www.brightbox.com/docs/guides/manager/api-clients/) +can be created in the [Brightbox +Manager](https://www.brightbox.com/docs/guides/manager/) + +## Cluster Configuration + +The [Kubernetes Cluster +Builder](https://github.com/brightbox/kubernetes-cluster) creates a +`brightbox-credentials` secret in the `kube-system` namespace ready +to use. + +## Checking the environment + +You can check the brightbox-credentials secret by running the `check-env` job from the examples directory. + +``` +$ kubectl apply -f examples/check-env.yaml +job.batch/check-env created +$ kubectl -n kube-system logs job/check-env +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +HOSTNAME=check-env-hbh6m +_BASH_GPG_KEY=7C0135FB088AAF6C66C650B9BB5869F064EA74AB +_BASH_VERSION=5.0 +_BASH_PATCH_LEVEL=0 +_BASH_LATEST_PATCH=11 +BRIGHTBOX_KUBE_VERSION=1.17.0 +... +$ kubectl delete -f examples/check-env.yaml +job.batch "check-env" deleted +``` + +# Running the Autoscaler + +1. Clone this repository and change into this directory. +1. Edit the `examples/config.rb` file and adjust the config hash. +2. Alter the cluster name if +required. (If you are using the [Kubernetes Cluster +Builder](https://github.com/brightbox/kubernetes-cluster), this will be +`cluster_name` and `cluster_domainname` joined with a '.') + +Then generate and apply the manifests +``` +$ make deploy TAG= +``` + +where TAG is the version you wish to use (1.17, 1.18, etc.) + +As the Brightbox cloud-provider auto-detects and potentially scales all +the worker groups, the example deployment file runs the autoscaler on +the master nodes. This avoids it accidentally killing itself. + +## Viewing the cluster-autoscaler options + +Cluster autoscaler has many options that can be adjusted to better fit the needs of your application. To view them run + +``` +$ kubectl create job ca-options --image=brightbox/cluster-autoscaler-brightbox:dev -- ./cluster-autoscaler -h +$ kubectl log job/ca-options +``` + +Remove the job in the normal way with `kubectl delete job/ca-options` + +You can read more details about some of the options in the [main FAQ](../../FAQ.md) + + +# Building the Brightbox Cloud autoscaler + +Extract the repository to a machine running docker and then run the make command + +``` +$ make build +``` + +This builds an autoscaler containing only the Brightbox Cloud provider, tagged as `brightbox/cluster-autoscaler-brightbox:dev`. To build any other version add a TAG variable + +``` +make build TAG=1.1x +``` + diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go new file mode 100644 index 00000000000..f069f901926 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go @@ -0,0 +1,336 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package brightbox + +import ( + "context" + "fmt" + "strconv" + "strings" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + brightbox "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/gobrightbox" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/gobrightbox/status" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + klog "k8s.io/klog/v2" +) + +const ( + // GPULabel is added to nodes with GPU resource + GPULabel = "cloud.brightbox.com/gpu-node" +) + +var ( + availableGPUTypes = map[string]struct{}{} +) + +// brightboxCloudProvider implements cloudprovider.CloudProvider interface +type brightboxCloudProvider struct { + resourceLimiter *cloudprovider.ResourceLimiter + ClusterName string + nodeGroups []cloudprovider.NodeGroup + nodeMap map[string]string + *k8ssdk.Cloud +} + +// Name returns name of the cloud provider. +func (b *brightboxCloudProvider) Name() string { + klog.V(4).Info("Name") + return cloudprovider.BrightboxProviderName +} + +// NodeGroups returns all node groups configured for this cloud provider. +func (b *brightboxCloudProvider) NodeGroups() []cloudprovider.NodeGroup { + klog.V(4).Info("NodeGroups") + // Duplicate the stored nodegroup elements and return it + //return append(b.nodeGroups[:0:0], b.nodeGroups...) + // Or just return the stored nodegroup elements by reference + return b.nodeGroups +} + +// NodeGroupForNode returns the node group for the given node, nil if +// the node should not be processed by cluster autoscaler, or non-nil +// error if such occurred. Must be implemented. +func (b *brightboxCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + klog.V(4).Info("NodeGroupForNode") + klog.V(4).Infof("Looking for %v", node.Spec.ProviderID) + groupID, ok := b.nodeMap[k8ssdk.MapProviderIDToServerID(node.Spec.ProviderID)] + if ok { + klog.V(4).Infof("Found in group %v", groupID) + return b.findNodeGroup(groupID), nil + } + klog.V(4).Info("Not found") + return nil, nil +} + +// Refresh is before every main loop and can be used to dynamically +// update cloud provider state. +// In particular the list of node groups returned by NodeGroups can +// change as a result of CloudProvider.Refresh(). +func (b *brightboxCloudProvider) Refresh() error { + klog.V(4).Info("Refresh") + configmaps, err := b.GetConfigMaps() + if err != nil { + return err + } + clusterSuffix := "." + b.ClusterName + nodeGroups := make([]cloudprovider.NodeGroup, 0) + nodeMap := make(map[string]string) + for _, configMapOutline := range configmaps { + if !strings.HasSuffix(configMapOutline.Name, clusterSuffix) { + klog.V(4).Infof("name %q doesn't match suffix %q. Ignoring %q", configMapOutline.Name, clusterSuffix, configMapOutline.Id) + continue + } + configMap, err := b.GetConfigMap(configMapOutline.Id) + if err != nil { + return err + } + klog.V(6).Infof("ConfigMap %+v", configMap) + mapData := make(map[string]string) + for k, v := range configMap.Data { + element, ok := v.(string) + if !ok { + return fmt.Errorf("Unexpected value for key %q in configMap %q", k, configMap.Id) + } + mapData[k] = element + } + klog.V(6).Infof("MapData: %+v", mapData) + minSize, err := strconv.Atoi(mapData["min"]) + if err != nil { + klog.V(4).Info("Unable to retrieve minimum size. Ignoring") + continue + } + maxSize, err := strconv.Atoi(mapData["max"]) + if err != nil { + klog.V(4).Info("Unable to retrieve maximum size. Ignoring") + continue + } + if minSize == maxSize { + klog.V(4).Infof("Group %q has a fixed size %d. Ignoring", mapData["server_group"], minSize) + continue + } + klog.V(4).Infof("Group %q: Node defaults found in %q. Adding to node group list", configMap.Data["server_group"], configMap.Id) + newNodeGroup := makeNodeGroupFromAPIDetails( + defaultServerName(configMap.Name), + mapData, + minSize, + maxSize, + b.Cloud, + ) + group, err := b.GetServerGroup(newNodeGroup.Id()) + if err != nil { + return err + } + for _, server := range group.Servers { + nodeMap[server.Id] = group.Id + } + nodeGroups = append(nodeGroups, newNodeGroup) + } + b.nodeGroups = nodeGroups + b.nodeMap = nodeMap + klog.V(4).Infof("Refresh located %v node(s) over %v group(s)", len(nodeMap), len(nodeGroups)) + return nil +} + +// Pricing returns pricing model for this cloud provider or error if +// not available. +// Implementation optional. +func (b *brightboxCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) { + klog.V(4).Info("Pricing") + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes get all machine types that can be requested +// from the cloud provider. +// Implementation optional. +func (b *brightboxCloudProvider) GetAvailableMachineTypes() ([]string, error) { + klog.V(4).Info("GetAvailableMachineTypes") + return nil, cloudprovider.ErrNotImplemented +} + +// NewNodeGroup builds a theoretical node group based on the node +// definition provided. The node group is not automatically created on +// the cloud provider side. The node group is not returned by NodeGroups() +// until it is created. +// Implementation optional. +func (b *brightboxCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { + klog.V(4).Info("newNodeGroup") + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns struct containing limits (max, min) for +// resources (cores, memory etc.). +func (b *brightboxCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + klog.V(4).Info("GetResourceLimiter") + return b.resourceLimiter, nil +} + +// GPULabel returns the label added to nodes with GPU resource. +func (b *brightboxCloudProvider) GPULabel() string { + klog.V(4).Info("GPULabel") + return GPULabel +} + +// GetAvailableGPUTypes return all available GPU types cloud provider +// supports. +func (b *brightboxCloudProvider) GetAvailableGPUTypes() map[string]struct{} { + klog.V(4).Info("GetAvailableGPUTypes") + return availableGPUTypes +} + +// Cleanup cleans up open resources before the cloud provider is +// destroyed, i.e. go routines etc. +func (b *brightboxCloudProvider) Cleanup() error { + klog.V(4).Info("Cleanup") + return nil +} + +// BuildBrightbox builds the Brightbox provider +func BuildBrightbox( + opts config.AutoscalingOptions, + do cloudprovider.NodeGroupDiscoveryOptions, + rl *cloudprovider.ResourceLimiter, +) cloudprovider.CloudProvider { + klog.V(4).Info("BuildBrightbox") + klog.V(4).Infof("Config: %+v", opts) + klog.V(4).Infof("Discovery Options: %+v", do) + if opts.CloudConfig != "" { + klog.Warning("supplied config is not read by this version. Using environment") + } + if opts.ClusterName == "" { + klog.Fatal("Set the cluster name option to the Fully Qualified Internal Domain Name of the cluster") + } + newCloudProvider := &brightboxCloudProvider{ + ClusterName: opts.ClusterName, + resourceLimiter: rl, + Cloud: &k8ssdk.Cloud{}, + } + _, err := newCloudProvider.CloudClient() + if err != nil { + klog.Fatalf("Failed to create Brightbox Cloud Client: %v", err) + } + return newCloudProvider +} + +//private + +func (b *brightboxCloudProvider) findNodeGroup(groupID string) cloudprovider.NodeGroup { + klog.V(4).Info("findNodeGroup") + klog.V(4).Infof("Looking for %q", groupID) + for _, nodeGroup := range b.nodeGroups { + if nodeGroup.Id() == groupID { + return nodeGroup + } + } + return nil +} + +func defaultServerName(name string) string { + klog.V(4).Info("defaultServerName") + klog.V(4).Infof("group name is %q", name) + return "auto." + name +} + +func fetchDefaultGroup(groups []brightbox.ServerGroup, clusterName string) string { + klog.V(4).Info("findDefaultGroup") + klog.V(4).Infof("for cluster %q", clusterName) + for _, group := range groups { + if group.Name == clusterName { + return group.Id + } + } + klog.Warningf("Unable to detect main group for cluster %q", clusterName) + return "" +} + +type idWithStatus struct { + id string + status string +} + +func (b *brightboxCloudProvider) extractGroupDefaults(servers []brightbox.Server) (string, string, string, error) { + klog.V(4).Info("extractGroupDefaults") + const zoneSentinel string = "dummyValue" + zoneID := zoneSentinel + var serverType, image idWithStatus + for _, serverSummary := range servers { + server, err := b.GetServer( + context.Background(), + serverSummary.Id, + serverNotFoundError(serverSummary.Id), + ) + if err != nil { + return "", "", "", err + } + image = checkForChange(image, idWithStatus{server.Image.Id, server.Image.Status}, "Group has multiple Image Ids") + serverType = checkForChange(serverType, idWithStatus{server.ServerType.Id, server.ServerType.Status}, "Group has multiple ServerType Ids") + zoneID = checkZoneForChange(zoneID, server.Zone.Id, zoneSentinel) + } + switch { + case serverType.id == "": + return "", "", "", fmt.Errorf("Unable to determine Server Type details from Group") + case image.id == "": + return "", "", "", fmt.Errorf("Unable to determine Image details from Group") + case zoneID == zoneSentinel: + return "", "", "", fmt.Errorf("Unable to determine Zone details from Group") + case image.status == status.Deprecated: + klog.Warningf("Selected image %q is deprecated. Please update to an available version", image.id) + } + return serverType.id, image.id, zoneID, nil +} + +func checkZoneForChange(zoneID string, newZoneID string, sentinel string) string { + klog.V(4).Info("checkZoneForChange") + klog.V(4).Infof("new %q, existing %q", newZoneID, zoneID) + switch zoneID { + case newZoneID, sentinel: + return newZoneID + default: + klog.V(4).Info("Group is zone balanced") + return "" + } +} + +func checkForChange(current idWithStatus, newDetails idWithStatus, errorMessage string) idWithStatus { + klog.V(4).Info("checkForChange") + klog.V(4).Infof("new %v, existing %v", newDetails, current) + switch { + case newDetails == current: + // Skip to end + case newDetails.status == status.Available: + if current.id == "" || current.status == status.Deprecated { + klog.V(4).Infof("Object %q is available. Selecting", newDetails.id) + return newDetails + } + // Multiple ids + klog.Warning(errorMessage) + case newDetails.status == status.Deprecated: + if current.id == "" { + klog.V(4).Infof("Object %q is deprecated, but selecting anyway", newDetails.id) + return newDetails + } + // Multiple ids + klog.Warning(errorMessage) + default: + klog.Warningf("Object %q is no longer available. Ignoring.", newDetails.id) + } + return current +} diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider_test.go new file mode 100644 index 00000000000..bf284be8ab6 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider_test.go @@ -0,0 +1,735 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package brightbox + +import ( + "encoding/json" + "flag" + "os" + "os/exec" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + brightbox "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/gobrightbox" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk/mocks" + "k8s.io/autoscaler/cluster-autoscaler/config" + klog "k8s.io/klog/v2" +) + +const ( + fakeServer = "srv-testy" + fakeGroup = "grp-testy" + missingServer = "srv-notty" + fakeClusterName = "k8s-fake.cluster.local" +) + +var ( + fakeNodeMap = map[string]string{ + fakeServer: fakeGroup, + } + fakeNodeGroup = &brightboxNodeGroup{ + id: fakeGroup, + } + fakeNodeGroups = []cloudprovider.NodeGroup{ + fakeNodeGroup, + } +) + +func init() { + klog.InitFlags(nil) + flag.Set("alsologtostderr", "true") + flag.Set("v", "4") +} + +func TestMain(m *testing.M) { + flag.Parse() + os.Exit(m.Run()) +} + +func TestName(t *testing.T) { + assert.Equal(t, makeFakeCloudProvider(nil).Name(), cloudprovider.BrightboxProviderName) +} + +func TestGPULabel(t *testing.T) { + assert.Equal(t, makeFakeCloudProvider(nil).GPULabel(), GPULabel) +} + +func TestGetAvailableGPUTypes(t *testing.T) { + assert.Equal(t, makeFakeCloudProvider(nil).GetAvailableGPUTypes(), availableGPUTypes) +} + +func TestPricing(t *testing.T) { + obj, err := makeFakeCloudProvider(nil).Pricing() + assert.Equal(t, err, cloudprovider.ErrNotImplemented) + assert.Nil(t, obj) +} + +func TestGetAvailableMachineTypes(t *testing.T) { + obj, err := makeFakeCloudProvider(nil).GetAvailableMachineTypes() + assert.Equal(t, err, cloudprovider.ErrNotImplemented) + assert.Nil(t, obj) +} + +func TestNewNodeGroup(t *testing.T) { + obj, err := makeFakeCloudProvider(nil).NewNodeGroup("", nil, nil, nil, nil) + assert.Equal(t, err, cloudprovider.ErrNotImplemented) + assert.Nil(t, obj) +} + +func TestCleanUp(t *testing.T) { + assert.Nil(t, makeFakeCloudProvider(nil).Cleanup()) +} + +func TestResourceLimiter(t *testing.T) { + client := makeFakeCloudProvider(nil) + obj, err := client.GetResourceLimiter() + assert.Equal(t, obj, client.resourceLimiter) + assert.NoError(t, err) +} + +func TestNodeGroups(t *testing.T) { + client := makeFakeCloudProvider(nil) + assert.Zero(t, client.NodeGroups()) + client.nodeGroups = make([]cloudprovider.NodeGroup, 0) + assert.NotZero(t, client.NodeGroups()) + assert.Empty(t, client.NodeGroups()) + nodeGroup := &brightboxNodeGroup{} + client.nodeGroups = append(client.nodeGroups, nodeGroup) + newGroups := client.NodeGroups() + assert.Len(t, newGroups, 1) + assert.Same(t, newGroups[0], client.nodeGroups[0]) +} + +func TestNodeGroupForNode(t *testing.T) { + client := makeFakeCloudProvider(nil) + client.nodeGroups = fakeNodeGroups + client.nodeMap = fakeNodeMap + nodeGroup, err := client.NodeGroupForNode(makeNode(fakeServer)) + assert.Equal(t, fakeNodeGroup, nodeGroup) + assert.NoError(t, err) + nodeGroup, err = client.NodeGroupForNode(makeNode(missingServer)) + assert.Nil(t, nodeGroup) + assert.NoError(t, err) +} + +func TestBuildBrightBox(t *testing.T) { + ts := k8ssdk.GetAuthEnvTokenHandler(t) + defer k8ssdk.ResetAuthEnvironment() + defer ts.Close() + rl := cloudprovider.NewResourceLimiter(nil, nil) + do := cloudprovider.NodeGroupDiscoveryOptions{} + opts := config.AutoscalingOptions{ + CloudProviderName: cloudprovider.BrightboxProviderName, + ClusterName: fakeClusterName, + } + cloud := BuildBrightbox(opts, do, rl) + assert.Equal(t, cloud.Name(), cloudprovider.BrightboxProviderName) + obj, err := cloud.GetResourceLimiter() + assert.Equal(t, rl, obj) + assert.NoError(t, err) +} + +func testOsExit(t *testing.T, funcName string, testFunc func(*testing.T)) { + if os.Getenv(funcName) == "1" { + testFunc(t) + return + } + cmd := exec.Command(os.Args[0], "-test.run="+funcName) + cmd.Env = append(os.Environ(), funcName+"=1") + err := cmd.Run() + if e, ok := err.(*exec.ExitError); ok && !e.Success() { + return + } + t.Fatalf("%s subprocess ran successfully, want non-zero exit status", funcName) +} + +func TestBuildBrightboxMissingClusterName(t *testing.T) { + testOsExit(t, "TestBuildBrightboxMissingClusterName", func(t *testing.T) { + ts := k8ssdk.GetAuthEnvTokenHandler(t) + defer k8ssdk.ResetAuthEnvironment() + defer ts.Close() + rl := cloudprovider.NewResourceLimiter(nil, nil) + do := cloudprovider.NodeGroupDiscoveryOptions{} + opts := config.AutoscalingOptions{ + CloudProviderName: cloudprovider.BrightboxProviderName, + } + BuildBrightbox(opts, do, rl) + }) +} + +func TestRefresh(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + provider := makeFakeCloudProvider(testclient) + groups := fakeGroups() + mockclient.On("ServerGroup", "grp-sda44").Return(fakeServerGroupsda44(), nil) + mockclient.On("ConfigMaps").Return(fakeConfigMaps(), nil) + mockclient.On("ConfigMap", "cfg-502vh").Return(fakeConfigMap502vh(), nil) + err := provider.Refresh() + require.NoError(t, err) + assert.Len(t, provider.nodeGroups, 1) + assert.NotEmpty(t, provider.nodeMap) + node, err := provider.NodeGroupForNode(makeNode("srv-lv426")) + assert.NoError(t, err) + require.NotNil(t, node) + assert.Equal(t, node.Id(), groups[0].Id) + node, err = provider.NodeGroupForNode(makeNode("srv-rp897")) + assert.NoError(t, err) + require.NotNil(t, node) + assert.Equal(t, node.Id(), groups[0].Id) + mockclient.AssertExpectations(t) +} + +func TestFetchDefaultGroup(t *testing.T) { + groups := fakeGroups() + groupID := fetchDefaultGroup(groups, "fred") + assert.Empty(t, groupID) + groupID = fetchDefaultGroup(groups, groups[0].Name) + assert.Equal(t, groups[0].Id, groupID) +} + +func makeNode(serverID string) *v1.Node { + return &v1.Node{ + Spec: v1.NodeSpec{ + ProviderID: k8ssdk.MapServerIDToProviderID(serverID), + }, + } +} + +func makeFakeCloudProvider(brightboxCloudClient *k8ssdk.Cloud) *brightboxCloudProvider { + return &brightboxCloudProvider{ + resourceLimiter: &cloudprovider.ResourceLimiter{}, + ClusterName: fakeClusterName, + Cloud: brightboxCloudClient, + } +} + +func fakeConfigMaps() []brightbox.ConfigMap { + const groupjson = ` +[{ + "id": "cfg-502vh", + "resource_type": "config_map", + "url": "https://api.gb1.brightbox.com/1.0/config_maps/cfg-502vh", + "name": "storage.k8s-fake.cluster.local", + "data": { + "image": "img-svqx9", + "max": "4", + "min": "1", + "region": "gb1", + "server_group": "grp-sda44", + "default_group": "grp-vnr33", + "type": "2gb.ssd", + "user_data": "fake_userdata", + "zone": "" + } + }] + ` + var result []brightbox.ConfigMap + _ = json.NewDecoder(strings.NewReader(groupjson)).Decode(&result) + return result +} + +func fakeConfigMap502vh() *brightbox.ConfigMap { + const groupjson = ` +{ + "id": "cfg-502vh", + "resource_type": "config_map", + "url": "https://api.gb1.brightbox.com/1.0/config_maps/cfg-502vh", + "name": "storage.k8s-fake.cluster.local", + "data": { + "image": "img-svqx9", + "max": "4", + "min": "1", + "region": "gb1", + "server_group": "grp-sda44", + "default_group": "grp-vnr33", + "type": "2gb.ssd", + "user_data": "fake_userdata", + "zone": "" + } + } + ` + var result brightbox.ConfigMap + _ = json.NewDecoder(strings.NewReader(groupjson)).Decode(&result) + return &result +} + +func fakeServerGroupsda44() *brightbox.ServerGroup { + const groupjson = ` +{"id": "grp-sda44", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-sda44", + "name": "storage.k8s-fake.cluster.local", + "description": "1:4", + "created_at": "2011-10-01T00:00:00Z", + "default": true, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "firewall_policy": + {"id": "fwp-j3654", + "resource_type": "firewall_policy", + "url": "https://api.gb1.brightbox.com/1.0/firewall_policies/fwp-j3654", + "default": true, + "name": "default", + "created_at": "2011-10-01T00:00:00Z", + "description": null}, + "servers": + [ + {"id": "srv-lv426", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-lv426", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-lv426", + "fqdn": "srv-lv426.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null}, + {"id": "srv-rp897", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-rp897", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-rp897", + "fqdn": "srv-rp897.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null} + ]} + ` + var result brightbox.ServerGroup + _ = json.NewDecoder(strings.NewReader(groupjson)).Decode(&result) + return &result +} + +func fakeGroups() []brightbox.ServerGroup { + const groupjson = ` +[{"id": "grp-sda44", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-sda44", + "name": "storage.k8s-fake.cluster.local", + "description": "1:4", + "created_at": "2011-10-01T00:00:00Z", + "default": true, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "firewall_policy": + {"id": "fwp-j3654", + "resource_type": "firewall_policy", + "url": "https://api.gb1.brightbox.com/1.0/firewall_policies/fwp-j3654", + "default": true, + "name": "default", + "created_at": "2011-10-01T00:00:00Z", + "description": null}, + "servers": + [ + {"id": "srv-lv426", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-lv426", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-lv426", + "fqdn": "srv-lv426.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null}, + {"id": "srv-rp897", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-rp897", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-rp897", + "fqdn": "srv-rp897.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null} + ]}] + ` + var result []brightbox.ServerGroup + _ = json.NewDecoder(strings.NewReader(groupjson)).Decode(&result) + return result +} + +func fakeServerlv426() *brightbox.Server { + const serverjson = ` +{"id": "srv-lv426", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-lv426", + "name": "storage-0.storage.k8s-fake.cluster.local", + "status": "active", + "locked": false, + "hostname": "srv-lv426", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null, + "user_data": null, + "fqdn": "srv-lv426.gb1.brightbox.com", + "compatibility_mode": false, + "console_url": null, + "console_token": null, + "console_token_expires": null, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "image": + {"id": "img-3ikco", + "resource_type": "image", + "url": "https://api.gb1.brightbox.com/1.0/images/img-3ikco", + "name": "Ubuntu Lucid 10.04 server", + "username": "ubuntu", + "status": "available", + "locked": false, + "description": "Expands root partition automatically. login: ubuntu using stored ssh key", + "source": "ubuntu-lucid-daily-i64-server-20110509", + "arch": "x86_64", + "created_at": "2011-05-09T12:00:00Z", + "official": true, + "public": true, + "owner": "acc-43ks4"}, + "server_type": + {"id": "typ-zx45f", + "resource_type": "server_type", + "url": "https://api.gb1.brightbox.com/1.0/server_types/typ-zx45f", + "name": "Small", + "status": "available", + "cores": 2, + "ram": 2048, + "disk_size": 81920, + "handle": "small"}, + "zone": + {"id": "zon-328ds", + "resource_type": "zone", + "url": "https://api.gb1.brightbox.com/1.0/zones/zon-328ds", + "handle": "gb1"}, + "cloud_ips": + [{"id": "cip-k4a25", + "resource_type": "cloud_ip", + "url": "https://api.gb1.brightbox.com/1.0/cloud_ips/cip-k4a25", + "status": "mapped", + "public_ip": "109.107.50.0", + "public_ipv4": "109.107.50.0", + "public_ipv6": "2a02:1348:ffff:ffff::6d6b:3200", + "fqdn": "cip-k4a25.gb1.brightbox.com", + "reverse_dns": null, + "name": "product website ip"}], + "interfaces": + [{"id": "int-ds42k", + "resource_type": "interface", + "url": "https://api.gb1.brightbox.com/1.0/interfaces/int-ds42k", + "mac_address": "02:24:19:00:00:ee", + "ipv4_address": "81.15.16.17"}], + "snapshots": + [], + "server_groups": + [{"id": "grp-sda44", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-sda44", + "name": "", + "description": null, + "created_at": "2011-10-01T00:00:00Z", + "default": true}]} +` + var result brightbox.Server + _ = json.NewDecoder(strings.NewReader(serverjson)).Decode(&result) + return &result +} + +func fakeServerTypezx45f() *brightbox.ServerType { + const serverjson = ` +{"id": "typ-zx45f", + "resource_type": "server_type", + "url": "https://api.gb1.brightbox.com/1.0/server_types/typ-zx45f", + "name": "Small", + "status": "available", + "cores": 2, + "ram": 2048, + "disk_size": 81920, + "handle": "small"} +` + var result brightbox.ServerType + _ = json.NewDecoder(strings.NewReader(serverjson)).Decode(&result) + return &result +} + +func fakeServerrp897() *brightbox.Server { + const serverjson = ` +{"id": "srv-rp897", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-rp897", + "name": "storage-0.storage.k8s-fake.cluster.local", + "status": "active", + "locked": false, + "hostname": "srv-rp897", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null, + "user_data": null, + "fqdn": "srv-rp897.gb1.brightbox.com", + "compatibility_mode": false, + "console_url": null, + "console_token": null, + "console_token_expires": null, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "image": + {"id": "img-3ikco", + "resource_type": "image", + "url": "https://api.gb1.brightbox.com/1.0/images/img-3ikco", + "name": "Ubuntu Lucid 10.04 server", + "username": "ubuntu", + "status": "available", + "locked": false, + "description": "Expands root partition automatically. login: ubuntu using stored ssh key", + "source": "ubuntu-lucid-daily-i64-server-20110509", + "arch": "x86_64", + "created_at": "2011-05-09T12:00:00Z", + "official": true, + "public": true, + "owner": "acc-43ks4"}, + "server_type": + {"id": "typ-zx45f", + "resource_type": "server_type", + "url": "https://api.gb1.brightbox.com/1.0/server_types/typ-zx45f", + "name": "Small", + "status": "available", + "cores": 2, + "ram": 2048, + "disk_size": 81920, + "handle": "small"}, + "zone": + {"id": "zon-328ds", + "resource_type": "zone", + "url": "https://api.gb1.brightbox.com/1.0/zones/zon-328ds", + "handle": "gb1"}, + "cloud_ips": + [{"id": "cip-k4a25", + "resource_type": "cloud_ip", + "url": "https://api.gb1.brightbox.com/1.0/cloud_ips/cip-k4a25", + "status": "mapped", + "public_ip": "109.107.50.0", + "public_ipv4": "109.107.50.0", + "public_ipv6": "2a02:1348:ffff:ffff::6d6b:3200", + "fqdn": "cip-k4a25.gb1.brightbox.com", + "reverse_dns": null, + "name": "product website ip"}], + "interfaces": + [{"id": "int-ds42k", + "resource_type": "interface", + "url": "https://api.gb1.brightbox.com/1.0/interfaces/int-ds42k", + "mac_address": "02:24:19:00:00:ee", + "ipv4_address": "81.15.16.17"}], + "snapshots": + [], + "server_groups": + [{"id": "grp-sda44", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-sda44", + "name": "", + "description": null, + "created_at": "2011-10-01T00:00:00Z", + "default": true}]} +` + var result brightbox.Server + _ = json.NewDecoder(strings.NewReader(serverjson)).Decode(&result) + return &result +} + +func fakeServertesty() *brightbox.Server { + const serverjson = ` +{"id": "srv-testy", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-testy", + "name": "storage-0.storage.k8s-fake.cluster.local", + "status": "active", + "locked": false, + "hostname": "srv-testy", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null, + "user_data": null, + "fqdn": "srv-testy.gb1.brightbox.com", + "compatibility_mode": false, + "console_url": null, + "console_token": null, + "console_token_expires": null, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "image": + {"id": "img-3ikco", + "resource_type": "image", + "url": "https://api.gb1.brightbox.com/1.0/images/img-3ikco", + "name": "Ubuntu Lucid 10.04 server", + "username": "ubuntu", + "status": "available", + "locked": false, + "description": "Expands root partition automatically. login: ubuntu using stored ssh key", + "source": "ubuntu-lucid-daily-i64-server-20110509", + "arch": "x86_64", + "created_at": "2011-05-09T12:00:00Z", + "official": true, + "public": true, + "owner": "acc-43ks4"}, + "server_type": + {"id": "typ-zx45f", + "resource_type": "server_type", + "url": "https://api.gb1.brightbox.com/1.0/server_types/typ-zx45f", + "name": "Small", + "status": "available", + "cores": 2, + "ram": 2048, + "disk_size": 81920, + "handle": "small"}, + "zone": + {"id": "zon-328ds", + "resource_type": "zone", + "url": "https://api.gb1.brightbox.com/1.0/zones/zon-328ds", + "handle": "gb1"}, + "cloud_ips": + [{"id": "cip-k4a25", + "resource_type": "cloud_ip", + "url": "https://api.gb1.brightbox.com/1.0/cloud_ips/cip-k4a25", + "status": "mapped", + "public_ip": "109.107.50.0", + "public_ipv4": "109.107.50.0", + "public_ipv6": "2a02:1348:ffff:ffff::6d6b:3200", + "fqdn": "cip-k4a25.gb1.brightbox.com", + "reverse_dns": null, + "name": "product website ip"}], + "interfaces": + [{"id": "int-ds42k", + "resource_type": "interface", + "url": "https://api.gb1.brightbox.com/1.0/interfaces/int-ds42k", + "mac_address": "02:24:19:00:00:ee", + "ipv4_address": "81.15.16.17"}], + "snapshots": + [], + "server_groups": + [{"id": "grp-testy", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-testy", + "name": "", + "description": null, + "created_at": "2011-10-01T00:00:00Z", + "default": true}]} +` + var result brightbox.Server + _ = json.NewDecoder(strings.NewReader(serverjson)).Decode(&result) + return &result +} + +func fakeServerGroupsPlusOne() []brightbox.ServerGroup { + const groupjson = ` +[{"id": "grp-sda44", + "resource_type": "server_group", + "url": "https://api.gb1.brightbox.com/1.0/server_groups/grp-sda44", + "name": "storage.k8s-fake.cluster.local", + "description": "1:4", + "created_at": "2011-10-01T00:00:00Z", + "default": true, + "account": + {"id": "acc-43ks4", + "resource_type": "account", + "url": "https://api.gb1.brightbox.com/1.0/accounts/acc-43ks4", + "name": "Brightbox", + "status": "active"}, + "firewall_policy": + {"id": "fwp-j3654", + "resource_type": "firewall_policy", + "url": "https://api.gb1.brightbox.com/1.0/firewall_policies/fwp-j3654", + "default": true, + "name": "default", + "created_at": "2011-10-01T00:00:00Z", + "description": null}, + "servers": + [ + {"id": "srv-lv426", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-lv426", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-lv426", + "fqdn": "srv-lv426.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null}, + {"id": "srv-testy", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-testy", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-testy", + "fqdn": "srv-testy.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null}, + {"id": "srv-rp897", + "resource_type": "server", + "url": "https://api.gb1.brightbox.com/1.0/servers/srv-rp897", + "name": "", + "status": "active", + "locked": false, + "hostname": "srv-rp897", + "fqdn": "srv-rp897.gb1.brightbox.com", + "created_at": "2011-10-01T01:00:00Z", + "started_at": "2011-10-01T01:01:00Z", + "deleted_at": null} + ]}] + ` + var result []brightbox.ServerGroup + _ = json.NewDecoder(strings.NewReader(groupjson)).Decode(&result) + return result +} + +func deletedFakeServer(server *brightbox.Server) *brightbox.Server { + now := time.Now() + result := *server + result.DeletedAt = &now + result.Status = "deleted" + result.ServerGroups = []brightbox.ServerGroup{} + return &result +} diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go new file mode 100644 index 00000000000..2b6974c2936 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group.go @@ -0,0 +1,438 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package brightbox + +import ( + "context" + "fmt" + "strings" + "time" + + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + brightbox "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/gobrightbox" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/gobrightbox/status" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk" + "k8s.io/autoscaler/cluster-autoscaler/config" + klog "k8s.io/klog/v2" + v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + // Allocatable Resources reserves + // Reserve 4% of memory + memoryReservePercent = 4 + // with a minimum of 160MB + minimumMemoryReserve = 167772160 + // Reserve 5GB of disk space + minimumDiskReserve = 5368709120 +) + +var ( + checkInterval = time.Second * 1 + checkTimeout = time.Second * 30 +) + +type brightboxNodeGroup struct { + id string + minSize int + maxSize int + serverOptions *brightbox.ServerOptions + *k8ssdk.Cloud +} + +// MaxSize returns maximum size of the node group. +func (ng *brightboxNodeGroup) MaxSize() int { + klog.V(4).Info("MaxSize") + return ng.maxSize +} + +// MinSize returns minimum size of the node group. +func (ng *brightboxNodeGroup) MinSize() int { + klog.V(4).Info("MinSize") + return ng.minSize +} + +// TargetSize returns the current target size of the node group. It +// is possible that the number of nodes in Kubernetes is different at +// the moment but should be equal to Size() once everything stabilizes +// (new nodes finish startup and registration or removed nodes are deleted +// completely). Implementation required. +func (ng *brightboxNodeGroup) TargetSize() (int, error) { + klog.V(4).Info("TargetSize") + group, err := ng.GetServerGroup(ng.Id()) + if err != nil { + return 0, err + } + return len(group.Servers), nil +} + +// CurrentSize returns the current actual size of the node group. +func (ng *brightboxNodeGroup) CurrentSize() (int, error) { + klog.V(4).Info("CurrentSize") + // The implementation is currently synchronous, so + // CurrentSize and TargetSize will be identical at all times + return ng.TargetSize() +} + +// IncreaseSize increases the size of the node group. To delete a node +// you need to explicitly name it and use DeleteNode. This function should +// wait until node group size is updated. Implementation required. +func (ng *brightboxNodeGroup) IncreaseSize(delta int) error { + klog.V(4).Infof("IncreaseSize: %v", delta) + if delta <= 0 { + return fmt.Errorf("size increase must be positive") + } + size, err := ng.TargetSize() + if err != nil { + return err + } + desiredSize := size + delta + if desiredSize > ng.MaxSize() { + return fmt.Errorf("size increase too large - desired:%d max:%d", desiredSize, ng.MaxSize()) + } + err = ng.createServers(delta) + if err != nil { + return err + } + return wait.Poll( + checkInterval, + checkTimeout, + func() (bool, error) { + size, err := ng.TargetSize() + return err == nil && size >= desiredSize, err + }, + ) +} + +// DeleteNodes deletes nodes from this node group. Error is returned +// either on failure or if the given node doesn't belong to this +// node group. This function should wait until node group size is +// updated. Implementation required. +func (ng *brightboxNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + klog.V(4).Info("DeleteNodes") + klog.V(4).Infof("Nodes: %+v", nodes) + for _, node := range nodes { + size, err := ng.CurrentSize() + if err != nil { + return err + } + if size <= ng.MinSize() { + return fmt.Errorf("min size reached, no further nodes will be deleted") + } + serverID := k8ssdk.MapProviderIDToServerID(node.Spec.ProviderID) + err = ng.deleteServerFromGroup(serverID) + if err != nil { + return err + } + } + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This +// function doesn't permit to delete any existing node and can be used +// only to reduce the request for new nodes that have not been yet +// fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes +// when there is an option to just decrease the target. Implementation +// required. +func (ng *brightboxNodeGroup) DecreaseTargetSize(delta int) error { + klog.V(4).Infof("DecreaseTargetSize: %v", delta) + if delta >= 0 { + return fmt.Errorf("decrease size must be negative") + } + size, err := ng.TargetSize() + if err != nil { + return err + } + nodesize, err := ng.CurrentSize() + if err != nil { + return err + } + // Group size is synchronous at present, so this always fails + if size+delta < nodesize { + return fmt.Errorf("attempt to delete existing nodes targetSize:%d delta:%d existingNodes: %d", + size, delta, nodesize) + } + return fmt.Errorf("shouldn't have got here") +} + +// Id returns an unique identifier of the node group. +func (ng *brightboxNodeGroup) Id() string { + klog.V(4).Info("Id") + return ng.id +} + +// Debug returns a string containing all information regarding this +// node group. +func (ng *brightboxNodeGroup) Debug() string { + klog.V(4).Info("Debug") + return fmt.Sprintf("brightboxNodeGroup %+v", *ng) +} + +// Nodes returns a list of all nodes that belong to this node group. +// It is required that Instance objects returned by this method have Id +// field set. Other fields are optional. +func (ng *brightboxNodeGroup) Nodes() ([]cloudprovider.Instance, error) { + klog.V(4).Info("Nodes") + group, err := ng.GetServerGroup(ng.Id()) + if err != nil { + return nil, err + } + klog.V(4).Infof("Found %d servers in group", len(group.Servers)) + nodes := make([]cloudprovider.Instance, len(group.Servers)) + for i, server := range group.Servers { + cpStatus := cloudprovider.InstanceStatus{} + switch server.Status { + case status.Active: + cpStatus.State = cloudprovider.InstanceRunning + case status.Creating: + cpStatus.State = cloudprovider.InstanceCreating + case status.Deleting: + cpStatus.State = cloudprovider.InstanceDeleting + default: + errorInfo := cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: server.Status, + ErrorMessage: server.Status, + } + cpStatus.ErrorInfo = &errorInfo + } + nodes[i] = cloudprovider.Instance{ + Id: k8ssdk.MapServerIDToProviderID(server.Id), + Status: &cpStatus, + } + } + klog.V(4).Infof("Created %d nodes", len(nodes)) + return nodes, nil +} + +// Exist checks if the node group really exists on the cloud provider +// side. Allows to tell the theoretical node group from the real +// one. Implementation required. +func (ng *brightboxNodeGroup) Exist() bool { + klog.V(4).Info("Exist") + _, err := ng.GetServerGroup(ng.Id()) + return err == nil +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The returned +// NodeInfo is expected to have a fully populated Node object, with all of the labels, +// capacity and allocatable information as well as all pods that are started on +// the node by default, using manifest (most likely only kube-proxy). Implementation optional. +func (ng *brightboxNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + klog.V(4).Info("TemplateNodeInfo") + klog.V(4).Infof("Looking for server type %q", ng.serverOptions.ServerType) + serverType, err := ng.findServerType() + if err != nil { + return nil, err + } + klog.V(4).Infof("ServerType %+v", serverType) + // AllowedPodNumber is the kubelet default. The way to obtain that default programmatically + // has been lost in a twisty maze of endless indirection. + resources := &schedulerframework.Resource{ + MilliCPU: int64(serverType.Cores * 1000), + Memory: int64(serverType.Ram * 1024 * 1024), + EphemeralStorage: int64(serverType.DiskSize * 1024 * 1024), + AllowedPodNumber: 110, + } + node := apiv1.Node{ + Status: apiv1.NodeStatus{ + Capacity: resourceList(resources), + Allocatable: resourceList(applyFudgeFactor(resources)), + Conditions: cloudprovider.BuildReadyConditions(), + }, + } + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(ng.Id())) + nodeInfo.SetNode(&node) + return nodeInfo, nil +} + +// ResourceList returns a resource list of this resource. +func resourceList(r *schedulerframework.Resource) v1.ResourceList { + result := v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI), + v1.ResourcePods: *resource.NewQuantity(int64(r.AllowedPodNumber), resource.BinarySI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(r.EphemeralStorage, resource.BinarySI), + } + for rName, rQuant := range r.ScalarResources { + if v1helper.IsHugePageResourceName(rName) { + result[rName] = *resource.NewQuantity(rQuant, resource.BinarySI) + } else { + result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI) + } + } + return result +} + +// Create creates the node group on the cloud provider +// side. Implementation optional. +func (ng *brightboxNodeGroup) Create() (cloudprovider.NodeGroup, error) { + klog.V(4).Info("Create") + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. +// This will be executed only for autoprovisioned node groups, once +// their size drops to 0. Implementation optional. +func (ng *brightboxNodeGroup) Delete() error { + klog.V(4).Info("Delete") + return cloudprovider.ErrNotImplemented +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// NodeGroup. Returning a nil will result in using default options. +func (ng *brightboxNodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An +// autoprovisioned group was created by CA and can be deleted when scaled +// to 0. +func (ng *brightboxNodeGroup) Autoprovisioned() bool { + klog.V(4).Info("Autoprovisioned") + return false +} + +//private + +func (ng *brightboxNodeGroup) findServerType() (*brightbox.ServerType, error) { + handle := ng.serverOptions.ServerType + if strings.HasPrefix(handle, "typ-") { + return ng.GetServerType(handle) + } + servertypes, err := ng.GetServerTypes() + if err != nil { + return nil, err + } + for _, servertype := range servertypes { + if servertype.Handle == handle { + return &servertype, nil + } + } + return nil, fmt.Errorf("ServerType with handle '%s' doesn't exist", handle) +} + +func max(x, y int64) int64 { + if x > y { + return x + } + return y +} + +func applyFudgeFactor(capacity *schedulerframework.Resource) *schedulerframework.Resource { + allocatable := capacity.Clone() + allocatable.Memory = max(0, capacity.Memory-max(capacity.Memory*memoryReservePercent/100, minimumMemoryReserve)) + allocatable.EphemeralStorage = max(0, capacity.EphemeralStorage-minimumDiskReserve) + return allocatable +} + +func makeNodeGroupFromAPIDetails( + name string, + mapData map[string]string, + minSize int, + maxSize int, + cloudclient *k8ssdk.Cloud, +) *brightboxNodeGroup { + klog.V(4).Info("makeNodeGroupFromApiDetails") + userData := mapData["user_data"] + options := &brightbox.ServerOptions{ + Image: mapData["image"], + Name: &name, + ServerType: mapData["type"], + Zone: mapData["zone"], + UserData: &userData, + ServerGroups: []string{mapData["default_group"], mapData["server_group"]}, + } + result := brightboxNodeGroup{ + id: mapData["server_group"], + minSize: minSize, + maxSize: maxSize, + serverOptions: options, + Cloud: cloudclient, + } + klog.V(4).Info(result.Debug()) + return &result +} + +func (ng *brightboxNodeGroup) createServers(amount int) error { + klog.V(4).Infof("createServers: %d", amount) + for i := 1; i <= amount; i++ { + _, err := ng.CreateServer(ng.serverOptions) + if err != nil { + return err + } + } + return nil +} + +// Delete the server and wait for the group details to be updated +func (ng *brightboxNodeGroup) deleteServerFromGroup(serverID string) error { + klog.V(4).Infof("deleteServerFromGroup: %q", serverID) + serverIDNotInGroup := func() (bool, error) { + return ng.isMissing(serverID) + } + missing, err := serverIDNotInGroup() + if err != nil { + return err + } else if missing { + return fmt.Errorf("%s belongs to a different group than %s", serverID, ng.Id()) + } + err = ng.DestroyServer(serverID) + if err != nil { + return err + } + return wait.Poll( + checkInterval, + checkTimeout, + serverIDNotInGroup, + ) +} + +func serverNotFoundError(id string) error { + klog.V(4).Infof("serverNotFoundError: created for %q", id) + return fmt.Errorf("Server %s not found", id) +} + +func (ng *brightboxNodeGroup) isMissing(serverID string) (bool, error) { + klog.V(4).Infof("isMissing: %q from %q", serverID, ng.Id()) + server, err := ng.GetServer( + context.Background(), + serverID, + serverNotFoundError(serverID), + ) + if err != nil { + return false, err + } + if server.DeletedAt != nil { + klog.V(4).Info("server deleted") + return true, nil + } + for _, group := range server.ServerGroups { + if group.Id == ng.Id() { + return false, nil + } + } + return true, nil +} diff --git a/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go new file mode 100644 index 00000000000..7c42db9499e --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_node_group_test.go @@ -0,0 +1,347 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package brightbox + +import ( + "errors" + "strconv" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox/k8ssdk/mocks" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" + //schedulerframework "k8s.io/kubernetes/pkg/scheduler/nodeinfo" +) + +const ( + fakeMaxSize = 4 + fakeMinSize = 1 + fakeNodeGroupDescription = "1:4" + fakeDefaultSize = 3 + fakeNodeGroupID = "grp-sda44" + fakeNodeGroupName = "auto.workers.k8s_fake.cluster.local" + fakeNodeGroupImageID = "img-testy" + fakeNodeGroupServerTypeID = "typ-zx45f" + fakeNodeGroupServerTypeHandle = "small" + fakeNodeGroupZoneID = "zon-testy" + fakeNodeGroupMainGroupID = "grp-y6cai" + fakeNodeGroupUserData = "fake userdata" +) + +var ( + fakeMapData = map[string]string{ + "min": strconv.Itoa(fakeMinSize), + "max": strconv.Itoa(fakeMaxSize), + "server_group": fakeNodeGroupID, + "default_group": fakeNodeGroupMainGroupID, + "image": fakeNodeGroupImageID, + "type": fakeNodeGroupServerTypeID, + "zone": fakeNodeGroupZoneID, + "user_data": fakeNodeGroupUserData, + } + ErrFake = errors.New("fake API Error") + fakeInstances = []cloudprovider.Instance{ + { + Id: "brightbox://srv-rp897", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceRunning, + }, + }, + { + Id: "brightbox://srv-lv426", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceRunning, + }, + }, + } + fakeTransitionInstances = []cloudprovider.Instance{ + { + Id: "brightbox://srv-rp897", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceDeleting, + }, + }, + { + Id: "brightbox://srv-lv426", + Status: &cloudprovider.InstanceStatus{ + State: cloudprovider.InstanceCreating, + }, + }, + } + ErrFakeInstances = []cloudprovider.Instance{ + { + Id: "brightbox://srv-rp897", + Status: &cloudprovider.InstanceStatus{ + ErrorInfo: &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "unavailable", + ErrorMessage: "unavailable", + }, + }, + }, + { + Id: "brightbox://srv-lv426", + Status: &cloudprovider.InstanceStatus{ + ErrorInfo: &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "inactive", + ErrorMessage: "inactive", + }, + }, + }, + } +) + +func TestMaxSize(t *testing.T) { + assert.Equal(t, makeFakeNodeGroup(nil).MaxSize(), fakeMaxSize) +} + +func TestMinSize(t *testing.T) { + assert.Equal(t, makeFakeNodeGroup(nil).MinSize(), fakeMinSize) +} + +func TestSize(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + nodeGroup := makeFakeNodeGroup(testclient) + fakeServerGroup := &fakeGroups()[0] + t.Run("TargetSize", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil).Once() + size, err := nodeGroup.TargetSize() + assert.Equal(t, 2, size) + assert.NoError(t, err) + }) + t.Run("TargetSizeFail", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(nil, ErrFake).Once() + size, err := nodeGroup.TargetSize() + assert.Error(t, err) + assert.Zero(t, size) + }) + t.Run("CurrentSize", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil).Once() + size, err := nodeGroup.CurrentSize() + assert.Equal(t, 2, size) + assert.NoError(t, err) + }) + t.Run("CurrentSizeFail", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(nil, ErrFake).Once() + size, err := nodeGroup.CurrentSize() + assert.Error(t, err) + assert.Zero(t, size) + }) + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil) + t.Run("DecreaseTargetSizePositive", func(t *testing.T) { + err := nodeGroup.DecreaseTargetSize(0) + assert.Error(t, err) + }) + t.Run("DecreaseTargetSizeFail", func(t *testing.T) { + err := nodeGroup.DecreaseTargetSize(-1) + assert.Error(t, err) + }) + mockclient.AssertExpectations(t) +} + +func TestIncreaseSize(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + nodeGroup := makeFakeNodeGroup(testclient) + t.Run("Creating details set properly", func(t *testing.T) { + assert.Equal(t, fakeNodeGroupID, nodeGroup.id) + assert.Equal(t, fakeNodeGroupName, *nodeGroup.serverOptions.Name) + assert.Equal(t, fakeNodeGroupServerTypeID, nodeGroup.serverOptions.ServerType) + assert.Equal(t, fakeNodeGroupImageID, nodeGroup.serverOptions.Image) + assert.Equal(t, fakeNodeGroupZoneID, nodeGroup.serverOptions.Zone) + assert.ElementsMatch(t, []string{fakeNodeGroupMainGroupID, fakeNodeGroupID}, nodeGroup.serverOptions.ServerGroups) + assert.Equal(t, fakeNodeGroupUserData, *nodeGroup.serverOptions.UserData) + }) + t.Run("Require positive delta", func(t *testing.T) { + err := nodeGroup.IncreaseSize(0) + assert.Error(t, err) + }) + fakeServerGroup := &fakeGroups()[0] + t.Run("Don't exceed max size", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil).Once() + err := nodeGroup.IncreaseSize(4) + assert.Error(t, err) + }) + t.Run("Fail to create one new server", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil).Once() + mockclient.On("CreateServer", mock.Anything). + Return(nil, ErrFake).Once() + err := nodeGroup.IncreaseSize(1) + assert.Error(t, err) + }) + t.Run("Create one new server", func(t *testing.T) { + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil).Once() + mockclient.On("CreateServer", mock.Anything). + Return(nil, nil).Once() + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(&fakeServerGroupsPlusOne()[0], nil).Once() + err := nodeGroup.IncreaseSize(1) + assert.NoError(t, err) + }) +} + +func TestDeleteNodes(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + nodeGroup := makeFakeNodeGroup(testclient) + fakeServerGroup := &fakeGroups()[0] + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil). + On("Server", fakeServer). + Return(fakeServertesty(), nil) + t.Run("Empty Nodes", func(t *testing.T) { + err := nodeGroup.DeleteNodes(nil) + assert.NoError(t, err) + }) + t.Run("Foreign Node", func(t *testing.T) { + err := nodeGroup.DeleteNodes([]*v1.Node{makeNode(fakeServer)}) + assert.Error(t, err) + }) + t.Run("Delete Node", func(t *testing.T) { + mockclient.On("Server", "srv-rp897"). + Return(fakeServerrp897(), nil).Once(). + On("Server", "srv-rp897"). + Return(deletedFakeServer(fakeServerrp897()), nil). + Once(). + On("DestroyServer", "srv-rp897"). + Return(nil).Once() + err := nodeGroup.DeleteNodes([]*v1.Node{makeNode("srv-rp897")}) + assert.NoError(t, err) + }) + t.Run("Delete All Nodes", func(t *testing.T) { + truncateServers := mocks.ServerListReducer(fakeServerGroup) + mockclient.On("Server", "srv-rp897"). + Return(fakeServerrp897(), nil).Once(). + On("Server", "srv-rp897"). + Return(deletedFakeServer(fakeServerrp897()), nil). + Once(). + On("DestroyServer", "srv-rp897"). + Return(nil).Once().Run(truncateServers) + err := nodeGroup.DeleteNodes([]*v1.Node{ + makeNode("srv-rp897"), + makeNode("srv-lv426"), + }) + assert.Error(t, err) + }) + +} + +func TestExist(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + nodeGroup := makeFakeNodeGroup(testclient) + fakeServerGroup := &fakeGroups()[0] + t.Run("Find Group", func(t *testing.T) { + mockclient.On("ServerGroup", nodeGroup.Id()). + Return(fakeServerGroup, nil).Once() + assert.True(t, nodeGroup.Exist()) + }) + t.Run("Fail to Find Group", func(t *testing.T) { + mockclient.On("ServerGroup", nodeGroup.Id()). + Return(nil, serverNotFoundError(nodeGroup.Id())) + assert.False(t, nodeGroup.Exist()) + }) + mockclient.AssertExpectations(t) +} + +func TestNodes(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + nodeGroup := makeFakeNodeGroup(testclient) + fakeServerGroup := &fakeGroups()[0] + mockclient.On("ServerGroup", fakeNodeGroupID). + Return(fakeServerGroup, nil) + t.Run("Both Active", func(t *testing.T) { + fakeServerGroup.Servers[0].Status = "active" + fakeServerGroup.Servers[1].Status = "active" + nodes, err := nodeGroup.Nodes() + require.NoError(t, err) + assert.ElementsMatch(t, fakeInstances, nodes) + }) + t.Run("Creating and Deleting", func(t *testing.T) { + fakeServerGroup.Servers[0].Status = "creating" + fakeServerGroup.Servers[1].Status = "deleting" + nodes, err := nodeGroup.Nodes() + require.NoError(t, err) + assert.ElementsMatch(t, fakeTransitionInstances, nodes) + }) + t.Run("Inactive and Unavailable", func(t *testing.T) { + fakeServerGroup.Servers[0].Status = "inactive" + fakeServerGroup.Servers[1].Status = "unavailable" + nodes, err := nodeGroup.Nodes() + require.NoError(t, err) + assert.ElementsMatch(t, ErrFakeInstances, nodes) + }) +} + +func TestTemplateNodeInfo(t *testing.T) { + mockclient := new(mocks.CloudAccess) + testclient := k8ssdk.MakeTestClient(mockclient, nil) + mockclient.On("ServerType", fakeNodeGroupServerTypeID). + Return(fakeServerTypezx45f(), nil) + obj, err := makeFakeNodeGroup(testclient).TemplateNodeInfo() + require.NoError(t, err) + assert.Equal(t, fakeResource(), obj.Allocatable) +} + +func TestCreate(t *testing.T) { + obj, err := makeFakeNodeGroup(nil).Create() + assert.Equal(t, cloudprovider.ErrNotImplemented, err) + assert.Nil(t, obj) +} + +func TestDelete(t *testing.T) { + assert.Equal(t, cloudprovider.ErrNotImplemented, makeFakeNodeGroup(nil).Delete()) +} + +func TestAutoprovisioned(t *testing.T) { + assert.False(t, makeFakeNodeGroup(nil).Autoprovisioned()) +} + +func fakeResource() *schedulerframework.Resource { + return &schedulerframework.Resource{ + MilliCPU: 2000, + Memory: 1979711488, + EphemeralStorage: 80530636800, + AllowedPodNumber: 110, + } +} + +func makeFakeNodeGroup(brightboxCloudClient *k8ssdk.Cloud) *brightboxNodeGroup { + return makeNodeGroupFromAPIDetails( + fakeNodeGroupName, + fakeMapData, + fakeMinSize, + fakeMaxSize, + brightboxCloudClient, + ) +} diff --git a/cluster-autoscaler/cloudprovider/brightbox/examples/check-env.yaml b/cluster-autoscaler/cloudprovider/brightbox/examples/check-env.yaml new file mode 100644 index 00000000000..844f10d365e --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/examples/check-env.yaml @@ -0,0 +1,16 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: check-env + namespace: kube-system +spec: + template: + spec: + restartPolicy: Never + containers: + - name: check-env + image: bash + envFrom: + - secretRef: + name: brightbox-credentials + command: ["env"] diff --git a/cluster-autoscaler/cloudprovider/brightbox/examples/cluster-autoscaler-secret.yaml b/cluster-autoscaler/cloudprovider/brightbox/examples/cluster-autoscaler-secret.yaml new file mode 100644 index 00000000000..b20af4efcdc --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/examples/cluster-autoscaler-secret.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: brightbox-credentials + namespace: kube-system +type: Opaque +data: + BRIGHTBOX_API_URL: + BRIGHTBOX_CLIENT: + BRIGHTBOX_CLIENT_SECRET: + BRIGHTBOX_KUBE_JOIN_COMMAND: + BRIGHTBOX_KUBE_VERSION: diff --git a/cluster-autoscaler/cloudprovider/brightbox/examples/config.rb b/cluster-autoscaler/cloudprovider/brightbox/examples/config.rb new file mode 100644 index 00000000000..775a29ac9ee --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/examples/config.rb @@ -0,0 +1,39 @@ +def config + { + cluster_name: 'kubernetes.cluster.local', + image: 'brightbox/cluster-autoscaler-brightbox', + secret: 'brightbox-credentials' + } +end + +def output(config) + { 'autoDiscovery' => { 'clusterName' => config[:cluster_name] }, + 'cloudProvider' => 'brightbox', + 'image' => + { 'repository' => config[:image], + 'tag' => ENV['TAG'], + 'pullPolicy' => 'Always' }, + 'tolerations' => + [ + { 'effect' => 'NoSchedule', 'key' => 'node-role.kubernetes.io/master' }, + { 'operator' => 'Exists', 'key' => 'CriticalAddonsOnly' } + ], + 'extraArgs' => + { 'v' => (ENV['TAG'] == 'dev' ? 4 : 2).to_s, + 'stderrthreshold' => 'info', + 'logtostderr' => true, + 'cluster-name' => config[:cluster_name], + 'skip-nodes-with-local-storage' => true }, + 'podAnnotations' => + { 'prometheus.io/scrape' => 'true', 'prometheus.io/port' => '8085' }, + 'rbac' => { 'create' => true }, + 'resources' => + { 'limits' => { 'cpu' => '100m', 'memory' => '300Mi' }, + 'requests' => { 'cpu' => '100m', 'memory' => '300Mi' } }, + 'envFromSecret' => config[:secret], + 'priorityClassName' => 'system-cluster-critical', + 'dnsPolicy' => 'Default' } +end + +require 'yaml' +STDOUT << output(config).to_yaml diff --git a/cluster-autoscaler/cloudprovider/brightbox/examples/rebase.sh b/cluster-autoscaler/cloudprovider/brightbox/examples/rebase.sh new file mode 100644 index 00000000000..0d751faddaa --- /dev/null +++ b/cluster-autoscaler/cloudprovider/brightbox/examples/rebase.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +set -e + +git rebase --onto cluster-autoscaler-1.17.2 cluster-autoscaler-1.17.1 autoscaler-brightbox-cloudprovider-1.17 +git rebase --onto cluster-autoscaler-1.18.1 cluster-autoscaler-1.18.0 autoscaler-brightbox-cloudprovider-1.18 + diff --git a/cluster-autoscaler/cloudprovider/builder/builder_brightbox.go b/cluster-autoscaler/cloudprovider/builder/builder_brightbox.go new file mode 100644 index 00000000000..c227fba0e5f --- /dev/null +++ b/cluster-autoscaler/cloudprovider/builder/builder_brightbox.go @@ -0,0 +1,42 @@ +// +build brightbox + +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox" + "k8s.io/autoscaler/cluster-autoscaler/config" +) + +// AvailableCloudProviders supported by the brightbox cloud provider builder. +var AvailableCloudProviders = []string{ + cloudprovider.BrightboxProviderName, +} + +// DefaultCloudProvider is Brightbox +const DefaultCloudProvider = cloudprovider.BrightboxProviderName + +func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + switch opts.CloudProviderName { + case cloudprovider.BrightboxProviderName: + return brightbox.BuildBrightbox(opts, do, rl) + } + + return nil +}