Merge pull request #219 from kinvolk/invidian/aks-support

Add AKS platform support
kinvolk · Apr 27, 2020 · dc34577 · dc34577
2 parents 59dadd5 + ff1a0d0
commit dc34577
Show file tree

Hide file tree

Showing 34 changed files with 1,669 additions and 100 deletions.
diff --git a/Makefile b/Makefile
@@ -6,7 +6,7 @@ VERSION :=
 MOD ?= vendor
 DOCS_DIR ?= docs/cli
 
-ALL_BUILD_TAGS := "aws,packet,e2e,disruptivee2e,poste2e"
+ALL_BUILD_TAGS := "aws,packet,aks,e2e,disruptivee2e,poste2e"
 
 ## Adds a '-dirty' suffix to version string if there are uncommitted changes
 changes := $(shell git status --porcelain)
@@ -88,12 +88,12 @@ endif
 
 .PHONY: run-e2e-tests
 run-e2e-tests:
-	KUBECONFIG=${kubeconfig} go test -mod=$(MOD) -tags="$(platform),e2e" -covermode=atomic -buildmode=exe -v ./test/...
+	KUBECONFIG=${kubeconfig} go test -mod=$(MOD) -tags="$(platform),e2e" -covermode=atomic -buildmode=exe -v -count=1 ./test/...
 	# Test if the metrics are actually being scraped
-	KUBECONFIG=${kubeconfig} PLATFORM=${platform} go test -mod=$(MOD) -tags="$(platform),poste2e" -covermode=atomic -buildmode=exe -v ./test/...
+	KUBECONFIG=${kubeconfig} PLATFORM=${platform} go test -mod=$(MOD) -tags="$(platform),poste2e" -covermode=atomic -buildmode=exe -v -count=1 ./test/...
 	# This is a test that should be run in the end to reduce the disruption to other tests because
 	# it will delete a node.
-	KUBECONFIG=${kubeconfig} go test -mod=$(MOD) -tags="$(platform),disruptivee2e" -covermode=atomic -buildmode=exe -v ./test/...
+	KUBECONFIG=${kubeconfig} go test -mod=$(MOD) -tags="$(platform),disruptivee2e" -covermode=atomic -buildmode=exe -v -count=1 ./test/...
 
 .PHONY: all
 all: build test

diff --git a/ci/aks/aks-cluster.lokocfg.envsubst b/ci/aks/aks-cluster.lokocfg.envsubst
@@ -0,0 +1,118 @@
+variable "cert_manager_email" {
+  default = "$EMAIL"
+}
+variable "cluster_name" {
+  default = "$CLUSTER_ID"
+}
+
+variable "aws_zone_id" {
+  default = "$AWS_DNS_ZONE_ID"
+}
+
+variable "aws_access_key_id" {
+  default = "$AWS_ACCESS_KEY_ID"
+}
+
+variable "aws_secret_access_key" {
+  default = "$AWS_SECRET_ACCESS_KEY"
+}
+
+variable "aws_dns_zone" {
+  default = "$AWS_DNS_ZONE"
+}
+
+variable "resource_group_name" {
+  default = "$CLUSTER_ID"
+}
+
+variable "grafana_admin_password" {
+  default = "admin"
+}
+
+variable "asset_dir" {
+  default = "~/lokoctl-assets"
+}
+
+variable "workers_count" {
+  default = 2
+}
+
+variable "workers_type" {
+  default = "Standard_D2_v2"
+}
+
+variable "location" {
+  default = "Germany West Central"
+}
+
+variable "worker_labels" {
+  default = {
+    "testing.io" = "yes",
+    "roleofnode" = "testing",
+  }
+}
+
+cluster "aks" {
+  asset_dir    = pathexpand(var.asset_dir)
+  cluster_name = var.cluster_name
+
+  location            = var.location
+  resource_group_name = var.resource_group_name
+
+  worker_pool "default" {
+    vm_size = var.workers_type
+    count   = var.workers_count
+    labels  = var.worker_labels
+  }
+
+  tags = {
+    "owner" = "LokomotiveCIAKS"
+  }
+}
+
+component "prometheus-operator" {
+  grafana_admin_password = var.grafana_admin_password
+  disable_webhooks       = true
+
+  monitor {
+    etcd                    = false
+    kube_controller_manager = false
+    kube_scheduler          = false
+    kube_proxy              = false
+    kubelet                 = false
+  }
+
+  coredns {
+    selector = {
+      "k8s-app" = "kube-dns",
+    }
+  }
+}
+
+component "contour" {
+  ingress_hosts = [
+    "httpbin.${var.cluster_name}.${var.aws_dns_zone}",
+  ]
+  service_monitor = true
+}
+
+component "cert-manager" {
+  email           = var.cert_manager_email
+  service_monitor = true
+}
+
+component "external-dns" {
+  policy   = "sync"
+  owner_id = var.cluster_name
+  aws {
+    zone_id               = var.aws_zone_id
+    aws_access_key_id     = var.aws_access_key_id
+    aws_secret_access_key = var.aws_secret_access_key
+  }
+
+  service_monitor = true
+}
+
+component "httpbin" {
+  ingress_host = "httpbin.${var.cluster_name}.${var.aws_dns_zone}"
+}
diff --git a/cli/cmd/cluster-apply.go b/cli/cmd/cluster-apply.go
@@ -80,12 +80,12 @@ func runClusterApply(cmd *cobra.Command, args []string) {
 	fmt.Printf("\nYour configurations are stored in %s\n", assetDir)
 
 	kubeconfigPath := assetsKubeconfig(assetDir)
-	if err := verifyCluster(kubeconfigPath, p.GetExpectedNodes()); err != nil {
+	if err := verifyCluster(kubeconfigPath, p.Meta().ExpectedNodes); err != nil {
 		ctxLogger.Fatalf("Verify cluster: %v", err)
 	}
 
-	// Do controlplane upgrades only if cluster already exists.
-	if exists {
+	// Do controlplane upgrades only if cluster already exists and it is not a managed platform.
+	if exists && !p.Meta().Managed {
 		fmt.Printf("\nEnsuring that cluster controlplane is up to date.\n")
 
 		cu := controlplaneUpdater{

diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
@@ -79,7 +79,7 @@ func initialize(ctxLogger *logrus.Entry) (*terraform.Executor, platform.Platform
 		b = local.NewLocalBackend()
 	}
 
-	assetDir, err := homedir.Expand(p.GetAssetDir())
+	assetDir, err := homedir.Expand(p.Meta().AssetDir)
 	if err != nil {
 		ctxLogger.Fatalf("Error expanding path: %v", err)
 	}
@@ -97,7 +97,7 @@ func initialize(ctxLogger *logrus.Entry) (*terraform.Executor, platform.Platform
 // initializeTerraform initialized Terraform directory using given backend and platform
 // and returns configured executor.
 func initializeTerraform(ctxLogger *logrus.Entry, p platform.Platform, b backend.Backend) *terraform.Executor {
-	assetDir, err := homedir.Expand(p.GetAssetDir())
+	assetDir, err := homedir.Expand(p.Meta().AssetDir)
 	if err != nil {
 		ctxLogger.Fatalf("Error expanding path: %v", err)
 	}

diff --git a/cli/cmd/health.go b/cli/cmd/health.go
@@ -64,7 +64,7 @@ func runHealth(cmd *cobra.Command, args []string) {
 		contextLogger.Fatal("No cluster configured")
 	}
 
-	cluster, err := lokomotive.NewCluster(client, p.GetExpectedNodes())
+	cluster, err := lokomotive.NewCluster(client, p.Meta().ExpectedNodes)
 	if err != nil {
 		contextLogger.Fatalf("Error in creating new Lokomotive cluster: %q", err)
 	}

diff --git a/cli/cmd/root.go b/cli/cmd/root.go
@@ -22,6 +22,7 @@ import (
 	"github.com/spf13/viper"
 
 	// Register platforms by adding an anonymous import.
+	_ "github.com/kinvolk/lokomotive/pkg/platform/aks"
 	_ "github.com/kinvolk/lokomotive/pkg/platform/aws"
 	_ "github.com/kinvolk/lokomotive/pkg/platform/baremetal"
 	_ "github.com/kinvolk/lokomotive/pkg/platform/packet"

diff --git a/cli/cmd/utils.go b/cli/cmd/utils.go
@@ -85,7 +85,7 @@ func getAssetDir() (string, error) {
 		return "", nil
 	}
 
-	return cfg.GetAssetDir(), nil
+	return cfg.Meta().AssetDir, nil
 }
 
 // expandKubeconfigPath tries to expand ~ in the given kubeconfig path.

diff --git a/docs/configuration-reference/components/prometheus-operator.md b/docs/configuration-reference/components/prometheus-operator.md
@@ -97,6 +97,15 @@ Example:
 | `alertmanager_external_url` | The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. | "" | false |
 | `alertmanager_config` | Provide YAML file path to configure Alertmanager. See [https://prometheus.io/docs/alerting/configuration/#configuration-file](https://prometheus.io/docs/alerting/configuration/#configuration-file). | `{"global":{"resolve_timeout":"5m"},"route":{"group_by":["job"],"group_wait":"30s","group_interval":"5m","repeat_interval":"12h","receiver":"null","routes":[{"match":{"alertname":"Watchdog"},"receiver":"null"}]},"receivers":[{"name":"null"}]}` | false |
 | `alertmanager_node_selector` | Node selector to specify nodes where the AlertManager pods should be deployed. | {} | false |
+| `disable_webhooks` | Disables validation and mutation webhooks. This might be required on older versions of Kubernetes to install successfully. | false | false |
+| `monitor` | Block, which allows to disable scraping of individual Kubernetes components. | - | false |
+| `monitor.etcd` | Controls if the default Prometheus instance should scrape etcd metrics. | true | false |
+| `monitor.kube_controller_manager` | Controls if the default Prometheus instance should scrape kube-controller-manager metrics. | true | false |
+| `monitor.kube_scheduler` | Controls if the default Prometheus instance should scrape kube-scheduler metrics. | true | false |
+| `monitor.kube_proxy` | Controls if the default Prometheus instance should scrape kube-proxy metrics. | true | false |
+| `monitor.kubelet` | Controls if the default Prometheus instance should scrape kubelet metrics. | true | false |
+| `coredns` | Block, which allows to customize, how CoreDNS is scraped. | - | false |
+| `coredns.selector` | Defines, how CoreDNS pods should be selected for scraping. | {"k8s-app":"coredns","tier":"control-plane"} | false |
 
 ## Applying
 

diff --git a/docs/configuration-reference/platforms/aks.md b/docs/configuration-reference/platforms/aks.md
@@ -0,0 +1,129 @@
+# Lokomotive AKS configuration reference
+
+## Contents
+
+* [Introduction](#introduction)
+* [Prerequisites](#prerequisites)
+* [Configuration](#configuration)
+* [Attribute reference](#attribute-reference)
+* [Applying](#applying)
+* [Destroying](#destroying)
+
+## Introduction
+
+This configuration reference provides information on configuring a Lokomotive cluster on Azure AKS with all the configuration options available to the user.
+
+## Prerequisites
+
+* `lokoctl` [installed locally](../../installer/lokoctl.md).
+* `kubectl` installed locally to access the Kubernetes cluster.
+
+## Configuration
+
+To create a Lokomotive cluster, we need to define a configuration.
+
+Example configuration file:
+
+```tf
+#myakscluster.lokocfg
+variable "state_s3_bucket" {}
+variable "lock_dynamodb_table" {}
+variable "asset_dir" {}
+variable "cluster_name" {}
+variable "workers_count" {}
+variable "state_s3_key" {}
+variable "state_s3_region" {}
+variable "workers_vm_size" {}
+variable "location" {}
+variable "tenant_id" {}
+variable "subscription_id" {}
+variable "client_id" {}
+variable "client_secret" {}
+variable "resource_group_name" {}
+variable "application_name" {}
+variable "manage_resource_group" {}
+
+backend "s3" {
+  bucket         = var.state_s3_bucket
+  key            = var.state_s3_key
+  region         = var.state_s3_region
+  dynamodb_table = var.lock_dynamodb_table
+}
+
+# backend "local" {
+#   path = "path/to/local/file"
+#}
+
+
+cluster "aks" {
+  asset_dir    = pathexpand(var.asset_dir)
+  cluster_name = var.cluster_name
+
+  tenant_id       = var.tenant_id
+  subscription_id = var.subscription_id
+  client_id       = var.client_id
+  client_secret   = var.client_secret
+
+  location              = var.location
+  resource_group_name   = var.resource_group_name
+  application_name      = var.application_name
+  manage_resource_group = var.manage_resource_group
+
+  worker_pool "default" {
+    count   = var.workers_count
+    vm_size = var.workers_vm_size
+
+    labels = {
+      "key" = "value",
+    }
+
+    taints = [
+      "node-role.kubernetes.io/master=NoSchedule",
+    ]
+  }
+
+  tags = {
+    "key" = "value",
+  }
+}
+```
+
+**NOTE**: Should you feel differently about the default values, you can set default values using the `variable`
+block in the cluster configuration.
+
+## Attribute reference
+
+| Argument                | Description                                                  |    Default    | Required |
+| ----------------------- | ------------------------------------------------------------ | :-----------: | :------: |
+| `asset_dir`             | Location where Lokomotive stores cluster assets.             |       -       |   true   |
+| `cluster_name`          | Name of the cluster. **NOTE**: It must be unique per resource group. |       -       |   true   |
+| `tenant_id`             | Azure Tenant ID. Can also be provided using the `LOKOMOTIVE_AKS_TENANT_ID` environment variable. |       -       |   true   |
+| `subscription_id`       | Azure Subscription ID. Can also be provided using the `LOKOMOTIVE_AKS_SUBSCRIPTION_ID` environment variable. |       -       |   true   |
+| `resource_group_name`   | Name of the resource group, where AKS cluster object will be created. Please note, that AKS will also create a separate resource group for workers and other required objects, like load balancers, disks etc. If `manage_resource_group` parameter is set to `false`, this resource group must be manually created before cluster creation. |       -       |   true   |
+| `client_id`             | Azure service principal ID used  for running the AKS cluster. Can also be provided using the `LOKOMOTIVE_AKS_CLIENT_ID`. This parameter is mutually exclusive with `application_name` parameter. |       -       |  false   |
+| `client_secret`         | Azure service principal secret used  for running the AKS cluster. Can also be provided using the `LOKOMOTIVE_AKS_CLIENT_SECRET`. This parameter is mutually exclusive with `application_name` parameter. |       -       |  false   |
+| `tags`                  | Additional tags for Azure resources.                  |       -       |  false   |
+| `location`              | Azure location where resources will be created. Valid values can be obtained using the following command from Azure CLI: `az account list-locations -o table`. | "West Europe" |  false   |
+| `application_name`      | Azure AD application name. If specified, a new Application will be created in Azure AD together with a service principal, which will be used to run the AKS cluster on behalf of the user to provide full cluster creation automation. Please note that this requires [permissions to create applications in Azure AD](https://docs.microsoft.com/en-us/azure/active-directory/users-groups-roles/roles-delegate-app-roles). This parameter is mutually exclusive with `client_id` and `client_secret`. |       -       |  false   |
+| `manage_resource_group` | If `true`, a resource group for the AKS object will be created on behalf of the user. |     true      |  false   |
+| `worker_pool`           | Configuration block for worker pools. At least one worker pool must be defined. |       -       |   true   |
+| `worker_pool.count`     | Number of workers in the worker pool. Can be changed afterwards to add or delete workers. |       -       |   true   |
+| `worker_pool.vm_size`   | Azure VM size for worker nodes.                              |       -       |   true   |
+| `worker_pool.labels`    | Map of Kubernetes Node object labels.                        |       -       |  false   |
+| `worker_pool.taints`    | List of Kubernetes Node taints.                              |       -       |  false   |
+
+## Applying
+
+To create the cluster, execute the following command:
+
+```console
+lokoctl cluster apply
+```
+
+## Destroying
+
+To destroy the Lokomotive cluster, execute the following command:
+
+```console
+lokoctl cluster destroy --confirm
+```
diff --git a/docs/configuration-reference/platforms/aws.md b/docs/configuration-reference/platforms/aws.md
@@ -58,7 +58,7 @@ backend "s3" {
   dynamodb_table = var.lock_dynamodb_table
 }
 
-# backed "local" {
+# backend "local" {
 #   path = "path/to/local/file"
 #}
 

diff --git a/docs/configuration-reference/platforms/packet.md b/docs/configuration-reference/platforms/packet.md
@@ -52,7 +52,7 @@ backend "s3" {
   dynamodb_table = var.lock_dynamodb_table
 }
 
-# backed "local" {
+# backend "local" {
 #   path = "path/to/local/file"
 #}