Skip to content

Commit

Permalink
Enable kube-proxy metrics and allow Prometheus scrapes
Browse files Browse the repository at this point in the history
* Configure kube-proxy --metrics-bind-address=0.0.0.0 (default
127.0.0.1) to serve metrics on 0.0.0.0:10249
* Add firewall rules to allow Prometheus (resides on a worker) to
scrape kube-proxy service endpoints on controllers or workers
* Add a clusterIP: None service for kube-proxy endpoint discovery
  • Loading branch information
dghubble committed Jan 7, 2020
1 parent b2eb3e0 commit 43e05b9
Show file tree
Hide file tree
Showing 16 changed files with 153 additions and 33 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ Notable changes between versions.
* Update Calico from v3.10.2 to v3.11.1 ([#604](https://github.com/poseidon/typhoon/pull/604))
* Inline Kubelet service on Container Linux nodes ([#606](https://github.com/poseidon/typhoon/pull/606))
* Disable unused Kubelet `127.0.0.1:10248` healthz listener ([#607](https://github.com/poseidon/typhoon/pull/607))
* Enable kube-proxy metrics and allow Prometheus scrapes
* Allow TCP/10249 traffic with worker node sources

#### Addons

* Update Prometheus from v2.14.0 to [v2.15.1](https://github.com/prometheus/prometheus/releases/tag/v2.15.1)
* Add discovery for kube-proxy service endpoints
* Update kube-state-metrics from v1.8.0 to v1.9.0
* Update Grafana from v6.5.1 to v6.5.2

Expand Down
2 changes: 1 addition & 1 deletion addons/prometheus/discovery/kube-controller-manager.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1
kind: Service
metadata:
Expand All @@ -7,7 +8,6 @@ metadata:
prometheus.io/scrape: 'true'
spec:
type: ClusterIP
# service is created to allow prometheus to scrape endpoints
clusterIP: None
selector:
k8s-app: kube-controller-manager
Expand Down
19 changes: 19 additions & 0 deletions addons/prometheus/discovery/kube-proxy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1
kind: Service
metadata:
name: kube-proxy
namespace: kube-system
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '10249'
spec:
type: ClusterIP
clusterIP: None
selector:
k8s-app: kube-proxy
ports:
- name: metrics
protocol: TCP
port: 10249
targetPort: 10249
2 changes: 1 addition & 1 deletion addons/prometheus/discovery/kube-scheduler.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1
kind: Service
metadata:
Expand All @@ -7,7 +8,6 @@ metadata:
prometheus.io/scrape: 'true'
spec:
type: ClusterIP
# service is created to allow prometheus to scrape endpoints
clusterIP: None
selector:
k8s-app: kube-scheduler
Expand Down
2 changes: 1 addition & 1 deletion aws/container-linux/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
Expand Down
56 changes: 39 additions & 17 deletions aws/container-linux/kubernetes/security.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,28 @@ resource "aws_security_group_rule" "controller-etcd" {
self = true
}

# Allow Prometheus to scrape etcd metrics
resource "aws_security_group_rule" "controller-etcd-metrics" {
security_group_id = aws_security_group.controller.id

type = "ingress"
protocol = "tcp"
from_port = 2381
to_port = 2381
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "kube-proxy-metrics" {
security_group_id = aws_security_group.controller.id

type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape kube-scheduler
resource "aws_security_group_rule" "controller-scheduler-metrics" {
security_group_id = aws_security_group.controller.id
Expand All @@ -55,17 +77,6 @@ resource "aws_security_group_rule" "controller-manager-metrics" {
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape etcd metrics
resource "aws_security_group_rule" "controller-etcd-metrics" {
security_group_id = aws_security_group.controller.id

type = "ingress"
protocol = "tcp"
from_port = 2381
to_port = 2381
source_security_group_id = aws_security_group.worker.id
}

resource "aws_security_group_rule" "controller-vxlan" {
count = var.networking == "flannel" ? 1 : 0

Expand Down Expand Up @@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
self = true
}

resource "aws_security_group_rule" "ingress-health" {
# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "worker-kube-proxy" {
security_group_id = aws_security_group.worker.id

type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
self = true
}

# Allow apiserver to access kubelets for exec, log, port-forward
Expand All @@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
self = true
}

resource "aws_security_group_rule" "ingress-health" {
security_group_id = aws_security_group.worker.id

type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
}

resource "aws_security_group_rule" "worker-bgp" {
security_group_id = aws_security_group.worker.id

Expand Down
2 changes: 1 addition & 1 deletion aws/fedora-coreos/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
Expand Down
34 changes: 28 additions & 6 deletions aws/fedora-coreos/kubernetes/security.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ resource "aws_security_group_rule" "controller-etcd-metrics" {
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "kube-proxy-metrics" {
security_group_id = aws_security_group.controller.id

type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape kube-scheduler
resource "aws_security_group_rule" "controller-scheduler-metrics" {
security_group_id = aws_security_group.controller.id
Expand Down Expand Up @@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
self = true
}

resource "aws_security_group_rule" "ingress-health" {
# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "worker-kube-proxy" {
security_group_id = aws_security_group.worker.id

type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
self = true
}

# Allow apiserver to access kubelets for exec, log, port-forward
Expand All @@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
self = true
}

resource "aws_security_group_rule" "ingress-health" {
security_group_id = aws_security_group.worker.id

type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
}

resource "aws_security_group_rule" "worker-bgp" {
security_group_id = aws_security_group.worker.id

Expand Down
2 changes: 1 addition & 1 deletion azure/container-linux/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
Expand Down
34 changes: 33 additions & 1 deletion azure/container-linux/kubernetes/security.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,29 @@ resource "azurerm_network_security_rule" "controller-etcd-metrics" {
destination_address_prefix = azurerm_subnet.controller.address_prefix
}

# Allow Prometheus to scrape kube-proxy metrics
resource "azurerm_network_security_rule" "controller-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name

name = "allow-kube-proxy-metrics"
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefix = azurerm_subnet.worker.address_prefix
destination_address_prefix = azurerm_subnet.controller.address_prefix
}

# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
resource "azurerm_network_security_rule" "controller-kube-metrics" {
resource_group_name = azurerm_resource_group.cluster.name

name = "allow-kube-metrics"
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
priority = "2012"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
Expand Down Expand Up @@ -251,6 +267,22 @@ resource "azurerm_network_security_rule" "worker-node-exporter" {
destination_address_prefix = azurerm_subnet.worker.address_prefix
}

# Allow Prometheus to scrape kube-proxy
resource "azurerm_network_security_rule" "worker-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name

name = "allow-kube-proxy"
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2024"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefix = azurerm_subnet.worker.address_prefix
destination_address_prefix = azurerm_subnet.worker.address_prefix
}

# Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "worker-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name
Expand Down
2 changes: 1 addition & 1 deletion bare-metal/container-linux/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [var.k8s_domain_name]
Expand Down
2 changes: 1 addition & 1 deletion bare-metal/fedora-coreos/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [var.k8s_domain_name]
Expand Down
2 changes: 1 addition & 1 deletion digital-ocean/container-linux/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
Expand Down
8 changes: 8 additions & 0 deletions digital-ocean/container-linux/kubernetes/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@ resource "digitalocean_firewall" "rules" {
source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name]
}

# Allow Prometheus to scrape node-exporter
inbound_rule {
protocol = "tcp"
port_range = "9100"
source_tags = [digitalocean_tag.workers.name]
}

# Allow Prometheus to scrape kube-proxy
inbound_rule {
protocol = "tcp"
port_range = "10249"
source_tags = [digitalocean_tag.workers.name]
}

inbound_rule {
protocol = "tcp"
port_range = "10250"
Expand Down
2 changes: 1 addition & 1 deletion google-cloud/container-linux/kubernetes/bootstrap.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"

cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
Expand Down
14 changes: 14 additions & 0 deletions google-cloud/container-linux/kubernetes/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,20 @@ resource "google_compute_firewall" "internal-node-exporter" {
target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"]
}

# Allow Prometheus to scrape kube-proxy metrics
resource "google_compute_firewall" "internal-kube-proxy" {
name = "${var.cluster_name}-internal-kube-proxy"
network = google_compute_network.network.name

allow {
protocol = "tcp"
ports = [10249]
}

source_tags = ["${var.cluster_name}-worker"]
target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"]
}

# Allow apiserver to access kubelets for exec, log, port-forward
resource "google_compute_firewall" "internal-kubelet" {
name = "${var.cluster_name}-internal-kubelet"
Expand Down

0 comments on commit 43e05b9

Please sign in to comment.