Skip to content
This repository has been archived by the owner on Sep 30, 2020. It is now read-only.

Commit

Permalink
Allow dnsmasq to be backed by a local copy of CoreDNS
Browse files Browse the repository at this point in the history
This commit allows the user to specify that dnsmasq should be
backed by a pod-local copy of CoreDNS rather than relying on
the global CoreDNS service. If enabled, the dnsmasq-node
DaemonSet will be configured to use a local copy of CoreDNS
for its resolution while setting the global CoreDNS service as
a fallback. This is handy in situations where the number of DNS
requests within a cluster grows large and causes resolution issues
as dnsmasq reaches out to the global CoreDNS service.

Additionally, several values passed to dnsmasq are now configurable
including its `--cache-size` and `--dns-forward-max`.

See [this postmortem](https://github.com/zalando-incubator/kubernetes-on-aws/blob/dev/docs/postmortems/jan-2019-dns-outage.md)
for an investigation into this situation which was instrumental in
understanding issues we were facing. Many thanks to dominicgunn
for providing the manifests which I codified into this commit.

---

These features can be enabled and tuned by setting the following
values within cluster.yaml:

```yaml
kubeDns:
  dnsmasq:
    coreDNSLocal:
      # When enabled, this will run a copy of CoreDNS within each DNS-masq pod and
      # configure the utility to use it for resolution.
      enabled: true

      # Defines the resource requests/limits for the coredns-local container.
      # cpu and/or memory constraints can be removed by setting the appropriate value(s)
      # to an empty string.
      resources:
        requests:
          cpu: 50m
          memory: 100Mi
        limits:
          cpu: 50m
          memory: 100Mi

    # The size of dnsmasq's cache.
    cacheSize: 50000

    # The maximum number of concurrent DNS queries.
    dnsForwardMax: 500

    # This option gives a default value for time-to-live (in seconds) which dnsmasq
    # uses to cache negative replies even in the absence of an SOA record.
    negTTL: 60
```
  • Loading branch information
kfr2 committed Aug 17, 2020
1 parent 2260379 commit 5bd8214
Show file tree
Hide file tree
Showing 5 changed files with 503 additions and 40 deletions.
35 changes: 33 additions & 2 deletions builtin/files/cluster.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,7 @@ kubernetes:
# Tells Kubernetes to enable the autoscaler rest client (not using heapster) without the requirement to use metrics-server.
podAutoscalerUseRestClient:
enabled: false

# controllerManager:
# resources:
# requests:
Expand Down Expand Up @@ -1329,6 +1329,35 @@ kubeDns:
# - --neg-ttl=10
# - --no-ping

# Settings for the dnsmasq-node DaemonSet which must be enabled by setting
# `kubeDns.nodeLocalResolver` to true.
dnsmasq:
coreDNSLocal:
# When enabled, this will run a copy of CoreDNS within each DNS-masq pod and
# configure the utility to use it for resolution.
enabled: false

# Defines the resource requests/limits for the coredns-local container.
# cpu and/or memory constraints can be removed by setting the appropriate value(s)
# to an empty string.
resources:
requests:
cpu: 50m
memory: 100Mi
limits:
cpu: 50m
memory: 100Mi

# The size of dnsmasq's cache.
cacheSize: 50000

# The maximum number of concurrent DNS queries.
dnsForwardMax: 500

# This option gives a default value for time-to-live (in seconds) which dnsmasq
# uses to cache negative replies even in the absence of an SOA record.
# negTTL: 60

# When enabled, will modify the TTL of the coredns service.
# ttl: 30

Expand All @@ -1351,6 +1380,8 @@ kubeDns:
#
# This configuration is injected into the CoreDNS config map after the root
# zone (".") and can be used to add configuration for additional zones.
# If coreDNSLocal has been enabled, this configuration will additionally be injected
# into its ConfigMap.
# additionalZoneCoreDNSConfig: |
# global:53 {
# errors
Expand Down Expand Up @@ -1378,7 +1409,7 @@ kubeProxy:
# When enabled, a security group rule is included on the generated kube-aws SG to allow ICMP Ping from all traffic (0.0.0.0/0).
# This is applied to all nodes (worker & control plane) in the cluster.
openICMP: true

# Addon features
addons:
# When enabled, Kubernetes rescheduler is deployed to the cluster controller(s)
Expand Down
194 changes: 178 additions & 16 deletions builtin/files/userdata/cloud-config-controller
Original file line number Diff line number Diff line change
Expand Up @@ -1126,9 +1126,14 @@ write_files:
"${mfdir}/kube-dns-autoscaler-de.yaml" \
"${mfdir}/kube-dns-de.yaml"
{{- end }}
{{ if .KubeDns.NodeLocalResolver -}}
{{- if .KubeDns.NodeLocalResolver }}
{{- if .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
deploy "${mfdir}/dnsmasq-node-coredns-local.yaml"
{{- else }}
remove "${mfdir}/dnsmasq-node-coredns-local.yaml"
{{- end }}
deploy "${mfdir}/dnsmasq-node-ds.yaml"
{{ end -}}
{{- end }}
forceapply "${mfdir}/kube-dns-pdb.yaml"

{{ if .Addons.MetricsServer.Enabled -}}
Expand Down Expand Up @@ -5350,6 +5355,9 @@ write_files:
namespace: kube-system
data:
Corefile: |
{{- if and (eq .KubeDns.Provider "coredns") .KubeDns.AdditionalZoneCoreDNSConfig }}
{{ .KubeDns.AdditionalZoneCoreDNSConfig | indent 10 }}
{{- end }}
.:53 {
errors
health
Expand All @@ -5372,9 +5380,6 @@ write_files:
reload
loadbalance
}
{{- if and (eq .KubeDns.Provider "coredns") .KubeDns.AdditionalZoneCoreDNSConfig }}
{{ .KubeDns.AdditionalZoneCoreDNSConfig | indent 10 }}
{{- end }}
{{- else }}
- path: /srv/kubernetes/manifests/kube-dns-sa.yaml
content: |
Expand Down Expand Up @@ -5440,6 +5445,85 @@ write_files:
- --v=2
- --logtostderr

{{ if and .KubeDns.NodeLocalResolver .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
- path: /srv/kubernetes/manifests/dnsmasq-node-coredns-local.yaml
content: |
apiVersion: v1
kind: ServiceAccount
metadata:
name: dnsmasq
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dnsmasq
rules:
- apiGroups: [""]
resources: ["endpoints", "services", "pods", "namespaces"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dnsmasq
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: dnsmasq
subjects:
- kind: ServiceAccount
name: dnsmasq
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: dnsmasq-privileged-psp
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: privileged-psp
subjects:
- kind: ServiceAccount
name: dnsmasq
namespace: kube-system
---
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns-local
namespace: kube-system
labels:
application: coredns
data:
Corefile: |
{{- if and (eq .KubeDns.Provider "coredns") .KubeDns.AdditionalZoneCoreDNSConfig }}
{{ .KubeDns.AdditionalZoneCoreDNSConfig | indent 12 }}
{{- end }}

cluster.local:9254 {{ .PodCIDR }}:9254 {{ .ServiceCIDR }}:9254 {
errors
kubernetes {
pods insecure
}
cache 30
log svc.svc.cluster.local.
prometheus :9153
}

.:9254 {
errors
health :9154 # this is global for all servers
prometheus :9153
forward . /etc/resolv.conf
pprof 127.0.0.1:9156
cache 30
reload
}
{{ end }}

{{ if .KubeDns.NodeLocalResolver }}
- path: /srv/kubernetes/manifests/dnsmasq-node-ds.yaml
content: |
Expand All @@ -5451,9 +5535,12 @@ write_files:
labels:
k8s-app: dnsmasq-node
spec:
selector:
matchLabels:
k8s-app: dnsmasq-node
updateStrategy:
rollingUpdate:
maxUnavailable: 100%
maxUnavailable: 10%
type: RollingUpdate
selector:
matchLabels:
Expand All @@ -5478,15 +5565,24 @@ write_files:
configMap:
name: kube-dns
optional: true
{{ if .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
- name: coredns-local-config
configMap:
name: coredns-local
items:
- key: Corefile
path: Corefile
{{ end }}
containers:
- name: dnsmasq
image: {{ .KubeDnsMasqImage.RepoWithTag }}
livenessProbe:
httpGet:
path: /healthcheck/dnsmasq
port: 10054
port: 9054
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
Expand All @@ -5497,13 +5593,24 @@ write_files:
- -restartDnsmasq=true
- --
- -k
- --min-port=1024
- --cache-size=1000
- --cache-size={{ .KubeDns.DNSMasq.CacheSize }}
- --dns-forward-max={{ .KubeDns.DNSMasq.DNSForwardMax }}
- --log-facility=-
{{ if .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
- --no-resolv
- --keep-in-foreground
- --neg-ttl={{ .KubeDns.DNSMasq.NegTTL }}
# Send requests to the last server (coredns-local) first and only
# fallback to the previous one (global coredns) if it's unreachable.
- --strict-order
- --server={{.DNSServiceIP}}#53
- --server=127.0.0.1#9254
{{ else }}
- --server=//{{.DNSServiceIP}}
- --server=/cluster.local/{{.DNSServiceIP}}
- --server=/in-addr.arpa/{{.DNSServiceIP}}
- --server=/ip6.arpa/{{.DNSServiceIP}}
- --log-facility=-
{{ end }}
{{- if ne (len .KubeDns.NodeLocalResolverOptions) 0 }}
{{- range .KubeDns.NodeLocalResolverOptions }}
- {{.}}
Expand All @@ -5519,8 +5626,10 @@ write_files:
# see: https://github.com/kubernetes/kubernetes/issues/29055 for details
resources:
requests:
cpu: 150m
memory: 20Mi
ephemeral-storage: 256Mi
limits:
cpu: 10m
memory: 45Mi
volumeMounts:
- name: kube-dns-config
mountPath: /etc/k8s/dns/dnsmasq-nanny
Expand All @@ -5529,7 +5638,7 @@ write_files:
livenessProbe:
httpGet:
path: /metrics
port: 10054
port: 9054
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
Expand All @@ -5538,17 +5647,70 @@ write_files:
args:
- --v=2
- --logtostderr
{{ if .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
- --probe=dnsmasq,127.0.0.1:9254,ec2.amazonaws.com,5,A
{{ else }}
- --probe=dnsmasq,127.0.0.1:53,ec2.amazonaws.com,5,A
{{ end }}
- --prometheus-port=9054
ports:
- containerPort: 10054
- containerPort: 9054
name: metrics
protocol: TCP
resources:
requests:
memory: 20Mi
ephemeral-storage: 256Mi
limits:
cpu: 100m
memory: 50Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
{{ if .KubeDns.DNSMasq.CoreDNSLocal.Enabled }}
- name: coredns
image: {{ .CoreDnsImage.RepoWithTag }}
args: ["-conf", "/etc/coredns/Corefile"]
volumeMounts:
- name: coredns-local-config
mountPath: /etc/coredns
ports:
- containerPort: 9254
name: dns
protocol: UDP
- containerPort: 9254
name: dns-tcp
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: 9154
scheme: HTTP
initialDelaySeconds: 60
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
resources:
requests:
ephemeral-storage: 256Mi
{{ if .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Requests.Cpu }}
cpu: {{ .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Requests.Cpu }}
{{ end }}
{{ if .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Requests.Memory }}
memory: {{ .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Requests.Memory }}
{{ end }}
{{ if or .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Cpu .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Memory }}
limits:
{{ if .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Cpu }}
cpu: {{ .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Cpu }}
{{ end }}
{{ if .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Memory }}
memory: {{ .KubeDns.DNSMasq.CoreDNSLocal.ComputeResources.Limits.Memory }}
{{ end }}
{{ end }}
{{ end }}
hostNetwork: true
dnsPolicy: Default
automountServiceAccountToken: false
automountServiceAccountToken: true
serviceAccountName: dnsmasq
{{ end }}

{{- if eq .KubeDns.Provider "coredns" }}
Expand Down
22 changes: 20 additions & 2 deletions pkg/api/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,26 @@ func NewDefaultCluster() *Cluster {
IPVSMode: ipvsMode,
},
KubeDns: KubeDns{
Provider: "coredns",
NodeLocalResolver: false,
Provider: "coredns",
NodeLocalResolver: false,
DNSMasq: DNSMasq{
CoreDNSLocal: CoreDNSLocal{
Enabled: false,
ComputeResources: ComputeResources{
Requests: ResourceQuota{
Cpu: "50m",
Memory: "100Mi",
},
Limits: ResourceQuota{
Cpu: "50m",
Memory: "100Mi",
},
},
},
CacheSize: 50000,
DNSForwardMax: 500,
NegTTL: 60,
},
DeployToControllers: false,
AntiAffinityAvailabilityZone: false,
TTL: 30,
Expand Down
Loading

0 comments on commit 5bd8214

Please sign in to comment.