From 062ea377bb36de0e38a66a8fe857185a2722585e Mon Sep 17 00:00:00 2001 From: Yury Tsarev Date: Fri, 16 Jun 2017 10:39:56 +0200 Subject: [PATCH 1/3] Use docker instead of rkt for regular etcdadm tasks (#4) Motivation is to avoid serious memory leak on etcd nodes as in * https://github.com/coreos/bugs/issues/1927 --- etcdadm/etcdadm | 56 +++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/etcdadm/etcdadm b/etcdadm/etcdadm index 38a42316c..7cacd4dd9 100755 --- a/etcdadm/etcdadm +++ b/etcdadm/etcdadm @@ -493,20 +493,14 @@ member_restore_from_local_snapshot() { rm -rf "$restored_dir" fi - _run_as_root rkt run \ - --insecure-options=image \ - --set-env ETCDCTL_API=3 \ - --dns=host \ - --net=host \ - --volume $(member_snapshots_dir_name),kind=host,source="$(member_host_snapshots_dir_path)" \ - --mount volume="$(member_snapshots_dir_name)",target=/"$(member_snapshots_dir_name)" \ - --volume data-dir-root,kind=host,source="$(dirname "$restored_dir")" \ - --mount volume=data-dir-root,target="$(dirname "$restored_dir")" \ - --volume data-dir,kind=empty \ - --mount volume=data-dir,target=/var/lib/etcd \ - --uuid-file-save="$uuid_file" \ - "$etcd_aci_url" \ - --exec etcdctl -- \ + _run_as_root docker run --rm \ + -e ETCDCTL_API=3 \ + --network=host \ + --volume="$(member_host_snapshots_dir_path)":/"$(member_snapshots_dir_name)" \ + --volume="$(dirname "$restored_dir")":"$(dirname "$restored_dir")" \ + --volume=/var/lib/etcd \ + quay.io/coreos/etcd:v$etcd_version \ + etcdctl \ --write-out simple \ --endpoints "$(member_client_url)" snapshot restore \ --data-dir "$restored_dir" \ @@ -514,8 +508,6 @@ member_restore_from_local_snapshot() { --initial-advertise-peer-urls "$(member_peer_url)" \ --name "$(member_name)" \ "$snapshot_name" - _run_as_root rkt stop --force --uuid-file "$uuid_file" || echo pod is already stopped - _run_as_root rkt rm --uuid-file "$uuid_file" _run_as_root mv "$restored_dir"/* "$data_dir"/ _run_as_root rm -rf "$restored_dir" @@ -784,33 +776,27 @@ member_data_dir() { member_etcdctl() { local uuid_file - local rkt_opts=(--insecure-options=image) + local docker_opts=(--rm) uuid_file="$(config_state_dir)/etcdctl-$BASHPID.uuid" if [ "${ETCDCTL_CACERT:-}" != "" -a "${ETCDCTL_CERT:-}" != "" -a "${ETCDCTL_KEY:-}" != "" ]; then local credentials credentials=$(dirname "${ETCDCTL_CACERT}") - rkt_opts+=(--set-env ETCDCTL_CACERT=${ETCDCTL_CACERT}) - rkt_opts+=(--set-env ETCDCTL_CERT=${ETCDCTL_CERT}) - rkt_opts+=(--set-env ETCDCTL_KEY=${ETCDCTL_KEY}) - rkt_opts+=(--volume credentials,kind=host,source=${credentials}) - rkt_opts+=(--mount volume=credentials,target=${credentials}) + docker_opts+=(-e ETCDCTL_CACERT=${ETCDCTL_CACERT}) + docker_opts+=(-e ETCDCTL_CERT=${ETCDCTL_CERT}) + docker_opts+=(-e ETCDCTL_KEY=${ETCDCTL_KEY}) + docker_opts+=(--volume=${credentials}:${credentials}) fi - _run_as_root rkt run ${rkt_opts[*]} \ - --set-env ETCDCTL_API=3 \ - --dns=host \ - --net=host \ - --volume "$(member_snapshots_dir_name)",kind=host,source="$(member_host_snapshots_dir_path)" \ - --mount volume="$(member_snapshots_dir_name)",target=/"$(member_snapshots_dir_name)" \ - --volume data-dir,kind=host,source="$(member_data_dir)" \ - --mount volume=data-dir,target=/var/lib/etcd \ - --uuid-file-save="$uuid_file" \ - "$etcd_aci_url" \ - --exec etcdctl -- --endpoints "$(member_client_url)" ${*} - _run_as_root rkt rm --uuid-file "$uuid_file" - rm "$uuid_file" + _run_as_root docker run ${docker_opts[*]} \ + --env ETCDCTL_API=3 \ + --network=host \ + --volume="$(member_host_snapshots_dir_path)":/"$(member_snapshots_dir_name)" \ + --volume="$(member_data_dir)":/var/lib/etcd \ + --volume "$(member_snapshots_dir_name)":"$(member_host_snapshots_dir_path)" \ + quay.io/coreos/etcd:v$etcd_version \ + etcdctl --endpoints "$(member_client_url)" ${*} } member_is_healthy() { From 6ef50316bcf120f7c899ba15158e0d16432ad2a0 Mon Sep 17 00:00:00 2001 From: Daniel Fernandes Martins Date: Wed, 21 Jun 2017 18:29:53 -0300 Subject: [PATCH 2/3] Fix node drain error when trying to evict pods from jobs --- core/controlplane/config/templates/cloud-config-controller | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/controlplane/config/templates/cloud-config-controller b/core/controlplane/config/templates/cloud-config-controller index 554c98804..5fff287e0 100644 --- a/core/controlplane/config/templates/cloud-config-controller +++ b/core/controlplane/config/templates/cloud-config-controller @@ -1164,6 +1164,11 @@ write_files: - replicasets verbs: - get + - apiGroups: ["batch"] + resources: + - jobs + verbs: + - get - apiGroups: [""] resources: - replicationcontrollers From 7e5a2a1b2b9fafb6237659fa69a9767c08d2cb01 Mon Sep 17 00:00:00 2001 From: Daniel Fernandes Martins Date: Wed, 21 Jun 2017 18:52:16 -0300 Subject: [PATCH 3/3] Removed unused sysctl override Kube-proxy, by default, automatically sets `nf_conntrack_max=131072` when it starts, so it's useless to set this value in some systemd unit. Closes #708 --- .../config/templates/cloud-config-controller | 16 ---------------- .../config/templates/cloud-config-worker | 17 ----------------- 2 files changed, 33 deletions(-) diff --git a/core/controlplane/config/templates/cloud-config-controller b/core/controlplane/config/templates/cloud-config-controller index 554c98804..1b130aebd 100644 --- a/core/controlplane/config/templates/cloud-config-controller +++ b/core/controlplane/config/templates/cloud-config-controller @@ -38,14 +38,6 @@ coreos: ExecStartPre=/opt/bin/cfn-etcd-environment ExecStart=/usr/bin/mv -f /var/run/coreos/etcd-environment /etc/etcd-environment -{{if .UseCalico }} - # https://github.com/coreos/docs/blob/5d7b1cccb8286185275b07db1495828be9fdb0ea/os/other-settings.md#tuning-sysctl-parameters - - name: systemd-modules-load.service - command: restart - - name: systemd-sysctl.service - command: restart -{{ end }} - {{if .Experimental.AwsEnvironment.Enabled}} - name: set-aws-environment.service enable: true @@ -2158,14 +2150,6 @@ write_files: } } - # http://docs.projectcalico.org/v2.0/usage/configuration/ - - path: /etc/modules-load.d/nf.conf - content: | - nf_conntrack - - path: /etc/sysctl.d/nf.conf - content: | - net.netfilter.nf_conntrack_max=1000000 - {{ end }} {{if .Experimental.Authentication.Webhook.Enabled}} diff --git a/core/controlplane/config/templates/cloud-config-worker b/core/controlplane/config/templates/cloud-config-worker index 8f6f500e4..3c9cd71a0 100644 --- a/core/controlplane/config/templates/cloud-config-worker +++ b/core/controlplane/config/templates/cloud-config-worker @@ -249,14 +249,6 @@ coreos: RequiredBy=rkt-api.service {{ end }} -{{if .UseCalico }} - # https://github.com/coreos/docs/blob/5d7b1cccb8286185275b07db1495828be9fdb0ea/os/other-settings.md#tuning-sysctl-parameters - - name: systemd-modules-load.service - command: restart - - name: systemd-sysctl.service - command: restart -{{ end }} - {{if .AwsEnvironment.Enabled}} - name: set-aws-environment.service enable: true @@ -853,15 +845,6 @@ write_files: } } } - - # http://docs.projectcalico.org/v2.0/usage/configuration/ - - path: /etc/modules-load.d/nf.conf - content: | - nf_conntrack - - path: /etc/sysctl.d/nf.conf - content: | - net.netfilter.nf_conntrack_max=1000000 - {{ end }} {{ if and .Experimental.TLSBootstrap.Enabled .AssetsConfig.HasTLSBootstrapToken }}