Skip to content

Commit

Permalink
Fix scaling (#5889)
Browse files Browse the repository at this point in the history
* etcd: etcd-events doesn't depend on etcd_cluster_setup

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* etcd: remove condition already present on include_tasks

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* etcd: fix scaling up

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* etcd: use *access_addresses, do not delegate to etcd[0]

We want to wait for the full cluster to be healthy,
so use all the cluster addresses
Also we should be able to run the playbook when etcd[0] is down
(not tested), so do not delegate to etcd[0]

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* etcd: use failed_when for health check

unhealthy cluster is expected on first run, so use failed_when
instead of ignore_errors to remove scary red messages

Also use run_once

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* kubernetes/preinstall: ensure ansible_fqdn is up to date after changing /etc/hosts

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>

* kubernetes/master: regenerate apiserver cert if needed

Signed-off-by: Etienne Champetier <champetier.etienne@gmail.com>
  • Loading branch information
champtar authored Apr 8, 2020
1 parent 910a821 commit a35b6dc
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 19 deletions.
25 changes: 12 additions & 13 deletions roles/etcd/tasks/configure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
- name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy
ignore_errors: true
failed_when: false
changed_when: false
check_mode: no
run_once: yes
when: is_etcd_master and etcd_cluster_setup
tags:
- facts
Expand All @@ -16,9 +17,10 @@
- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
ignore_errors: true
failed_when: false
changed_when: false
check_mode: no
run_once: yes
when: is_etcd_master and etcd_events_cluster_setup
tags:
- facts
Expand Down Expand Up @@ -49,30 +51,33 @@
daemon_reload: true
when: is_etcd_master

# when scaling new etcd will fail to start
- name: Configure | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
ignore_errors: "{{ etcd_cluster_is_healthy.rc == 0 }}"
when: is_etcd_master and etcd_cluster_setup

# when scaling new etcd will fail to start
- name: Configure | Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
ignore_errors: "{{ etcd_events_cluster_is_healthy.rc == 0 }}"
when: is_etcd_master and etcd_events_cluster_setup

- name: Configure | Check if etcd cluster is healthy
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_client_url }} cluster-health | grep -q 'cluster is healthy'"
- name: Configure | Wait for etcd cluster to be healthy
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_cluster_is_healthy
until: etcd_cluster_is_healthy.rc == 0
retries: "{{ etcd_retries }}"
delay: "{{ retry_stagger | random + 3 }}"
ignore_errors: false
changed_when: false
check_mode: no
delegate_to: "{{ groups['etcd'][0] }}"
run_once: yes
when:
- is_etcd_master
Expand All @@ -84,21 +89,19 @@
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"

- name: Configure | Check if etcd-events cluster is healthy
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_client_url }} cluster-health | grep -q 'cluster is healthy'"
- name: Configure | Wait for etcd-events cluster to be healthy
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} cluster-health | grep -q 'cluster is healthy'"
register: etcd_events_cluster_is_healthy
until: etcd_events_cluster_is_healthy.rc == 0
retries: "{{ etcd_retries }}"
delay: "{{ retry_stagger | random + 3 }}"
ignore_errors: false
changed_when: false
check_mode: no
delegate_to: "{{ groups['etcd'][0] }}"
run_once: yes
when:
- is_etcd_master
- etcd_events_cluster_setup
- etcd_cluster_setup
tags:
- facts
environment:
Expand Down Expand Up @@ -136,14 +139,10 @@

- name: Configure | Join member(s) to etcd cluster one at a time
include_tasks: join_etcd_member.yml
vars:
target_node: "{{ item }}"
with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_cluster_setup and etcd_member_in_cluster.rc != 0 and etcd_cluster_is_healthy.rc == 0

- name: Configure | Join member(s) to etcd-events cluster one at a time
include_tasks: join_etcd-events_member.yml
vars:
target_node: "{{ item }}"
with_items: "{{ groups['etcd'] }}"
when: inventory_hostname == item and etcd_events_cluster_setup and etcd_events_member_in_cluster.rc != 0 and etcd_events_cluster_is_healthy.rc == 0
9 changes: 6 additions & 3 deletions roles/etcd/tasks/join_etcd-events_member.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
until: member_add_result.rc == 0
retries: "{{ etcd_retries }}"
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
Expand All @@ -21,7 +20,6 @@
{{ etcd_member_name }}={{ etcd_events_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in etcd-events cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_events_access_addresses }} member list | grep -q {{ etcd_events_access_address }}"
Expand All @@ -30,7 +28,12 @@
check_mode: no
tags:
- facts
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"

- name: Configure | Ensure etcd-events is running
service:
name: etcd-events
state: started
enabled: yes
9 changes: 6 additions & 3 deletions roles/etcd/tasks/join_etcd_member.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
until: member_add_result.rc == 0
retries: "{{ etcd_retries }}"
delay: "{{ retry_stagger | random + 3 }}"
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
Expand All @@ -22,7 +21,6 @@
{{ etcd_member_name }}={{ etcd_peer_url }}
{%- endif -%}
{%- endfor -%}
when: target_node == inventory_hostname
- name: Join Member | Ensure member is in etcd cluster
shell: "{{ bin_dir }}/etcdctl --no-sync --endpoints={{ etcd_access_addresses }} member list | grep -q {{ etcd_access_address }}"
Expand All @@ -31,8 +29,13 @@
check_mode: no
tags:
- facts
when: target_node == inventory_hostname
environment:
ETCDCTL_CERT_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}.pem"
ETCDCTL_KEY_FILE: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CA_FILE: "{{ etcd_cert_dir }}/ca.pem"

- name: Configure | Ensure etcd is running
service:
name: etcd
state: started
enabled: yes
31 changes: 31 additions & 0 deletions roles/kubernetes/master/tasks/kubeadm-setup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,37 @@
- not upgrade_cluster_setup
- kubeadm_already_run.stat.exists

- name: kubeadm | Check if apiserver.crt contains all needed SANs
command: openssl x509 -noout -in "{{ kube_cert_dir }}/apiserver.crt" -checkip "{{ item }}"
with_items: "{{ apiserver_sans }}"
register: apiserver_sans_check
changed_when: "'does match certificate' not in apiserver_sans_check.stdout"
when:
- inventory_hostname == groups['kube-master']|first
- kubeadm_already_run.stat.exists

- name: kubeadm | regenerate apiserver cert 1/2
file:
state: absent
path: "{{ kube_cert_dir }}/{{ item }}"
with_items:
- apiserver.crt
- apiserver.key
when:
- inventory_hostname == groups['kube-master']|first
- kubeadm_already_run.stat.exists
- apiserver_sans_check.changed

- name: kubeadm | regenerate apiserver cert 2/2
command: >-
{{ bin_dir }}/kubeadm
init phase certs apiserver
--config={{ kube_config_dir }}/kubeadm-config.yaml
when:
- inventory_hostname == groups['kube-master']|first
- kubeadm_already_run.stat.exists
- apiserver_sans_check.changed

- name: kubeadm | Initialize first master
command: >-
timeout -k 300s 300s
Expand Down
5 changes: 5 additions & 0 deletions roles/kubernetes/preinstall/tasks/0090-etchosts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,8 @@
backup: yes
unsafe_writes: yes
with_dict: "{{ etc_hosts_localhosts_dict_target }}"

# gather facts to update ansible_fqdn
- name: Update facts
setup:
gather_subset: min

0 comments on commit a35b6dc

Please sign in to comment.