Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixup recover control plane playbook + add debian12/cilium test #10411

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ variables:
MITOGEN_ENABLE: "false"
ANSIBLE_LOG_LEVEL: "-vv"
RECOVER_CONTROL_PLANE_TEST: "false"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[2:],kube_control_plane[1:]"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[2:]:kube_control_plane[1:]"
TERRAFORM_VERSION: 1.3.7
PIPELINE_IMAGE: "$CI_REGISTRY_IMAGE/pipeline:${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}"

Expand Down
9 changes: 7 additions & 2 deletions .gitlab-ci/packet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ packet_debian12-docker:
extends: .packet_pr
when: on_success

packet_debian12-cilium:
stage: deploy-part2
extends: .packet_periodic
when: on_success

packet_centos7-calico-ha-once-localhost:
stage: deploy-part2
extends: .packet_pr
Expand Down Expand Up @@ -311,12 +316,12 @@ packet_ubuntu20-calico-ha-recover:
when: on_success
variables:
RECOVER_CONTROL_PLANE_TEST: "true"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[2:],kube_control_plane[1:]"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[2:]:kube_control_plane[1:]"

packet_ubuntu20-calico-ha-recover-noquorum:
stage: deploy-part3
extends: .packet_periodic
when: on_success
variables:
RECOVER_CONTROL_PLANE_TEST: "true"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[1:],kube_control_plane[1:]"
RECOVER_CONTROL_PLANE_TEST_GROUPS: "etcd[1:]:kube_control_plane[1:]"
2 changes: 1 addition & 1 deletion docs/ci.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ amazon | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
centos7 | :white_check_mark: | :x: | :x: | :white_check_mark: | :x: | :white_check_mark: | :x: | :white_check_mark: |
debian10 | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: |
debian11 | :white_check_mark: | :x: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: |
debian12 | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
debian12 | :white_check_mark: | :white_check_mark: | :x: | :x: | :x: | :x: | :x: | :x: |
fedora37 | :white_check_mark: | :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: |
fedora38 | :x: | :x: | :x: | :x: | :white_check_mark: | :x: | :x: | :x: |
opensuse | :x: | :x: | :x: | :x: | :x: | :x: | :x: | :x: |
Expand Down
14 changes: 7 additions & 7 deletions roles/recover_control_plane/etcd/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']

- name: Set healthy fact
set_fact:
healthy: "{{ etcd_endpoint_health.stderr is match('Error: unhealthy cluster') }}"
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']

- name: Set has_quorum fact
set_fact:
has_quorum: "{{ etcd_endpoint_health.stdout_lines | select('match', '.*is healthy.*') | list | length >= etcd_endpoint_health.stderr_lines | select('match', '.*is unhealthy.*') | list | length }}"
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']

- name: Recover lost etcd quorum
include_tasks: recover_lost_quorum.yml
Expand All @@ -40,7 +40,7 @@
with_items: "{{ groups['broken_etcd'] }}"
ignore_errors: true # noqa ignore-errors
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']
- has_quorum

- name: Delete old certificates
Expand All @@ -56,7 +56,7 @@
loop: "{{ delete_old_cerificates.results }}"
changed_when: false
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']
- "item.rc != 0 and not 'No such file or directory' in item.stderr"

- name: Get etcd cluster members
Expand All @@ -71,7 +71,7 @@
ETCDCTL_KEY: "{{ etcd_cert_dir }}/admin-{{ inventory_hostname }}-key.pem"
ETCDCTL_CACERT: "{{ etcd_cert_dir }}/ca.pem"
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']
- not healthy
- has_quorum

Expand All @@ -87,7 +87,7 @@
- "{{ groups['broken_etcd'] }}"
- "{{ member_list.stdout_lines }}"
when:
- inventory_hostname in groups['broken_etcd']
- groups['broken_etcd']
- not healthy
- has_quorum
- hostvars[item[0]]['etcd_member_name'] == item[1].replace(' ', '').split(',')[2]
7 changes: 7 additions & 0 deletions tests/files/packet_debian12-cilium.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# Instance settings
cloud_image: debian-12
mode: default

# Kubespray settings
kube_network_plugin: cilium
2 changes: 1 addition & 1 deletion tests/scripts/testcases_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ fi
# Test control plane recovery
if [ "${RECOVER_CONTROL_PLANE_TEST}" != "false" ]; then
ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_SETTING} -e @${CI_TEST_REGISTRY_MIRROR} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads --limit "${RECOVER_CONTROL_PLANE_TEST_GROUPS}:!fake_hosts" -e reset_confirmation=yes reset.yml
ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_SETTING} -e @${CI_TEST_REGISTRY_MIRROR} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e etcd_retries=10 --limit etcd,kube_control_plane:!fake_hosts recover-control-plane.yml
ansible-playbook ${ANSIBLE_LOG_LEVEL} -e @${CI_TEST_SETTING} -e @${CI_TEST_REGISTRY_MIRROR} -e @${CI_TEST_VARS} -e local_release_dir=${PWD}/downloads -e etcd_retries=10 --limit "etcd:kube_control_plane:!fake_hosts" recover-control-plane.yml
fi

# Test collection build and install by installing our collection, emptying our repository, adding
Expand Down