From f6d1c294d4d78c46366a30a53a7ede11e28a2e66 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Thu, 12 Dec 2024 16:46:51 +0100 Subject: [PATCH 1/2] CI: Use the debug stdout callback instead of manual debug This display in a readable (by humans) way the result of most tasks, and should be way more readable that what we have now, which is frequently a bunch of unreadable json. + some small fixes (using delegated_to instead of when ) --- .gitlab-ci.yml | 1 + tests/testcases/010_check-apiserver.yml | 3 - tests/testcases/015_check-nodes-ready.yml | 3 - tests/testcases/020_check-pods-running.yml | 10 -- tests/testcases/030_check-network.yml | 72 ++++------- tests/testcases/040_check-network-adv.yml | 140 ++++++++------------- 6 files changed, 76 insertions(+), 153 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e2b8864f422..2c1a5065665 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,7 @@ variables: FAILFASTCI_NAMESPACE: 'kargo-ci' GITLAB_REPOSITORY: 'kargo-ci/kubernetes-sigs-kubespray' ANSIBLE_FORCE_COLOR: "true" + ANSIBLE_STDOUT_CALLBACK: "debug" MAGIC: "ci check this" GS_ACCESS_KEY_ID: $GS_KEY GS_SECRET_ACCESS_KEY: $GS_SECRET diff --git a/tests/testcases/010_check-apiserver.yml b/tests/testcases/010_check-apiserver.yml index 081a2a31ed2..5714ccd6833 100644 --- a/tests/testcases/010_check-apiserver.yml +++ b/tests/testcases/010_check-apiserver.yml @@ -13,9 +13,6 @@ delay: 5 until: apiserver_response is success - - debug: # noqa name[missing] - msg: "{{ apiserver_response.json }}" - - name: Check API servers version assert: that: diff --git a/tests/testcases/015_check-nodes-ready.yml b/tests/testcases/015_check-nodes-ready.yml index 660fa8491f8..778417f10a0 100644 --- a/tests/testcases/015_check-nodes-ready.yml +++ b/tests/testcases/015_check-nodes-ready.yml @@ -21,9 +21,6 @@ changed_when: false register: get_nodes - - debug: # noqa name[missing] - msg: "{{ get_nodes.stdout.split('\n') }}" - - name: Check that all nodes are running and ready command: "{{ bin_dir }}/kubectl get nodes --no-headers -o yaml" changed_when: false diff --git a/tests/testcases/020_check-pods-running.yml b/tests/testcases/020_check-pods-running.yml index 9a0bee8d8e1..f25e2b7a46f 100644 --- a/tests/testcases/020_check-pods-running.yml +++ b/tests/testcases/020_check-pods-running.yml @@ -19,10 +19,6 @@ - name: Check kubectl output command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide" changed_when: false - register: get_pods - - - debug: # noqa name[missing] - msg: "{{ get_pods.stdout.split('\n') }}" - name: Check that all pods are running and ready command: "{{ bin_dir }}/kubectl get pods --all-namespaces --no-headers -o yaml" @@ -35,13 +31,7 @@ - '(run_pods_log.stdout | from_yaml)["items"] | map(attribute = "status.containerStatuses") | map("map", attribute = "ready") | map("min") | min' retries: 30 delay: 10 - failed_when: false - name: Check kubectl output command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide" changed_when: false - register: get_pods - - - debug: # noqa name[missing] - msg: "{{ get_pods.stdout.split('\n') }}" - failed_when: not run_pods_log is success diff --git a/tests/testcases/030_check-network.yml b/tests/testcases/030_check-network.yml index 8c5b8ec4f80..7c5058a9f02 100644 --- a/tests/testcases/030_check-network.yml +++ b/tests/testcases/030_check-network.yml @@ -29,9 +29,6 @@ register: csr_json changed_when: false - - debug: # noqa name[missing] - var: csrs - - name: Check there are csrs assert: that: csrs | length > 0 @@ -67,17 +64,13 @@ when: get_csr.stdout_lines | length > 0 changed_when: certificate_approve.stdout - - debug: # noqa name[missing] - msg: "{{ certificate_approve.stdout.split('\n') }}" - - - name: Create test namespace command: "{{ bin_dir }}/kubectl create namespace test" changed_when: false - name: Run 2 agnhost pods in test ns command: - cmd: "{{ bin_dir }}/kubectl apply -f -" + cmd: "{{ bin_dir }}/kubectl apply --namespace test -f -" stdin: | apiVersion: apps/v1 kind: Deployment @@ -107,52 +100,35 @@ type: RuntimeDefault changed_when: false - - import_role: # noqa name[missing] - name: cluster-dump - - name: Check that all pods are running and ready + vars: + pods: "{{ (pods_json | from_json)['items'] }}" block: - name: Check Deployment is ready - command: "{{ bin_dir }}/kubectl rollout status deploy --namespace test agnhost --timeout=180" + command: "{{ bin_dir }}/kubectl rollout status deploy --namespace test agnhost --timeout=180s" changed_when: false - rescue: - name: Get pod names command: "{{ bin_dir }}/kubectl get pods -n test -o json" changed_when: false - register: pods - - - name: Get running pods - command: "{{ bin_dir }}/kubectl get pods -n test -o - jsonpath='{range .items[?(.status.phase==\"Running\")]}{.metadata.name} {.status.podIP} {.status.containerStatuses} {end}'" - changed_when: false - register: running_pods + register: pods_json - - name: Check kubectl output - command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide" - changed_when: false - register: get_pods - - - debug: # noqa name[missing] - msg: "{{ get_pods.stdout.split('\n') }}" + - name: Check pods IP are in correct network + assert: + that: pods + | selectattr('status.phase', '==', 'Running') + | selectattr('status.podIP', 'ansible.utils.in_network', kube_pods_subnet) + | length == 2 + + - name: Curl between pods is working + command: "{{ bin_dir }}/kubectl -n test exec {{ item[0].metadata.name }} -- curl {{ item[1].status.podIP }}:8080" + with_nested: + - "{{ pods }}" + - "{{ pods }}" + rescue: + - name: List pods cluster-wide + command: "{{ bin_dir }}/kubectl get pods --all-namespaces -owide" + changed_when: false - - name: Set networking facts - set_fact: - kube_pods_subnet: 10.233.64.0/18 - pod_names: "{{ (pods.stdout | from_json)['items'] | map(attribute='metadata.name') | list }}" - pod_ips: "{{ (pods.stdout | from_json)['items'] | selectattr('status.podIP', 'defined') | map(attribute='status.podIP') | list }}" - pods_running: | - {% set list = running_pods.stdout.split(" ") %} - {{ list }} - - - name: Check pods IP are in correct network - assert: - that: item | ansible.utils.ipaddr(kube_pods_subnet) - when: - - item in pods_running - with_items: "{{ pod_ips }}" - - - name: Curl between pods is working - command: "{{ bin_dir }}/kubectl -n test exec {{ item[0] }} -- curl {{ item[1] }}:8080" - with_nested: - - "{{ pod_names }}" - - "{{ pod_ips }}" + - import_role: # noqa name[missing] + name: cluster-dump + - fail: # noqa name[missing] diff --git a/tests/testcases/040_check-network-adv.yml b/tests/testcases/040_check-network-adv.yml index 368a5c0d8ab..7f166bb68c5 100644 --- a/tests/testcases/040_check-network-adv.yml +++ b/tests/testcases/040_check-network-adv.yml @@ -54,97 +54,57 @@ - netchecker-agent-hostnet when: not pods_json is success - - debug: # noqa name[missing] - var: nca_pod.stdout_lines - when: inventory_hostname == groups['kube_control_plane'][0] - - - name: Get netchecker agents - uri: - url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/agents/" - return_content: true - run_once: true - delegate_to: "{{ groups['kube_control_plane'][0] }}" - register: agents - retries: 18 - delay: "{{ agent_report_interval }}" - until: agents.content | length > 0 and - agents.content[0] == '{' and - agents.content | from_json | length >= groups['k8s_cluster'] | intersect(ansible_play_hosts) | length * 2 - failed_when: false - - - name: Check netchecker status - uri: - url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/connectivity_check" - status_code: 200 - return_content: true - delegate_to: "{{ groups['kube_control_plane'][0] }}" - run_once: true - register: connectivity_check - retries: 3 - delay: "{{ agent_report_interval }}" - until: connectivity_check.content | length > 0 and - connectivity_check.content[0] == '{' - failed_when: false - when: - - agents.content != '{}' - - - debug: # noqa name[missing] - var: pods_json - run_once: true - - - name: Get kube-proxy logs - command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app=kube-proxy" - when: - - inventory_hostname == groups['kube_control_plane'][0] - - not connectivity_check is success - - - name: Get logs from other apps - command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app={{ item }} --all-containers" - when: - - inventory_hostname == groups['kube_control_plane'][0] - - not connectivity_check is success - with_items: - - kube-router - - flannel - - canal-node - - calico-node - - cilium - - - name: Parse agents list - set_fact: - agents_check_result: "{{ agents.content | from_json }}" - delegate_to: "{{ groups['kube_control_plane'][0] }}" + - name: Perform netchecker tests run_once: true - when: - - agents is success - - agents.content is defined - - agents.content[0] == '{' - - - debug: # noqa name[missing] - var: agents_check_result delegate_to: "{{ groups['kube_control_plane'][0] }}" - run_once: true - when: - - agents_check_result is defined - - - name: Parse connectivity check - set_fact: - connectivity_check_result: "{{ connectivity_check.content | from_json }}" - delegate_to: "{{ groups['kube_control_plane'][0] }}" - run_once: true - when: - - connectivity_check is success - - connectivity_check.content is defined - - connectivity_check.content[0] == '{' - - - debug: # noqa name[missing] - var: connectivity_check_result - delegate_to: "{{ groups['kube_control_plane'][0] }}" - run_once: true - when: - - connectivity_check_result is defined + block: + - name: Get netchecker agents + uri: + url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/agents/" + return_content: true + headers: + Accept: application/json + register: agents + retries: 18 + delay: "{{ agent_report_interval }}" + until: + - agents is success + - (agents.content | from_json | length) == (groups['k8s_cluster'] | length * 2) + + - name: Check netchecker status + uri: + url: "http://{{ ansible_default_ipv4.address }}:{{ netchecker_port }}/api/v1/connectivity_check" + return_content: true + headers: + Accept: application/json + register: connectivity_check + retries: 3 + delay: "{{ agent_report_interval }}" + until: + - connectivity_check is success + - connectivity_check.content | from_json + + rescue: + - name: Get kube-proxy logs + command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app=kube-proxy" + + - name: Get logs from other apps + command: "{{ bin_dir }}/kubectl -n kube-system logs -l k8s-app={{ item }} --all-containers" + with_items: + - kube-router + - flannel + - canal-node + - calico-node + - cilium + + - name: Netchecker tests failed + fail: + msg: "netchecker tests failed" - name: Check connectivity with all netchecker agents + vars: + connectivity_check_result: "{{ connectivity_check.content | from_json }}" + agents_check_result: "{{ agents.content | from_json }}" assert: that: - agents_check_result is defined @@ -193,8 +153,9 @@ - name: samplepod command: ["/bin/bash", "-c", "sleep 2000000000000"] image: dougbtv/centos-network + delegate_to: groups['kube_control_plane'][0] + run_once: true when: - - inventory_hostname == groups['kube_control_plane'][0] - kube_network_plugin_multus | default(false) | bool - name: Check secondary macvlan interface @@ -203,6 +164,7 @@ until: output.rc == 0 retries: 90 changed_when: false + delegate_to: groups['kube_control_plane'][0] + run_once: true when: - - inventory_hostname == groups['kube_control_plane'][0] - kube_network_plugin_multus | default(false) | bool From 86a949dc81574bb6dc62ca7317bab76217db2076 Mon Sep 17 00:00:00 2001 From: Max Gautier Date: Fri, 13 Dec 2024 09:06:52 +0100 Subject: [PATCH 2/2] CI: Remove Flatcar specifics We don't test Flatcar at all in CI, thus remove special handling for it. --- tests/testcases/015_check-nodes-ready.yml | 12 ++---------- tests/testcases/020_check-pods-running.yml | 12 ++---------- tests/testcases/030_check-network.yml | 12 ++---------- tests/testcases/040_check-network-adv.yml | 11 +---------- 4 files changed, 7 insertions(+), 40 deletions(-) diff --git a/tests/testcases/015_check-nodes-ready.yml b/tests/testcases/015_check-nodes-ready.yml index 778417f10a0..536207211bf 100644 --- a/tests/testcases/015_check-nodes-ready.yml +++ b/tests/testcases/015_check-nodes-ready.yml @@ -1,18 +1,10 @@ --- - name: Testcases checking nodes hosts: kube_control_plane[0] + vars: + bin_dir: /usr/local/bin tasks: - - name: Force binaries directory for Flatcar Container Linux by Kinvolk - set_fact: - bin_dir: "/opt/bin" - when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - - name: Force binaries directory for other hosts - set_fact: - bin_dir: "/usr/local/bin" - when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - import_role: # noqa name[missing] name: cluster-dump diff --git a/tests/testcases/020_check-pods-running.yml b/tests/testcases/020_check-pods-running.yml index f25e2b7a46f..a573bb1cc7c 100644 --- a/tests/testcases/020_check-pods-running.yml +++ b/tests/testcases/020_check-pods-running.yml @@ -1,18 +1,10 @@ --- - name: Testcases checking pods hosts: kube_control_plane[0] + vars: + bin_dir: /usr/local/bin tasks: - - name: Force binaries directory for Flatcar Container Linux by Kinvolk - set_fact: - bin_dir: "/opt/bin" - when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - - name: Force binaries directory for other hosts - set_fact: - bin_dir: "/usr/local/bin" - when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - import_role: # noqa name[missing] name: cluster-dump diff --git a/tests/testcases/030_check-network.yml b/tests/testcases/030_check-network.yml index 7c5058a9f02..35717e0cfa8 100644 --- a/tests/testcases/030_check-network.yml +++ b/tests/testcases/030_check-network.yml @@ -4,17 +4,9 @@ vars: test_image_repo: registry.k8s.io/e2e-test-images/agnhost test_image_tag: "2.40" + bin_dir: "/usr/local/bin" tasks: - - name: Force binaries directory for Flatcar Container Linux by Kinvolk - set_fact: - bin_dir: "/opt/bin" - when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - - name: Force binaries directory for other hosts - set_fact: - bin_dir: "/usr/local/bin" - when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - name: Check kubelet serving certificates approved with kubelet_csr_approver when: @@ -102,7 +94,7 @@ - name: Check that all pods are running and ready vars: - pods: "{{ (pods_json | from_json)['items'] }}" + pods: "{{ (pods_json.stdout | from_json)['items'] }}" block: - name: Check Deployment is ready command: "{{ bin_dir }}/kubectl rollout status deploy --namespace test agnhost --timeout=180s" diff --git a/tests/testcases/040_check-network-adv.yml b/tests/testcases/040_check-network-adv.yml index 7f166bb68c5..946de80595a 100644 --- a/tests/testcases/040_check-network-adv.yml +++ b/tests/testcases/040_check-network-adv.yml @@ -19,18 +19,9 @@ agent_report_interval: 10 netcheck_namespace: default netchecker_port: 31081 + bin_dir: "/usr/local/bin" tasks: - - name: Force binaries directory for Container Linux by CoreOS and Flatcar - set_fact: - bin_dir: "/opt/bin" - when: ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - - name: Force binaries directory on other hosts - set_fact: - bin_dir: "/usr/local/bin" - when: not ansible_os_family in ["Flatcar", "Flatcar Container Linux by Kinvolk"] - - import_role: # noqa name[missing] name: cluster-dump