Skip to content

Commit

Permalink
Merge pull request #981 from evrardjp/fix_ci
Browse files Browse the repository at this point in the history
Fix ci
  • Loading branch information
evrardjp authored Oct 3, 2024
2 parents 679cdc4 + a02ae67 commit fdac3b1
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 35 deletions.
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.28.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: worker
Expand Down
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.29.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: worker
Expand Down
4 changes: 0 additions & 4 deletions .github/kind-cluster-1.30.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: worker
Expand Down
30 changes: 15 additions & 15 deletions .github/workflows/on-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build image
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
run: VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
with:
Expand Down Expand Up @@ -132,8 +132,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -145,7 +145,7 @@ jobs:
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -169,7 +169,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down Expand Up @@ -217,8 +217,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -230,7 +230,7 @@ jobs:
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -241,7 +241,7 @@ jobs:

- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds-signal.yaml
sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds-signal.yaml
- name: Install kured with kubectl
run: |
Expand All @@ -254,7 +254,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down Expand Up @@ -303,8 +303,8 @@ jobs:
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
Expand All @@ -316,7 +316,7 @@ jobs:
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
- name: Create kind cluster with 3 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
Expand All @@ -327,7 +327,7 @@ jobs:

- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds.yaml
sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml
- name: Install kured with kubectl
Expand All @@ -341,7 +341,7 @@ jobs:
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'"

- name: Create reboot sentinel files
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/periodics-daily.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ jobs:
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build artifacts
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
run: VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
with:
Expand Down
3 changes: 3 additions & 0 deletions .trivyignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://pkg.go.dev/vuln/GO-2024-3106
# Will be automatically fixed when we'll use golang 1.22.7
CVE-2024-34156
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

TEMPDIR=./.tmp
GORELEASER_CMD=$(TEMPDIR)/goreleaser
DH_ORG=kubereboot
DH_ORG ?= kubereboot
VERSION=$(shell git rev-parse --short HEAD)
SUDO=$(shell docker info >/dev/null 2>&1 || echo "sudo -E")

Expand Down
5 changes: 3 additions & 2 deletions tests/kind/create-reboot-sentinels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
SENTINEL_FILE="${SENTINEL_FILE:-/var/run/reboot-required}"

echo "Creating reboot sentinel on all nodes"
echo "Creating reboot sentinel on worker nodes"

for nodename in $("$KUBECTL_CMD" get nodes -o name); do
# To speed up the system, let's not kill the control plane.
for nodename in $("$KUBECTL_CMD" get nodes -o name | grep -v control-plane); do
docker exec "${nodename/node\//}" hostname
docker exec "${nodename/node\//}" touch "${SENTINEL_FILE}"
done
17 changes: 13 additions & 4 deletions tests/kind/follow-coordinated-reboot.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

NODECOUNT=${NODECOUNT:-5}
NODECOUNT=${NODECOUNT:-2}
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
DEBUG="${DEBUG:-false}"
CONTAINER_NAME_FORMAT=${CONTAINER_NAME_FORMAT:-"chart-testing-*"}
Expand Down Expand Up @@ -35,10 +35,12 @@ trap gather_logs_and_cleanup EXIT

declare -A was_unschedulable
declare -A has_recovered
max_attempts="60"
sleep_time=60
max_attempts="200"
sleep_time=5
attempt_num=1

# Get docker info of each of those kind containers. If one has crashed, restart it.

set +o errexit
echo "There are $NODECOUNT nodes in the cluster"
until [ ${#was_unschedulable[@]} == "$NODECOUNT" ] && [ ${#has_recovered[@]} == "$NODECOUNT" ]
Expand All @@ -52,13 +54,14 @@ do
# cat "$tmp_dir"/node_output
#fi

"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers > "$tmp_dir"/node_output
"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers | grep -v control-plane > "$tmp_dir"/node_output
if [[ "$DEBUG" == "true" ]]; then
# This is useful to see if a node gets stuck after drain, and doesn't
# come back up.
echo "Result of command $KUBECTL_CMD get nodes ... showing unschedulable nodes:"
cat "$tmp_dir"/node_output
fi

while read -r node; do
unschedulable=$(echo "$node" | grep true | cut -f 1 -d ' ')
if [ -n "$unschedulable" ] && [ -z ${was_unschedulable["$unschedulable"]+x} ] ; then
Expand All @@ -70,6 +73,12 @@ do
echo "$schedulable has recovered!"
has_recovered["$schedulable"]=1
fi

# If the container has crashed, restart it.
node_name=$(echo "$node" | cut -f 1 -d ' ')
stopped_container_id=$(docker container ls --filter=name="$node_name" --filter=status=exited -q)
if [ -n "$stopped_container_id" ]; then echo "Node $stopped_container_id needs restart"; docker start "$stopped_container_id"; echo "Container started."; fi

done < "$tmp_dir"/node_output

if [[ "${#has_recovered[@]}" == "$NODECOUNT" ]]; then
Expand Down

0 comments on commit fdac3b1

Please sign in to comment.