Fix smoke-daemonset to work on multi-node cluster. (open-telemetry#2412)

changexd · Dec 5, 2023 · d08896e · d08896e
1 parent b5be0ca
commit d08896e
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 5 deletions.
diff --git a/tests/e2e-openshift/monitoring/02-generate-traces.yaml b/tests/e2e-openshift/monitoring/02-generate-traces.yaml
@@ -18,8 +18,10 @@ spec:
             - "--otlp-endpoint=cluster-collector-collector-headless:4317"
             - "--otlp-insecure=true"
             - "--rate=1"
-            - "--duration=5s"
+            - "--duration=3m"
             - "--otlp-attributes=telemetrygen=\"traces\""
             - "--otlp-header=telemetrygen=\"traces\""
+            - "--span-duration=1s"
+            - "--workers=1"
             - "traces"
       restartPolicy: Never
diff --git a/tests/e2e-openshift/monitoring/check_metrics.sh b/tests/e2e-openshift/monitoring/check_metrics.sh
@@ -4,8 +4,8 @@ SECRET=$(oc get secret -n openshift-user-workload-monitoring | grep prometheus-u
 TOKEN=$(echo $(oc get secret $SECRET -n openshift-user-workload-monitoring -o json | jq -r '.data.token') | base64 -d)
 THANOS_QUERIER_HOST=$(oc get route thanos-querier -n openshift-monitoring -o json | jq -r '.spec.host')
 
-#Check metrics used in the prometheus rules created for TempoStack. Refer issue https://issues.redhat.com/browse/TRACING-3399 for skipped metrics.
-metrics="otelcol_exporter_enqueue_failed_spans otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans"
+#Check metrics for OpenTelemetry collector instance.
+metrics="otelcol_process_uptime otelcol_process_runtime_total_sys_memory_bytes otelcol_process_memory_rss otelcol_exporter_sent_spans otelcol_process_cpu_seconds otelcol_process_memory_rss otelcol_process_runtime_heap_alloc_bytes otelcol_process_runtime_total_alloc_bytes otelcol_process_runtime_total_sys_memory_bytes otelcol_process_uptime otelcol_receiver_accepted_spans otelcol_receiver_refused_spans"
 
 for metric in $metrics; do
   query="$metric"

diff --git a/tests/e2e/smoke-daemonset/00-assert.yaml b/tests/e2e/smoke-daemonset/00-assert.yaml
@@ -9,6 +9,12 @@ spec:
       maxSurge: 0
       maxUnavailable: 1
 status:
-  numberAvailable: 1
   numberMisscheduled: 0
-  numberReady: 1
+
+---
+# This KUTTL assert uses the check-daemonset.sh script to ensure the number of ready pods in a daemonset matches the desired count, retrying until successful or a timeout occurs. The script is needed as the number of Kubernetes cluster nodes can vary and we cannot statically set desiredNumberScheduled and numberReady in the assert for daemonset status. 
+
+apiVersion: kuttl.dev/v1beta1
+kind: TestAssert
+commands:
+- script: ./tests/e2e/smoke-daemonset/check-daemonset.sh
diff --git a/tests/e2e/smoke-daemonset/check-daemonset.sh b/tests/e2e/smoke-daemonset/check-daemonset.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# Name of the daemonset to check
+DAEMONSET_NAME="daemonset-test-collector"
+
+# Get the desired and ready pod counts for the daemonset
+read DESIRED READY <<< $(kubectl get daemonset -n $NAMESPACE $DAEMONSET_NAME -o custom-columns=:status.desiredNumberScheduled,:status.numberReady --no-headers)
+
+# Check if the desired count matches the ready count
+if [ "$DESIRED" -eq "$READY" ]; then
+  echo "Desired count ($DESIRED) matches the ready count ($READY) for $DAEMONSET_NAME."
+else
+  echo "Desired count ($DESIRED) does not match the ready count ($READY) for $DAEMONSET_NAME."
+  exit 1
+fi