Skip to content

Commit

Permalink
Add debug info collection to functional tests ghwf (#1584)
Browse files Browse the repository at this point in the history
* Add debug info collection to functional tests ghwf so when functional tests fail the state of the cluster is recorded and uploaded as a zip file artifact
  • Loading branch information
jvoravong authored Dec 18, 2024
1 parent 7419b32 commit b49ef63
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 26 deletions.
32 changes: 26 additions & 6 deletions .github/workflows/functional_test_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ on:
branches: [ main ]
workflow_dispatch:
inputs:
UPDATE_EXPECTED_RESULTS:
description: 'Set this to true to update expected results and collect updated test output as a Github workflow artifact.'
UPLOAD_UPDATED_EXPECTED_RESULTS:
description: 'Set this to true to upload updated golden file expected results and upload these results as a Github workflow artifact.'
required: false
default: false
UPLOAD_KUBERNETES_DEBUG_INFO:
description: 'Set this to true to collect the debug info of the k8s cluster and upload this info as a Github workflow artifact.'
required: false
default: false

Expand All @@ -20,7 +24,8 @@ jobs:
env:
KUBECONFIG: /tmp/kube-config-splunk-otel-collector-chart-functional-testing
KUBE_TEST_ENV: kind
UPDATE_EXPECTED_RESULTS: ${{ github.event.inputs.UPDATE_EXPECTED_RESULTS || 'false' }}
UPLOAD_UPDATED_EXPECTED_RESULTS: ${{ github.event.inputs.UPLOAD_UPDATED_EXPECTED_RESULTS || 'false' }}
UPLOAD_KUBERNETES_DEBUG_INFO: ${{ github.event.inputs.UPLOAD_KUBERNETES_DEBUG_INFO || 'false' }}
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -71,13 +76,28 @@ jobs:
run: |
make cert-manager
- name: run functional tests
id: run-functional-tests
env:
K8S_VERSION: ${{ matrix.k8s-version }}
run: |
cd functional_tests
TEARDOWN_BEFORE_SETUP=true UPDATE_EXPECTED_RESULTS=${{ env.UPDATE_EXPECTED_RESULTS }} go test -v -tags ${{ matrix.test-job }}
- name: 'Upload test results'
if: always() && env.UPDATE_EXPECTED_RESULTS == 'true'
TEARDOWN_BEFORE_SETUP=true UPDATE_EXPECTED_RESULTS=${{ env.UPLOAD_UPDATED_EXPECTED_RESULTS }} go test -v -tags ${{ matrix.test-job }}
- name: Collect Kubernetes Cluster debug info on failure
if: always() && (steps.run-functional-tests.outcome == 'failure' || env.UPLOAD_KUBERNETES_DEBUG_INFO == 'true')
id: collect-debug-info
run: |
echo "Functional tests failed. Collecting debug info for current state of the Kubernetes cluster..."
cd tools
./splunk_kubernetes_debug_info.sh
- name: Upload Kubernetes Cluster debug info
if: always() && (steps.run-functional-tests.outcome == 'failure' || env.UPLOAD_KUBERNETES_DEBUG_INFO == 'true')
uses: actions/upload-artifact@v4
with:
name: k8s-debug-info-${{ matrix.test-job }}-${{ matrix.k8s-version }}
path: tools/splunk_kubernetes_debug_info_*
retention-days: 5
- name: Upload test results
if: always() && env.UPLOAD_UPDATED_EXPECTED_RESULTS == 'true'
uses: actions/upload-artifact@v4
with:
name: functional_tests-${{ matrix.test-job }}-${{ matrix.k8s-version }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.idea
*.iml
.DS_Store
*splunk_kubernetes_debug_info_*

# Helm
**/charts/*.tgz
Expand Down
55 changes: 35 additions & 20 deletions tools/splunk_kubernetes_debug_info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Description:
# This script collects debugging information from a Kubernetes cluster.
# It retrieves networking, firewall, security policies, custom resource definitions (CRDs),
# and logs from specified pods and secrets (sanitized). The outputs are saved to files for each namespace and object type.
# and logs from specified pods. The outputs are saved to files for each namespace and object type.
# This helps in diagnosing and troubleshooting cluster configurations.
# Finally, it compresses all the collected files into a ZIP archive.
#
Expand All @@ -28,7 +28,7 @@
#
# Objects Scraped:
# - Pod logs for agent, cluster-receiver, certmanager, operator, gateway, splunk pods
# - Deployments, daemonsets, secrets, Helm releases matching K8S_OBJECT_NAME_FILTER
# - Deployments, daemonsets, Helm releases matching K8S_OBJECT_NAME_FILTER
# - NetworkPolicies, Services, Ingress resources, Endpoints, Roles, RoleBindings, Security contexts
# - OpenTelemetry Instrumentation objects
# - Custom Resource Definitions (CRDs), Pod Security Policies (PSPs), Security Context Constraints (SCCs)
Expand Down Expand Up @@ -87,10 +87,10 @@ write_output() {
collect_data_namespace() {
local ns=$1

object_types=("deployments" "daemonsets" "configmaps" "secrets" "networkpolicies" "svc" "ingress" "endpoints" "roles" "rolebindings" "otelinst")
object_types=("configmaps" "daemonsets" "deployments" "endpoints" "events" "ingress" "jobs" "networkpolicies" "otelinst" "rolebindings" "roles" "svc")
for type in "${object_types[@]}"; do
stdbuf -oL echo "Collecting $type data for $ns namespace with $k8s_object_name_filter name filter"
if [[ "$type" == "deployment" || "$type" == "daemonset" || "$type" == "configmaps" || "$type" == "secrets" ]]; then
if [[ "$type" == "deployment" || "$type" == "daemonset" || "$type" == "configmaps" ]]; then
kubectl get "$type" -n "$ns" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep -E "$k8s_object_name_filter" | while read object; do
cmd="kubectl get $type $object -n $ns -o yaml"
output=$(eval "$cmd")
Expand Down Expand Up @@ -201,21 +201,6 @@ collect_data_cluster() {
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_custom_resource_definitions.yaml" "$cmd"

echo "Collecting pod security policies..."
cmd="kubectl get psp -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_pod_security_policies.yaml" "$cmd"

echo "Collecting security context constraints..."
cmd="kubectl get scc -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_security_context_constraints.yaml" "$cmd"

echo "Collecting MutatingWebhookConfiguration objects..."
cmd="kubectl get mutatingwebhookconfiguration.admissionregistration.k8s.io -o yaml; kubectl describe mutatingwebhookconfiguration.admissionregistration.k8s.io; kubectl get --raw /metrics | grep apiserver_admission_webhook_rejection_count;"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_webhooks.yaml" "$cmd"

echo "Checking for cert-manager installation..."
cert_manager_pods=$(kubectl get pods --all-namespaces -l app=cert-manager --no-headers)
if [ -n "$cert_manager_pods" ]; then
Expand All @@ -233,6 +218,33 @@ collect_data_cluster() {
done
}

collect_cluster_resources() {
# List of cluster-scoped resource types to collect
cluster_object_types=(
"crds"
"psp"
"scc"
"mutatingwebhookconfiguration.admissionregistration.k8s.io"
"validatingwebhookconfiguration.admissionregistration.k8s.io"
)

for type in "${cluster_object_types[@]}"; do
echo "Collecting $type cluster-scoped resources..."

# Fetch each object's name
kubectl get "$type" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | while read object; do
# Get the API version for this object, fallback to "unknown"
api_version=$(kubectl get "$type" "$object" -o jsonpath='{.apiVersion}' 2>/dev/null || echo "unknown")
api_version=${api_version//\//_} # Sanitize slashes in API version

# Collect YAML output
cmd="kubectl get $type $object -o yaml"
output=$(eval "$cmd")
write_output "$output" "$temp_dir/cluster_${type//./_}_${api_version}_${object}.yaml" "$cmd"
done
done
}

# Parse input parameters
namespaces=""
k8s_object_name_filter="splunk|collector|otel|certmanager|test|sck|sock"
Expand Down Expand Up @@ -279,9 +291,12 @@ script_start_time=$(date +"%Y-%m-%d %H:%M:%S")
echo "Script start time: $script_start_time"
echo "Script start time: $script_start_time" >> "$output_file"

# Collect cluster-wide data
# Collect cluster instance specific data
collect_data_cluster

# Collect cluster scoped resources data
collect_cluster_resources

# Function to manage parallel processing of namespaces
collect_data_namespace_namespaces() {
local parallelism=20
Expand Down

0 comments on commit b49ef63

Please sign in to comment.