From 2d76183658507fe2f6832f6e9128d022bb0d952c Mon Sep 17 00:00:00 2001 From: james hughes Date: Thu, 12 Dec 2024 20:50:26 +0000 Subject: [PATCH 1/5] Completes and fixes the gateway test --- .gitlab-ci.yml | 32 ++++ packaging/technical-addon/Makefile | 10 ++ .../cicd-tests/gateway/gateway.sh | 139 ++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100755 packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 490c104187..22178545a2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -485,6 +485,38 @@ test-happypath-ta: paths: - "$BUILD_DIR/out/**/*" - "$BUILD_DIR/$CI_JOB_ID/**/*" +test-gateway-ta: + image: "${DOCKER_CICD_REPO}/ci-container/python-3.11-bullseye:1.19.0" + extends: .auth-setup + stage: orca-tests + dependencies: + - "package-technical-addon" + parallel: + matrix: + - UF_VERSION: ["9.0.2", "8.2.7"] + SPLUNK_PLATFORM: ["x64_centos_7"] + ORCA_CLOUD: ["aws"] + ORCA_OPTION: [""] + PLATFORM: ["linux"] + - UF_VERSION: ["9.0.2", "8.2.7"] + SPLUNK_PLATFORM: ["x64_windows_2019", "x64_windows_2022"] + ORCA_CLOUD: ["aws"] + ORCA_OPTION: [""] + PLATFORM: ["windows"] + script: | + make orca-gateway-test-ta -e + after_script: + # spin down orca deployments + - "cd ${CI_PROJECT_DIR}" + - gateway_deployment_id=$(grep "orca_deployment_id" $TEST_FOLDER/orca-gateway-deployment.json | awk -F ':' '{print $2}' | awk -F '"' '{print $2}') + - splunk_orca --cloud ${ORCA_CLOUD} destroy "${gateway_deployment_id}" + - gateway_agent_deployment_id=$(grep "orca_deployment_id" $TEST_FOLDER/orca-gateway-agent-deployment.json | awk -F ':' '{print $2}' | awk -F '"' '{print $2}') + - splunk_orca --cloud ${ORCA_CLOUD} destroy "${gateway_agent_deployment_id}" + artifacts: + when: always + paths: + - "$BUILD_DIR/out/**/*" + - "$BUILD_DIR/$CI_JOB_ID/**/*" AppInspect_local: image: "${DOCKER_CICD_REPO}/ci-container/python-3.11-bullseye:1.19.0" dependencies: diff --git a/packaging/technical-addon/Makefile b/packaging/technical-addon/Makefile index 5bfb6f8bf8..98c4d00c5f 100644 --- a/packaging/technical-addon/Makefile +++ b/packaging/technical-addon/Makefile @@ -100,6 +100,16 @@ orca-test-ta: env-guard-verify OLLY_ACCESS_TOKEN="$(OLLY_ACCESS_TOKEN)" \ $(SOURCE_DIR)/packaging-scripts/cicd-tests/happypath-test.sh +.PHONY: orca-gateway-test-ta +orca-gateway-test-ta: + SOURCE_DIR="$(SOURCE_DIR)" \ + BUILD_DIR="$(BUILD_DIR)" \ + ARCH="$(ARCH)" \ + SPLUNK_PLATFORM="$(SPLUNK_PLATFORM)" \ + PLATFORM="$(PLATFORM)" \ + ORCA_CLOUD="$(ORCA_CLOUD)" \ + $(SOURCE_DIR)/packaging-scripts/cicd-tests/gateway/gateway.sh + .PHONY: smoketest-ta smoketest-ta: SOURCE_DIR="$(SOURCE_DIR)" \ diff --git a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh new file mode 100755 index 0000000000..3a65911254 --- /dev/null +++ b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh @@ -0,0 +1,139 @@ +#!/bin/bash -eux +set -o pipefail +which jq || (echo "jq not found" && exit 1) +source "${SOURCE_DIR}/packaging-scripts/cicd-tests/add-access-token.sh" +BUILD_DIR="$(realpath "$BUILD_DIR")" + +CI_JOB_ID="${CI_JOB_ID:-$(mktemp -d)}" +TEST_FOLDER="${TEST_FOLDER:-$BUILD_DIR/$CI_JOB_ID}" +mkdir -p "$TEST_FOLDER" + +# customize TA to act as gateway +GATEWAY_TA_FULLPATH="$(repack_with_access_token "$OLLY_ACCESS_TOKEN" "$BUILD_DIR/out/distribution/Splunk_TA_otel.tgz" | tail -n 1)" +GATEWAY_REPACKED_TA_NAME="$(basename "$GATEWAY_TA_FULLPATH")" +GATEWAY_ADDON_DIR="$(realpath "$(dirname "$GATEWAY_TA_FULLPATH")")" +rm -rf "$GATEWAY_ADDON_DIR/$GATEWAY_REPACKED_TA_NAME" +# listen on all interfaces (ipv4) +sed -i "s/splunk_listen_interface=localhost/splunk_listen_interface=0.0.0.0/g" "$GATEWAY_ADDON_DIR/Splunk_TA_otel/local/inputs.conf" +echo 'splunk_config=$SPLUNK_OTEL_TA_HOME/configs/ta-gateway-config.yaml' >> "$GATEWAY_ADDON_DIR/Splunk_TA_otel/local/inputs.conf" +tar -C "$GATEWAY_ADDON_DIR" -hcz --file "$GATEWAY_TA_FULLPATH" "Splunk_TA_otel" +echo "Creating splunk cluster with TA $GATEWAY_TA_FULLPATH" + +#if [ "$PLATFORM" == "linux" ]; then +#else +# ORCA_SSH_USER="ansible" +#fi +ORCA_SSH_USER="splunk" + +# There is an internal slack thread in #orca on opening up port 4318, but suffice to say we're only doing on windows for now. Can use docker for linux if needed. +gateway_container_info="$(splunk_orca -v --printer json --cloud "$ORCA_CLOUD" --ansible-log "$TEST_FOLDER/ansible-local-gateway.log" create --env SPLUNK_CONNECTION_TIMEOUT=600 --platform "$SPLUNK_PLATFORM" --local-apps "$GATEWAY_TA_FULLPATH" --playbook "$SOURCE_DIR/packaging-scripts/orca-playbook-windows.yml,site.yml")" +echo "$gateway_container_info" > "$TEST_FOLDER/orca-gateway-deployment.json" + +# .keys[keys[0]] will grab the first key out of a dict +# Structure is (currently) {"creator":{"deployment":{"containers":{"container_id":{}}}}} +GATEWAY_IPV4_ADDR="$(echo "$gateway_container_info" | jq -r '.[keys[0]] | .[keys[0]] | .containers | .[keys[0]] | .private_address')" + + +# Customize TA to act as agent which forwards to gateway +GATEWAY_AGENT_TA_FULLPATH="$(repack_with_access_token "$OLLY_ACCESS_TOKEN" "$BUILD_DIR/out/distribution/Splunk_TA_otel.tgz" | tail -n 1)" +GATEWAY_AGENT_REPACKED_TA_NAME="$(basename "$GATEWAY_AGENT_TA_FULLPATH")" +GATEWAY_AGENT_ADDON_DIR="$(realpath "$(dirname "$GATEWAY_AGENT_TA_FULLPATH")")" +rm -rf "$GATEWAY_AGENT_ADDON_DIR/$GATEWAY_AGENT_REPACKED_TA_NAME" +echo 'splunk_config=$SPLUNK_OTEL_TA_HOME/configs/ta-agent-to-gateway-config.yaml' >> "$GATEWAY_AGENT_ADDON_DIR/Splunk_TA_otel/local/inputs.conf" +echo "splunk_gateway_url=$GATEWAY_IPV4_ADDR" >> "$GATEWAY_AGENT_ADDON_DIR/Splunk_TA_otel/local/inputs.conf" +tar -C "$GATEWAY_AGENT_ADDON_DIR" -hcz --file "$GATEWAY_AGENT_TA_FULLPATH" "Splunk_TA_otel" +echo "Creating splunk cluster with TA $GATEWAY_AGENT_TA_FULLPATH" + +gateway_agent_container_info=$(splunk_orca -v --printer json --cloud "$ORCA_CLOUD" --ansible-log "$TEST_FOLDER/ansible-local-gateway-agent.log" create --env SPLUNK_CONNECTION_TIMEOUT=600 --platform "$SPLUNK_PLATFORM" --local-apps "$GATEWAY_AGENT_TA_FULLPATH" --playbook "$SOURCE_DIR/packaging-scripts/orca-playbook-windows.yml,site.yml") +GATEWAY_AGENT_IPV4_ADDR="$(echo "$gateway_agent_container_info" | jq -r '.[keys[0]] | .[keys[0]] | .containers | .[keys[0]] | .ssh_address')" + +echo "$gateway_agent_container_info" > "$TEST_FOLDER/orca-gateway-agent-deployment.json" +GATEWAY_LOGS_DIR="$TEST_FOLDER/$GATEWAY_REPACKED_TA_NAME/" +mkdir -p "$GATEWAY_LOGS_DIR" +GATEWAY_AGENT_LOGS_DIR="$TEST_FOLDER/$GATEWAY_AGENT_REPACKED_TA_NAME/" +mkdir -p "$GATEWAY_AGENT_LOGS_DIR" + +# It can take quite some time to extract the agent bundle. Await for it before trying to pull otel.log. +MAX_ATTEMPTS=6 +DELAY=60 +ATTEMPT=1 +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + scp -i ~/.orca/id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r "$ORCA_SSH_USER@$GATEWAY_IPV4_ADDR":/opt/splunk/var/log/splunk/Splunk_TA_otel.log "$GATEWAY_LOGS_DIR" + if grep -qi "Done extracting agent bundle" "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log"; then + break + else + if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then + echo "Failed to extract agent bundle after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log" + exit 1 + fi + echo "Extraction not complete according to Splunk_TA_otel.log... Retrying in $DELAY seconds" + ATTEMPT=$((ATTEMPT + 1)) + sleep $DELAY + fi +done + +MAX_ATTEMPTS=6 +DELAY=10 +ATTEMPT=1 +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + scp -i ~/.orca/id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r "$ORCA_SSH_USER@$GATEWAY_IPV4_ADDR":/opt/splunk/var/log/splunk/otel.log "$GATEWAY_LOGS_DIR" + if grep -qi "Everything is ready" "$GATEWAY_LOGS_DIR/otel.log"; then + break + fi + if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then + echo "Failed to see startup message in otel.log after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_LOGS_DIR/otel.log" + exit 1 + fi + echo "Did not see startup message according to otel.log... Retrying in $DELAY seconds" + ATTEMPT=$((ATTEMPT + 1)) + sleep $DELAY +done + +# It can take quite some time to extract the agent bundle (+7 minutes between start and end log message). Await for it before trying to pull otel.log. +MAX_ATTEMPTS=12 +DELAY=60 +ATTEMPT=1 +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + scp -i ~/.orca/id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r "$ORCA_SSH_USER@$GATEWAY_AGENT_IPV4_ADDR":/opt/splunk/var/log/splunk/Splunk_TA_otel.log "$GATEWAY_AGENT_LOGS_DIR" + if grep -qi "Done extracting agent bundle" "$GATEWAY_AGENT_LOGS_DIR/Splunk_TA_otel.log"; then + break + fi + echo "Extraction not complete according to Splunk_TA_otel.log... Retrying in $DELAY seconds" + ATTEMPT=$((ATTEMPT + 1)) + sleep $DELAY +done +if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then + echo "Failed to extract agent bundle after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_AGENT_LOGS_DIR/Splunk_TA_otel.log" + exit 1 +fi + +MAX_ATTEMPTS=6 +DELAY=10 +ATTEMPT=1 +while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do + scp -i ~/.orca/id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r "$ORCA_SSH_USER@$GATEWAY_AGENT_IPV4_ADDR":/opt/splunk/var/log/splunk/otel.log "$GATEWAY_AGENT_LOGS_DIR" + if grep -qi "Everything is ready" "$GATEWAY_AGENT_LOGS_DIR/otel.log"; then + break + fi + echo "Did not see startup message according to otel.log... Retrying in $DELAY seconds" + ATTEMPT=$((ATTEMPT + 1)) + sleep $DELAY +done +if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then + echo "Failed to see startup message in otel.log after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_AGENT_LOGS_DIR/otel.log" + exit 1 +fi + +grep -q "Starting otel agent" "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log" +(grep -qi "ERROR" "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log" && exit 1 ) || true +(grep -qi "ERROR" "$GATEWAY_LOGS_DIR/otel.log" && exit 1 ) || true + +grep -q "Starting otel agent" "$GATEWAY_AGENT_LOGS_DIR/Splunk_TA_otel.log" +(grep -qi "ERROR" "$GATEWAY_AGENT_LOGS_DIR/Splunk_TA_otel.log" && exit 1 ) || true +(grep -qi "ERROR" "$GATEWAY_AGENT_LOGS_DIR/otel.log" && exit 1 ) || true + +exit 0 From 8932671460ff3d9e91c5a5de228636fcecefff48 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Wed, 5 Feb 2025 14:00:55 -0800 Subject: [PATCH 2/5] remove comment --- .../packaging-scripts/cicd-tests/gateway/gateway.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh index 3a65911254..ee6c6969cf 100755 --- a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh +++ b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh @@ -19,10 +19,6 @@ echo 'splunk_config=$SPLUNK_OTEL_TA_HOME/configs/ta-gateway-config.yaml' >> "$GA tar -C "$GATEWAY_ADDON_DIR" -hcz --file "$GATEWAY_TA_FULLPATH" "Splunk_TA_otel" echo "Creating splunk cluster with TA $GATEWAY_TA_FULLPATH" -#if [ "$PLATFORM" == "linux" ]; then -#else -# ORCA_SSH_USER="ansible" -#fi ORCA_SSH_USER="splunk" # There is an internal slack thread in #orca on opening up port 4318, but suffice to say we're only doing on windows for now. Can use docker for linux if needed. From d26ec65dd1476b10de4e12bd4fe1c5fdff0a2446 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Wed, 5 Feb 2025 14:01:41 -0800 Subject: [PATCH 3/5] remove comment --- .../packaging-scripts/cicd-tests/gateway/gateway.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh index ee6c6969cf..9e5c1efbfd 100755 --- a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh +++ b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh @@ -21,7 +21,6 @@ echo "Creating splunk cluster with TA $GATEWAY_TA_FULLPATH" ORCA_SSH_USER="splunk" -# There is an internal slack thread in #orca on opening up port 4318, but suffice to say we're only doing on windows for now. Can use docker for linux if needed. gateway_container_info="$(splunk_orca -v --printer json --cloud "$ORCA_CLOUD" --ansible-log "$TEST_FOLDER/ansible-local-gateway.log" create --env SPLUNK_CONNECTION_TIMEOUT=600 --platform "$SPLUNK_PLATFORM" --local-apps "$GATEWAY_TA_FULLPATH" --playbook "$SOURCE_DIR/packaging-scripts/orca-playbook-windows.yml,site.yml")" echo "$gateway_container_info" > "$TEST_FOLDER/orca-gateway-deployment.json" From 8af6cf064663121612a9559eae04da46958e0c41 Mon Sep 17 00:00:00 2001 From: James Hughes Date: Wed, 5 Feb 2025 14:05:26 -0800 Subject: [PATCH 4/5] idk --- packaging/technical-addon/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packaging/technical-addon/Makefile b/packaging/technical-addon/Makefile index 98c4d00c5f..63ccdfdd48 100644 --- a/packaging/technical-addon/Makefile +++ b/packaging/technical-addon/Makefile @@ -101,13 +101,15 @@ orca-test-ta: env-guard-verify $(SOURCE_DIR)/packaging-scripts/cicd-tests/happypath-test.sh .PHONY: orca-gateway-test-ta -orca-gateway-test-ta: +orca-gateway-test-ta: env-guard-verify SOURCE_DIR="$(SOURCE_DIR)" \ BUILD_DIR="$(BUILD_DIR)" \ ARCH="$(ARCH)" \ SPLUNK_PLATFORM="$(SPLUNK_PLATFORM)" \ PLATFORM="$(PLATFORM)" \ ORCA_CLOUD="$(ORCA_CLOUD)" \ + UF_VERSION="$(UF_VERSION)" \ + OLLY_ACCESS_TOKEN="$(OLLY_ACCESS_TOKEN)" \ $(SOURCE_DIR)/packaging-scripts/cicd-tests/gateway/gateway.sh .PHONY: smoketest-ta From b4120586548cd08b714398543ce21b3c2c6108ac Mon Sep 17 00:00:00 2001 From: James Hughes Date: Wed, 5 Feb 2025 14:21:15 -0800 Subject: [PATCH 5/5] normalize retry function --- .../cicd-tests/gateway/gateway.sh | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh index 9e5c1efbfd..8700fc5265 100755 --- a/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh +++ b/packaging/technical-addon/packaging-scripts/cicd-tests/gateway/gateway.sh @@ -28,7 +28,6 @@ echo "$gateway_container_info" > "$TEST_FOLDER/orca-gateway-deployment.json" # Structure is (currently) {"creator":{"deployment":{"containers":{"container_id":{}}}}} GATEWAY_IPV4_ADDR="$(echo "$gateway_container_info" | jq -r '.[keys[0]] | .[keys[0]] | .containers | .[keys[0]] | .private_address')" - # Customize TA to act as agent which forwards to gateway GATEWAY_AGENT_TA_FULLPATH="$(repack_with_access_token "$OLLY_ACCESS_TOKEN" "$BUILD_DIR/out/distribution/Splunk_TA_otel.tgz" | tail -n 1)" GATEWAY_AGENT_REPACKED_TA_NAME="$(basename "$GATEWAY_AGENT_TA_FULLPATH")" @@ -56,17 +55,16 @@ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do scp -i ~/.orca/id_rsa -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r "$ORCA_SSH_USER@$GATEWAY_IPV4_ADDR":/opt/splunk/var/log/splunk/Splunk_TA_otel.log "$GATEWAY_LOGS_DIR" if grep -qi "Done extracting agent bundle" "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log"; then break - else - if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then - echo "Failed to extract agent bundle after $MAX_ATTEMPTS attempts." - cat "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log" - exit 1 - fi - echo "Extraction not complete according to Splunk_TA_otel.log... Retrying in $DELAY seconds" - ATTEMPT=$((ATTEMPT + 1)) - sleep $DELAY fi + echo "Extraction not complete according to Splunk_TA_otel.log... Retrying in $DELAY seconds" + ATTEMPT=$((ATTEMPT + 1)) + sleep $DELAY done +if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then + echo "Failed to extract agent bundle after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_LOGS_DIR/Splunk_TA_otel.log" + exit 1 +fi MAX_ATTEMPTS=6 DELAY=10 @@ -76,15 +74,15 @@ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do if grep -qi "Everything is ready" "$GATEWAY_LOGS_DIR/otel.log"; then break fi - if [ $ATTEMPT -eq $MAX_ATTEMPTS ]; then - echo "Failed to see startup message in otel.log after $MAX_ATTEMPTS attempts." - cat "$GATEWAY_LOGS_DIR/otel.log" - exit 1 - fi echo "Did not see startup message according to otel.log... Retrying in $DELAY seconds" ATTEMPT=$((ATTEMPT + 1)) sleep $DELAY done +if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then + echo "Failed to see startup message in otel.log after $MAX_ATTEMPTS attempts." + cat "$GATEWAY_LOGS_DIR/otel.log" + exit 1 +fi # It can take quite some time to extract the agent bundle (+7 minutes between start and end log message). Await for it before trying to pull otel.log. MAX_ATTEMPTS=12