Skip to content

Commit

Permalink
Review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
hughesjj committed Feb 4, 2025
1 parent 2cc24bc commit 892db98
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 47 deletions.
8 changes: 0 additions & 8 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -515,14 +515,6 @@ AppInspect_local:
when: always
allow_failure: false

spell check:
image: "${DOCKER_CICD_REPO}/ci-container/node-18-alpine:1.9.0"
stage: code-analysis
script: |
npm install -g cspell@latest
cd $CI_PROJECT_DIR/packaging/technical-addon/Splunk_TA_otel
cspell --config $CI_PROJECT_DIR/packaging/technical-addon/packaging-scripts/cspell/cspell.json --color "**/*.md" --exclude "**/agent-bundle/**"
.instrumentation-deb-rpm:
extends: .trigger-filter
stage: package
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ set "SPLUNK_OTEL_TA_HOME="
set "SPLUNK_OTEL_TA_PLATFORM_HOME="
set "SPLUNK_OTEL_FLAGS="


:: BEGIN AUTOGENERATED CODE
set "configd_name=configd"
set "configd_value="
Expand Down Expand Up @@ -52,7 +51,6 @@ set "splunk_access_token_file_name=splunk_access_token_file"
set "splunk_access_token_file_value="
:: END AUTOGENERATED CODE


echo on
echo "Starting Splunk TA Otel."
echo off
Expand All @@ -78,7 +76,6 @@ echo "validate args called"
endlocal
exit /b 0


:splunk_TA_otel_run_agent
setlocal enabledelayedexpansion
:: READING CONFIGURATION FROM STDIN
Expand All @@ -96,7 +93,6 @@ if "%SPLUNK_ACCESS_TOKEN%" == "" (
call :splunk_TA_otel_log_msg "INFO" "Environment variable SPLUNK_ACCESS_TOKEN already set."
)


:: BEGIN AUTOGENERATED CODE
if "%gomemlimit_value%" == "" (
call :splunk_TA_otel_log_msg "DEBUG" "Param %gomemlimit_name% not set"
Expand Down Expand Up @@ -283,7 +279,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
set "splunk_access_token_file_value=%%i"
)


:: expand params in splunk_bundle_dir_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_bundle_dir_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "splunk_bundle_dir_value=%%i"
Expand All @@ -295,7 +290,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
set "splunk_bundle_dir_value=%%i"
)


:: expand params in splunk_config_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_config_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "splunk_config_value=%%i"
Expand All @@ -307,7 +301,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
set "splunk_config_value=%%i"
)


:: expand params in splunk_config_dir_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_config_dir_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "splunk_config_dir_value=%%i"
Expand All @@ -319,7 +312,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
set "splunk_config_dir_value=%%i"
)


:: expand params in splunk_collectd_dir_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_collectd_dir_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "splunk_collectd_dir_value=%%i"
Expand All @@ -331,7 +323,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
set "splunk_collectd_dir_value=%%i"
)


:: expand params in splunk_config_yaml_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_config_yaml_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "splunk_config_yaml_value=%%i"
Expand All @@ -353,6 +344,7 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splu
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%splunk_otel_log_file_value%' -replace '\$SPLUNK_HOME', '%SPLUNK_HOME%'"') do (
set "splunk_otel_log_file_value=%%i"
)

:: expand params in discovery_properties_value
for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%discovery_properties_value%' -replace '\$SPLUNK_OTEL_TA_PLATFORM_HOME', '%SPLUNK_OTEL_TA_PLATFORM_HOME%'"') do (
set "discovery_properties_value=%%i"
Expand All @@ -364,7 +356,6 @@ for /f "delims=" %%i in ('powershell -noninteractive -noprofile -command "'%disc
set "discovery_properties_value=%%i"
)


:: END AUTOGENERATED CODE
exit /b 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,58 +10,68 @@ TEST_FOLDER="${TEST_FOLDER:-$BUILD_DIR/$CI_JOB_ID}"
mkdir -p "$TEST_FOLDER"

# Create ORCA container & grab id
splunk_orca -vvv --cloud ${ORCA_CLOUD} --printer sdd-json --deployment-file "$TEST_FOLDER/orca_deployment.json" --ansible-log ansible-local.log create --prefix "happypath" --env SPLUNK_CONNECTION_TIMEOUT=600 --platform $SPLUNK_PLATFORM --splunk-version "${UF_VERSION}" --local-apps "$TA_FULLPATH" --playbook "$SOURCE_DIR/packaging-scripts/orca-playbook-$PLATFORM.yml,site.yml"
# TODO use jq not awk
deployment_id="$(grep "orca_deployment_id" "$TEST_FOLDER/orca_deployment.json" | awk -F ':' '{print $2}' | awk -F '"' '{print $2}')"
echo "$deployment_id" > "$TEST_FOLDER/deployment_id.txt"
splunk_orca -vvv --cloud "${ORCA_CLOUD}" --printer sdd-json --deployment-file "$TEST_FOLDER/orca_deployment.json" --ansible-log "$TEST_FOLDER/ansible-local.log" create --prefix "happypath" --env SPLUNK_CONNECTION_TIMEOUT=600 --platform "$SPLUNK_PLATFORM" --splunk-version "${UF_VERSION}" --local-apps "$TA_FULLPATH" --playbook "$SOURCE_DIR/packaging-scripts/orca-playbook-$PLATFORM.yml,site.yml"
deployment_id="$(jq -r '.orca_deployment_id' < "$TEST_FOLDER/orca_deployment.json")"
ip_addr="$(jq -r '.server_roles.standalone[0].host' < "$TEST_FOLDER/orca_deployment.json")"

# Check for successful startup
ATTEMPT=1
if [ "$PLATFORM" == "windows" ]; then
# Windows takes forever to extract
echo "sleeping for 700s at $(date)"
sleep 700s
MAX_ATTEMPTS=12 # Windows takes a long time to extract, often 7 minutes on default hardware
DELAY=60
else
# Can likely drop this way down, but give the collector time to collect metrics/traces
echo "sleeping for 90s at $(date)"
sleep 90s
MAX_ATTEMPTS=6
DELAY=20
fi
while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
# Copy logs from container
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/otel.log" "$TEST_FOLDER/otel.log"
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/Splunk_TA_otel.log" "$TEST_FOLDER/Splunk_TA_otel.log"
if grep -q "Starting otel agent" "$TEST_FOLDER/splunk/Splunk_TA_otel.log" &&
grep -q "Everything is ready" "$TEST_FOLDER/splunk/otel.log"; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
sleep "$DELAY"
done
if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
echo "Failed to find metrics within $CUTOFF_DELTA after $MAX_ATTEMPTS attempts."
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/" "$TEST_FOLDER"
tail -n 200 "$TEST_FOLDER/splunkd.log"
cat "$TEST_FOLDER/Splunk_TA_otel.log"
cat "$TEST_FOLDER/otel.log"
exit 1
fi


# Copy logs from container
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/" "$TEST_FOLDER"

# Verify Otel agent is running without any error
grep -q "Starting otel agent" "$TEST_FOLDER/splunk/Splunk_TA_otel.log"
grep -q "Everything is ready" "$TEST_FOLDER/splunk/otel.log"
(grep -qi "ERROR" "$TEST_FOLDER/splunk/Splunk_TA_otel.log" && exit 1 ) || true
(grep -qi "ERROR" "$TEST_FOLDER/splunk/otel.log" && exit 1 ) || true

# Verify Olly has received metrics data from this host
# Verify O11y has received metrics data from this host
MAX_ATTEMPTS=6
DELAY=10
ATTEMPT=1
export CUTOFF="$(date '+%s%3N' -d '5 min ago')"
CUTOFF_DELTA='5 min'
export CUTOFF="$(date '+%s%3N' -d "$CUTOFF_DELTA ago")"
otel_hostname="$(grep "host.name" "$TEST_FOLDER/splunk/otel.log" | head -1 | awk -F 'host.name":"' '{print $2}' | awk -F '","' '{print $1}')"
while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
curl --header "Content-Type:application/json" --header "X-SF-TOKEN:${OLLY_ACCESS_TOKEN}" "https://api.us0.signalfx.com/v2/metrictimeseries?query=host.name:${otel_hostname}%20AND%20sf_metric:otelcol_process_uptime%20AND%20splunk.distribution:otel-ta" > "$TEST_FOLDER/uptime.json"
count=$( grep '"count"' "$TEST_FOLDER/uptime.json" | awk -F ':\ ' '{print $2}' | awk -F ',' '{print $1}')

count="$(jq -r '.count' < "$TEST_FOLDER/uptime.json")"
if [[ "$count" -gt "0" ]] && jq '[.results[].created, .results[].lastUpdated] | max as $max | $max >= ($ENV.CUTOFF | tonumber)' "$TEST_FOLDER/uptime.json" ; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
sleep $DELAY
done
if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
echo "Failed find metrics in last 5m after $MAX_ATTEMPTS attempts."
cat "$GATEWAY_AGENT_LOGS_DIR/otel.log"
echo "Failed to find metrics within $CUTOFF_DELTA after $MAX_ATTEMPTS attempts."
cat "$TEST_FOLDER/otel.log"
exit 1
fi

# Verify the addon can be restarted successfully
orca_container_name=$(splunk_orca --cloud ${ORCA_CLOUD} --printer json show --deployment-id "${deployment_id}" containers | jq -r '.[keys[0]] | .[keys[0]] | .containers | keys[0]')
orca_container_name=$(splunk_orca --cloud "${ORCA_CLOUD}" --printer json show --deployment-id "${deployment_id}" containers | jq -r '.[keys[0]] | .[keys[0]] | .containers | keys[0]')
splunk_orca --cloud "${ORCA_CLOUD}" exec --exec-user splunk "${orca_container_name}" '/opt/splunk/bin/splunk restart'
sleep 90s

MAX_ATTEMPTS=30
DELAY=10
Expand All @@ -80,7 +90,30 @@ while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
done

if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
echo "Failed to see restart log m after $MAX_ATTEMPTS attempts."
echo "Failed to see restart log after $MAX_ATTEMPTS attempts."
if [ "$PLATFORM" == "windows" ]; then
cat "$TEST_FOLDER/Splunk_TA_otelutils.log"
else
cat "$TEST_FOLDER/Splunk_TA_otel.log"
fi
exit 1
fi

# Ensure restart was successful as well
DELAY=10
ATTEMPT=1
MAX_ATTEMPTS=6
while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/otel.log" "$TEST_FOLDER/"
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/Splunk_TA_otelutils.log" "$TEST_FOLDER/"
if grep -q "Starting otel agent" "$TEST_FOLDER/splunk/Splunk_TA_otel.log" && grep -q "Everything is ready" "$TEST_FOLDER/splunk/otel.log"; then
break
fi
ATTEMPT=$((ATTEMPT + 1))
sleep $DELAY
done
if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
echo "Failed to see restart log after $MAX_ATTEMPTS attempts."
if [ "$PLATFORM" == "windows" ]; then
cat "$TEST_FOLDER/Splunk_TA_otelutils.log"
else
Expand All @@ -89,13 +122,11 @@ if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
exit 1
fi

scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -r -i ~/.orca/id_rsa "splunk@$ip_addr:/opt/splunk/var/log/splunk/otel.log" "$TEST_FOLDER/"
grep -q "Starting otel agent" "$TEST_FOLDER/splunk/Splunk_TA_otel.log"
grep -q "Everything is ready" "$TEST_FOLDER/splunk/otel.log"
# Ensure no errors after restart
(grep -qi "ERROR" "$TEST_FOLDER/splunk/Splunk_TA_otel.log" && exit 1 ) || true
(grep -qi "ERROR" "$TEST_FOLDER/splunk/otel.log" && exit 1 ) || true

# Ensure version is as expected
# For release, ensure version is as expected. TODO move this to another test and compare against tag
actual_version="$(grep "Version" "$TEST_FOLDER/splunk/otel.log" | head -1 | awk -F 'Version": "' '{print $2}' | awk -F '", "' '{print $1}')"
echo "actual version: $actual_version"
[[ "$actual_version" != "v0.111.0" ]] && echo "Test failed -- invalid version" && exit 1
Expand Down

0 comments on commit 892db98

Please sign in to comment.