Skip to content

Commit

Permalink
add opentelemery helper scripts (#119)
Browse files Browse the repository at this point in the history
  • Loading branch information
msarahan authored Oct 10, 2024
1 parent c5a6fef commit 8bd8fca
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 5 deletions.
49 changes: 49 additions & 0 deletions tests/test_rapids-get-telemetry-trace-id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os.path
import subprocess

TOOLS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "tools")

def test_rapids_compute_trace_id():
result = subprocess.run(
os.path.join(TOOLS_DIR, "rapids-get-telemetry-trace-id"),
env={
"GITHUB_REPOSITORY": "rapidsai/gha-tools",
"GITHUB_RUN_ID": "1123123",
"RUN_ATTEMPT": "1"
},
text=True,
capture_output=True,
)
assert result.stdout.strip() == "22ab4ec60f37f446b4a95917e86660df"
assert result.stderr == ""
assert result.returncode == 0

def test_rapids_get_traceparent():
result = subprocess.run(
[os.path.join(TOOLS_DIR, "rapids-get-telemetry-traceparent"), "my_job"],
env={
"GITHUB_REPOSITORY": "rapidsai/gha-tools",
"GITHUB_RUN_ID": "1123123",
"RUN_ATTEMPT": "1"
},
text=True,
capture_output=True,
)
assert result.stdout.strip() == "00-22ab4ec60f37f446b4a95917e86660df-5f57388b5b07a3e8-01"
assert result.stderr == ""
assert result.returncode == 0

def test_rapids_get_traceparent_with_step():
result = subprocess.run(
[os.path.join(TOOLS_DIR, "rapids-get-telemetry-traceparent"), "my_job", "my step"],
env={
"GITHUB_REPOSITORY": "rapidsai/gha-tools",
"GITHUB_RUN_ID": "1123123",
"RUN_ATTEMPT": "1"
},
text=True,
capture_output=True,
)
assert result.stdout.strip() == "00-22ab4ec60f37f446b4a95917e86660df-a6e5bc57fad91889-01"
assert result.stderr == ""
assert result.returncode == 0
2 changes: 1 addition & 1 deletion tools/rapids-conda-retry
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ condaCmd=${RAPIDS_CONDA_EXE:=conda}
# needToRetry: 1 if the command should be retried, 0 if it should not be
function runConda {
# shellcheck disable=SC2086
${condaCmd} ${args} 2>&1| tee "${outfile}"
rapids-otel-wrap ${condaCmd} ${args} 2>&1| tee "${outfile}"
exitcode=$?
needToRetry=0
needToClean=0
Expand Down
2 changes: 1 addition & 1 deletion tools/rapids-get-pr-conda-artifact
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ if [[ -z "${commit}" ]]; then
commit=$(git ls-remote https://github.com/rapidsai/"${repo}".git refs/heads/pull-request/"${pr}" | cut -c1-7)
fi

rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}"
rapids-otel-wrap rapids-get-artifact "ci/${repo}/pull-request/${pr}/${commit}/${artifact_name}"
6 changes: 6 additions & 0 deletions tools/rapids-get-telemetry-trace-id
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
# This is a global, per-run identifier. It is the same across all jobs and all steps within all jobs.
# It is constant from the source repo, to shared-workflows, to shared-actions.

sha="$(echo "${GITHUB_REPOSITORY}+${GITHUB_RUN_ID}+${RUN_ATTEMPT}" | sha256sum | cut -f1 -d' ')"
echo "${sha:0:32}"
30 changes: 30 additions & 0 deletions tools/rapids-get-telemetry-traceparent
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# This emits a TRACEPARENT, which follows the w3c trace context standard.
# https://www.w3.org/TR/trace-context/
#
# This script can operate for two purposes:
# 1. The top level of a job, whether it is the job at the source repo (e.g. rmm) level, or
# the matrix job level
# 2. The steps level within a job, which uses both the job name and the step name
#
# The job name must always be provided as the first argument.
# A step name MAY be provided as the second argument. If it is specified, the output corresponds to
# the step within the context of its job.

JOB_NAME=$1
STEP_NAME=${2:-}

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

TRACE_ID="$("${SCRIPT_DIR}"/rapids-get-telemetry-trace-id)"
JOB_SPAN_ID="${TRACE_ID}-${JOB_NAME}"
STEP_SPAN_ID="${JOB_SPAN_ID}-${STEP_NAME}"

JOB_TRACEPARENT=$(echo -n "${JOB_SPAN_ID}" | sha256sum | cut -f1 -d' ')
STEP_TRACEPARENT=$(echo -n "${STEP_SPAN_ID}" | sha256sum | cut -f1 -d' ')

if [ "${STEP_NAME}" != "" ]; then
echo "00-${TRACE_ID}-${STEP_TRACEPARENT:0:16}-01"
else
echo "00-${TRACE_ID}-${JOB_TRACEPARENT:0:16}-01"
fi
2 changes: 1 addition & 1 deletion tools/rapids-mamba-retry
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ for arg in "$@"; do
fi
done

rapids-conda-retry "$@"
rapids-otel-wrap rapids-conda-retry "${args[@]}"
53 changes: 53 additions & 0 deletions tools/rapids-otel-wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
# Wraps arbitrary commands with arbitrary args. Emits an OpenTelemetry span for tracing the command

SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"

RAPIDS_OTEL_TRACES_EXPORTER="${RAPIDS_OTEL_TRACES_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
RAPIDS_OTEL_METRICS_EXPORTER="${RAPIDS_OTEL_METRICS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
RAPIDS_OTEL_LOGS_EXPORTER="${RAPIDS_OTEL_LOGS_EXPORTER:-${RAPIDS_OTEL_EXPORTER:-"console"}}"
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/traces}"
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/metrics}"
OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-${OTEL_EXPORTER_OTLP_ENDPOINT}/v1/logs}"
export TRACEPARENT="${TRACEPARENT}"

if [[ $(type otel-cli >/dev/null 2>&1) -eq 0 ]] && [ "$TRACEPARENT" != "" ]; then
echo "Running command with OpenTelemetry instrumentation";

set -x
if [ "$OTEL_SERVICE_NAME" = "" ]; then
echo "WARNING: OTEL_SERVICE_NAME variable not provided. Traces from different steps may not be associated correctly."
fi

# Some commands have instrumentation. For example, conda-build has monkey-patched instrumentation
# that can be activated with the opentelemetry-instrument command. For these commands,
# we replace the command with the wrapped command, quoted as a whole for the purposes
# of otel-cli exec, so that flags don't get confused.
case "$1" in
conda* )
echo "using opentelemetry-instrument for command";
command="opentelemetry-instrument $*"
;;
* )
command="$*"
;;
esac

echo "TRACEPARENT prior to otel-cli exec is: \"${TRACEPARENT}\""
STEP_TRACEPARENT=$("${SCRIPT_DIR}/rapids-get-telemetry-traceparent" "${JOB_NAME}" "${OTEL_SERVICE_NAME}")

# otel-cli creates a span for us that bridges the traceparent from the parent process
# into the command we're wrapping
otel-cli exec \
--name "Run instrumented $*" \
--force-parent-span-id "$(cut -d'-' -f3 <<<"$STEP_TRACEPARENT")" \
--verbose \
-- "${command}"
RETURN_STATUS=$?
else
echo "Skipping instrumentation, running \"${*}\"";
eval "$*"
RETURN_STATUS=$?
fi

exit "${RETURN_STATUS}"
2 changes: 1 addition & 1 deletion tools/rapids-upload-conda-to-s3
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ pkg_name="$(rapids-package-name "$pkg_type")"
# Where conda build artifacts are output
path_to_tar_up="${RAPIDS_CONDA_BLD_OUTPUT_DIR}"

rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}"
rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "${path_to_tar_up}"
2 changes: 1 addition & 1 deletion tools/rapids-upload-wheels-to-s3
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ if [ "${CI:-false}" = "false" ]; then
exit 0
fi

rapids-upload-to-s3 "${pkg_name}" "$@"
rapids-otel-wrap rapids-upload-to-s3 "${pkg_name}" "$@"

0 comments on commit 8bd8fca

Please sign in to comment.