Skip to content

Commit

Permalink
CSMDS-315: Extend report.sh with Kafka custom resource dump (strimzi#22)
Browse files Browse the repository at this point in the history
CSMDS-321: Dump all Kafka resources in report.sh (strimzi#24)

CSMDS-329: Add all topic describe to report.sh (strimzi#37)

CSMDS-420: Fix report.sh to not fail when Kafka resource is being deleted during script run (strimzi#39)

CSMDS-317: Add java_thread_dump.sh to dump Java threads of all containers o… (strimzi#23)
  • Loading branch information
urbandan authored and david-simon committed Mar 28, 2024
1 parent 81ec77a commit 85e24f1
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
177 changes: 177 additions & 0 deletions tools/java_thread_dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env bash
# Self contained Strimzi thread dump tool.
set -Eeuo pipefail
if [[ $(uname -s) == "Darwin" ]]; then
shopt -s expand_aliases
alias echo="gecho"; alias grep="ggrep"; alias sed="gsed"; alias date="gdate"
fi

error() {
echo "$@" 1>&2 && exit 1
}

{ # this ensures that the entire script is downloaded #
KUBECTL_INSTALLED=false
OC_INSTALLED=false
KUBE_CLIENT="kubectl"
CONTAINER=""
OUT_DIR=""
DUMPS=1
INTERVAL=5
readonly JCMD_LIST_CMD="jcmd -l | grep -v JCmd"
readonly JCMD_DUMP_CMD_TMPL="jcmd PID Thread.print"

# bash version check
if [[ -z ${BASH_VERSINFO+x} ]]; then
error "No bash version information available, aborting"
fi
if [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then
error "You need bash version >= 4 to run the script"
fi

# kube client check
if [[ -x "$(command -v kubectl)" ]]; then
KUBECTL_INSTALLED=true
else
if [[ -x "$(command -v oc)" ]]; then
OC_INSTALLED=true
KUBE_CLIENT="oc"
fi
fi
if [[ $OC_INSTALLED = false && $KUBECTL_INSTALLED = false ]]; then
error "There is no kubectl or oc installed"
fi

# kube connectivity check
$KUBE_CLIENT version -o yaml --request-timeout=5s 1>/dev/null

readonly USAGE="
Usage: java_thread_dump.sh [options]
This tool dumps the threads of all Java processes running in the containers of a specific pod.
Required:
--namespace=<string> Kubernetes namespace.
--pod=<string> Pod name. Must be a cluster operator, entity operator, kafka, zookeeper or cruise control pod.
Optional:
--container=<string> Container name to limit the thread dump to. By default, all containers are captured with thread dump.
--out-dir=<string> Script output directory.
--dumps=<int> Number of thread dumps to capture. 1 by default.
--interval=<int> Number of seconds to wait between 2 dumps. 5 by default.
"
OPTSPEC=":-:"
while getopts "$OPTSPEC" optchar; do
case "${optchar}" in
-)
case "${OPTARG}" in
namespace=*)
NAMESPACE=${OPTARG#*=} && readonly NAMESPACE
;;
pod=*)
POD=${OPTARG#*=} && readonly POD
;;
container=*)
CONTAINER=${OPTARG#*=} && readonly CONTAINER
;;
out-dir=*)
OUT_DIR=${OPTARG#*=}
OUT_DIR=${OUT_DIR//\~/$HOME} && readonly OUT_DIR
;;
dumps=*)
DUMPS=${OPTARG#*=} && readonly DUMPS
;;
interval=*)
INTERVAL=${OPTARG#*=} && readonly INTERVAL
;;
*)
error "$USAGE"
;;
esac;;
esac
done
shift $((OPTIND-1))

if [[ -z $NAMESPACE || -z $POD ]]; then
error "$USAGE"
fi

if [[ -z $OUT_DIR ]]; then
OUT_DIR="$(mktemp -d)"
fi

if [[ -z $($KUBE_CLIENT get ns "$NAMESPACE" -o name --ignore-not-found) ]]; then
error "Namespace $NAMESPACE not found! Exiting"
fi

mkdir -p "$OUT_DIR/dumps"

declare -a containers
if [[ -z $CONTAINER ]]; then
container_list=$($KUBE_CLIENT get pod -n "$NAMESPACE" "$POD" -ojsonpath="{.spec.containers[*].name}")
for c in $container_list;
do
containers+=("$c")
done
else
containers+=("$CONTAINER")
fi

dump_count=0
for (( i=0 ; i<DUMPS ; i++ ));
do
if [[ $i -ne 0 ]]; then
echo "Backing off for ${INTERVAL}s"
sleep "$INTERVAL"
fi

for c in "${containers[@]}";
do
java_processes_list=$($KUBE_CLIENT exec -n "$NAMESPACE" "$POD" -c "$c" -- /bin/bash -c "$JCMD_LIST_CMD" 2>/dev/null) || true
if [[ -z "$java_processes_list" ]]; then
echo "Skipping container $c as it does not have a running Java process"
continue
fi

declare -a jprocesses
jprocesses=()
while read -r line
do
jprocesses+=("$line")
done <<< "$java_processes_list"

mkdir -p "$OUT_DIR/dumps/$c"

for line in "${jprocesses[@]}"; do
pid=$(echo "$line" | cut -f1 -d' ')
main_class=$(echo "$line" | cut -f2 -d' ')

echo "Dumping threads from container ${c} PID ${pid} main class ${main_class} #${i}"

dump_file_name="thread_dump-${c}-${pid}-${main_class}"
if [[ $DUMPS -ne 1 ]]; then
dump_file_name+="-$i"
fi
dump_file_name+=".txt"

dump_cmd=${JCMD_DUMP_CMD_TMPL/"PID"/"$pid"}
$KUBE_CLIENT exec -n "$NAMESPACE" "$POD" -c "$c" -- /bin/bash -c "$dump_cmd" > "${OUT_DIR}/dumps/${c}/$dump_file_name"
((++dump_count))
done
done
done

if [[ $dump_count -eq 0 ]]; then
error "Could not capture any thread dumps in the specified pod"
fi

FILENAME="tdumps-${NAMESPACE}-${POD}-$(date +"%d-%m-%Y_%H-%M-%S")"
OLD_DIR="$(pwd)"
cd "$OUT_DIR" || exit
zip -qr "$FILENAME".zip ./dumps/
cd "$OLD_DIR" || exit
if [[ $OUT_DIR == *"tmp."* ]]; then
# keeping the old behavior when --out-dir is not specified
mv "$OUT_DIR"/"$FILENAME".zip ./
fi
echo "Thread dump collection file $FILENAME.zip created"
} # this ensures that the entire script is downloaded #
39 changes: 39 additions & 0 deletions tools/report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,32 @@ for RES in "${RESOURCES[@]}"; do
fi
done

echo "describe topics"
get_topic_describe() {
pod=$($KUBE_CLIENT -n "$NAMESPACE" get po -l strimzi.io/kind=Kafka,strimzi.io/name="$CLUSTER-kafka" --ignore-not-found --no-headers -o jsonpath='{range .items[*]}{.status.containerStatuses[*].ready.true}{.metadata.name}{ "\n"}{end}' | head -n 1)

if [[ -n "$pod" ]]; then
mkdir -p "$OUT_DIR"/reports/topics

$KUBE_CLIENT -n "$NAMESPACE" exec "$pod" -- bash -c '# Extract variables from strimzi.properties && \
listener=$(grep "control.plane.listener.name" /tmp/strimzi.properties | sed -e "s/control.plane.listener.name=//g") && \
port=$(grep "control.plane.listener.name" /tmp/strimzi.properties | sed -e "s/.*-//g") && \
bootstrapserver=$(grep "advertised.listeners" /tmp/strimzi.properties | sed -e "s/.*$listener:\/\/\(.*\):$port.*/\1/") && \
# Create client config file && \
grep -i "listener.name.$listener." /tmp/strimzi.properties | sed -e "s/listener.name.$listener.//gI" > /tmp/report-client-config.properties && \
echo "security.protocol=ssl" >> /tmp/report-client-config.properties && \
# Execute topic describe && \
bin/kafka-topics.sh --describe --command-config=/tmp/report-client-config.properties --bootstrap-server $bootstrapserver:$port' \
> "$OUT_DIR"/reports/topics/topic-describe.txt 2>/dev/null||true
$KUBE_CLIENT -n "$NAMESPACE" exec "$pod" -- bash -c 'rm -rf /tmp/report-client-config.properties' 2>/dev/null||true

echo " topic describe executed, /reports/topics/topic-describe.txt created"
else
echo " topic describe failed due to no kafka pods available"
fi
}
get_topic_describe

get_nonnamespaced_yamls() {
local type="$1"
mkdir -p "$OUT_DIR"/reports/"$type"
Expand Down Expand Up @@ -304,6 +330,19 @@ for CRD in $CRDS; do
fi
done

echo "all kafkas"
mkdir -p "$OUT_DIR"/reports/all_kafkas
mapfile -t KAFKA_CLUSTERS < <($KUBE_CLIENT get kafkas --all-namespaces --ignore-not-found --no-headers -ojsonpath="{range .items[*]}{.metadata.namespace}/{.metadata.name}{'\n'}{end}")
if [[ ${#KAFKA_CLUSTERS[@]} -ne 0 ]]; then
for kafka_cluster in "${KAFKA_CLUSTERS[@]}"; do
echo " $kafka_cluster"
kafka_cluster_ns=${kafka_cluster%/*}
kafka_cluster_name=${kafka_cluster#*/}
mkdir -p "$OUT_DIR/reports/all_kafkas/$kafka_cluster_ns"
$KUBE_CLIENT get kafka -n "$kafka_cluster_ns" "$kafka_cluster_name" -o yaml > "$OUT_DIR/reports/all_kafkas/$kafka_cluster_ns/$kafka_cluster_name.yaml"||true
done
fi

echo "events"
EVENTS=$($KUBE_CLIENT get event -n "$NAMESPACE" --ignore-not-found) && readonly EVENTS
if [[ -n $EVENTS ]]; then
Expand Down

0 comments on commit 85e24f1

Please sign in to comment.