Skip to content

Commit

Permalink
fix(max-concurrency): fix result collection of max-concurrency test f…
Browse files Browse the repository at this point in the history
…or new iteration directories

Signed-off-by: Pavel Macík <pavel.macik@gmail.com>
  • Loading branch information
pmacik authored and jhutar committed Jun 18, 2024
1 parent 2e160fa commit dbda4f8
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 67 deletions.
103 changes: 59 additions & 44 deletions tests/load-tests/ci-scripts/max-concurrency/collect-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ collect_artifacts() {
collect_monitoring_data() {
echo "[$(date --utc -Ins)] Setting up tool to collect monitoring data"
{
python3 -m venv venv
set +u
# shellcheck disable=SC1091
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
python3 -m venv venv
set +u
# shellcheck disable=SC1091
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
} &>"${ARTIFACT_DIR}/monitoring-setup.log"

## Monitoring data for entire test
Expand Down Expand Up @@ -121,18 +121,26 @@ ${csv_delim}WorkloadKPI\
${csv_delim}Errors\
${csv_delim}UserAvgTime\
${csv_delim}UserMaxTime\
${csv_delim}ApplicationAvgTime\
${csv_delim}ApplicationMaxTime\
${csv_delim}CDQAvgTime\
${csv_delim}CDQMaxTime\
${csv_delim}ComponentsAvgTime\
${csv_delim}ComponentsMaxTime\
${csv_delim}PipelineRunAvgTime\
${csv_delim}PipelineRunMaxTime\
${csv_delim}IntegrationTestsRunPipelineSucceededTimeAvg\
${csv_delim}IntegrationTestsRunPipelineSucceededTimeMax\
${csv_delim}DeploymentSucceededTimeAvg\
${csv_delim}DeploymentSucceededTimeMax\
${csv_delim}CreateApplicationAvgTime\
${csv_delim}CreateApplicationMaxTime\
${csv_delim}ValidateApplicationAvgTime\
${csv_delim}ValidateApplicationMaxTime\
${csv_delim}CreateComponentAvgTime\
${csv_delim}CreateComponentMaxTime\
${csv_delim}ValidatePipelineRunConditionAvgTime\
${csv_delim}ValidatePipelineRunConditionMaxTime\
${csv_delim}ValidatePipelineRunCreationAvgTime\
${csv_delim}ValidatePipelineRunCreationMaxTime\
${csv_delim}ValidatePipelineRunSignatureAvgTime\
${csv_delim}ValidatePipelineRunSignatureMaxTime\
${csv_delim}CreateIntegrationTestScenarioAvgTime\
${csv_delim}CreateIntegrationTestScenarioMaxTime\
${csv_delim}ValidateIntegrationTestScenarioAvgTime\
${csv_delim}ValidateIntegrationTestScenarioMaxTime\
${csv_delim}ValidateTestPipelineRunConditionAvgTime\
${csv_delim}ValidateTestPipelineRunConditionMaxTime\
${csv_delim}ValidateTestPipelineRunCreationAvgTime\
${csv_delim}ValidateTestPipelineRunCreationMaxTime\
${csv_delim}ClusterCPUUsageAvg\
${csv_delim}ClusterDiskUsageAvg\
${csv_delim}ClusterMemoryUsageAvg\
Expand All @@ -158,12 +166,10 @@ ${csv_delim}ClusterNetworkReceiveBytesTotalAvg\
${csv_delim}ClusterNetworkTransmitBytesTotalAvg\
${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \
>"$max_concurrency_csv"
mc_files=$(find "$output_dir" -type f -name 'load-test.max-concurrency.*.json')
if [ -n "$mc_files" ]; then
for i in $mc_files; do
iteration_index=$(echo "$i" | sed -e 's,'"$output_dir"'/load-test.max-concurrency.\([0-9-]\+\).*,\1,g')

parked_go_routines=$(get_parked_go_routines "$iteration_index")
iteration_dirs=$(find "$ARTIFACT_DIR/iterations" -type d -name 'iteration-*')
if [ -n "$iteration_dirs" ]; then
for iteration_dir in $iteration_dirs; do
parked_go_routines=$(get_parked_go_routines "$iteration_dir")
parked_go_routines_columns=""
if [ -n "$parked_go_routines" ]; then
for g in $parked_go_routines; do
Expand All @@ -174,24 +180,33 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \
parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted"
done
fi
echo "[$(date --utc -Ins)] Processing $iteration_dir/load-test.json"
jq -rc "(.metadata.\"max-concurrency\".iteration | tostring) \
+ $csv_delim_quoted + (.threads | tostring) \
+ $csv_delim_quoted + (.workloadKPI | tostring) \
+ $csv_delim_quoted + (.errorsTotal | tostring) \
+ $csv_delim_quoted + (.createUserTimeAvg | tostring) \
+ $csv_delim_quoted + (.createUserTimeMax | tostring) \
+ $csv_delim_quoted + (.createApplicationsTimeAvg | tostring) \
+ $csv_delim_quoted + (.createApplicationsTimeMax | tostring) \
+ $csv_delim_quoted + (.createCDQsTimeAvg | tostring) \
+ $csv_delim_quoted + (.createCDQsTimeMax | tostring) \
+ $csv_delim_quoted + (.createComponentsTimeAvg | tostring) \
+ $csv_delim_quoted + (.createComponentsTimeMax | tostring) \
+ $csv_delim_quoted + (.runPipelineSucceededTimeAvg | tostring) \
+ $csv_delim_quoted + (.runPipelineSucceededTimeMax | tostring) \
+ $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeAvg | tostring) \
+ $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeMax | tostring) \
+ $csv_delim_quoted + (.deploymentSucceededTimeAvg | tostring) \
+ $csv_delim_quoted + (.deploymentSucceededTimeMax | tostring) \
+ $csv_delim_quoted + (.parameters.options.Concurrency | tostring) \
+ $csv_delim_quoted + (.results.measurements.KPI.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.KPI.errors | tostring) \
+ $csv_delim_quoted + (.results.measurements.HandleUser.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.HandleUser.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.createApplication.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.createApplication.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateApplication.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateApplication.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.createComponent.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.createComponent.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunCondition.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunCondition.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunCreation.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunCreation.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunSignature.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validatePipelineRunSignature.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.createIntegrationTestScenario.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.createIntegrationTestScenario.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateIntegrationTestScenario.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateIntegrationTestScenario.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCondition.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCondition.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCreation.pass.duration.mean | tostring) \
+ $csv_delim_quoted + (.results.measurements.validateTestPipelineRunCreation.pass.duration.max | tostring) \
+ $csv_delim_quoted + (.measurements.cluster_cpu_usage_seconds_total_rate.mean | tostring) \
+ $csv_delim_quoted + (.measurements.cluster_disk_throughput_total.mean | tostring) \
+ $csv_delim_quoted + (.measurements.cluster_memory_usage_rss_total.mean | tostring) \
Expand All @@ -216,15 +231,15 @@ ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \
+ $csv_delim_quoted + (.measurements.cluster_network_receive_bytes_total.mean | tostring) \
+ $csv_delim_quoted + (.measurements.cluster_network_transmit_bytes_total.mean | tostring) \
+ $csv_delim_quoted + (.measurements.node_disk_io_time_seconds_total.mean | tostring)" \
"$i" >>"$max_concurrency_csv"
"$iteration_dir/load-test.json" >>"$max_concurrency_csv"
done
else
echo "[$(date --utc -Ins)] WARNING: No file matching '$output_dir/load-test.max-concurrency.*.json' found!"
fi
}

get_parked_go_routines() {
goroutines_pprof=$(find "$output_dir" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.$1.pprof")
goroutines_pprof=$(find "$1" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.pprof")
count=0
for i in $goroutines_pprof; do
if [ $count -gt 0 ]; then
Expand Down
44 changes: 21 additions & 23 deletions tests/load-tests/run-max-concurrency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,22 @@ OPENSHIFT_USERNAME="${OPENSHIFT_USERNAME:-kubeadmin}"
OPENSHIFT_PASSWORD="${OPENSHIFT_PASSWORD:-$(cat "$KUBEADMIN_PASSWORD_FILE")}"

load_test() {
local workdir threads iteration index iteration_index
local workdir threads index
workdir=${1:-/tmp}
threads=${2:-1}
iteration=$(printf "%04d" "${3:-1}")
index=$(printf "%04d" "$threads")
iteration_index="${iteration}-${index}"
## Enable CPU profiling in Tekton
if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ]; then
echo "Starting CPU profiling with pprof"
for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do
pod="${p##*/}"
file="tekton-pipelines-controller.$pod.cpu-profile.$iteration_index"
file="tekton-pipelines-controller.$pod.cpu-profile"
oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" &
echo $! >"$workdir/$file.pid"
done
for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do
pod="${p##*/}"
file=tekton-results-watcher.$pod.cpu-profile.$iteration_index
file="tekton-results-watcher.$pod.cpu-profile"
oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/profile?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" &
echo $! >"$workdir/$file.pid"
done
Expand All @@ -38,14 +36,14 @@ load_test() {
echo "Starting memory profiling of Tekton controller with pprof"
for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do
pod="${p##*/}"
file="tekton-pipelines-controller.$pod.memory-profile.$iteration_index"
file="tekton-pipelines-controller.$pod.memory-profile"
oc exec -n openshift-pipelines "$p" -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" &
echo $! >"$workdir/$file.pid"
done
echo "Starting memory profiling of Tekton results watcher with pprof"
for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do
pod="${p##*/}"
file=tekton-results-watcher.$pod.memory-profile.$iteration_index
file="tekton-results-watcher.$pod.memory-profile"
oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL --max-time $((TEKTON_PERF_PROFILE_CPU_PERIOD + 10)) localhost:8008/debug/pprof/heap?seconds=${TEKTON_PERF_PROFILE_CPU_PERIOD} | base64" | base64 -d >"$workdir/$file.pprof" &
echo $! >"$workdir/$file.pid"
done
Expand Down Expand Up @@ -92,7 +90,7 @@ load_test() {
STATUS_DATA_FILE="$workdir/load-test.json"
status_data.py \
--status-data-file "${STATUS_DATA_FILE}" \
--set "name=Konflux loadtest" "started=$( cat started )" "ended=$( cat ended )" \
--set "name=Konflux loadtest" "started=$(cat started)" "ended=$(cat ended)" \
--set-subtree-json "parameters.options=$workdir/load-test-options.json" "results.measurements=$workdir/load-test-timings.json"

deactivate
Expand All @@ -107,15 +105,15 @@ load_test() {
for p in $(oc get pods -n openshift-pipelines -l app=tekton-pipelines-controller -o name); do
pod="${p##*/}"
for i in 0 1 2; do
file="tekton-pipelines-controller.$pod.goroutine-dump-$i.$iteration_index"
file="tekton-pipelines-controller.$pod.goroutine-dump-$i"
oc exec -n tekton-results "$p" -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof"
done
done
echo "[$(date --utc -Ins)] Getting Tekton results watcher goroutine dump"
for p in $(oc get pods -n tekton-results -l app.kubernetes.io/name=tekton-results-watcher -o name); do
pod="${p##*/}"
for i in 0 1 2; do
file="tekton-results-watcher.$pod.goroutine-dump-$i.$iteration_index"
file="tekton-results-watcher.$pod.goroutine-dump-$i"
oc exec -n tekton-results "$p" -c watcher -- bash -c "curl -SsL localhost:8008/debug/pprof/goroutine?debug=$i | base64" | base64 -d >"$workdir/$file.pprof"
done
done
Expand Down Expand Up @@ -172,22 +170,22 @@ max_concurrency() {
else
output="$OUTPUT_DIR/load-test.max-concurrency.json"
IFS="," read -r -a maxConcurrencySteps <<<"$(echo "${MAX_CONCURRENCY_STEPS:-1\ 5\ 10\ 25\ 50\ 100\ 150\ 200}" | sed 's/ /,/g')"
maxThreads=${MAX_THREADS:-10} # Do not go above this concurrency.
threshold_sec=${THRESHOLD:-300} # In seconds. If KPI crosses this duration, stop.
threshold_err=${THRESHOLD_ERR:-10} # Failure ratio. When crossed, stop.
maxThreads=${MAX_THREADS:-10} # Do not go above this concurrency.
threshold_sec=${THRESHOLD:-300} # In seconds. If KPI crosses this duration, stop.
threshold_err=${THRESHOLD_ERR:-10} # Failure ratio. When crossed, stop.
echo '{"started":"'"$(date +%FT%T%:z)"'", "maxThreads": '"$maxThreads"', "maxConcurrencySteps": "'"${maxConcurrencySteps[*]}"'", "threshold": '"$threshold_sec"', "thresholdErrors": '"$threshold_err"', "maxConcurrencyReached": 0, "computedConcurrency": 0, "workloadKPI": 0, "ended": "", "errorsTotal": -1}' | jq >"$output"
iteration=0

{
python3 -m venv venv
set +u
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
python3 -m pip install tabulate
python3 -m pip install matplotlib
deactivate
python3 -m venv venv
set +u
source venv/bin/activate
set -u
python3 -m pip install -U pip
python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
python3 -m pip install tabulate
python3 -m pip install matplotlib
deactivate
} &>"$OUTPUT_DIR/monitoring-setup.log"

for t in "${maxConcurrencySteps[@]}"; do
Expand All @@ -201,7 +199,7 @@ max_concurrency() {
iteration_index="$(printf "%04d" "$iteration")-$(printf "%04d" "$t")"
workdir="${OUTPUT_DIR}/iteration-${iteration_index}"
mkdir "${workdir}"
load_test "$workdir" "$t" "$iteration"
load_test "$workdir" "$t"
jq ".metadata.\"max-concurrency\".iteration = \"$(printf "%04d" "$iteration")\"" "$workdir/load-test.json" >"$OUTPUT_DIR/$$.json" && mv -f "$OUTPUT_DIR/$$.json" "$workdir/load-test.json"
workloadKPI=$(jq '.results.measurements.KPI.mean' "$workdir/load-test.json")
workloadKPIerrors=$(jq '.results.measurements.KPI.errors' "$workdir/load-test.json")
Expand Down

0 comments on commit dbda4f8

Please sign in to comment.