Skip to content

Commit

Permalink
[pulsarbot] Handle bug in "/pulsarbot rerun-failure-checks" that rera…
Browse files Browse the repository at this point in the history
…n obsolete jobs

- only the most recent job should be considered for failed jobs
  • Loading branch information
lhotari committed Apr 21, 2022
1 parent d35becc commit 0bf1733
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 15 deletions.
12 changes: 11 additions & 1 deletion pulsarbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,14 @@ The accepted commands are:
- `/pulsarbot run-failure-checks`: Run all the failed checks.
- `/pulsarbot rerun-failure-checks`: Rerun all the failed checks. Same as `/pulsarbot run-failure-checks`.
- `/pulsarbot run <check-name>`: Run a specified check only if the check is failed.
- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`
- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`


### Testing changes to `entrypoint.sh` script

You can test modifications to the `entrypoint.sh` script locally with the `test_pulsarbot.sh` script.

Syntax for testing changes
```bash
GITHUB_TOKEN=your_token_here ./test_pulsarbot.sh PR_NUMBER_HERE
```
69 changes: 55 additions & 14 deletions pulsarbot/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#!/bin/bash

if [[ $TESTMODE == 1 ]]; then
set -x
cat ${GITHUB_EVENT_PATH}
fi
set -e

cat ${GITHUB_EVENT_PATH}
COMMENT_BODY=$(jq -r '.comment.body' "${GITHUB_EVENT_PATH}")

BOT_COMMAND_PREFIX="/pulsarbot"
Expand All @@ -13,7 +15,6 @@ if [[ ${COMMENT_BODY} != "${BOT_COMMAND_PREFIX}"* ]]; then
exit
fi


read -r -a commands <<< "${COMMENT_BODY}"
BOT_COMMAND=${commands[1]}
CHECK_NAME=""
Expand All @@ -38,8 +39,8 @@ fi
PR_NUM=$(jq -r '.issue.number' "${GITHUB_EVENT_PATH}")

function github_get() {
path="$1"
github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${path}"
local urlpath="$1"
github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${urlpath}"
}

function github_client() {
Expand All @@ -51,24 +52,64 @@ PR_JSON="$(github_get "/pulls/${PR_NUM}")"
HEAD_SHA=$(printf "%s" "${PR_JSON}" | jq -r .head.sha)
PR_BRANCH=$(printf "%s" "${PR_JSON}" | jq -r .head.ref)
PR_USER=$(printf "%s" "${PR_JSON}" | jq -r .head.user.login)
PR_HTML_URL=$(printf "%s" "${PR_JSON}" | jq -r .html_url)

echo "Handling pulsarbot command for PR #${PR_NUM} ${PR_HTML_URL}"

function get_runs() {
status="${1:-failure}"
local page="${1:-1}"
# API reference https://docs.github.com/en/rest/reference/actions#list-workflow-runs-for-a-repository
github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&status=${status}&per_page=100" | jq -r --arg head_sha "${HEAD_SHA}" '.workflow_runs[] | select(.head_sha==$head_sha) | .url'
github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&page=${page}&per_page=100" \
| jq -r --arg head_sha "${HEAD_SHA}" \
'.workflow_runs[] | select(.head_sha==$head_sha) | [.workflow_id,.created_at,.conclusion // .status,.url,.name,.html_url] | @csv'
}

# take the last attempt for each workflow to prevent restarting old runs
function filter_oldruns() {
awk -F, '{ if (NR > 1 && LAST != null && LAST != $1) {print LASTLINE; print $0; LAST=null; LASTLINE=null} else { LAST = $1;LASTLINE = $0} } END { if (LASTLINE != null) { print LASTLINE } }'
}

function get_all_runs() {
local page=1
local tempfile=$(mktemp)
while true; do
csv="$(get_runs $page | tee -a $tempfile)"
if [ -z "$csv" ]; then
break
fi
((page++))
done
if [ -f $tempfile ]; then
if [ -s $tempfile ]; then
cat $tempfile | sort
fi
rm $tempfile
fi
}

# return url and name for failed or cancelled jobs that are the most recent ones for each workflow
function find_failed_or_cancelled() {
get_all_runs | filter_oldruns \
| awk -F, '{ gsub(/"/, ""); if ($3 == "failure" || $3 == "cancelled") { print $4 "\t" $5 "\t" $6 } }'
}

# find the failures
FAILED_URLS=$(get_runs failure)
CANCELLED_URLS=$(get_runs cancelled)
for url in $FAILED_URLS $CANCELLED_URLS; do
name=$(github_client "$url"|jq -r '.name')
# allocate file descriptor for the failed or cancelled url and name listing
exec {failures_fd}< <(find_failed_or_cancelled)

foundjobs=0
# handle failures
while IFS=$'\t' read -r url name html_url <&${failures_fd}; do
if [[ "${CHECK_NAME}" == "_all" || "${name}" == *"${CHECK_NAME}"* ]]; then
echo "rerun-failed-jobs for '${name}' ($url)"
echo "rerun-failed-jobs for '${name}'. Follow progress at $html_url"
# use https://docs.github.com/en/rest/reference/actions#re-run-failed-jobs-from-a-workflow-run
# to rerun only the failed jobs
github_client -X POST "${url}/rerun-failed-jobs"
((foundjobs++))
else
echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($url)"
echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($html_url)"
fi
done

if [[ $foundjobs == 0 ]]; then
echo >&2 "Cannot find any failed workflow runs in PR #${PR_NUM}. Re-running can only target completed workflows."
fi
17 changes: 17 additions & 0 deletions pulsarbot/test_pulsarbot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
PRNUM=${1:-99999}
echo "Using PRNUM=$PRNUM"
cat > /tmp/testevent.json$$ <<EOF
{
"comment": {
"body": "${COMMENT_BODY:-"/pulsarbot rerun-failure-checks"}"
},
"issue": {
"number": $PRNUM
}
}
EOF
echo "Building docker image..."
docker build -t pulsarbot . || exit 1
docker run -v /tmp/testevent.json$$:/tmp/testevent.json -e TESTMODE="${TESTMODE:-1}" -e GITHUB_TOKEN -e GITHUB_EVENT_PATH=/tmp/testevent.json pulsarbot
rm /tmp/testevent.json$$

0 comments on commit 0bf1733

Please sign in to comment.