[pulsarbot] Handle bug in "/pulsarbot rerun-failure-checks" that rera…

…n obsolete jobs - only the most recent job should be considered for failed jobs
apache · Apr 21, 2022 · 0bf1733 · 0bf1733
1 parent d35becc
commit 0bf1733
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 15 deletions.
diff --git a/pulsarbot/README.md b/pulsarbot/README.md
@@ -9,4 +9,14 @@ The accepted commands are:
 - `/pulsarbot run-failure-checks`: Run all the failed checks.
 - `/pulsarbot rerun-failure-checks`: Rerun all the failed checks. Same as `/pulsarbot run-failure-checks`.
 - `/pulsarbot run <check-name>`: Run a specified check only if the check is failed.
-- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`
+- `/pulsarbot rerun <check-name>`: Same as `/pulsarbot run <check-name>`
+
+
+### Testing changes to `entrypoint.sh` script
+
+You can test modifications to the `entrypoint.sh` script locally with the `test_pulsarbot.sh` script.
+
+Syntax for testing changes
+```bash
+GITHUB_TOKEN=your_token_here ./test_pulsarbot.sh PR_NUMBER_HERE
+```
diff --git a/pulsarbot/entrypoint.sh b/pulsarbot/entrypoint.sh
@@ -1,8 +1,10 @@
 #!/bin/bash
-
+if [[ $TESTMODE == 1 ]]; then
+    set -x
+    cat ${GITHUB_EVENT_PATH}
+fi
 set -e
 
-cat ${GITHUB_EVENT_PATH}
 COMMENT_BODY=$(jq -r '.comment.body' "${GITHUB_EVENT_PATH}")
 
 BOT_COMMAND_PREFIX="/pulsarbot"
@@ -13,7 +15,6 @@ if [[ ${COMMENT_BODY} != "${BOT_COMMAND_PREFIX}"* ]]; then
     exit
 fi
 
-
 read -r -a commands <<< "${COMMENT_BODY}" 
 BOT_COMMAND=${commands[1]}
 CHECK_NAME=""
@@ -38,8 +39,8 @@ fi
 PR_NUM=$(jq -r '.issue.number' "${GITHUB_EVENT_PATH}")
 
 function github_get() {
-    path="$1"
-    github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${path}"
+    local urlpath="$1"
+    github_client "https://api.github.com/repos/${BOT_TARGET_REPOSITORY}${urlpath}"
 }
 
 function github_client() {
@@ -51,24 +52,64 @@ PR_JSON="$(github_get "/pulls/${PR_NUM}")"
 HEAD_SHA=$(printf "%s" "${PR_JSON}" | jq -r .head.sha)
 PR_BRANCH=$(printf "%s" "${PR_JSON}" | jq -r .head.ref)
 PR_USER=$(printf "%s" "${PR_JSON}" | jq -r .head.user.login)
+PR_HTML_URL=$(printf "%s" "${PR_JSON}" | jq -r .html_url)
+
+echo "Handling pulsarbot command for PR #${PR_NUM} ${PR_HTML_URL}"
 
 function get_runs() {
-    status="${1:-failure}"
+    local page="${1:-1}"
     # API reference https://docs.github.com/en/rest/reference/actions#list-workflow-runs-for-a-repository
-    github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&status=${status}&per_page=100" | jq -r --arg head_sha "${HEAD_SHA}" '.workflow_runs[] | select(.head_sha==$head_sha) | .url'
+    github_get "/actions/runs?actor=${PR_USER}&branch=${PR_BRANCH}&page=${page}&per_page=100" \
+      | jq -r --arg head_sha "${HEAD_SHA}" \
+        '.workflow_runs[] | select(.head_sha==$head_sha) | [.workflow_id,.created_at,.conclusion // .status,.url,.name,.html_url] | @csv'
+}
+
+# take the last attempt for each workflow to prevent restarting old runs
+function filter_oldruns() {
+    awk -F, '{ if (NR > 1 && LAST != null && LAST != $1) {print LASTLINE; print $0; LAST=null; LASTLINE=null} else { LAST = $1;LASTLINE = $0} } END { if (LASTLINE != null) { print LASTLINE } }'
+}
+
+function get_all_runs() {
+    local page=1
+    local tempfile=$(mktemp)
+    while true; do  
+      csv="$(get_runs $page | tee -a $tempfile)"
+      if [ -z "$csv" ]; then
+        break
+      fi
+      ((page++))
+    done
+    if [ -f $tempfile ]; then
+        if [ -s $tempfile ]; then
+            cat $tempfile | sort
+        fi
+        rm $tempfile
+    fi
+}
+
+# return url and name for failed or cancelled jobs that are the most recent ones for each workflow
+function find_failed_or_cancelled() {
+    get_all_runs | filter_oldruns \
+      | awk -F, '{ gsub(/"/, ""); if ($3 == "failure" || $3 == "cancelled") { print $4 "\t" $5 "\t" $6 } }'    
 }
 
-# find the failures 
-FAILED_URLS=$(get_runs failure)
-CANCELLED_URLS=$(get_runs cancelled)
-for url in $FAILED_URLS $CANCELLED_URLS; do
-    name=$(github_client "$url"|jq -r '.name')
+# allocate file descriptor for the failed or cancelled url and name listing
+exec {failures_fd}< <(find_failed_or_cancelled)
+
+foundjobs=0
+# handle failures
+while IFS=$'\t' read -r url name html_url <&${failures_fd}; do
     if [[ "${CHECK_NAME}" == "_all" || "${name}" == *"${CHECK_NAME}"* ]]; then
-        echo "rerun-failed-jobs for '${name}' ($url)"
+        echo "rerun-failed-jobs for '${name}'. Follow progress at $html_url"
         # use https://docs.github.com/en/rest/reference/actions#re-run-failed-jobs-from-a-workflow-run
         # to rerun only the failed jobs
         github_client -X POST "${url}/rerun-failed-jobs"
+        ((foundjobs++))
     else
-        echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($url)"
+        echo "Expect ${CHECK_NAME}, skipping build job '${name}' ($html_url)"
     fi
 done
+
+if [[ $foundjobs == 0 ]]; then
+    echo >&2 "Cannot find any failed workflow runs in PR #${PR_NUM}. Re-running can only target completed workflows."
+fi
diff --git a/pulsarbot/test_pulsarbot.sh b/pulsarbot/test_pulsarbot.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+PRNUM=${1:-99999}
+echo "Using PRNUM=$PRNUM"
+cat > /tmp/testevent.json$$ <<EOF
+{
+  "comment": {
+    "body": "${COMMENT_BODY:-"/pulsarbot rerun-failure-checks"}"
+  },
+  "issue": {
+    "number": $PRNUM
+  }
+}
+EOF
+echo "Building docker image..."
+docker build -t pulsarbot . || exit 1
+docker run -v /tmp/testevent.json$$:/tmp/testevent.json -e TESTMODE="${TESTMODE:-1}" -e GITHUB_TOKEN -e GITHUB_EVENT_PATH=/tmp/testevent.json pulsarbot
+rm /tmp/testevent.json$$