Restart the app on errors and attempt to fix logging (#84)

* crash the app on errors this can be necessary for starting to receive events again * restart the app on crashes get rid of monitor service in favor of systemd-cat * fix pipe * fix newline * fix command * explain the differences between log accesses * sudo is not necessary for journalctl * the tmux output doesn't have anything meaningful anymore since the output stream gets consumed by systemd-cat * better journalctl explanations
paritytech · Jan 28, 2022 · f377be8 · f377be8
1 parent e9da99b
commit f377be8
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 215 deletions.
diff --git a/README.md b/README.md
@@ -60,23 +60,20 @@ in the bot's directory.
 - `run {start,stop,restart}`: execute the relevant action for the bot.
 - `run update [ref]`: restart the bot with the branch or PR
   - For branch: `ssh user@remote '/home/benchbot/bench-bot/run update master'`
-  - For PR: `ssh user@remote '/home/benchbot/bench-bot/run update pull/number/head:branch'` 
+  - For PR: `ssh user@remote '/home/benchbot/bench-bot/run update pull/number/head:branch'`
     e.g. `pull/1/head:master`
 
-### Monitoring Service commands
-
-- `run monitor {install,uninstall}`: install or uninstall the monitoring
-  service
-- `run monitor {start,restart,stop,status,...}`: acts as a wrapper for
-  `systemctl`
-
 ### Logs
 
-The logs will be output to the systemd journal:
-
-`sudo journalctl -u benchbot-monitor.service`
+- Logs from the systemd journal:
+  - `journalctl --follow --identifier benchbot` follows the output as if you
+    were running the command in the foreground
+  - `journalctl --pagerend --identifier benchbot` goes to the end of the output
+    in pager mode and therefore allows you paginate through the history
+  - This log is cleared between machine restarts
 
-As well as to `./log.txt`.
+- Full log history: `less +G /home/benchbot/bench-bot/log.txt`
+  - This log is only cleared manually
 
 # Required Github settings
 

diff --git a/bench.js b/bench.js
@@ -40,7 +40,7 @@ function BenchContext(app, config) {
       stdout = result.stdout
     } catch (err) {
       error = true
-      app.log.fatal({
+      config.logFatal({
         msg: "Caught exception in command execution",
         error: err,
       })
@@ -532,7 +532,7 @@ function benchmarkRuntime(app, config) {
             )
             if (last.error) {
               extraInfo = `ERROR: Unable to commit file ${outputFile}`
-              app.log.fatal({
+              config.logFatal({
                 msg: extraInfo,
                 stdout: last.stdout,
                 stderr: last.stderr,
@@ -546,7 +546,7 @@ function benchmarkRuntime(app, config) {
               )
               if (last.error) {
                 extraInfo = `ERROR: Unable to push ${outputFile}`
-                app.log.fatal({
+                config.logFatal({
                   msg: extraInfo,
                   stdout: last.stdout,
                   stderr: last.stderr,
@@ -556,7 +556,7 @@ function benchmarkRuntime(app, config) {
           } catch (error) {
             extraInfo =
               "NOTE: Caught exception while trying to push commits to the repository"
-            app.log.fatal({ msg: extraInfo, error })
+            config.logFatal({ msg: extraInfo, error })
           }
         }
       }

diff --git a/index.js b/index.js
@@ -9,7 +9,7 @@ const githubCommentLimitLength = 65536
 const githubCommentLimitTruncateMessage = "<truncated>..."
 
 let isTerminating = false
-let appFatalLogger = undefined
+let logFatal = undefined
 
 for (const event of ["uncaughtException", "unhandledRejection"]) {
   process.on(event, function (error, origin) {
@@ -19,8 +19,8 @@ for (const event of ["uncaughtException", "unhandledRejection"]) {
     isTerminating = true
 
     try {
-      if (appFatalLogger) {
-        appFatalLogger({ event, error, origin })
+      if (logFatal) {
+        logFatal({ event, error, origin })
       }
     } catch (error) {
       console.error({ level: "error", event, error, origin, exception })
@@ -35,7 +35,25 @@ module.exports = (app) => {
     app.log("Running in debug mode")
   }
 
-  appFatalLogger = app.log.fatal
+  // Crash the server on Probot failures or errors
+  // We retain the original error handlers on logError and logFatal so that the
+  // application can still report errors on the expected channels
+  // This is necessary to work around problems in reconnection issues from our
+  // event source
+  // (https://github.com/paritytech/bench-bot/issues/83#issuecomment-1024283664)
+  // FIXME: This is suboptimal and we should not have to stop the application in
+  // case of errors
+  // The server will automatically restarted on failures in ./run
+  const logError = app.log.error
+  app.log.error = function(...args) {
+    logError(...args)
+    process.exit(1)
+  }
+  logFatal = app.log.fatal
+  app.log.fatal = function(...args) {
+    logFatal(...args)
+    process.exit(1)
+  }
 
   const baseBranch = process.env.BASE_BRANCH || "master"
   app.log.debug(`base branch: ${baseBranch}`)
@@ -140,6 +158,7 @@ module.exports = (app) => {
         id: action,
         extra,
         getPushDomain,
+        logFatal
       }
 
       let report
@@ -156,10 +175,10 @@ module.exports = (app) => {
       }
 
       if (report.isError) {
-        app.log.error(report.message)
+        logError(report.message)
 
         if (report.error) {
-          app.log.error(report.error)
+          logError(report.error)
         }
 
         const output = `${report.message}${report.error ? `: ${report.error.toString()}` : ""
@@ -221,7 +240,7 @@ ${extraInfo}
         body,
       })
     } catch (error) {
-      app.log.fatal({
+      logFatal({
         error,
         repo,
         owner,