Skip to content

Commit

Permalink
Better QoB error propagation (patch from Tim Poterba) (#242)
Browse files Browse the repository at this point in the history
  • Loading branch information
lgruen authored Oct 16, 2022
1 parent 0c057d6 commit 3c66763
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 4 deletions.
12 changes: 8 additions & 4 deletions hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,6 @@ class ServiceBackend(
stageCount += 1
implicit val formats: Formats = DefaultFormats
val batchState = (batch \ "state").extract[String]
if (batchState == "failed") {
throw new HailBatchFailure(s"Update $updateId for batch $batchId failed")
}

log.info(s"parallelizeAndComputeWithIndex: $token: reading results")

Expand All @@ -237,7 +234,12 @@ class ServiceBackend(
availableGCSConnections.acquire()
try {
val bytes = retryTransientErrors {
using(open(s"$root/result.$i")) { is =>
val is = try {
open(s"$root/result.$i")
} catch {
case e: Throwable => throw new HailWorkerFailure(s"no result for failing job ${i}!", e)
}
using(is) { is =>
resultOrHailException(new DataInputStream(is))
}
}
Expand All @@ -248,6 +250,8 @@ class ServiceBackend(
}
}

assert(batchState != "failed") // a failure can't have all the correct outputs with no exceptions!

log.info(s"all results complete")
results.toArray[Array[Byte]]
}
Expand Down
2 changes: 2 additions & 0 deletions hail/src/main/scala/is/hail/backend/service/Worker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ object Worker {
writeString(dos, shortMessage)
writeString(dos, expandedMessage)
dos.writeInt(errorId)
log.info(s"job $i/$n failed with user exception: $shortMessage (error id $errorId)\n $expandedMessage")
throw userError
}
}
timer.end("writeOutputs")
Expand Down
2 changes: 2 additions & 0 deletions hail/src/main/scala/is/hail/utils/ErrorHandling.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ class HailException(val msg: String, val logMsg: Option[String], cause: Throwabl
def this(msg: String, errorId: Int) = this(msg, None, null, errorId)
}

class HailWorkerFailure(msg: String, cause: Throwable) extends RuntimeException(msg)

class HailWorkerException(
val shortMessage: String,
val expandedMessage: String,
Expand Down

0 comments on commit 3c66763

Please sign in to comment.