Skip to content

Commit

Permalink
PAPI better error message when localization/delocalization fails (#4718)
Browse files Browse the repository at this point in the history
  • Loading branch information
salonishah11 authored Mar 8, 2019
1 parent 73ad264 commit d16138a
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
version 1.0

task localize_file {
input {
File input_file
}
command {
cat "localizing file over 1 GB"
}
runtime {
docker: "ubuntu:latest"
disks: "local-disk 1 HDD"
}
output {
String out = read_string(stdout())
}
}

workflow localize_file_larger_than_disk_space {
File wf_input = "gs://cromwell_test_bucket/file_over_1_gb.txt"

call localize_file { input: input_file = wf_input }

output {
String content = localize_file.out
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: localize_file_larger_than_disk_space
testFormat: workflowfailure
backends: [Papiv2]
workflowType: WDL
workflowTypeVersion: 1.0
tags: ["wdl_1.0"]

files {
workflow: input_localization/localize_file_larger_than_disk_space.wdl
}

metadata {
workflowName: localize_file_larger_than_disk_space
status: Failed
"failures.0.message": "Workflow failed"
"failures.0.causedBy.0.message": "Task localize_file_larger_than_disk_space.localize_file:NA:1 failed. The job was stopped before the command finished. PAPI error code 9. Please check the log file for more details: gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/localize_file_larger_than_disk_space/<<UUID>>/call-localize_file/localize_file.log."
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ metadata {
workflowName: requester_pays_localization
status: Failed
"failures.0.message": "Workflow failed"
"failures.0.causedBy.0.message": ~~"does not have serviceusage.services.use access"
"failures.0.causedBy.0.message": "Task requester_pays_localization.localize:NA:1 failed. The job was stopped before the command finished. PAPI error code 9. Please check the log file for more details: gs://cloud-cromwell-dev-self-cleaning/cromwell_execution/travis/requester_pays_localization/<<UUID>>/call-localize/localize.log."
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ object PipelinesApiAsyncBackendJobExecutionActor {
val JesUnexpectedTermination = 13
val JesPreemption = 14

val PapiFailedPreConditionErrorCode = 9

// If the JES code is 2 (UNKNOWN), this sub-string indicates preemption:
val FailedToStartDueToPreemptionSubstring = "failed to start due to preemption"

Expand Down Expand Up @@ -584,6 +586,14 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta
override def handleExecutionFailure(runStatus: RunStatus,
returnCode: Option[Int]): Future[ExecutionHandle] = {

def generateBetterErrorMsg(runStatus: RunStatus.UnsuccessfulRunStatus, errorMsg: String): String = {
if (runStatus.errorCode.getCode.value == PapiFailedPreConditionErrorCode
&& errorMsg.contains("Execution failed")
&& (errorMsg.contains("Localization") || errorMsg.contains("Delocalization"))) {
s"Please check the log file for more details: $jesLogPath."
} else errorMsg
}

// Inner function: Handles a 'Failed' runStatus (or Preempted if preemptible was false)
def handleFailedRunStatus(runStatus: RunStatus.UnsuccessfulRunStatus,
returnCode: Option[Int]): Future[ExecutionHandle] = {
Expand All @@ -592,19 +602,22 @@ class PipelinesApiAsyncBackendJobExecutionActor(override val standardParams: Sta
def isDockerPullFailure: Boolean = prettyError.contains("not found: does not exist or no pull access")

(runStatus.errorCode, runStatus.jesCode) match {
case (Status.NOT_FOUND, Some(JesFailedToDelocalize)) => Future.successful(FailedNonRetryableExecutionHandle(FailedToDelocalizeFailure(runStatus.prettyPrintedError, jobTag, Option(standardPaths.error))))
case (Status.ABORTED, Some(JesUnexpectedTermination)) => handleUnexpectedTermination(runStatus.errorCode, runStatus.prettyPrintedError, returnCode)
case (Status.NOT_FOUND, Some(JesFailedToDelocalize)) => Future.successful(FailedNonRetryableExecutionHandle(FailedToDelocalizeFailure(prettyError, jobTag, Option(standardPaths.error))))
case (Status.ABORTED, Some(JesUnexpectedTermination)) => handleUnexpectedTermination(runStatus.errorCode, prettyError, returnCode)
case _ if isDockerPullFailure =>
val unable = s"Unable to pull Docker image '$jobDockerImage' "
val details = if (hasDockerCredentials)
"but Docker credentials are present; is this Docker account authorized to pull the image? " else
"and there are effectively no Docker credentials present (one or more of token, authorization, or Google KMS key may be missing). " +
"Please check your private Docker configuration and/or the pull access for this image. "
val message = unable + details + runStatus.prettyPrintedError
val message = unable + details + prettyError
Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, message, jobTag, returnCode, standardPaths.error), returnCode))
case _ => Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, runStatus.prettyPrintedError, jobTag, returnCode, standardPaths.error), returnCode))
case _ => {
val finalPrettyPrintedError = generateBetterErrorMsg(runStatus, prettyError)
Future.successful(FailedNonRetryableExecutionHandle(StandardException(
runStatus.errorCode, finalPrettyPrintedError, jobTag, returnCode, standardPaths.error), returnCode))
}
}
}

Expand Down

0 comments on commit d16138a

Please sign in to comment.