Skip to content

Commit d80efee

Browse files
WX-1105 Fix interpretation of full http blob paths (#7138)
Co-authored-by: Adam Nichols <anichols@broadinstitute.org>
1 parent 24e67fd commit d80efee

File tree

4 files changed

+90
-14
lines changed

4 files changed

+90
-14
lines changed

filesystems/blob/src/main/scala/cromwell/filesystems/blob/BlobPathBuilder.scala

+18-10
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,30 @@ class BlobPathBuilder(container: BlobContainerName, endpoint: EndpointURL)(priva
6868
}
6969

7070
object BlobPath {
71-
// The Azure NIO library uses `{containerName}:` as the root of the path.
72-
// This doesn't work well for our need to easily transfer back and forth
73-
// to and from the blob URL format. This method removes anything up to and including
74-
// the first colon, to create a path string useful for working with BlobPath.
75-
// This is safe because the NIO library enforces no colons except to mark
76-
// the root container name.
77-
private def nioPathString(nioPath: NioPath): String = {
78-
val pathStr = nioPath.toString
71+
// The Azure NIO library uses `{containerName}:` as the root of the path (treating the blob container within
72+
// the storage account similarly to a drive within a computer). This doesn't work well for our need to easily
73+
// transfer back and forth to and from the blob URL format. It also causes the library to garble full http://
74+
// paths that it receives (it interprets `http` as the container name); it transforms them to http:/<remainder of path>
75+
//
76+
// We transform these library-generated paths in two steps:
77+
// 1) If the path starts with http:/ (single slash!) transform it to the containerName:<path inside container>
78+
// format the library expects
79+
// 2) If the path looks like <container>:<path>, strip off the <container>: to leave the absolute path inside the container.
80+
private val brokenPathRegex = "https:/([a-z0-9]+).blob.core.windows.net/([-a-zA-Z0-9]+)/(.*)".r
81+
def cleanedNioPathString(nioString: String): String = {
82+
val pathStr = nioString match {
83+
case brokenPathRegex(_, containerName, pathInContainer) =>
84+
s"${containerName}:/${pathInContainer}"
85+
case _ => nioString
86+
}
7987
pathStr.substring(pathStr.indexOf(":")+1)
8088
}
8189

8290
def apply(nioPath: NioPath,
8391
endpoint: EndpointURL,
8492
container: BlobContainerName,
8593
fsm: BlobFileSystemManager): BlobPath = {
86-
BlobPath(nioPathString(nioPath), endpoint, container)(fsm)
94+
BlobPath(cleanedNioPathString(nioPath.toString), endpoint, container)(fsm)
8795
}
8896
}
8997

@@ -95,7 +103,7 @@ case class BlobPath private[blob](pathString: String, endpoint: EndpointURL, con
95103
override def pathAsString: String = List(endpoint, container, pathString.stripPrefix("/")).mkString("/")
96104

97105
//This is purposefully an unprotected get because if the endpoint cannot be parsed this should fail loudly rather than quietly
98-
override def pathWithoutScheme: String = parseURI(endpoint.value).map(u => List(u.getHost, container, pathString).mkString("/")).get
106+
override def pathWithoutScheme: String = parseURI(endpoint.value).map(u => List(u.getHost, container, pathString.stripPrefix("/")).mkString("/")).get
99107

100108
private def findNioPath(path: String): NioPath = (for {
101109
fileSystem <- fsm.retrieveFilesystem()

filesystems/blob/src/test/scala/cromwell/filesystems/blob/BlobPathBuilderSpec.scala

+51-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import org.mockito.Mockito.when
44
import org.scalatest.flatspec.AnyFlatSpec
55
import org.scalatest.matchers.should.Matchers
66

7+
import java.util.UUID
78
import scala.util.{Failure, Try}
89

910
object BlobPathBuilderSpec {
@@ -57,12 +58,59 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar {
5758
testException should contain(exception)
5859
}
5960

61+
private def testBlobNioStringCleaning(input: String, expected: String) =
62+
BlobPath.cleanedNioPathString(input) shouldBe expected
63+
64+
it should "clean the NIO path string when it has a garbled http protocol" in {
65+
testBlobNioStringCleaning(
66+
"https:/lz43.blob.core.windows.net/sc-ebda3e/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout",
67+
"/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout"
68+
)
69+
}
70+
71+
it should "clean the NIO path string when it has a container name with colon prefix" in {
72+
testBlobNioStringCleaning(
73+
"sc-ebda3e:/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout",
74+
"/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout"
75+
)
76+
}
77+
78+
it should "clean the NIO path string when it's an in-container absolute path" in {
79+
testBlobNioStringCleaning(
80+
"/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout",
81+
"/workspace-services/cbas/terra-app-4628d0e1/test_all_engine_functions/4bb6a0a2-3b07/call-run_read_string/execution/stdout"
82+
)
83+
}
84+
85+
it should "clean the NIO path string when it's the root directory only" in {
86+
testBlobNioStringCleaning(
87+
"sc-ebda3e:",
88+
""
89+
)
90+
}
91+
92+
//// The below tests are IGNORED because they depend on Azure auth information being present in the environment ////
93+
val subscriptionId = SubscriptionId(UUID.fromString("62b22893-6bc1-46d9-8a90-806bb3cce3c9"))
94+
95+
ignore should "resolve an absolute path string correctly to a path" in {
96+
val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage")
97+
val store = BlobContainerName("inputs")
98+
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint, Some(subscriptionId))
99+
val fsm: BlobFileSystemManager = new BlobFileSystemManager(store, endpoint, 10, blobTokenGenerator)
100+
101+
val rootString = s"${endpoint.value}/${store.value}/cromwell-execution"
102+
val blobRoot: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build rootString getOrElse fail()
103+
blobRoot.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution")
104+
val otherFile = blobRoot.resolve("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt")
105+
otherFile.toAbsolutePath.pathAsString should equal ("https://coaexternalstorage.blob.core.windows.net/inputs/cromwell-execution/test/inputFile.txt")
106+
}
107+
60108
ignore should "build a blob path from a test string and read a file" in {
61109
val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage")
62110
val endpointHost = BlobPathBuilder.parseURI(endpoint.value).map(_.getHost).getOrElse(fail("Could not parse URI"))
63111
val store = BlobContainerName("inputs")
64112
val evalPath = "/test/inputFile.txt"
65-
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint)
113+
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint, Some(subscriptionId))
66114
val fsm: BlobFileSystemManager = new BlobFileSystemManager(store, endpoint, 10L, blobTokenGenerator)
67115
val testString = endpoint.value + "/" + store + evalPath
68116
val blobPath: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build testString getOrElse fail()
@@ -80,7 +128,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar {
80128
val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage")
81129
val store = BlobContainerName("inputs")
82130
val evalPath = "/test/inputFile.txt"
83-
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint)
131+
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint, Some(subscriptionId))
84132
val fsm: BlobFileSystemManager = new BlobFileSystemManager(store, endpoint, 10, blobTokenGenerator)
85133
val testString = endpoint.value + "/" + store + evalPath
86134
val blobPath1: BlobPath = new BlobPathBuilder(store, endpoint)(fsm) build testString getOrElse fail()
@@ -95,7 +143,7 @@ class BlobPathBuilderSpec extends AnyFlatSpec with Matchers with MockSugar {
95143
ignore should "resolve a path without duplicating container name" in {
96144
val endpoint = BlobPathBuilderSpec.buildEndpoint("coaexternalstorage")
97145
val store = BlobContainerName("inputs")
98-
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint)
146+
val blobTokenGenerator = NativeBlobSasTokenGenerator(store, endpoint, Some(subscriptionId))
99147
val fsm: BlobFileSystemManager = new BlobFileSystemManager(store, endpoint, 10, blobTokenGenerator)
100148

101149
val rootString = s"${endpoint.value}/${store.value}/cromwell-execution"

supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesAsyncBackendJobExecutionActor.scala

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
package cromwell.backend.impl.tes
22

33
import common.exception.AggregatedMessageException
4+
45
import java.io.FileNotFoundException
56
import java.nio.file.FileAlreadyExistsException
67
import cats.syntax.apply._
78
import akka.http.scaladsl.Http
89
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
910
import akka.http.scaladsl.marshalling.Marshal
11+
import akka.http.scaladsl.model.HttpHeader.ParsingResult.Ok
1012
import akka.http.scaladsl.model._
1113
import akka.http.scaladsl.unmarshalling.{Unmarshal, Unmarshaller}
1214
import akka.stream.ActorMaterializer
@@ -295,9 +297,18 @@ class TesAsyncBackendJobExecutionActor(override val standardParams: StandardAsyn
295297
}
296298
}
297299

300+
// Headers that should be included with all requests to the TES server
301+
private def requestHeaders: List[HttpHeader] =
302+
tesConfiguration.token.flatMap { t =>
303+
HttpHeader.parse("Authorization", t) match {
304+
case Ok(header, _) => Some(header)
305+
case _ => None
306+
}
307+
}.toList
308+
298309
private def makeRequest[A](request: HttpRequest)(implicit um: Unmarshaller[ResponseEntity, A]): Future[A] = {
299310
for {
300-
response <- withRetry(() => Http().singleRequest(request))
311+
response <- withRetry(() => Http().singleRequest(request.withHeaders(requestHeaders)))
301312
data <- if (response.status.isFailure()) {
302313
response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody =>
303314
Future.failed(new RuntimeException(s"Failed TES request: Code ${response.status.intValue()}, Body = $errorBody"))

supportedBackends/tes/src/main/scala/cromwell/backend/impl/tes/TesConfiguration.scala

+9
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ class TesConfiguration(val configurationDescriptor: BackendConfigurationDescript
3232
.map(SimpleExponentialBackoff(_))
3333
.getOrElse(TesConfiguration.defaultExecOrRecoverBackoff)
3434

35+
// Used for testing only. Include a bearer token for authenticating with the TES server
36+
final val bearerPrefix: String = "Bearer "
37+
val token: Option[String] = {
38+
configurationDescriptor.backendConfig.as[Option[String]]("bearer-token").map { t =>
39+
if (!t.startsWith(bearerPrefix))
40+
s"${bearerPrefix}${t}"
41+
else t
42+
}
43+
}
3544
}
3645

3746
object TesConfiguration {

0 commit comments

Comments
 (0)