Skip to content

Commit

Permalink
Support for s3 multipart checksums (#4991)
Browse files Browse the repository at this point in the history
  • Loading branch information
olivergrabinski authored May 22, 2024
1 parent e16594d commit a8a2601
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"@type": "@id"
},
"_digest": "https://bluebrain.github.io/nexus/vocabulary/digest",
"_numberOfParts": "https://bluebrain.github.io/nexus/vocabulary/numberOfParts",
"_bytes": "https://bluebrain.github.io/nexus/vocabulary/bytes",
"_value": "https://bluebrain.github.io/nexus/vocabulary/value",
"_rev": "https://bluebrain.github.io/nexus/vocabulary/rev",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@
},
"_value": {
"type": "keyword"
},
"_numberOfParts": {
"type": "long"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ object Digest {
*/
final case class ComputedDigest(algorithm: DigestAlgorithm, value: String) extends Digest

/**
* A digest as provided by S3 when the file is uploaded in parts.
*
* @param algorithm
* the algorithm used in order to compute the digest
* @param value
* the actual value of the digest of the file
* @param numberOfParts
* the number of parts the digest was computed from
*/
final case class MultiPartDigest(algorithm: DigestAlgorithm, value: String, numberOfParts: Int) extends Digest

/**
* A digest that does not yield a value because it is still being computed
*/
Expand All @@ -36,8 +48,10 @@ object Digest {
final case object NotComputedDigest extends Digest

implicit val digestEncoder: Encoder.AsObject[Digest] = Encoder.encodeJsonObject.contramapObject {
case ComputedDigest(algorithm, value) => JsonObject("_algorithm" -> algorithm.asJson, "_value" -> value.asJson)
case NotComputedDigest => JsonObject("_value" -> "".asJson)
case NoDigest => JsonObject("_value" -> "".asJson)
case ComputedDigest(algorithm, value) => JsonObject("_algorithm" -> algorithm.asJson, "_value" -> value.asJson)
case MultiPartDigest(algorithm, value, numberOfParts) =>
JsonObject("_algorithm" -> algorithm.asJson, "_value" -> value.asJson, "_numberOfParts" -> numberOfParts.asJson)
case NotComputedDigest => JsonObject("_value" -> "".asJson)
case NoDigest => JsonObject("_value" -> "".asJson)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,21 @@ object HeadObject {
// It is highly likely for S3 to return an erroneous value here
ContentType.parse(value).toOption
}
val digestValue = Option(response.checksumSHA256).map { encodedChecksum =>
Hex.valueOf(Base64.getDecoder.decode(encodedChecksum))
}
val digest = digestValue.fold(Digest.none) { value =>
ComputedDigest(DigestAlgorithm.SHA256, value)
}

val digest = Option(response.checksumSHA256())
.map { encodedChecksum =>
val multiPartDigest = """^(.*)-(\d+)$""".r
encodedChecksum match {
case multiPartDigest(value, parts) =>
val digestValue = Hex.valueOf(Base64.getDecoder.decode(value))
Digest.MultiPartDigest(DigestAlgorithm.SHA256, digestValue, parts.toInt)
case _ =>
val digestValue = Hex.valueOf(Base64.getDecoder.decode(encodedChecksum))
ComputedDigest(DigestAlgorithm.SHA256, digestValue)
}
}
.getOrElse(Digest.none)

HeadObject(
response.contentLength(),
contentType,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3

import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.Digest
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model.DigestAlgorithm
import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite
import software.amazon.awssdk.services.s3.model.HeadObjectResponse

class HeadObjectSuite extends NexusSuite {

test("HeadObject should correctly parse a standard S3 SHA256 digest") {

val multiPartDigest = "44ImQwqlEWtD75zMbO3GeJCOj4oO2lMb+VW6l6zJ3sc="
val response =
HeadObjectResponse.builder().checksumSHA256(multiPartDigest).build()
val digest = HeadObject(response).digest

assertEquals(
digest,
Digest.ComputedDigest(
DigestAlgorithm.SHA256,
"e38226430aa5116b43ef9ccc6cedc678908e8f8a0eda531bf955ba97acc9dec7"
)
)

}

test("HeadObject should correctly parse a multipart S3 SHA256 digest") {
val multiPartDigest = "kFsM2p15+Jbp2K0FIF0y1zIWlEJOt5052qlU8IRQPtM=-13"
val response =
HeadObjectResponse.builder().checksumSHA256(multiPartDigest).build()
val digest = HeadObject(response).digest

assertEquals(
digest,
Digest.MultiPartDigest(
DigestAlgorithm.SHA256,
"905b0cda9d79f896e9d8ad05205d32d7321694424eb79d39daa954f084503ed3",
13
)
)

}

}

0 comments on commit a8a2601

Please sign in to comment.