From 54a1fc53ac853b2375e9cf25a6047fb593bffa0b Mon Sep 17 00:00:00 2001 From: Roberto Tyley Date: Fri, 4 Aug 2023 15:33:17 +0100 Subject: [PATCH] Add ETag-caching (and AWS SDK v2) support This change adds these improvements: * Facia data is only re-downloaded & re-parsed if the S3 content has _changed_, thanks to ETag-caching - see https://github.com/guardian/etag-caching . This library has already been used in DotCom PROD with https://github.com/guardian/frontend/pull/26338 * AWS SDK v2: the FAPI client itself now has a `fapi-s3-sdk-v2` artifact. An example PR consuming this updated version of the FAPI client is at: https://github.com/guardian/ophan/pull/5506 Updated FAPI artifact layout ---------------------------- To use FAPI with the new AWS SDK v2 support, users must now have a dependency on *two* FAPI artifacts: * `fapi-s3-sdk-v2` * `fapi-client-playXX` Due to needing to support the matrix of: * AWS SDK v1 & v2 * Play-JSON 2.7, 2.8, and eventually 2.9 ...it's best not to try to produce an artifact that corresponds to every single combination of those! Consequently, we provide an artifacts that are specific to the different versions of AWS SDK (or at least, could do - if AWS SDK v1 was moved out of common code), and artifacts that are specific to the different versions of Play-JSON, and allow the user to combine them as needed. A similar approach was used with `guardian/play-secret-rotation`: https://github.com/guardian/play-secret-rotation/pull/8 In order for the different artifacts to have interfaces they can use to join together and become a single useful Facia client, we have a `fapi-client-core` artifact. Any code that doesn't depend on the JSON classes, or the actual AWS SDK version (which isn't much!), can live in there. In particular, we have: * `com.gu.facia.client.ApiClient`, an existing type that is now a trait, with 2 implementations - one that uses the existing `com.gu.facia.client.S3Client` abstraction on S3 behaviour * `com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour`, a new trait that exposes just enough interface to allow the conditional fetching used for ETag-based caching, but doesn't tie you to any specific version of the AWS SDK. --- build.sbt | 49 ++++++---- ...S3Client.scala => AmazonSdkS3Client.scala} | 12 +-- .../scala/com/gu/facia/client/ApiClient.scala | 90 ++++++++++++++----- .../gu/facia/client/lib/ApiTestClient.scala | 11 +-- .../com/gu/facia/client/Environment.scala | 15 ++++ .../com/gu/facia/client/FaciaResult.scala | 0 .../client/JsonDeserialisationError.scala | 0 .../scala/com/gu/facia/client/S3Client.scala | 10 +++ .../fetching/S3FetchBehaviour.scala | 10 +++ .../facia/client/aws/sdkv2/s3/AwsSdkV2.scala | 19 ++++ project/dependencies.scala | 5 +- 11 files changed, 169 insertions(+), 52 deletions(-) rename facia-json/src/main/scala/com/gu/facia/client/{S3Client.scala => AmazonSdkS3Client.scala} (80%) create mode 100644 fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala rename {facia-json => fapi-client-core}/src/main/scala/com/gu/facia/client/FaciaResult.scala (100%) rename {facia-json => fapi-client-core}/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala (100%) create mode 100644 fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala create mode 100644 fapi-client-core/src/main/scala/com/gu/facia/client/etagcaching/fetching/S3FetchBehaviour.scala create mode 100644 fapi-s3-sdk-v2/src/main/scala/com/gu/facia/client/aws/sdkv2/s3/AwsSdkV2.scala diff --git a/build.sbt b/build.sbt index 59485010..90a8c22a 100644 --- a/build.sbt +++ b/build.sbt @@ -47,6 +47,8 @@ val sonatypeReleaseSettings = Seq( ) lazy val root = (project in file(".")).aggregate( + fapiClient_core, + fapiClient_s3_sdk_v2, faciaJson_play27, faciaJson_play28, fapiClient_play27, @@ -62,27 +64,31 @@ val exactPlayJsonVersions = Map( "28" -> "2.8.2" ) -def baseProject(module: String, majorMinorVersion: String) = Project(s"$module-play$majorMinorVersion", file(s"$module-play$majorMinorVersion")) +val baseSettings = Seq( + organization := "com.gu", + resolvers ++= Resolver.sonatypeOssRepos("releases"), + scalaVersion := "2.13.11", + crossScalaVersions := Seq(scalaVersion.value, "2.12.18"), + scalacOptions := Seq( + "-feature", + "-deprecation", + "-Xfatal-warnings" + ), + libraryDependencies += scalaTest, + publishTo := sonatypePublishToBundle.value +) ++ sonatypeReleaseSettings + +def playSpecificProject(module: String, majorMinorVersion: String) = Project(s"$module-play$majorMinorVersion", file(s"$module-play$majorMinorVersion")) .settings( sourceDirectory := baseDirectory.value / s"../$module/src", - organization := "com.gu", - resolvers ++= Resolver.sonatypeOssRepos("releases"), - scalaVersion := "2.13.11", - crossScalaVersions := Seq(scalaVersion.value, "2.12.18"), - scalacOptions := Seq( - "-feature", - "-deprecation", - "-Xfatal-warnings" - ), - libraryDependencies += scalaTest, - publishTo := sonatypePublishToBundle.value, - sonatypeReleaseSettings + baseSettings ) -def faciaJson_playJsonVersion(majorMinorVersion: String) = baseProject("facia-json", majorMinorVersion) +def faciaJson_playJsonVersion(majorMinorVersion: String) = playSpecificProject("facia-json", majorMinorVersion) + .dependsOn(fapiClient_core) .settings( libraryDependencies ++= Seq( - awsSdk, + awsS3SdkV1, // ideally, this would be pushed out to a separate FAPI artifact commonsIo, "com.typesafe.play" %% "play-json" % exactPlayJsonVersions(majorMinorVersion), "org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0", @@ -90,7 +96,7 @@ def faciaJson_playJsonVersion(majorMinorVersion: String) = baseProject("facia-js ) ) -def fapiClient_playJsonVersion(majorMinorVersion: String) = baseProject("fapi-client", majorMinorVersion) +def fapiClient_playJsonVersion(majorMinorVersion: String) = playSpecificProject("fapi-client", majorMinorVersion) .settings( libraryDependencies ++= Seq( contentApi, @@ -101,6 +107,17 @@ def fapiClient_playJsonVersion(majorMinorVersion: String) = baseProject("fapi-c ) ) +lazy val fapiClient_core = Project("fapi-client-core", file("fapi-client-core")).settings( + libraryDependencies += eTagCachingS3Base, + baseSettings +) + +lazy val fapiClient_s3_sdk_v2 = Project("fapi-s3-sdk-v2", file("fapi-s3-sdk-v2")).dependsOn(fapiClient_core) + .settings( + libraryDependencies += eTagCachingS3SdkV2, + baseSettings +) + lazy val faciaJson_play27 = faciaJson_playJsonVersion("27") lazy val faciaJson_play28 = faciaJson_playJsonVersion("28") diff --git a/facia-json/src/main/scala/com/gu/facia/client/S3Client.scala b/facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala similarity index 80% rename from facia-json/src/main/scala/com/gu/facia/client/S3Client.scala rename to facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala index 1c198454..73403fb9 100644 --- a/facia-json/src/main/scala/com/gu/facia/client/S3Client.scala +++ b/facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala @@ -1,18 +1,14 @@ package com.gu.facia.client -import com.amazonaws.regions.{Region, Regions} -import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} +import com.amazonaws.regions.Regions import com.amazonaws.services.s3.model.AmazonS3Exception +import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import org.apache.commons.io.IOUtils + import scala.concurrent.{ExecutionContext, Future, blocking} import scala.util.Try -/** For mocking in tests, but also to allow someone to define a properly asynchronous S3 client. (The one in the AWS - * SDK is unfortunately synchronous only.) - */ -trait S3Client { - def get(bucket: String, path: String): Future[FaciaResult] -} + case class AmazonSdkS3Client(client: AmazonS3)(implicit executionContext: ExecutionContext) extends S3Client { def get(bucket: String, path: String): Future[FaciaResult] = Future { diff --git a/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala b/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala index 5c7936fe..86462a1e 100644 --- a/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala +++ b/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala @@ -1,37 +1,83 @@ package com.gu.facia.client -import com.gu.facia.client.models.{ConfigJson, CollectionJson} +import com.gu.etagcaching.FreshnessPolicy.AlwaysWaitForRefreshedValue +import com.gu.etagcaching.aws.s3.ObjectId +import com.gu.etagcaching.{ConfigCache, ETagCache} +import com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour +import com.gu.facia.client.models.{CollectionJson, ConfigJson} import play.api.libs.json.{Format, Json} import scala.concurrent.{ExecutionContext, Future} +trait ApiClient { + def config: Future[ConfigJson] + + def collection(id: String): Future[Option[CollectionJson]] +} + object ApiClient { val Encoding = "utf-8" -} -case class ApiClient( + def apply( bucket: String, - /** e.g., CODE, PROD, DEV ... */ - environment: String, + environment: String, // e.g., CODE, PROD, DEV ... s3Client: S3Client -)(implicit executionContext: ExecutionContext) { - import com.gu.facia.client.ApiClient._ - - private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key) map { - case FaciaSuccess(bytes) => - Some(Json.fromJson[A](Json.parse(new String(bytes, Encoding))) getOrElse { - throw new JsonDeserialisationError(s"Could not deserialize JSON in $bucket, $key") - }) - case FaciaNotAuthorized(message) => throw new BackendError(message) - case FaciaNotFound(_) => None - case FaciaUnknownError(message) => throw new BackendError(message) + )(implicit executionContext: ExecutionContext): ApiClient = new ApiClient { + val env: Environment = Environment(environment) + + private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key).map(translateFaciaResult[A](_)) + + def config: Future[ConfigJson] = + retrieve[ConfigJson](env.configS3Path).map(getOrWarnAboutMissingConfig) + + def collection(id: String): Future[Option[CollectionJson]] = + retrieve[CollectionJson](env.collectionS3Path(id)) + } + + private def getOrWarnAboutMissingConfig(configOpt: Option[ConfigJson]): ConfigJson = + configOpt getOrElse throwConfigMissingException() + + private def throwConfigMissingException() = throw BackendError("Config was missing!! NOT GOOD!") + + private def translateFaciaResult[B: Format](faciaResult: FaciaResult): Option[B] = faciaResult match { + case FaciaSuccess(bytes) => Some(parseBytes(bytes)) + case FaciaNotAuthorized(message) => throw BackendError(message) + case FaciaNotFound(_) => None + case FaciaUnknownError(message) => throw BackendError(message) } - def config: Future[ConfigJson] = - retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_ getOrElse { - throw new BackendError("Config was missing!! OH MY GOD") - }) + private def parseBytes[B: Format](bytes: Array[Byte]): B = + Json.fromJson[B](Json.parse(new String(bytes, Encoding))) getOrElse { + throw JsonDeserialisationError(s"Could not deserialize JSON") + } + + def withCaching( + bucket: String, + s3FetchBehaviour: S3FetchBehaviour, + environment: Environment, + configureCollectionCache: ConfigCache = _.maximumSize(10000) // at most 1GB RAM worst case + )(implicit ec: ExecutionContext): ApiClient = + new ApiClient { + private val fetching = s3FetchBehaviour.fetching.keyOn[String](path => ObjectId(bucket, path)) + + def eTagCache[B: Format](configureCache: ConfigCache) = new ETagCache( + fetching.thenParsing(parseBytes[B]), + AlwaysWaitForRefreshedValue, + configureCache + ) + + private val configCache = eTagCache[ConfigJson](_.maximumSize(1)) + private val collectionCache = eTagCache[CollectionJson](configureCollectionCache) + + override def config: Future[ConfigJson] = configCache.get(environment.configS3Path).recover { + case t if s3FetchBehaviour.fetchExceptionIndicatesContentMissing(t) => + throwConfigMissingException() + } - def collection(id: String): Future[Option[CollectionJson]] = - retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json") + override def collection(id: String): Future[Option[CollectionJson]] = + collectionCache.get(environment.collectionS3Path(id)).map(Some(_)).recover { + case t if s3FetchBehaviour.fetchExceptionIndicatesContentMissing(t) => None + case t => throw BackendError(t.getMessage) + } + } } diff --git a/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala b/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala index 1fda7b27..1f139d42 100644 --- a/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala +++ b/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala @@ -1,10 +1,10 @@ package com.gu.facia.client.lib -import com.amazonaws.auth.{ AWSStaticCredentialsProvider, BasicAWSCredentials} +import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials} import com.amazonaws.regions.Regions import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import com.gu.facia.client.models.{CollectionJson, ConfigJson} -import com.gu.facia.client.{AmazonSdkS3Client, ApiClient} +import com.gu.facia.client.{AmazonSdkS3Client, ApiClient, Environment} import play.api.libs.json.{Format, Json} import scala.concurrent.ExecutionContext.Implicits.global @@ -14,14 +14,15 @@ object Amazon { val amazonS3Client = AmazonS3ClientBuilder.standard().withRegion(Regions.EU_WEST_1).withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials("key", "pass"))).build() } -class ApiTestClient extends ApiClient("bucket", "DEV", AmazonSdkS3Client(Amazon.amazonS3Client)) with ResourcesHelper { +class ApiTestClient extends ApiClient with ResourcesHelper { + private val environment = Environment.Dev private def retrieve[A: Format](key: String): Future[Option[A]] = Future.successful(slurp(key).map(Json.parse).flatMap(_.asOpt[A])) override def config: Future[ConfigJson] = - retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_.get) + retrieve[ConfigJson](environment.configS3Path).map(_.get) override def collection(id: String): Future[Option[CollectionJson]] = - retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json") + retrieve[CollectionJson](environment.collectionS3Path(id)) } diff --git a/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala new file mode 100644 index 00000000..8df3ce0f --- /dev/null +++ b/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala @@ -0,0 +1,15 @@ +package com.gu.facia.client + +case class Environment(name: String) { + private val s3PathPrefix: String = s"$name/frontsapi" + + val configS3Path: String = s"$s3PathPrefix/config/config.json" + def collectionS3Path(id: String): String = s"$s3PathPrefix/collection/$id/collection.json" +} + +object Environment { + val Prod = Environment("PROD") + val Code = Environment("CODE") + val Dev = Environment("DEV") + val Test = Environment("TEST") +} \ No newline at end of file diff --git a/facia-json/src/main/scala/com/gu/facia/client/FaciaResult.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/FaciaResult.scala similarity index 100% rename from facia-json/src/main/scala/com/gu/facia/client/FaciaResult.scala rename to fapi-client-core/src/main/scala/com/gu/facia/client/FaciaResult.scala diff --git a/facia-json/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala similarity index 100% rename from facia-json/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala rename to fapi-client-core/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala diff --git a/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala new file mode 100644 index 00000000..e0537d52 --- /dev/null +++ b/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala @@ -0,0 +1,10 @@ +package com.gu.facia.client + +import scala.concurrent.Future + +/** For mocking in tests, but also to allow someone to define a properly asynchronous S3 client. (The one in the AWS + * SDK is unfortunately synchronous only.) + */ +trait S3Client { + def get(bucket: String, path: String): Future[FaciaResult] +} diff --git a/fapi-client-core/src/main/scala/com/gu/facia/client/etagcaching/fetching/S3FetchBehaviour.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/etagcaching/fetching/S3FetchBehaviour.scala new file mode 100644 index 00000000..ee9d6ac7 --- /dev/null +++ b/fapi-client-core/src/main/scala/com/gu/facia/client/etagcaching/fetching/S3FetchBehaviour.scala @@ -0,0 +1,10 @@ +package com.gu.facia.client.etagcaching.fetching + +import com.gu.etagcaching.aws.s3.ObjectId +import com.gu.etagcaching.fetching.Fetching + +trait S3FetchBehaviour { + val fetching: Fetching[ObjectId, Array[Byte]] + + def fetchExceptionIndicatesContentMissing(t: Throwable): Boolean +} diff --git a/fapi-s3-sdk-v2/src/main/scala/com/gu/facia/client/aws/sdkv2/s3/AwsSdkV2.scala b/fapi-s3-sdk-v2/src/main/scala/com/gu/facia/client/aws/sdkv2/s3/AwsSdkV2.scala new file mode 100644 index 00000000..7bc149c8 --- /dev/null +++ b/fapi-s3-sdk-v2/src/main/scala/com/gu/facia/client/aws/sdkv2/s3/AwsSdkV2.scala @@ -0,0 +1,19 @@ +package com.gu.facia.client.aws.sdkv2.s3 + +import com.gu.etagcaching.aws.s3.ObjectId +import com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching +import com.gu.etagcaching.aws.sdkv2.s3.response.Transformer.Bytes +import com.gu.etagcaching.fetching.Fetching +import com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour +import software.amazon.awssdk.services.s3.S3AsyncClient +import software.amazon.awssdk.services.s3.model.NoSuchKeyException + +case class AwsSdkV2(s3AsyncClient: S3AsyncClient) extends S3FetchBehaviour { + override val fetching: Fetching[ObjectId, Array[Byte]] = + S3ObjectFetching(s3AsyncClient, Bytes).mapResponse(_.asByteArray()) + + override def fetchExceptionIndicatesContentMissing(t: Throwable): Boolean = t match { + case _: NoSuchKeyException => true + case _ => false + } +} diff --git a/project/dependencies.scala b/project/dependencies.scala index fd13f6be..944ea3d8 100644 --- a/project/dependencies.scala +++ b/project/dependencies.scala @@ -2,8 +2,11 @@ import sbt._ object Dependencies { val capiVersion = "19.2.3" + val eTagCachingVersion = "1.0.4" - val awsSdk = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.523" + val awsS3SdkV1 = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.523" + val eTagCachingS3Base = "com.gu.etag-caching" %% "aws-s3-base" % eTagCachingVersion + val eTagCachingS3SdkV2 = "com.gu.etag-caching" %% "aws-s3-sdk-v2" % eTagCachingVersion val commonsIo = "org.apache.commons" % "commons-io" % "1.3.2" val contentApi = "com.gu" %% "content-api-client" % capiVersion val contentApiDefault = "com.gu" %% "content-api-client-default" % capiVersion % Test