Skip to content

Commit

Permalink
Add ETag-caching (and AWS SDK v2) support
Browse files Browse the repository at this point in the history
This change adds these improvements:

* Facia data is only re-downloaded & re-parsed if the S3 content has
  _changed_, thanks to ETag-caching - see https://github.com/guardian/etag-caching .
  This library has already been used in DotCom PROD with guardian/frontend#26338
* AWS SDK v2: the FAPI client itself now has a `fapi-s3-sdk-v2` artifact.

An example PR consuming this updated version of the FAPI client is at:

guardian/ophan#5506

Updated FAPI artifact layout
----------------------------

To use FAPI with the new AWS SDK v2 support, users must now have a
dependency on *two* FAPI artifacts:

* `fapi-s3-sdk-v2`
* `fapi-client-playXX`

Due to needing to support the matrix of:

* AWS SDK v1 & v2
* Play-JSON 2.8, 2.9, 3.0

...it's best not to try to produce an artifact that corresponds to
every single combination of those! Consequently, we provide an
artifacts that are specific to the different versions of AWS SDK
(or at least, could do - if AWS SDK v1 was moved out of common code),
and artifacts that are specific to the different versions of
Play-JSON, and allow the user to combine them as needed. A
similar approach was used with `guardian/play-secret-rotation`:

guardian/play-secret-rotation#8

In order for the different artifacts to have interfaces they can
use to join together and become a single useful Facia client, we have
a `fapi-client-core` artifact. Any code that doesn't depend on the
JSON classes, or the actual AWS SDK version (which isn't much!), can
live in there. In particular, we have:

* `com.gu.facia.client.ApiClient`, an existing type that is now a
  trait, with 2 implementations - one that uses the existing
  `com.gu.facia.client.S3Client` abstraction on S3 behaviour
* `com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour`,
  a new trait that exposes just enough interface to allow the
  conditional fetching used for ETag-based caching, but doesn't
  tie you to any specific version of the AWS SDK.
  • Loading branch information
rtyley committed Jul 11, 2024
1 parent a40be65 commit b1fbfdb
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 65 deletions.
76 changes: 49 additions & 27 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import Dependencies.*
import sbtrelease.ReleaseStateTransformations.*
import Dependencies._
import sbtrelease.ReleaseStateTransformations._
import sbtversionpolicy.withsbtrelease.ReleaseVersion.fromAggregatedAssessedCompatibilityWithLatestRelease

organization := "com.gu"

name := "facia-api-client"

description := "Scala client for The Guardian's Facia JSON API"

val sonatypeReleaseSettings = Seq(
licenses := Seq("Apache V2" -> url("https://www.apache.org/licenses/LICENSE-2.0.html")),
// releaseVersion := fromAggregatedAssessedCompatibilityWithLatestRelease().value,
licenses := Seq(License.Apache2),
releaseVersion := fromAggregatedAssessedCompatibilityWithLatestRelease().value,
releaseCrossBuild := true, // true if you cross-build the project for multiple Scala versions
releaseProcess := Seq[ReleaseStep](
checkSnapshotDependencies,
Expand All @@ -24,12 +22,17 @@ val sonatypeReleaseSettings = Seq(
)
)

def baseSettings(supportScala3: Boolean) = Seq(
organization := "com.gu",
licenses := Seq("Apache V2" -> url("https://www.apache.org/licenses/LICENSE-2.0.html"))
)

lazy val root = (project in file(".")).aggregate(
faciaJson_play27,
faciaJson_play28,
faciaJson_play29,
faciaJson_play30,
fapiClient_play27,
fapiClient_core,
fapiClient_s3_sdk_v2,
fapiClient_play28,
fapiClient_play29,
fapiClient_play30
Expand All @@ -43,49 +46,68 @@ def baseProject(module: String, playJsonVersion: PlayJsonVersion) = Project(s"$m
sourceDirectory := baseDirectory.value / s"../$module/src",
organization := "com.gu",
resolvers ++= Resolver.sonatypeOssRepos("releases"),
scalaVersion := "2.13.11",
crossScalaVersions := Seq(scalaVersion.value, "2.12.18"), // ++ (if (playJsonVersion.supportsScala3) Seq("3.3.1") else Seq.empty),
scalaVersion := "2.13.14",
crossScalaVersions := Seq(scalaVersion.value, "2.12.19"), // ++ (if (playJsonVersion.supportsScala3) Seq("3.3.1") else Seq.empty),
scalacOptions := Seq(
"-release:11",
"-feature",
"-deprecation",
"-Xfatal-warnings"
"-release:11",
"-feature",
"-deprecation",
"-Xfatal-warnings"
),
libraryDependencies += scalaTest,
sonatypeReleaseSettings
)

def faciaJson_playJsonVersion(playJsonVersion: PlayJsonVersion) = baseProject("facia-json", playJsonVersion)
def playJsonSpecificProject(module: String, playJsonVersion: PlayJsonVersion) = Project(s"$module-${playJsonVersion.projectId}", file(s"$module-${playJsonVersion.projectId}"))
.settings(
sourceDirectory := baseDirectory.value / s"../$module/src"
)

def faciaJson(playJsonVersion: PlayJsonVersion) = playJsonSpecificProject("facia-json", playJsonVersion)
.dependsOn(fapiClient_core)
.settings(
libraryDependencies ++= Seq(
awsSdk,
awsS3SdkV1, // ideally, this would be pushed out to a separate FAPI artifact
commonsIo,
playJsonVersion.lib,
"org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0",
scalaLogging
)
),
baseSettings(supportScala3 = playJsonVersion.supportsScala3)
)

def fapiClient_playJsonVersion(playJsonVersion: PlayJsonVersion) = baseProject("fapi-client", playJsonVersion)
def fapiClient(playJsonVersion: PlayJsonVersion) = playJsonSpecificProject("fapi-client", playJsonVersion)
.settings(
libraryDependencies ++= Seq(
contentApi,
contentApiDefault,
commercialShared,
scalaTestMockito,
mockito
)
),
baseSettings(supportScala3 = false) // currently blocked by contentApi & commercialShared clients
)

lazy val faciaJson_play27 = faciaJson_playJsonVersion(PlayJsonVersion.V27)
lazy val faciaJson_play28 = faciaJson_playJsonVersion(PlayJsonVersion.V28)
lazy val faciaJson_play29 = faciaJson_playJsonVersion(PlayJsonVersion.V29)
lazy val faciaJson_play30 = faciaJson_playJsonVersion(PlayJsonVersion.V30)
lazy val faciaJson_play28 = faciaJson(PlayJsonVersion.V28)
lazy val faciaJson_play29 = faciaJson(PlayJsonVersion.V29)
lazy val faciaJson_play30 = faciaJson(PlayJsonVersion.V30)

lazy val fapiClient_core = (project in file("fapi-client-core")).settings(
libraryDependencies ++= Seq(
eTagCachingS3Base
),
baseSettings(supportScala3 = true)
)

lazy val fapiClient_s3_sdk_v2 = (project in file("fapi-s3-sdk-v2")).dependsOn(fapiClient_core)
.settings(
libraryDependencies += eTagCachingS3SdkV2,
baseSettings(supportScala3 = true)
)

lazy val fapiClient_play27 = fapiClient_playJsonVersion(PlayJsonVersion.V27).dependsOn(faciaJson_play27)
lazy val fapiClient_play28 = fapiClient_playJsonVersion(PlayJsonVersion.V28).dependsOn(faciaJson_play28)
lazy val fapiClient_play29 = fapiClient_playJsonVersion(PlayJsonVersion.V29).dependsOn(faciaJson_play29)
lazy val fapiClient_play30 = fapiClient_playJsonVersion(PlayJsonVersion.V30).dependsOn(faciaJson_play30)
lazy val fapiClient_play28 = fapiClient(PlayJsonVersion.V28).dependsOn(faciaJson_play28)
lazy val fapiClient_play29 = fapiClient(PlayJsonVersion.V29).dependsOn(faciaJson_play29)
lazy val fapiClient_play30 = fapiClient(PlayJsonVersion.V30).dependsOn(faciaJson_play30)

Test/testOptions += Tests.Argument(
TestFrameworks.ScalaTest,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
package com.gu.facia.client

import com.amazonaws.regions.{Region, Regions}
import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
import com.amazonaws.regions.Regions
import com.amazonaws.services.s3.model.AmazonS3Exception
import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
import org.apache.commons.io.IOUtils

import scala.concurrent.{ExecutionContext, Future, blocking}
import scala.util.Try

/** For mocking in tests, but also to allow someone to define a properly asynchronous S3 client. (The one in the AWS
* SDK is unfortunately synchronous only.)
*/
trait S3Client {
def get(bucket: String, path: String): Future[FaciaResult]
}


case class AmazonSdkS3Client(client: AmazonS3)(implicit executionContext: ExecutionContext) extends S3Client {
def get(bucket: String, path: String): Future[FaciaResult] = Future {
Expand Down
100 changes: 77 additions & 23 deletions facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala
Original file line number Diff line number Diff line change
@@ -1,37 +1,91 @@
package com.gu.facia.client

import com.gu.facia.client.models.{ConfigJson, CollectionJson}
import com.gu.etagcaching.FreshnessPolicy.AlwaysWaitForRefreshedValue
import com.gu.etagcaching.aws.s3.ObjectId
import com.gu.etagcaching.{ConfigCache, ETagCache}
import com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour
import com.gu.facia.client.models.{CollectionJson, ConfigJson}
import play.api.libs.json.{Format, Json}

import scala.concurrent.{ExecutionContext, Future}

object ApiClient {
val Encoding = "utf-8"
trait ApiClient {
def config: Future[ConfigJson]

def collection(id: String): Future[Option[CollectionJson]]
}

case class ApiClient(
object ApiClient {
private val Encoding = "utf-8"

def apply(
bucket: String,
/** e.g., CODE, PROD, DEV ... */
environment: String,
environment: String, // e.g., CODE, PROD, DEV ...
s3Client: S3Client
)(implicit executionContext: ExecutionContext) {
import com.gu.facia.client.ApiClient._

private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key) map {
case FaciaSuccess(bytes) =>
Some(Json.fromJson[A](Json.parse(new String(bytes, Encoding))) getOrElse {
throw new JsonDeserialisationError(s"Could not deserialize JSON in $bucket, $key")
})
case FaciaNotAuthorized(message) => throw new BackendError(message)
case FaciaNotFound(_) => None
case FaciaUnknownError(message) => throw new BackendError(message)
)(implicit executionContext: ExecutionContext): ApiClient = new ApiClient {
val env: Environment = Environment(environment)

private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key).map(translateFaciaResult[A](_))

def config: Future[ConfigJson] =
retrieve[ConfigJson](env.configS3Path).map(getOrWarnAboutMissingConfig)

def collection(id: String): Future[Option[CollectionJson]] =
retrieve[CollectionJson](env.collectionS3Path(id))
}

private def getOrWarnAboutMissingConfig(configOpt: Option[ConfigJson]): ConfigJson =
configOpt getOrElse throwConfigMissingException()

private def throwConfigMissingException() = throw BackendError("Config was missing!! NOT GOOD!")

private def translateFaciaResult[B: Format](faciaResult: FaciaResult): Option[B] = faciaResult match {
case FaciaSuccess(bytes) => Some(parseBytes(bytes))
case FaciaNotAuthorized(message) => throw BackendError(message)
case FaciaNotFound(_) => None
case FaciaUnknownError(message) => throw BackendError(message)
}

def config: Future[ConfigJson] =
retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_ getOrElse {
throw new BackendError("Config was missing!! OH MY GOD")
})
private def parseBytes[B: Format](bytes: Array[Byte]): B =
Json.fromJson[B](Json.parse(new String(bytes, Encoding))) getOrElse {
throw JsonDeserialisationError(s"Could not deserialize JSON")
}

def withCaching(
bucket: String,
s3FetchBehaviour: S3FetchBehaviour,
environment: Environment,
configureCollectionCache: ConfigCache = _.maximumSize(10000) // at most 1GB RAM worst case
)(implicit ec: ExecutionContext): ApiClient =
new ApiClient {
private val fetching =
s3FetchBehaviour.fetching.keyOn[String](path => ObjectId(bucket, path))
/* TODO - .suppressExceptionLoggingIf(s3FetchBehaviour.fetchExceptionIndicatesContentMissing)
* Need to settle on an approach - see https://github.com/guardian/etag-caching/pull/32
*/

def eTagCache[B: Format](configureCache: ConfigCache) = new ETagCache(
fetching.thenParsing(parseBytes[B]),
AlwaysWaitForRefreshedValue,
configureCache
)

private val configCache = eTagCache[ConfigJson](_.maximumSize(1))
private val collectionCache = eTagCache[CollectionJson](configureCollectionCache)

override def config: Future[ConfigJson] = configCache.get(environment.configS3Path).recover {
case t if s3FetchBehaviour.fetchExceptionIndicatesContentMissing(t) =>
throwConfigMissingException()
}

def collection(id: String): Future[Option[CollectionJson]] =
retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json")
override def collection(id: String): Future[Option[CollectionJson]] =
collectionCache.get(environment.collectionS3Path(id)).map(Some(_)).recover {
case t if {
val boo = s3FetchBehaviour.fetchExceptionIndicatesContentMissing(t)
println(s"Exception is content missing: $boo")
boo
} => None
case t => throw BackendError(t.getMessage)
}
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package com.gu.facia.client.lib

import com.amazonaws.auth.{ AWSStaticCredentialsProvider, BasicAWSCredentials}
import com.amazonaws.auth.{AWSStaticCredentialsProvider, BasicAWSCredentials}
import com.amazonaws.regions.Regions
import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
import com.gu.facia.client.models.{CollectionJson, ConfigJson}
import com.gu.facia.client.{AmazonSdkS3Client, ApiClient}
import com.gu.facia.client.{AmazonSdkS3Client, ApiClient, Environment}
import play.api.libs.json.{Format, Json}

import scala.concurrent.ExecutionContext.Implicits.global
Expand All @@ -14,14 +14,15 @@ object Amazon {
val amazonS3Client = AmazonS3ClientBuilder.standard().withRegion(Regions.EU_WEST_1).withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials("key", "pass"))).build()
}

class ApiTestClient extends ApiClient("bucket", "DEV", AmazonSdkS3Client(Amazon.amazonS3Client)) with ResourcesHelper {
class ApiTestClient extends ApiClient with ResourcesHelper {
private val environment = Environment.Dev

private def retrieve[A: Format](key: String): Future[Option[A]] =
Future.successful(slurp(key).map(Json.parse).flatMap(_.asOpt[A]))

override def config: Future[ConfigJson] =
retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_.get)
retrieve[ConfigJson](environment.configS3Path).map(_.get)

override def collection(id: String): Future[Option[CollectionJson]] =
retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json")
retrieve[CollectionJson](environment.collectionS3Path(id))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.gu.facia.client

case class Environment(name: String) {
private val s3PathPrefix: String = s"$name/frontsapi"

val configS3Path: String = s"$s3PathPrefix/config/config.json"
def collectionS3Path(id: String): String = s"$s3PathPrefix/collection/$id/collection.json"
}

object Environment {
val Prod = Environment("PROD")
val Code = Environment("CODE")
val Dev = Environment("DEV")
val Test = Environment("TEST")
}
10 changes: 10 additions & 0 deletions fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.gu.facia.client

import scala.concurrent.Future

/** For mocking in tests, but also to allow someone to define a properly asynchronous S3 client. (The one in the AWS
* SDK is unfortunately synchronous only.)
*/
trait S3Client {
def get(bucket: String, path: String): Future[FaciaResult]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.gu.facia.client.etagcaching.fetching

import com.gu.etagcaching.aws.s3.ObjectId
import com.gu.etagcaching.fetching.Fetching

trait S3FetchBehaviour {
val fetching: Fetching[ObjectId, Array[Byte]]

def fetchExceptionIndicatesContentMissing(t: Throwable): Boolean
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.gu.facia.client.aws.sdkv2.s3

import com.gu.etagcaching.aws.s3.ObjectId
import com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching
import com.gu.etagcaching.aws.sdkv2.s3.response.Transformer.Bytes
import com.gu.etagcaching.fetching.Fetching
import com.gu.facia.client.etagcaching.fetching.S3FetchBehaviour
import software.amazon.awssdk.services.s3.S3AsyncClient
import software.amazon.awssdk.services.s3.model.NoSuchKeyException

case class AwsSdkV2(s3AsyncClient: S3AsyncClient) extends S3FetchBehaviour {
override val fetching: Fetching[ObjectId, Array[Byte]] =
S3ObjectFetching(s3AsyncClient, Bytes).mapResponse(_.asByteArray())

override def fetchExceptionIndicatesContentMissing(t: Throwable): Boolean = t match {
case _: NoSuchKeyException => true
case _ => false
}
}
6 changes: 4 additions & 2 deletions project/dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ import sbt._

object Dependencies {
val capiVersion = "23.0.0"
val eTagCachingVersion = "4.0.0-PREVIEW.suppress-caffeine-exceptions-by-allowing-fetch-results-to-indicate-missing-keys.2024-07-09T1701.cbc4304a"

val awsSdk = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.524"
val awsS3SdkV1 = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.524"
val eTagCachingS3Base = "com.gu.etag-caching" %% "aws-s3-base" % eTagCachingVersion
val eTagCachingS3SdkV2 = "com.gu.etag-caching" %% "aws-s3-sdk-v2" % eTagCachingVersion
val commonsIo = "org.apache.commons" % "commons-io" % "1.3.2"
val contentApi = "com.gu" %% "content-api-client" % capiVersion
val contentApiDefault = "com.gu" %% "content-api-client-default" % capiVersion % Test
Expand All @@ -25,7 +28,6 @@ object Dependencies {
}

object PlayJsonVersion {
val V27 = PlayJsonVersion("27", "com.typesafe.play", "2.7.4")
val V28 = PlayJsonVersion("28", "com.typesafe.play", "2.8.2")
val V29 = PlayJsonVersion("29", "com.typesafe.play", "2.10.4", supportsScala3 = true)
val V30 = PlayJsonVersion("30", "org.playframework", "3.0.1", supportsScala3 = true)
Expand Down

0 comments on commit b1fbfdb

Please sign in to comment.