Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't request all the blocks for liveblog main page #13566

Merged
merged 16 commits into from
Jul 18, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 92 additions & 49 deletions article/app/controllers/ArticleController.scala
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package controllers

import liveblog.LiveBlogCurrentPage
import com.gu.contentapi.client.model.v1.{ItemResponse, Content => ApiContent}
import _root_.liveblog._
import com.gu.contentapi.client.model.v1.{Content => ApiContent, ItemResponse}
import common._
import conf.switches.Switches
import contentapi.ContentApiClient
import controllers.ParseBlockId.{InvalidFormat, ParsedBlockId}
import model.Cached.WithoutRevalidationResult
import model._
import model.liveblog.{BodyBlock, KeyEventData}
import model.liveblog.BodyBlock
import org.joda.time.DateTime
import org.scala_tools.time.Imports._
import play.api.libs.functional.syntax._
Expand All @@ -32,11 +33,15 @@ class ArticleController extends Controller with RendersItemResponse with Logging

private def isSupported(c: ApiContent) = c.isArticle || c.isLiveBlog || c.isSudoku
override def canRender(i: ItemResponse): Boolean = i.content.exists(isSupported)
override def renderItem(path: String)(implicit request: RequestHeader): Future[Result] = mapModel(path, blocks = true)(render(path, _))
override def renderItem(path: String)(implicit request: RequestHeader): Future[Result] = mapModel(path, Some(Canonical))(render(path, _))


private def renderNewerUpdates(page: PageWithStoryPackage, lastUpdateBlockId: String, isLivePage: Option[Boolean])(implicit request: RequestHeader): Result = {
val newBlocks = page.article.fields.blocks.takeWhile(block => s"block-${block.id}" != lastUpdateBlockId)
private def renderNewerUpdates(page: PageWithStoryPackage, lastUpdateBlockId: SinceBlockId, isLivePage: Option[Boolean])(implicit request: RequestHeader): Result = {
val newBlocks = page.article.fields.blocks.toSeq.flatMap {
_.requestedBodyBlocks.getOrElse(lastUpdateBlockId.around, Seq())
}.takeWhile { block =>
block.id != lastUpdateBlockId.lastUpdate
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume blocks are always sorted from the most recent. right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually CAPI doesn't guarantee that. We just give you the blocks in whatever order we get them from Composer. Might be worth sorting them somewhere on your side if you're not already.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what should we sort by? not sure which fields are guaranteed to be there as they're all options when they get to me - published, first published, last modified, created

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing based on your other PR comment I can sort on published date - I'll do that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi I'm now sorting it in 58ab38d and a0ecfd0

val blocksHtml = views.html.liveblog.liveBlogBlocks(newBlocks, page.article, Edition(request).timezone)
val timelineHtml = views.html.liveblog.keyEvents("", KeyEventData(newBlocks, Edition(request).timezone))
val allPagesJson = Seq(
Expand All @@ -46,7 +51,7 @@ class ArticleController extends Controller with RendersItemResponse with Logging
val livePageJson = isLivePage.filter(_ == true).map { _ =>
"html" -> blocksHtml
}
val mostRecent = page.article.fields.blocks.headOption.map { block =>
val mostRecent = newBlocks.headOption.map { block =>
"mostRecentBlockId" -> s"block-${block.id}"
}
Cached(page)(JsonComponent((allPagesJson ++ livePageJson ++ mostRecent): _*))
Expand All @@ -70,9 +75,14 @@ class ArticleController extends Controller with RendersItemResponse with Logging

private def blockText(page: PageWithStoryPackage, number: Int)(implicit request: RequestHeader): Result = page match {
case LiveBlogPage(liveBlog, _, _) =>
val blocks = liveBlog.blocks.collect {
case BodyBlock(id, html, _, title, _, _, _, publishedAt, _, updatedAt, _, _) if html.trim.nonEmpty =>
TextBlock(id, title, publishedAt, updatedAt, html)
val blocks =
liveBlog.blocks.toSeq.flatMap { blocks =>
blocks.requestedBodyBlocks.get(Canonical.firstPage).toSeq.flatMap { bodyBlocks: Seq[BodyBlock] =>
bodyBlocks.collect {
case BodyBlock(id, html, _, title, _, _, _, publishedAt, _, updatedAt, _, _) if html.trim.nonEmpty =>
TextBlock(id, title, publishedAt, updatedAt, html)
}
}
}.take(number)
Cached(page)(JsonComponent("blocks" -> Json.toJson(blocks)))
case _ => Cached(600)(WithoutRevalidationResult(NotFound("Can only return block text for a live blog")))
Expand Down Expand Up @@ -118,19 +128,34 @@ class ArticleController extends Controller with RendersItemResponse with Logging

def renderLiveBlog(path: String, page: Option[String] = None) =
Action.async { implicit request =>
mapModel(path, blocks = true, page) {// temporarily only ask for blocks too for things we know are new live blogs until until the migration is done and we can always use blocks
render(path, _)

def renderWithRange(range: BlockRange) =
mapModel(path, range = Some(range)) {// temporarily only ask for blocks too for things we know are new live blogs until until the migration is done and we can always use blocks
render(path, _)
}

page.map(ParseBlockId.fromPageParam) match {
case Some(ParsedBlockId(id)) => renderWithRange(PageWithBlock(id)) // we know the id of a block
case Some(InvalidFormat) => Future.successful(Cached(10)(WithoutRevalidationResult(NotFound))) // page param there but couldn't extract a block id
case None => renderWithRange(Canonical) // no page param
}
}

def renderLiveBlogJson(path: String, lastUpdate: Option[String], rendered: Option[Boolean], isLivePage: Option[Boolean]) = {
Action.async { implicit request =>
mapModel(path, blocks = true) { model =>
(lastUpdate, rendered) match {
case (Some(lastUpdate), _) => renderNewerUpdates(model, lastUpdate, isLivePage)
case (None, Some(false)) => blockText(model, 6)
case (_, _) => render(path, model)

def renderWithRange(range: BlockRange) =
mapModel(path, Some(range)) { model =>
range match {
case SinceBlockId(lastBlockId) => renderNewerUpdates(model, SinceBlockId(lastBlockId), isLivePage)
case _ => render(path, model)
}
}

lastUpdate.map(ParseBlockId.fromBlockId) match {
case Some(ParsedBlockId(id)) => renderWithRange(SinceBlockId(id))
case Some(InvalidFormat) => Future.successful(Cached(10)(WithoutRevalidationResult(NotFound))) // page param there but couldn't extract a block id
case None => if (rendered.contains(false)) mapModel(path) { model => blockText(model, 6) } else renderWithRange(Canonical) // no page param
}
}
}
Expand All @@ -145,20 +170,22 @@ class ArticleController extends Controller with RendersItemResponse with Logging

def renderArticle(path: String) = {
Action.async { implicit request =>
mapModel(path, blocks = request.isEmail) {
mapModel(path, range = if (request.isEmail) Some(ArticleBlocks) else None) {
render(path, _)
}
}
}

def mapModel(path: String, blocks: Boolean = false, pageParam: Option[String] = None)(render: PageWithStoryPackage => Result)(implicit request: RequestHeader): Future[Result] = {
lookup(path, blocks) map responseToModelOrResult(pageParam) recover convertApiExceptions map {
// range: None means the url didn't include /live/, Some(...) means it did. Canonical just means no url parameter
// if we switch to using blocks instead of body for articles, then it no longer needs to be Optional
def mapModel(path: String, range: Option[BlockRange] = None)(render: PageWithStoryPackage => Result)(implicit request: RequestHeader): Future[Result] = {
lookup(path, range) map responseToModelOrResult(range) recover convertApiExceptions map {
case Left(model) => render(model)
case Right(other) => RenderOtherStatus(other)
}
}

private def lookup(path: String, blocks: Boolean)(implicit request: RequestHeader): Future[ItemResponse] = {
private def lookup(path: String, range: Option[BlockRange])(implicit request: RequestHeader): Future[ItemResponse] = {
val edition = Edition(request)

log.info(s"Fetching article: $path for edition ${edition.id}: ${RequestLog(request)}")
Expand All @@ -168,7 +195,10 @@ class ArticleController extends Controller with RendersItemResponse with Logging
.showReferences("all")
.showAtoms("all")

val capiItemWithBlocks = if (blocks) capiItem.showBlocks("body") else capiItem
val capiItemWithBlocks = range.map { blockRange =>
val blocksParam = blockRange.query.map(_.mkString(",")).getOrElse("body")
capiItem.showBlocks(blocksParam)
}.getOrElse(capiItem)
ContentApiClient.getResponse(capiItemWithBlocks)

}
Expand All @@ -180,35 +210,34 @@ class ArticleController extends Controller with RendersItemResponse with Logging
* @param response
* @return Either[PageWithStoryPackage, Result]
*/
def responseToModelOrResult(pageParam: Option[String])(response: ItemResponse)(implicit request: RequestHeader): Either[PageWithStoryPackage, Result] = {
def responseToModelOrResult(range: Option[BlockRange])(response: ItemResponse)(implicit request: RequestHeader): Either[PageWithStoryPackage, Result] = {
val supportedContent = response.content.filter(isSupported).map(Content(_))
val page = pageParam.map(ParseBlockId.apply)
val supportedContentResult = ModelOrResult(supportedContent, response)
val content: Either[PageWithStoryPackage, Result] = supportedContentResult.left.flatMap { content =>
(content, page) match {
case (minute: Article, None) if minute.isUSMinute =>
Left(MinutePage(minute, StoryPackages(minute, response)))
case (liveBlog: Article, None/*no page param*/) if liveBlog.isLiveBlog =>
createLiveBlogModel(liveBlog, response, None)
case (liveBlog: Article, Some(Some(requiredBlockId))/*page param specified and valid format*/) if liveBlog.isLiveBlog =>
createLiveBlogModel(liveBlog, response, Some(requiredBlockId))
case (article: Article, None) => Left(ArticlePage(article, StoryPackages(article, response)))
case _ =>
Right(NotFound)
}
val content: Either[PageWithStoryPackage, Result] = supportedContentResult.left.flatMap {
case minute: Article if minute.isUSMinute =>
Left(MinutePage(minute, StoryPackages(minute, response)))
case liveBlog: Article if liveBlog.isLiveBlog =>
range.map {
createLiveBlogModel(liveBlog, response, _)
}.getOrElse(Right(NotFound))
case article: Article =>
Left(ArticlePage(article, StoryPackages(article, response)))
}

content
}

def createLiveBlogModel(liveBlog: Article, response: ItemResponse, maybeRequiredBlockId: Option[String]) = {
def createLiveBlogModel(liveBlog: Article, response: ItemResponse, range: BlockRange) = {

val pageSize = if (liveBlog.content.tags.tags.map(_.id).contains("sport/sport")) 30 else 10
val liveBlogPageModel = LiveBlogCurrentPage(
pageSize = pageSize,
liveBlog.content.fields.blocks,
maybeRequiredBlockId
)
val liveBlogPageModel =
liveBlog.content.fields.blocks.map { blocks =>
LiveBlogCurrentPage(
pageSize = pageSize,
blocks,
range
)
} getOrElse None
liveBlogPageModel match {
case Some(pageModel) =>

Expand All @@ -235,15 +264,29 @@ class ArticleController extends Controller with RendersItemResponse with Logging
}

object ParseBlockId extends RegexParsers {
def apply(input: String): Option[String] = {
def withParser: Parser[Unit] = "with:" ^^ { _ => () }
def block: Parser[Unit] = "block-" ^^ { _ => () }
def id: Parser[String] = "[a-zA-Z0-9]+".r
def expr: Parser[String] = withParser ~> block ~> id

Copy link
Contributor

@cb372 cb372 Jul 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, parsing with regex like this is pretty slow. Hard to say without profiling whether it's worth worrying about, but rewriting this parser using parboiled might be a quick performance win.

EDIT: Here's an example of what a parboiled parser looks like.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 to parboiled!
We've use it on the grid too (perhaps some refinements needed)

Copy link
Member Author

@johnduffell johnduffell Jul 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, yeah in the grand scheme of this PR it won't be much (it happens once per request) but Phil W pointed me at parboiled for something else so I'll be using it in future. I'll add it to my list of tweaks to do once I've got the big perf stuff out of the way as it's a good idea.

sealed trait ParseResult { def toOption: Option[String] }
case object InvalidFormat extends ParseResult { val toOption = None }
case class ParsedBlockId(blockId: String) extends ParseResult { val toOption = Some(blockId) }

private def withParser: Parser[Unit] = "with:" ^^ { _ => () }
private def block: Parser[Unit] = "block-" ^^ { _ => () }
private def id: Parser[String] = "[a-zA-Z0-9]+".r
private def blockId = block ~> id

def fromPageParam(input: String): ParseResult = {
def expr: Parser[String] = withParser ~> blockId

parse(expr, input) match {
case Success(matched, _) => Some(matched)
case _ => None
case Success(matched, _) => ParsedBlockId(matched)
case _ => InvalidFormat
}
}

def fromBlockId(input: String): ParseResult = {
parse(blockId, input) match {
case Success(matched, _) => ParsedBlockId(matched)
case _ => InvalidFormat
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fromBlockId and fromPageParam have similar name and input param but different output types. Can we make them have a similar and consistant signature so when using either of them you don't have to think to much or transform the output?

}
32 changes: 32 additions & 0 deletions article/app/liveblog/KeyEventData.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package liveblog

import model.liveblog.BodyBlock.{KeyEvent, SummaryEvent}
import model.liveblog.{Blocks, BodyBlock, LiveBlogDate}
import org.scala_tools.time.Imports._

object KeyEventData {

// just for convenience for use from the templates
def apply(maybeBlocks: Option[Blocks], timezone: DateTimeZone): Seq[KeyEventData] = {

val blocks = maybeBlocks.toSeq.flatMap(blocks => blocks.requestedBodyBlocks.getOrElse(Canonical.timeline, blocks.body))

apply(blocks, timezone)
}

def apply(blocks: Seq[BodyBlock], timezone: DateTimeZone): Seq[KeyEventData] = {

val TimelineMaxEntries = 7

val latestSummary = blocks.find(_.eventType == SummaryEvent)
val keyEvents = blocks.filter(_.eventType == KeyEvent)
val bodyBlocks = (latestSummary.toSeq ++ keyEvents).sortBy(_.lastModifiedDate.getOrElse(new DateTime(0)).millis).reverse.take(TimelineMaxEntries)

bodyBlocks.map { bodyBlock =>
KeyEventData(bodyBlock.id, bodyBlock.publishedCreatedDate(timezone), bodyBlock.title)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This foldLeft is clever, but I had to read it a few times before I understood it. It might be kinder to your fellow devs (and your future self) to write it as:

  1. Find the latest summary
  2. Find the key events
  3. Merge them together and take the latest 7

e.g.

val latestSummary = blocks.find(_.eventType == SummaryEvent)
val keyEvents = blocks.filter(_.eventType == KeyEvent)
(latestSummary.toSeq ++ keyEvents).sortBy(_.lastModified).reverse.take(7)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yeah that's actually old code I wrote, maybe I was trying to be more efficient by only looping through once, yours is much better

}

}

case class KeyEventData(id: String, time: Option[LiveBlogDate], title: Option[String])
75 changes: 72 additions & 3 deletions article/app/liveblog/LiveBlogCurrentPage.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,71 @@
package liveblog

import model.liveblog.BodyBlock
import model.liveblog.{Blocks, BodyBlock}

sealed trait BlockRange { def query: Option[Seq[String]] }
case object Canonical extends BlockRange {
val firstPage = "body:latest:30"
val oldestPage = "body:oldest:1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only 1?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just need to get any block id in the last page so we can work out the link for the navigation bar when you click "oldest"

val timeline = "body:key-events"
// this only makes sense for liveblogs at the moment, but article use field body not blocks anyway
val query = Some(Seq(firstPage, oldestPage, timeline))
}
case class PageWithBlock(page: String) extends BlockRange {
// just get them all, the caching should prevent too many requests, could use "around"
val query = None
}
case object ArticleBlocks extends BlockRange {
// we currently only use this for emails, we just want all the blocks
val query = None
}
case class SinceBlockId(lastUpdate: String) extends BlockRange {
val around = s"body:around:$lastUpdate:5"
// more than 5 could come in (in one go), but unlikely and won't matter as it'll just fetch again soon
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What decides how many block are coming in?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TBonnin what I mean is the endpoint is checked every 5 seconds when you are looking at the live blog, but if you leave it in the background it can increase as far as 20 minutes. It's possible the blogger would add more than 5 posts within that time.

current view:

vvv-newest          oldest-vvv
"i, j, k, l, m, n, o, p, q, r"

stuff that exists in capi
a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r

next update w'nt get us up to date:
"d, e, f, g, h"

subsequent update gets us up to date:
"a, b, c"

ideally it would get all 8 in the update, but it's a tradeoff between requesting a lot of blocks and re-requesting.

Also, in a susequent PR I'm going to make it cache responses once they become non-empty, so it would probably be more like:

update 1:
h
update 2:
g
update 3:
f
....etc....

however at that point I'll probably make it short circuit the delay when it gets a non empty response

val query = Some(Seq(around))
}

object LiveBlogCurrentPage {

def apply(pageSize: Int, blocks: Seq[BodyBlock], isRequestedBlock: Option[String]): Option[LiveBlogCurrentPage] = {
def apply(pageSize: Int, blocks: Blocks, range: BlockRange): Option[LiveBlogCurrentPage] = {
range match {
case Canonical => firstPage(pageSize, blocks)
case PageWithBlock(isRequestedBlock) => findPageWithBlock(pageSize, blocks.body, isRequestedBlock)
case SinceBlockId(blockId) => updates(blocks, SinceBlockId(blockId))
case ArticleBlocks => None
}
}

// filters newer blocks out of the list
def updates(blocks: Blocks, sinceBlockId: SinceBlockId) = {
val bodyBlocks = blocks.requestedBodyBlocks.get(sinceBlockId.around).toSeq.flatMap { bodyBlocks =>
bodyBlocks.takeWhile(_.id != sinceBlockId.lastUpdate)
}
Some(LiveBlogCurrentPage(FirstPage(bodyBlocks), None))// just pretend to be the first page, it'll be ignored
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the comment here. Do you mind explaining?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah ok that's just because of the code structure, we have to turn a capi response into a frontend model in order to render it. Normally this would be all the info needed for the article, liveblog, gallery, front, etc. However now that the liveblog model includes knowledge about which page we're on, and only includes a subset of the blocks, the liveblog model doesn't quite work for "updates since block-x" as there's no concept of "which page we're on".
I could have added a model type for LiveBlogUpdatesSince, but I thought for simplicity I'd just call it a FirstPage and embed the blocks in there. The fact that there's no pagination and various other things doesn't matter to the code that eventually produces the json.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not nice, but at least the comment is there to excuse myself not doing more refactoring ;)

}

// turns the slimmed down (to save bandwidth) capi response into a first page model object
def firstPage(pageSize: Int, blocks: Blocks): Option[LiveBlogCurrentPage] = {
val remainder = blocks.totalBodyBlocks % pageSize
val numPages = blocks.totalBodyBlocks / pageSize
blocks.requestedBodyBlocks.get(Canonical.firstPage).map { requestedBodyBlocks =>
val (firstPageBlocks, startOfSecondPageBlocks) = requestedBodyBlocks.splitAt(remainder + pageSize)
val oldestPage = blocks.requestedBodyBlocks.get(Canonical.oldestPage).flatMap(_.headOption.map { block =>
BlockPage(blocks = Nil, blockId = block.id, pageNumber = numPages)
} )
val olderPage = startOfSecondPageBlocks.headOption.map { block => BlockPage(blocks = Nil, blockId = block.id, pageNumber = 2) }
val pagination = if (blocks.totalBodyBlocks > firstPageBlocks.size) Some(Pagination(
newest = None,
newer = None,
oldest = oldestPage,
older = olderPage,
numberOfPages = numPages
)) else None
LiveBlogCurrentPage(FirstPage(firstPageBlocks), pagination)
}
}

// turns a full capi blocks list into a page model of the page with a specific block in it
def findPageWithBlock(pageSize: Int, blocks: Seq[BodyBlock], isRequestedBlock: String) = {
val (mainPageBlocks, restPagesBlocks) = getPages(pageSize, blocks)
val newestPage = FirstPage(mainPageBlocks)
val pages = newestPage :: restPagesBlocks
Expand All @@ -18,7 +79,7 @@ object LiveBlogCurrentPage {
val endedPages = None :: (pages.map(Some.apply) :+ None)

def hasRequestedBlock(page: LiveBlogCurrentPage): Boolean = {
isRequestedBlock.map(isRequestedBlock => page.currentPage.blocks.exists(_.id == isRequestedBlock)).getOrElse(true)
page.currentPage.blocks.exists(_.id == isRequestedBlock)
}


Expand Down Expand Up @@ -77,3 +138,11 @@ case class BlockPage(blocks: Seq[BodyBlock], blockId: String, pageNumber: Int) e
val suffix = s"?page=with:block-$blockId"
val isArchivePage = true
}

object LatestBlock {
def apply(maybeBlocks: Option[Blocks]): Option[String] = {
maybeBlocks.flatMap { blocks =>
blocks.requestedBodyBlocks.getOrElse(Canonical.firstPage, blocks.body).headOption.map(_.id)
}
}
}
Loading