Skip to content

Commit

Permalink
[gatsby-transformer-remark] Don't generate AST for same node multiple…
Browse files Browse the repository at this point in the history
… times in parallel.

If we are already generating AST for given node - wait for result of that.
  • Loading branch information
pieh committed Mar 27, 2018
1 parent 8454195 commit d12a3b2
Showing 1 changed file with 98 additions and 80 deletions.
178 changes: 98 additions & 80 deletions packages/gatsby-transformer-remark/src/extend-node-type.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ const tableOfContentsCacheKey = node =>
const withPathPrefix = (url, pathPrefix) =>
(pathPrefix + url).replace(/\/\//, `/`)

/**
* Map that keeps track of generation of AST to not generate it multiple
* times in parallel.
*
* @type {Map<string,Promise>}
*/
const ASTPromiseMap = new Map()

module.exports = (
{ type, store, pathPrefix, getNode, cache, reporter },
pluginOptions
Expand Down Expand Up @@ -87,90 +95,28 @@ module.exports = (
}

async function getAST(markdownNode) {
const cachedAST = await cache.get(astCacheKey(markdownNode))
const cacheKey = astCacheKey(markdownNode)
const cachedAST = await cache.get(cacheKey)
if (cachedAST) {
return cachedAST
} else if (ASTPromiseMap.has(cacheKey)) {
// We are already generating AST, so let's wait for it
return await ASTPromiseMap.get(cacheKey)
} else {
const files = _.values(store.getState().nodes).filter(
n => n.internal.type === `File`
)
const ast = await new Promise((resolve, reject) => {
// Use Bluebird's Promise function "each" to run remark plugins serially.
Promise.each(pluginOptions.plugins, plugin => {
const requiredPlugin = require(plugin.resolve)
if (_.isFunction(requiredPlugin.mutateSource)) {
return requiredPlugin.mutateSource(
{
markdownNode,
files,
getNode,
reporter,
},
plugin.pluginOptions
)
} else {
return Promise.resolve()
}
}).then(() => {
const markdownAST = remark.parse(markdownNode.internal.content)

if (pathPrefix) {
// Ensure relative links include `pathPrefix`
visit(markdownAST, `link`, node => {
if (
node.url &&
node.url.startsWith(`/`) &&
!node.url.startsWith(`//`)
) {
node.url = withPathPrefix(node.url, pathPrefix)
}
})
}

// source => parse (can order parsing for dependencies) => typegen
//
// source plugins identify nodes, provide id, initial parse, know
// when nodes are created/removed/deleted
// get passed cached DataTree and return list of clean and dirty nodes.
// Also get passed `dirtyNodes` function which they can call with an array
// of node ids which will then get re-parsed and the inferred schema
// recreated (if inferring schema gets too expensive, can also
// cache the schema until a query fails at which point recreate the
// schema).
//
// parse plugins take data from source nodes and extend it, never mutate
// it. Freeze all nodes once done so typegen plugins can't change it
// this lets us save off the DataTree at that point as well as create
// indexes.
//
// typegen plugins identify further types of data that should be lazily
// computed due to their expense, or are hard to infer graphql type
// (markdown ast), or are need user input in order to derive e.g.
// markdown headers or date fields.
//
// wrap all resolve functions to (a) auto-memoize and (b) cache to disk any
// resolve function that takes longer than ~10ms (do research on this
// e.g. how long reading/writing to cache takes), and (c) track which
// queries are based on which source nodes. Also if connection of what
// which are always rerun if their underlying nodes change..
//
// every node type in DataTree gets a schema type automatically.
// typegen plugins just modify the auto-generated types to add derived fields
// as well as computationally expensive fields.
const files = _.values(store.getState().nodes).filter(
n => n.internal.type === `File`
)
const ASTGenerationPromise = new Promise(async resolve => {
const files = _.values(store.getState().nodes).filter(
n => n.internal.type === `File`
)
const ast = await new Promise((resolve, reject) => {
// Use Bluebird's Promise function "each" to run remark plugins serially.
Promise.each(pluginOptions.plugins, plugin => {
const requiredPlugin = require(plugin.resolve)
if (_.isFunction(requiredPlugin)) {
return requiredPlugin(
if (_.isFunction(requiredPlugin.mutateSource)) {
return requiredPlugin.mutateSource(
{
markdownAST,
markdownNode,
getNode,
files,
pathPrefix,
getNode,
reporter,
},
plugin.pluginOptions
Expand All @@ -179,14 +125,86 @@ module.exports = (
return Promise.resolve()
}
}).then(() => {
resolve(markdownAST)
const markdownAST = remark.parse(markdownNode.internal.content)

if (pathPrefix) {
// Ensure relative links include `pathPrefix`
visit(markdownAST, `link`, node => {
if (
node.url &&
node.url.startsWith(`/`) &&
!node.url.startsWith(`//`)
) {
node.url = withPathPrefix(node.url, pathPrefix)
}
})
}

// source => parse (can order parsing for dependencies) => typegen
//
// source plugins identify nodes, provide id, initial parse, know
// when nodes are created/removed/deleted
// get passed cached DataTree and return list of clean and dirty nodes.
// Also get passed `dirtyNodes` function which they can call with an array
// of node ids which will then get re-parsed and the inferred schema
// recreated (if inferring schema gets too expensive, can also
// cache the schema until a query fails at which point recreate the
// schema).
//
// parse plugins take data from source nodes and extend it, never mutate
// it. Freeze all nodes once done so typegen plugins can't change it
// this lets us save off the DataTree at that point as well as create
// indexes.
//
// typegen plugins identify further types of data that should be lazily
// computed due to their expense, or are hard to infer graphql type
// (markdown ast), or are need user input in order to derive e.g.
// markdown headers or date fields.
//
// wrap all resolve functions to (a) auto-memoize and (b) cache to disk any
// resolve function that takes longer than ~10ms (do research on this
// e.g. how long reading/writing to cache takes), and (c) track which
// queries are based on which source nodes. Also if connection of what
// which are always rerun if their underlying nodes change..
//
// every node type in DataTree gets a schema type automatically.
// typegen plugins just modify the auto-generated types to add derived fields
// as well as computationally expensive fields.
const files = _.values(store.getState().nodes).filter(
n => n.internal.type === `File`
)
// Use Bluebird's Promise function "each" to run remark plugins serially.
Promise.each(pluginOptions.plugins, plugin => {
const requiredPlugin = require(plugin.resolve)
if (_.isFunction(requiredPlugin)) {
return requiredPlugin(
{
markdownAST,
markdownNode,
getNode,
files,
pathPrefix,
reporter,
},
plugin.pluginOptions
)
} else {
return Promise.resolve()
}
}).then(() => {
resolve(markdownAST)
})
})
})
})

// Save new AST to cache and return
cache.set(astCacheKey(markdownNode), ast)
return ast
// Save new AST to cache and return
cache.set(astCacheKey(markdownNode), ast)
// We can now release promise, as we cached result
ASTPromiseMap.delete(astCacheKey)
return resolve(ast)
})
ASTPromiseMap.set(cacheKey, ASTGenerationPromise)
return await ASTGenerationPromise
}
}

Expand Down

0 comments on commit d12a3b2

Please sign in to comment.