diff --git a/packages/gatsby-transformer-remark/README.md b/packages/gatsby-transformer-remark/README.md index 3faa7bc117e14..2e96499042003 100644 --- a/packages/gatsby-transformer-remark/README.md +++ b/packages/gatsby-transformer-remark/README.md @@ -81,7 +81,9 @@ Using the following GraphQL query you'll be able to get the table of contents By default the tableOfContents is using the field `slug` to generate URLs. You can however provide another field using the pathToSlugField parameter. **Note** that providing a non existing field will cause the result to be null. -### Excerpt length +### Excerpts + +#### Length By default, excerpts have a maximum length of 140 characters. You can change the default using the `pruneLength` argument. For example, if you need 500 characters, you can specify: @@ -98,6 +100,36 @@ By default, excerpts have a maximum length of 140 characters. You can change the } ``` +#### Format + +By default, Gatsby will return excerpts as plain text. This might be useful for populating [opengraph](https://en.wikipedia.org/wiki/Facebook_Platform#Open_Graph_protocol) HTML tags for SEO reasons. You can also explicitly specify a `PLAIN` format like so: + +```graphql +{ + allMarkdownRemark { + edges { + node { + excerpt(format: PLAIN) + } + } + } +} +``` + +It's also possible to ask Gatsby to return excerpts formatted as HTML. You might use this if you have a blog post whose an excerpt contains markdown content--e.g. header, link, etc.--and you want these links to render as HTML. + +```graphql +{ + allMarkdownRemark { + edges { + node { + excerpt(format: HTML) + } + } + } +} +``` + ## Troubleshooting ### Excerpts for non-latin languages diff --git a/packages/gatsby-transformer-remark/src/__tests__/__snapshots__/extend-node.js.snap b/packages/gatsby-transformer-remark/src/__tests__/__snapshots__/extend-node.js.snap index 4372b594b1f07..648fc80a20a74 100644 --- a/packages/gatsby-transformer-remark/src/__tests__/__snapshots__/extend-node.js.snap +++ b/packages/gatsby-transformer-remark/src/__tests__/__snapshots__/extend-node.js.snap @@ -55,6 +55,24 @@ Object { } `; +exports[`Excerpt is generated correctly from schema given an html format, it correctly maps nested markdown to html 1`] = ` +Object { + "excerpt": "

Where oh where is that pony?

", + "frontmatter": Object { + "title": "my little pony", + }, +} +`; + +exports[`Excerpt is generated correctly from schema given an html format, it respects the excerpt_separator 1`] = ` +Object { + "excerpt": "

Where oh where is that pony? Is he in the stable…

", + "frontmatter": Object { + "title": "my little pony", + }, +} +`; + exports[`Links are correctly prefixed correctly prefixes links 1`] = ` Object { "html": "

This is a link.

diff --git a/packages/gatsby-transformer-remark/src/__tests__/extend-node.js b/packages/gatsby-transformer-remark/src/__tests__/extend-node.js index 53b3eeca6b783..7fd5a833895c0 100644 --- a/packages/gatsby-transformer-remark/src/__tests__/extend-node.js +++ b/packages/gatsby-transformer-remark/src/__tests__/extend-node.js @@ -237,6 +237,74 @@ In quis lectus sed eros efficitur luctus. Morbi tempor, nisl eget feugiat tincid expect(node.excerpt.length).toBe(50) } ) + + bootstrapTest( + `given an html format, it correctly maps nested markdown to html`, + `--- +title: "my little pony" +date: "2017-09-18T23:19:51.246Z" +--- + +Where oh [*where*](nick.com) **_is_** that pony?`, + `excerpt(format: HTML) + frontmatter { + title + } + `, + node => { + expect(node).toMatchSnapshot() + expect(node.excerpt).toMatch( + `

Where oh where is that pony?

` + ) + } + ) + + bootstrapTest( + `given an html format, it prunes large excerpts`, + `--- +title: "my little pony" +date: "2017-09-18T23:19:51.246Z" +--- + +Where oh where is that pony? Is he in the stable or down by the stream?`, + `excerpt(format: HTML, pruneLength: 50) + frontmatter { + title + } + `, + node => { + // expect(node).toMatchSnapshot() + expect(node.excerpt).toMatch( + `

Where oh where is that pony? Is he in the stable…

` + ) + } + ) + + bootstrapTest( + `given an html format, it respects the excerpt_separator`, + `--- +title: "my little pony" +date: "2017-09-18T23:19:51.246Z" +--- + +Where oh where is that pony? Is he in the stable or by the stream? + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi auctor sit amet velit id facilisis. Nulla viverra, eros at efficitur pulvinar, lectus orci accumsan nisi, eu blandit elit nulla nec lectus. Integer porttitor imperdiet sapien. Quisque in orci sed nisi consequat aliquam. Aenean id mollis nisi. Sed auctor odio id erat facilisis venenatis. Quisque posuere faucibus libero vel fringilla. +`, + `excerpt(format: HTML, pruneLength: 50) + frontmatter { + title + } + `, + node => { + expect(node).toMatchSnapshot() + expect(node.excerpt).toMatch( + `

Where oh where is that pony? Is he in the stable…

` + ) + }, + { excerpt_separator: `` } + ) }) describe(`Wordcount and timeToRead are generated correctly from schema`, () => { diff --git a/packages/gatsby-transformer-remark/src/extend-node-type.js b/packages/gatsby-transformer-remark/src/extend-node-type.js index 07827d42b1458..fd7ac66e8ecfd 100644 --- a/packages/gatsby-transformer-remark/src/extend-node-type.js +++ b/packages/gatsby-transformer-remark/src/extend-node-type.js @@ -16,7 +16,6 @@ const toHAST = require(`mdast-util-to-hast`) const hastToHTML = require(`hast-util-to-html`) const mdastToToc = require(`mdast-util-toc`) const Promise = require(`bluebird`) -const prune = require(`underscore.string/prune`) const unified = require(`unified`) const parse = require(`remark-parse`) const stringify = require(`remark-stringify`) @@ -24,6 +23,13 @@ const english = require(`retext-english`) const remark2retext = require(`remark-retext`) const stripPosition = require(`unist-util-remove-position`) const hastReparseRaw = require(`hast-util-raw`) +const prune = require(`underscore.string/prune`) + +const { + getConcatenatedValue, + cloneTreeUntil, + findLastTextNode, +} = require(`./hast-processing`) let fileNodes let pluginsCacheStr = `` @@ -68,7 +74,6 @@ module.exports = ( if (type.name !== `MarkdownRemark`) { return {} } - pluginsCacheStr = pluginOptions.plugins.map(p => p.name).join(``) pathPrefixCacheStr = pathPrefix || `` @@ -117,111 +122,114 @@ module.exports = ( // We are already generating AST, so let's wait for it return await ASTPromiseMap.get(cacheKey) } else { - const ASTGenerationPromise = new Promise(async resolve => { - if (process.env.NODE_ENV !== `production` || !fileNodes) { - fileNodes = getNodesByType(`File`) - } - const ast = await new Promise((resolve, reject) => { - // Use Bluebird's Promise function "each" to run remark plugins serially. - Promise.each(pluginOptions.plugins, plugin => { - const requiredPlugin = require(plugin.resolve) - if (_.isFunction(requiredPlugin.mutateSource)) { - return requiredPlugin.mutateSource( - { - markdownNode, - files: fileNodes, - getNode, - reporter, - cache, - }, - plugin.pluginOptions - ) - } else { - return Promise.resolve() - } - }).then(() => { - const markdownAST = remark.parse(markdownNode.internal.content) + const ASTGenerationPromise = getMarkdownAST(markdownNode) + ASTGenerationPromise.then(markdownAST => { + cache.set(cacheKey, markdownAST) + ASTPromiseMap.delete(cacheKey) + }).catch(err => { + ASTPromiseMap.delete(cacheKey) + throw err + }) + // Save new AST to cache and return + // We can now release promise, as we cached result + ASTPromiseMap.set(cacheKey, ASTGenerationPromise) + return ASTGenerationPromise + } + } - if (pathPrefix) { - // Ensure relative links include `pathPrefix` - visit(markdownAST, [`link`, `definition`], node => { - if ( - node.url && - node.url.startsWith(`/`) && - !node.url.startsWith(`//`) - ) { - node.url = withPathPrefix(node.url, pathPrefix) - } - }) - } + function getMarkdownAST(markdownNode) { + return new Promise(async (resolve, reject) => { + if (process.env.NODE_ENV !== `production` || !fileNodes) { + fileNodes = getNodesByType(`File`) + } + // Use Bluebird's Promise function "each" to run remark plugins serially. + await Promise.each(pluginOptions.plugins, plugin => { + const requiredPlugin = require(plugin.resolve) + if (_.isFunction(requiredPlugin.mutateSource)) { + return requiredPlugin.mutateSource( + { + markdownNode, + files: fileNodes, + getNode, + reporter, + cache, + }, + plugin.pluginOptions + ) + } else { + return Promise.resolve() + } + }) + const markdownAST = remark.parse(markdownNode.internal.content) - // source => parse (can order parsing for dependencies) => typegen - // - // source plugins identify nodes, provide id, initial parse, know - // when nodes are created/removed/deleted - // get passed cached DataTree and return list of clean and dirty nodes. - // Also get passed `dirtyNodes` function which they can call with an array - // of node ids which will then get re-parsed and the inferred schema - // recreated (if inferring schema gets too expensive, can also - // cache the schema until a query fails at which point recreate the - // schema). - // - // parse plugins take data from source nodes and extend it, never mutate - // it. Freeze all nodes once done so typegen plugins can't change it - // this lets us save off the DataTree at that point as well as create - // indexes. - // - // typegen plugins identify further types of data that should be lazily - // computed due to their expense, or are hard to infer graphql type - // (markdown ast), or are need user input in order to derive e.g. - // markdown headers or date fields. - // - // wrap all resolve functions to (a) auto-memoize and (b) cache to disk any - // resolve function that takes longer than ~10ms (do research on this - // e.g. how long reading/writing to cache takes), and (c) track which - // queries are based on which source nodes. Also if connection of what - // which are always rerun if their underlying nodes change.. - // - // every node type in DataTree gets a schema type automatically. - // typegen plugins just modify the auto-generated types to add derived fields - // as well as computationally expensive fields. - if (process.env.NODE_ENV !== `production` || !fileNodes) { - fileNodes = getNodesByType(`File`) - } - // Use Bluebird's Promise function "each" to run remark plugins serially. - Promise.each(pluginOptions.plugins, plugin => { - const requiredPlugin = require(plugin.resolve) - if (_.isFunction(requiredPlugin)) { - return requiredPlugin( - { - markdownAST, - markdownNode, - getNode, - files: fileNodes, - pathPrefix, - reporter, - cache, - }, - plugin.pluginOptions - ) - } else { - return Promise.resolve() - } - }).then(() => { - resolve(markdownAST) - }) - }) + if (pathPrefix) { + // Ensure relative links include `pathPrefix` + visit(markdownAST, [`link`, `definition`], node => { + if ( + node.url && + node.url.startsWith(`/`) && + !node.url.startsWith(`//`) + ) { + node.url = withPathPrefix(node.url, pathPrefix) + } }) + } - // Save new AST to cache and return - cache.set(cacheKey, ast) - // We can now release promise, as we cached result - ASTPromiseMap.delete(cacheKey) - return resolve(ast) + // source => parse (can order parsing for dependencies) => typegen + // + // source plugins identify nodes, provide id, initial parse, know + // when nodes are created/removed/deleted + // get passed cached DataTree and return list of clean and dirty nodes. + // Also get passed `dirtyNodes` function which they can call with an array + // of node ids which will then get re-parsed and the inferred schema + // recreated (if inferring schema gets too expensive, can also + // cache the schema until a query fails at which point recreate the + // schema). + // + // parse plugins take data from source nodes and extend it, never mutate + // it. Freeze all nodes once done so typegen plugins can't change it + // this lets us save off the DataTree at that point as well as create + // indexes. + // + // typegen plugins identify further types of data that should be lazily + // computed due to their expense, or are hard to infer graphql type + // (markdown ast), or are need user input in order to derive e.g. + // markdown headers or date fields. + // + // wrap all resolve functions to (a) auto-memoize and (b) cache to disk any + // resolve function that takes longer than ~10ms (do research on this + // e.g. how long reading/writing to cache takes), and (c) track which + // queries are based on which source nodes. Also if connection of what + // which are always rerun if their underlying nodes change.. + // + // every node type in DataTree gets a schema type automatically. + // typegen plugins just modify the auto-generated types to add derived fields + // as well as computationally expensive fields. + if (process.env.NODE_ENV !== `production` || !fileNodes) { + fileNodes = getNodesByType(`File`) + } + // Use Bluebird's Promise function "each" to run remark plugins serially. + await Promise.each(pluginOptions.plugins, plugin => { + const requiredPlugin = require(plugin.resolve) + if (_.isFunction(requiredPlugin)) { + return requiredPlugin( + { + markdownAST, + markdownNode, + getNode, + files: fileNodes, + pathPrefix, + reporter, + cache, + }, + plugin.pluginOptions + ) + } else { + return Promise.resolve() + } }) - ASTPromiseMap.set(cacheKey, ASTGenerationPromise) - return await ASTGenerationPromise - } + resolve(markdownAST) + }) } async function getHeadings(markdownNode) { @@ -346,6 +354,14 @@ module.exports = ( }, }) + const ExcerptFormats = new GraphQLEnumType({ + name: `ExcerptFormats`, + values: { + PLAIN: { value: `plain` }, + HTML: { value: `html` }, + }, + }) + return resolve({ html: { type: GraphQLString, @@ -373,8 +389,58 @@ module.exports = ( type: GraphQLBoolean, defaultValue: false, }, + format: { + type: ExcerptFormats, + defaultValue: `plain`, + }, }, - resolve(markdownNode, { pruneLength, truncate }) { + async resolve(markdownNode, { format, pruneLength, truncate }) { + if (format === `html`) { + if (pluginOptions.excerpt_separator) { + const fullAST = await getHTMLAst(markdownNode) + const excerptAST = cloneTreeUntil( + fullAST, + ({ nextNode }) => + nextNode.type === `raw` && + nextNode.value === pluginOptions.excerpt_separator + ) + return hastToHTML(excerptAST) + } + const fullAST = await getHTMLAst(markdownNode) + if (!fullAST.children.length) { + return `` + } + + const excerptAST = cloneTreeUntil(fullAST, ({ root }) => { + const totalExcerptSoFar = getConcatenatedValue(root) + return totalExcerptSoFar && totalExcerptSoFar.length > pruneLength + }) + const unprunedExcerpt = getConcatenatedValue(excerptAST) + if (!unprunedExcerpt) { + return `` + } + + if (pruneLength && unprunedExcerpt.length < pruneLength) { + return hastToHTML(excerptAST) + } + + const lastTextNode = findLastTextNode(excerptAST) + const amountToPruneLastNode = + pruneLength - (unprunedExcerpt.length - lastTextNode.value.length) + if (!truncate) { + lastTextNode.value = prune( + lastTextNode.value, + amountToPruneLastNode, + `…` + ) + } else { + lastTextNode.value = _.truncate(lastTextNode.value, { + length: pruneLength, + omission: `…`, + }) + } + return hastToHTML(excerptAST) + } if (markdownNode.excerpt) { return Promise.resolve(markdownNode.excerpt) } diff --git a/packages/gatsby-transformer-remark/src/hast-processing.js b/packages/gatsby-transformer-remark/src/hast-processing.js new file mode 100644 index 0000000000000..46068438a9cff --- /dev/null +++ b/packages/gatsby-transformer-remark/src/hast-processing.js @@ -0,0 +1,72 @@ +function duplicateNode(node) { + return { + type: node.type, + children: [], + tagName: node.tagName, + value: node.value, + } +} + +function getConcatenatedValue(node) { + if (!node) { + return `` + } + if (node.type === `text`) { + return node.value + } else if (node.children && node.children.length) { + return node.children + .map(getConcatenatedValue) + .filter(value => value) + .join(``) + } + return `` +} + +function cloneTreeUntil(root, endCondition) { + let clonedRoot + let endConditionMet = false + + function preOrderTraversal(node) { + if (endConditionMet || endCondition({ root: clonedRoot, nextNode: node })) { + endConditionMet = true + return + } + + const newNode = duplicateNode(node) + if (clonedRoot) { + clonedRoot.children.push(newNode) + } else { + clonedRoot = newNode + } + + if (node.children) { + node.children.forEach(child => { + clonedRoot = newNode + preOrderTraversal(child) + }) + } + } + preOrderTraversal(root) + return clonedRoot +} + +function findLastTextNode(node, textNode) { + if (node.type === `text`) { + textNode = node + } + if (node.children) { + node.children.forEach(child => { + const laterTextNode = findLastTextNode(child) + if (laterTextNode !== textNode) { + textNode = laterTextNode + } + }) + } + return textNode +} + +module.exports = { + getConcatenatedValue, + cloneTreeUntil, + findLastTextNode, +}