From 8937307f9b5c7d2e280369d00cdcb55849aac480 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 10:05:47 +0000 Subject: [PATCH 01/50] store page data between builds --- packages/gatsby/src/commands/build.js | 57 ++++++++++++++++++--- packages/gatsby/src/query/query-runner.js | 2 - packages/gatsby/src/query/queue.js | 14 ++++- packages/gatsby/src/redux/actions/public.js | 7 +++ packages/gatsby/src/redux/index.ts | 11 +++- packages/gatsby/src/redux/reducers/index.js | 1 + packages/gatsby/src/redux/types.ts | 2 + packages/gatsby/src/utils/page-data.js | 43 ++++++++++++++++ 8 files changed, 124 insertions(+), 13 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 97090bf454587..1a2229d2ab5e4 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -2,6 +2,7 @@ const path = require(`path`) const report = require(`gatsby-cli/lib/reporter`) +const fs = require(`fs-extra`) import { buildHTML } from "./build-html" const buildProductionBundle = require(`./build-javascript`) const bootstrap = require(`../bootstrap`) @@ -11,7 +12,7 @@ const { initTracer, stopTracer } = require(`../utils/tracer`) const db = require(`../db`) const signalExit = require(`signal-exit`) const telemetry = require(`gatsby-telemetry`) -const { store, emitter } = require(`../redux`) +const { store, emitter, readState } = require(`../redux`) const queryUtil = require(`../query`) const appDataUtil = require(`../utils/app-data`) const WorkerPool = require(`../utils/worker/pool`) @@ -19,7 +20,8 @@ const { structureWebpackErrors } = require(`../utils/webpack-error-utils`) const { waitUntilAllJobsComplete: waitUntilAllJobsV2Complete, } = require(`../utils/jobs-manager`) - +const pageDataUtil = require(`../utils/page-data`) +const incrementalBuild = true type BuildArgs = { directory: string, sitePackageJson: object, @@ -98,6 +100,7 @@ module.exports = async function build(program: BuildArgs) { const workerPool = WorkerPool.create() const webpackCompilationHash = stats.hash + if ( webpackCompilationHash !== store.getState().webpackCompilationHash || !appDataUtil.exists(publicDir) @@ -137,14 +140,11 @@ module.exports = async function build(program: BuildArgs) { `BOOTSTRAP_QUERY_RUNNING_FINISHED` ) - await db.saveState() - await waitUntilAllJobsComplete() - // we need to save it again to make sure our latest state has been saved - await db.saveState() - - const pagePaths = [...store.getState().pages.keys()] + const pagePaths = incrementalBuild + ? await pageDataUtil.getChangedPageDataKeys(store.getState(), readState()) + : [...store.getState().pages.keys()] activity = report.createProgress( `Building static HTML for pages`, pagePaths.length, @@ -198,4 +198,45 @@ module.exports = async function build(program: BuildArgs) { await stopTracer() workerPool.end() buildActivity.end() + + if (incrementalBuild && process.argv.indexOf(`--log-pages`) > -1) { + if (pagePaths.length) { + report.info( + `Incremental build pages:\n${pagePaths.map( + path => `Updated page: ${path}\n` + )}`.replace(/,/g, ``) + ) + } + // if (deletedPageKeys.length) { + // report.info( + // `Incremental build deleted pages:\n${deletedPageKeys.map( + // path => `Deleted page: ${path}\n` + // )}`.replace(/,/g, ``) + // ) + // } + } + + if (incrementalBuild && process.argv.indexOf(`--write-to-file`) > -1) { + const createdFilesPath = path.resolve( + `${program.directory}/.cache`, + `newPages.txt` + ) + // const deletedFilesPath = path.resolve( + // `${program.directory}/.cache`, + // `deletedPages.txt` + // ) + + if (pagePaths.length) { + fs.writeFileSync(createdFilesPath, `${pagePaths.join(`\n`)}\n`, `utf8`) + report.info(`newPages.txt created`) + } + // if (deletedPageKeys.length) { + // fs.writeFileSync( + // deletedFilesPath, + // `${deletedPageKeys.join(`\n`)}\n`, + // `utf8` + // ) + // report.info(`deletedPages.txt created`) + // } + } } diff --git a/packages/gatsby/src/query/query-runner.js b/packages/gatsby/src/query/query-runner.js index 870704e2254c6..7349005309a15 100644 --- a/packages/gatsby/src/query/query-runner.js +++ b/packages/gatsby/src/query/query-runner.js @@ -98,7 +98,6 @@ module.exports = async (graphqlRunner, queryJob: QueryJob) => { .createHash(`sha1`) .update(resultJSON) .digest(`base64`) - if (resultHash !== resultHashes.get(queryJob.id)) { resultHashes.set(queryJob.id, resultHash) @@ -117,7 +116,6 @@ module.exports = async (graphqlRunner, queryJob: QueryJob) => { `d`, `${queryJob.hash}.json` ) - await fs.outputFile(resultPath, resultJSON) } } diff --git a/packages/gatsby/src/query/queue.js b/packages/gatsby/src/query/queue.js index bc310a020c806..0dcaa5d76da11 100644 --- a/packages/gatsby/src/query/queue.js +++ b/packages/gatsby/src/query/queue.js @@ -1,5 +1,7 @@ const Queue = require(`better-queue`) const { store } = require(`../redux`) +const { boundActionCreators } = require(`../redux/actions`) +const { setPageData } = boundActionCreators const FastMemoryStore = require(`../query/better-queue-custom-store`) const queryRunner = require(`../query/query-runner`) const websocketManager = require(`../utils/websocket-manager`) @@ -14,9 +16,19 @@ const createBaseOptions = () => { const createBuildQueue = () => { const graphqlRunner = new GraphQLRunner(store) + const handler = (queryJob, callback) => queryRunner(graphqlRunner, queryJob) - .then(result => callback(null, result)) + .then(result => { + if (queryJob.isPage) { + const payload = { + id: queryJob.id, + result, + } + setPageData(payload) + } + return callback(null, result) + }) .catch(callback) return new Queue(handler, createBaseOptions()) } diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index c6291a3077cc0..0d242d0bdf69a 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -1405,4 +1405,11 @@ actions.createPageDependency = ( } } +actions.setPageData = data => { + return { + type: `CREATE_PAGE_DATA`, + payload: data, + } +} + module.exports = { actions } diff --git a/packages/gatsby/src/redux/index.ts b/packages/gatsby/src/redux/index.ts index 591b1b7343a84..36aafc10bb366 100644 --- a/packages/gatsby/src/redux/index.ts +++ b/packages/gatsby/src/redux/index.ts @@ -53,12 +53,18 @@ const multi: Middleware = ({ dispatch }) => next => ( ): ActionsUnion | ActionsUnion[] => Array.isArray(action) ? action.filter(Boolean).map(dispatch) : next(action) -export const configureStore = (initialState: IReduxState): Store => - createStore( +export const configureStore = ( + initialState: IReduxState +): Store => { + // Page data is not required to be in the initial redux store. + // This will enable us to make a comparison of the cached state and new state. + initialState.pageData = new Map() + return createStore( combineReducers({ ...reducers }), initialState, applyMiddleware(thunk, multi) ) +} export const store = configureStore(readState()) @@ -75,6 +81,7 @@ export const saveState = (): void => { staticQueryComponents: state.staticQueryComponents, webpackCompilationHash: state.webpackCompilationHash, pageDataStats: state.pageDataStats, + pageData: state.pageData, }) } diff --git a/packages/gatsby/src/redux/reducers/index.js b/packages/gatsby/src/redux/reducers/index.js index 9dcf87cf74bd6..67067487f1b42 100644 --- a/packages/gatsby/src/redux/reducers/index.js +++ b/packages/gatsby/src/redux/reducers/index.js @@ -66,4 +66,5 @@ module.exports = { logs: require(`gatsby-cli/lib/reporter/redux/reducer`), inferenceMetadata: require(`./inference-metadata`), pageDataStats: require(`./page-data-stats`), + pageData: require(`./page-data`), } diff --git a/packages/gatsby/src/redux/types.ts b/packages/gatsby/src/redux/types.ts index 393c6eaa701a9..323aa4eff1b1e 100644 --- a/packages/gatsby/src/redux/types.ts +++ b/packages/gatsby/src/redux/types.ts @@ -28,6 +28,7 @@ export interface IReduxState { developMiddleware: any proxy: any } + pageData?: any } export interface ICachedReduxState { @@ -39,6 +40,7 @@ export interface ICachedReduxState { staticQueryComponents: IReduxState["staticQueryComponents"] webpackCompilationHash: IReduxState["webpackCompilationHash"] pageDataStats: IReduxState["pageDataStats"] + pageData?: IReduxState["pageData"] } export type ActionsUnion = diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 470ec57b825f5..ea6a79fdbfb0e 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,6 +1,7 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) +const Promise = require(`bluebird`) const getFilePath = ({ publicDir }, pagePath) => { const fixedPagePath = pagePath === `/` ? `index` : pagePath @@ -35,7 +36,49 @@ const write = async ({ publicDir }, page, result) => { await fs.outputFile(filePath, bodyStr) } +const getChangedPageDataKeys = (store, cacheData) => + new Promise(resolve => { + if (cacheData.pageData && store.pageData) { + const pageKeys = [] + store.pageData.forEach((value, key) => { + if (!cacheData.pageData.has(key)) { + pageKeys.push(key) + } else { + console.log(value) + const newPageData = JSON.stringify(value) + const previousPageData = JSON.stringify(cacheData.pageData.get(key)) + + if (newPageData !== previousPageData) { + pageKeys.push(key) + } + } + }) + resolve(pageKeys) + return + } + + resolve([...store.pages.keys()]) + }) + +const removePreviousPageData = (directory, store, cacheData) => + new Promise(resolve => { + if (cacheData.pageData && store.pageData) { + const deletedPageKeys = [] + cacheData.pageData.forEach((value, key) => { + if (!store.pageData.has(key)) { + deletedPageKeys.push(key) + fs.removeSync(`${directory}/public${key}`) + fs.removeSync(`${directory}/public/page-data${key}`) + } + }) + resolve(deletedPageKeys) + } + resolve(false) + }) + module.exports = { read, write, + getChangedPageDataKeys, + removePreviousPageData, } From 13b40711c54eb7d424b27a05ff9cdb66dcf28ea8 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 10:34:44 +0000 Subject: [PATCH 02/50] Support removing of pages from public --- packages/gatsby/src/commands/build.js | 59 ++++++++++++++++---------- packages/gatsby/src/utils/page-data.js | 1 - 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 1a2229d2ab5e4..7b268809257f7 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -21,7 +21,8 @@ const { waitUntilAllJobsComplete: waitUntilAllJobsV2Complete, } = require(`../utils/jobs-manager`) const pageDataUtil = require(`../utils/page-data`) -const incrementalBuild = true +const pageBuildPerformance = + process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES === `true` || false type BuildArgs = { directory: string, sitePackageJson: object, @@ -142,7 +143,7 @@ module.exports = async function build(program: BuildArgs) { await waitUntilAllJobsComplete() - const pagePaths = incrementalBuild + const pagePaths = pageBuildPerformance ? await pageDataUtil.getChangedPageDataKeys(store.getState(), readState()) : [...store.getState().pages.keys()] activity = report.createProgress( @@ -184,6 +185,18 @@ module.exports = async function build(program: BuildArgs) { } activity.done() + let deletedPageKeys = [] + if (pageBuildPerformance) { + activity = report.activityTimer(`Delete previous page data`) + activity.start() + deletedPageKeys = await pageDataUtil.removePreviousPageData( + program.directory, + store.getState(), + readState() + ) + activity.end() + } + await apiRunnerNode(`onPostBuild`, { graphql: graphqlRunner, parentSpan: buildSpan, @@ -199,7 +212,7 @@ module.exports = async function build(program: BuildArgs) { workerPool.end() buildActivity.end() - if (incrementalBuild && process.argv.indexOf(`--log-pages`) > -1) { + if (pageBuildPerformance && process.argv.indexOf(`--log-pages`) > -1) { if (pagePaths.length) { report.info( `Incremental build pages:\n${pagePaths.map( @@ -207,36 +220,36 @@ module.exports = async function build(program: BuildArgs) { )}`.replace(/,/g, ``) ) } - // if (deletedPageKeys.length) { - // report.info( - // `Incremental build deleted pages:\n${deletedPageKeys.map( - // path => `Deleted page: ${path}\n` - // )}`.replace(/,/g, ``) - // ) - // } + if (typeof deletedPageKeys !== `undefined` && deletedPageKeys.length) { + report.info( + `Incremental build deleted pages:\n${deletedPageKeys.map( + path => `Deleted page: ${path}\n` + )}`.replace(/,/g, ``) + ) + } } - if (incrementalBuild && process.argv.indexOf(`--write-to-file`) > -1) { + if (pageBuildPerformance && process.argv.indexOf(`--write-to-file`) > -1) { const createdFilesPath = path.resolve( `${program.directory}/.cache`, `newPages.txt` ) - // const deletedFilesPath = path.resolve( - // `${program.directory}/.cache`, - // `deletedPages.txt` - // ) + const deletedFilesPath = path.resolve( + `${program.directory}/.cache`, + `deletedPages.txt` + ) if (pagePaths.length) { fs.writeFileSync(createdFilesPath, `${pagePaths.join(`\n`)}\n`, `utf8`) report.info(`newPages.txt created`) } - // if (deletedPageKeys.length) { - // fs.writeFileSync( - // deletedFilesPath, - // `${deletedPageKeys.join(`\n`)}\n`, - // `utf8` - // ) - // report.info(`deletedPages.txt created`) - // } + if (deletedPageKeys.length) { + fs.writeFileSync( + deletedFilesPath, + `${deletedPageKeys.join(`\n`)}\n`, + `utf8` + ) + report.info(`deletedPages.txt created`) + } } } diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index ea6a79fdbfb0e..23052269a83e1 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -44,7 +44,6 @@ const getChangedPageDataKeys = (store, cacheData) => if (!cacheData.pageData.has(key)) { pageKeys.push(key) } else { - console.log(value) const newPageData = JSON.stringify(value) const previousPageData = JSON.stringify(cacheData.pageData.get(key)) From 67362052c83aa84c8fa97875ef2ab1ded2ab8b56 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 11:03:35 +0000 Subject: [PATCH 03/50] build all pages if webpackCompilationHash has changed --- packages/gatsby/src/commands/build.js | 1 - packages/gatsby/src/utils/page-data.js | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 7b268809257f7..c25df201acdb5 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -101,7 +101,6 @@ module.exports = async function build(program: BuildArgs) { const workerPool = WorkerPool.create() const webpackCompilationHash = stats.hash - if ( webpackCompilationHash !== store.getState().webpackCompilationHash || !appDataUtil.exists(publicDir) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 23052269a83e1..9851d58a4d1d0 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -22,6 +22,7 @@ const write = async ({ publicDir }, page, result) => { result, } const bodyStr = JSON.stringify(body) + // transform asset size to kB (from bytes) to fit 64 bit to numbers const pageDataSize = Buffer.byteLength(bodyStr) / 1000 @@ -38,6 +39,14 @@ const write = async ({ publicDir }, page, result) => { const getChangedPageDataKeys = (store, cacheData) => new Promise(resolve => { + if ( + cacheData.webpackCompilationHash !== + store.getState().webpackCompilationHash + ) { + resolve([...store.pages.keys()]) + return + } + if (cacheData.pageData && store.pageData) { const pageKeys = [] store.pageData.forEach((value, key) => { From bfd281e04b0ea59de41a8a73d43e3bc7b4eb9693 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 15:45:10 +0000 Subject: [PATCH 04/50] Support deleting of data on processors that run once --- packages/gatsby/src/commands/build.js | 4 ++-- packages/gatsby/src/query/index.js | 11 ++++++++++- packages/gatsby/src/query/queue.js | 6 ++---- packages/gatsby/src/redux/actions/public.js | 7 +++++++ packages/gatsby/src/redux/index.ts | 10 ++-------- packages/gatsby/src/utils/page-data.js | 15 ++++++++------- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index c25df201acdb5..f66f7bb423405 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -214,14 +214,14 @@ module.exports = async function build(program: BuildArgs) { if (pageBuildPerformance && process.argv.indexOf(`--log-pages`) > -1) { if (pagePaths.length) { report.info( - `Incremental build pages:\n${pagePaths.map( + `Built pages:\n${pagePaths.map( path => `Updated page: ${path}\n` )}`.replace(/,/g, ``) ) } if (typeof deletedPageKeys !== `undefined` && deletedPageKeys.length) { report.info( - `Incremental build deleted pages:\n${deletedPageKeys.map( + `Deleted pages:\n${deletedPageKeys.map( path => `Deleted page: ${path}\n` )}`.replace(/,/g, ``) ) diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index b183707da9cd6..b38d0b5fc64b6 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -3,7 +3,7 @@ const _ = require(`lodash`) const Queue = require(`better-queue`) // const convertHrtime = require(`convert-hrtime`) -const { store, emitter } = require(`../redux`) +const { store, emitter, readState } = require(`../redux`) const { boundActionCreators } = require(`../redux/actions`) const report = require(`gatsby-cli/lib/reporter`) const queryQueue = require(`./queue`) @@ -160,7 +160,16 @@ const groupQueryIds = queryIds => { const processQueries = async (queryJobs, activity) => { const queue = queryQueue.createBuildQueue() + const { pages } = store.getState() await queryQueue.processBatch(queue, queryJobs, activity) + + readState().pageData.forEach((value, key) => { + if (!pages.has(key)) { + boundActionCreators.removePageData({ + id: key, + }) + } + }) } const createStaticQueryJob = (state, queryId) => { diff --git a/packages/gatsby/src/query/queue.js b/packages/gatsby/src/query/queue.js index 0dcaa5d76da11..777807a7aaa74 100644 --- a/packages/gatsby/src/query/queue.js +++ b/packages/gatsby/src/query/queue.js @@ -1,7 +1,6 @@ const Queue = require(`better-queue`) const { store } = require(`../redux`) const { boundActionCreators } = require(`../redux/actions`) -const { setPageData } = boundActionCreators const FastMemoryStore = require(`../query/better-queue-custom-store`) const queryRunner = require(`../query/query-runner`) const websocketManager = require(`../utils/websocket-manager`) @@ -21,11 +20,10 @@ const createBuildQueue = () => { queryRunner(graphqlRunner, queryJob) .then(result => { if (queryJob.isPage) { - const payload = { + boundActionCreators.setPageData({ id: queryJob.id, result, - } - setPageData(payload) + }) } return callback(null, result) }) diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index 0d242d0bdf69a..7bc899fe210d9 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -1412,4 +1412,11 @@ actions.setPageData = data => { } } +actions.removePageData = data => { + return { + type: `DELETE_PAGE_DATA`, + payload: data, + } +} + module.exports = { actions } diff --git a/packages/gatsby/src/redux/index.ts b/packages/gatsby/src/redux/index.ts index 36aafc10bb366..49fff735f7e16 100644 --- a/packages/gatsby/src/redux/index.ts +++ b/packages/gatsby/src/redux/index.ts @@ -53,18 +53,12 @@ const multi: Middleware = ({ dispatch }) => next => ( ): ActionsUnion | ActionsUnion[] => Array.isArray(action) ? action.filter(Boolean).map(dispatch) : next(action) -export const configureStore = ( - initialState: IReduxState -): Store => { - // Page data is not required to be in the initial redux store. - // This will enable us to make a comparison of the cached state and new state. - initialState.pageData = new Map() - return createStore( +export const configureStore = (initialState: IReduxState): Store => + createStore( combineReducers({ ...reducers }), initialState, applyMiddleware(thunk, multi) ) -} export const store = configureStore(readState()) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 9851d58a4d1d0..a730c9822e283 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -39,14 +39,10 @@ const write = async ({ publicDir }, page, result) => { const getChangedPageDataKeys = (store, cacheData) => new Promise(resolve => { - if ( - cacheData.webpackCompilationHash !== - store.getState().webpackCompilationHash - ) { + if (cacheData.webpackCompilationHash !== store.webpackCompilationHash) { resolve([...store.pages.keys()]) return } - if (cacheData.pageData && store.pageData) { const pageKeys = [] store.pageData.forEach((value, key) => { @@ -75,8 +71,13 @@ const removePreviousPageData = (directory, store, cacheData) => cacheData.pageData.forEach((value, key) => { if (!store.pageData.has(key)) { deletedPageKeys.push(key) - fs.removeSync(`${directory}/public${key}`) - fs.removeSync(`${directory}/public/page-data${key}`) + if (key === `/`) { + fs.removeSync(`${directory}/public/index.html`) + fs.removeSync(`${directory}/public/page-data/index`) + } else { + fs.removeSync(`${directory}/public${key}`) + fs.removeSync(`${directory}/public/page-data${key}`) + } } }) resolve(deletedPageKeys) From 30e041d1d0bace6da7acc22d365de7474465da70 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 16:52:44 +0000 Subject: [PATCH 05/50] Add expirement flag to page performance --- packages/gatsby/src/bootstrap/index.js | 5 ++++- packages/gatsby/src/commands/build.js | 15 ++++++++++----- packages/gatsby/src/query/index.js | 19 ++++++++++++------- packages/gatsby/src/query/queue.js | 2 +- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/packages/gatsby/src/bootstrap/index.js b/packages/gatsby/src/bootstrap/index.js index 31c5abfdd8eb0..1cd5b5904cec5 100644 --- a/packages/gatsby/src/bootstrap/index.js +++ b/packages/gatsby/src/bootstrap/index.js @@ -190,7 +190,10 @@ module.exports = async (args: BootstrapArgs) => { // During builds, delete html and css files from the public directory as we don't want // deleted pages and styles from previous builds to stick around. - if (process.env.NODE_ENV === `production`) { + if ( + process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + process.env.NODE_ENV === `production` + ) { activity = report.activityTimer( `delete html and css files from previous builds`, { diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index f66f7bb423405..23a144add7467 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -21,8 +21,7 @@ const { waitUntilAllJobsComplete: waitUntilAllJobsV2Complete, } = require(`../utils/jobs-manager`) const pageDataUtil = require(`../utils/page-data`) -const pageBuildPerformance = - process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES === `true` || false + type BuildArgs = { directory: string, sitePackageJson: object, @@ -185,7 +184,7 @@ module.exports = async function build(program: BuildArgs) { activity.done() let deletedPageKeys = [] - if (pageBuildPerformance) { + if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { activity = report.activityTimer(`Delete previous page data`) activity.start() deletedPageKeys = await pageDataUtil.removePreviousPageData( @@ -211,7 +210,10 @@ module.exports = async function build(program: BuildArgs) { workerPool.end() buildActivity.end() - if (pageBuildPerformance && process.argv.indexOf(`--log-pages`) > -1) { + if ( + process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + process.argv.indexOf(`--log-pages`) > -1 + ) { if (pagePaths.length) { report.info( `Built pages:\n${pagePaths.map( @@ -228,7 +230,10 @@ module.exports = async function build(program: BuildArgs) { } } - if (pageBuildPerformance && process.argv.indexOf(`--write-to-file`) > -1) { + if ( + process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + process.argv.indexOf(`--write-to-file`) > -1 + ) { const createdFilesPath = path.resolve( `${program.directory}/.cache`, `newPages.txt` diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index b38d0b5fc64b6..780c46d625571 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -160,16 +160,21 @@ const groupQueryIds = queryIds => { const processQueries = async (queryJobs, activity) => { const queue = queryQueue.createBuildQueue() - const { pages } = store.getState() await queryQueue.processBatch(queue, queryJobs, activity) - - readState().pageData.forEach((value, key) => { - if (!pages.has(key)) { - boundActionCreators.removePageData({ - id: key, + if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { + const { pages } = store.getState() + const { pageData } = readState() + + if (pageData) { + readState().pageData.forEach((value, key) => { + if (!pages.has(key)) { + boundActionCreators.removePageData({ + id: key, + }) + } }) } - }) + } } const createStaticQueryJob = (state, queryId) => { diff --git a/packages/gatsby/src/query/queue.js b/packages/gatsby/src/query/queue.js index 777807a7aaa74..b72edd4f60ec3 100644 --- a/packages/gatsby/src/query/queue.js +++ b/packages/gatsby/src/query/queue.js @@ -19,7 +19,7 @@ const createBuildQueue = () => { const handler = (queryJob, callback) => queryRunner(graphqlRunner, queryJob) .then(result => { - if (queryJob.isPage) { + if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && queryJob.isPage) { boundActionCreators.setPageData({ id: queryJob.id, result, From ad83b982172b93748709c455fc7d7a2b482edb2e Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Wed, 5 Feb 2020 16:53:57 +0000 Subject: [PATCH 06/50] Add expirement flag to page performance --- packages/gatsby/src/commands/build.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 23a144add7467..82d2582d3cd40 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -141,7 +141,7 @@ module.exports = async function build(program: BuildArgs) { await waitUntilAllJobsComplete() - const pagePaths = pageBuildPerformance + const pagePaths = process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES ? await pageDataUtil.getChangedPageDataKeys(store.getState(), readState()) : [...store.getState().pages.keys()] activity = report.createProgress( From a47aa304a9a896c47ab48f54f0713f7a5f4d3639 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 6 Feb 2020 08:46:52 +0000 Subject: [PATCH 07/50] Add comments and types to actions --- packages/gatsby/src/redux/actions/public.js | 30 ++++++++++++++++----- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index 7bc899fe210d9..43a441a790d2a 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -102,6 +102,11 @@ type ActionOptions = { followsSpan: ?Object, } +type PageData = { + id: string, + result: ?Object, +} + /** * Delete a page * @param {Object} page a page object @@ -1405,17 +1410,30 @@ actions.createPageDependency = ( } } -actions.setPageData = data => { +/** + * Set page data in the store, saving the pages content data and context. + * + * @param {Object} $0 + * @param {string} $0.id the path to the page. + * @param {string} $0.result object of the pages data and context. + */ +actions.setPageData = (pageData: PageData) => { return { - type: `CREATE_PAGE_DATA`, - payload: data, + type: `SET_PAGE_DATA`, + payload: pageData, } } -actions.removePageData = data => { +/** + * Remove page data from the store. + * + * @param {Object} $0 + * @param {string} $0.id the path to the page. + */ +actions.removePageData = (id: PageData) => { return { - type: `DELETE_PAGE_DATA`, - payload: data, + type: `REMOVE_PAGE_DATA`, + payload: id, } } From d9aaac428735b107850179e1042ed7a8073b4e44 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 6 Feb 2020 08:52:31 +0000 Subject: [PATCH 08/50] Add missing page data reducer --- packages/gatsby/src/redux/reducers/page-data.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 packages/gatsby/src/redux/reducers/page-data.js diff --git a/packages/gatsby/src/redux/reducers/page-data.js b/packages/gatsby/src/redux/reducers/page-data.js new file mode 100644 index 0000000000000..52f6d1d99cf09 --- /dev/null +++ b/packages/gatsby/src/redux/reducers/page-data.js @@ -0,0 +1,14 @@ +module.exports = (state = new Map(), action) => { + switch (action.type) { + case `REMOVE_PAGE_DATA`: + state.delete(action.payload.id) + return state + + case `SET_PAGE_DATA`: { + return state.set(action.payload.id, action.payload.result) + } + + default: + return state + } +} From 635bbe4d6beac30ab6ba218b2c07e796312eae3e Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 6 Feb 2020 10:09:25 +0000 Subject: [PATCH 09/50] Retain public between builds --- packages/gatsby/src/bootstrap/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gatsby/src/bootstrap/index.js b/packages/gatsby/src/bootstrap/index.js index 1cd5b5904cec5..eaca8ddecb007 100644 --- a/packages/gatsby/src/bootstrap/index.js +++ b/packages/gatsby/src/bootstrap/index.js @@ -191,7 +191,7 @@ module.exports = async (args: BootstrapArgs) => { // During builds, delete html and css files from the public directory as we don't want // deleted pages and styles from previous builds to stick around. if ( - process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + !process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && process.env.NODE_ENV === `production` ) { activity = report.activityTimer( From 35550042fa88a438ccf2e91183d883be341fbee1 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 14 Feb 2020 14:30:29 +0000 Subject: [PATCH 10/50] Add docs to page build time enhancement --- docs/docs/page-build-time-enhancement.md | 65 ++++++++++++++++++++++++ www/src/data/sidebars/doc-links.yaml | 2 + 2 files changed, 67 insertions(+) create mode 100644 docs/docs/page-build-time-enhancement.md diff --git a/docs/docs/page-build-time-enhancement.md b/docs/docs/page-build-time-enhancement.md new file mode 100644 index 0000000000000..fdc33b4e4aab2 --- /dev/null +++ b/docs/docs/page-build-time-enhancement.md @@ -0,0 +1,65 @@ +--- +title: Page Build Time Enhancement +--- + +Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. + +Projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deploying these sites don't present a problem. + +Building sites with large amounts of content (10,000s upwards) are relatively fast with Gatsby. The issue arises when we need to support CI/CD principles, continuously building and deploying all assets have increased demand on CPU and memory. + +One solution to these problems might be to use [Gatsby Cloud's 'Build' features](https://www.gatsbyjs.com/cloud/). + +For projects that require self-hosted environments, where Gatsby Cloud would not be an option, this page build enhancement can speed up your deployments by building only the pages that have been updated by the content sources. + +For more info on the standard build process please see [overview of the gatsby build process](/docs/overview-of-the-gatsby-build-process/) + +## Warning + +This enhancement will require access to your own CI/CD build pipelines. If your site is relatively small and doesn't require to be on-premise consider using [Gatsby JS cloud solution](https://www.gatsbyjs.com/cloud/). + +## Setup + +Set the environment variable to `GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true` whilst running the command + +`GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build`. + +Optionally, if you want to list the directories of the pages that have been updated at the end of the build process, you can use one of the following arguments: + +### --write-to-file + +The `--write-to-file` will create two text files in the gatsby `.cache` folder: + +- `newPages.txt` will contain a list of directory values of the pages that have changed or are new. +- `deletedPages.txt` will contain a list of directory values of pages removed from the content sources. + +**Note that these files will not be created if there are no values.** + +### --log-pages + +The `--log-pages` parameter will output all the file paths that have been updated or deleted at the end of the build stage. + +``` +success Building production JavaScript and CSS bundles - 82.198s +success run queries - 82.762s - 4/4 0.05/s +success Building static HTML for pages - 19.386s - 2/2 0.10/s ++ success Delete previous page data - 1.512s +info Done building in 152.084 sec ++ info Built pages: ++ Updated page: /about ++ Updated page: /accounts/example ++ info Deleted pages: ++ Deleted page: /test + +Done in 154.501 sec +``` + +## More information + +- This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process. + +- To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment. + +- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that containing the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` the folder then a full build will be triggered. + +- Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build. diff --git a/www/src/data/sidebars/doc-links.yaml b/www/src/data/sidebars/doc-links.yaml index a437dd6ab1606..6a2b4aeca8d8e 100644 --- a/www/src/data/sidebars/doc-links.yaml +++ b/www/src/data/sidebars/doc-links.yaml @@ -486,6 +486,8 @@ breadcrumbTitle: Guess.js - title: Scaling Issues link: /docs/scaling-issues/ + - title: Page Build Speed Enhancement + link: /docs/page-build-time-enhancement/ - title: Localization & Internationalization with Gatsby link: /docs/localization-i18n/ breadcrumbTitle: Localization From 326d412f024f455d2fb55d1bb27c49627c4eac15 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Mon, 17 Feb 2020 12:55:00 +0000 Subject: [PATCH 11/50] Update Page build optimisations docs --- .../overview-of-the-gatsby-build-process.md | 2 + ...imizations-for-incremental-data-changes.md | 80 +++++++++++++++++++ docs/docs/page-build-time-enhancement.md | 65 --------------- www/src/data/sidebars/doc-links.yaml | 4 +- 4 files changed, 84 insertions(+), 67 deletions(-) create mode 100644 docs/docs/page-build-optimizations-for-incremental-data-changes.md delete mode 100644 docs/docs/page-build-time-enhancement.md diff --git a/docs/docs/overview-of-the-gatsby-build-process.md b/docs/docs/overview-of-the-gatsby-build-process.md index e0c8a9b770024..5177f0f57ada6 100644 --- a/docs/docs/overview-of-the-gatsby-build-process.md +++ b/docs/docs/overview-of-the-gatsby-build-process.md @@ -301,6 +301,8 @@ Page queries that were queued up earlier from query extraction are run so the da With everything ready for the HTML pages in place, HTML is compiled and written out to files so it can be served up statically. Since HTML is being produced in a Node.js server context, [references to browser APIs like `window` can break the build](/docs/debugging-html-builds/) and must be conditionally applied. +By default, Gatsby rebuilds static HTML for all pages on each build. There is an experimental feature flag `GATSBY_PAGE_BUILD_ON_DATA_CHANGES` which enables [Page Build Optimizations for Incremental Data Changes](/docs/page-build-optimizations-for-incremental-data-changes/). + ## What do you get from a successful build? When a Gatsby build is successfully completed, everything you need to deploy your site ends up in the `public` folder at the root of the site. The build includes minified files, transformed images, JSON files with information and data for each page, static HTML for each page, and more. diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md new file mode 100644 index 0000000000000..ff443b79e7330 --- /dev/null +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -0,0 +1,80 @@ +--- +title: Page Build Optimizations for Incremental Data Changes +--- + +Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. + +Projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deploying these sites don't present a problem. + +Building sites with large amounts of content (10,000s upwards) are already relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. + +One solution to these problems might be to use [Gatsby Cloud's Build features](https://www.gatsbyjs.com/cloud/). + +For projects that require self-hosted environments, where Gatsby Cloud would not be an option, being able to only deploy the content that has changed or is new (incremental data changes, you might say) would help reduce build times, deployment times and demand on resources. + +For more info on the standard build process please see [overview of the gatsby build process](/docs/overview-of-the-gatsby-build-process/) + +## How to use + +To enable this enhancement, use the environment variable `GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true` in your `gatsby build` command, for example: + +`GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build` + +This will run the Gatsby build process, but only build pages that have data changes since your last build. If there are any changes to code (JS, CSS) the bundling process returns a new webpack compilation hash which causes all pages to be rebuilt. + +### Reporting what has been built + +You might need to get a list of the pages that have been built for example, if you want to perform a sync action in your CI/CD pipeline. + +To list the paths in the build assets (`public`) folder, you can use one (or both) of the following arguments in your `build` command. + +- `--log-pages` outputs the updated paths to the console at the end of the build + +```bash +success Building production JavaScript and CSS bundles - 82.198s +success run queries - 82.762s - 4/4 0.05/s +success Building static HTML for pages - 19.386s - 2/2 0.10/s ++ success Delete previous page data - 1.512s +info Done building in 152.084 sec ++ info Built pages: ++ Updated page: /about ++ Updated page: /accounts/example ++ info Deleted pages: ++ Deleted page: /test + +Done in 154.501 sec +``` + +- `--write-to-file` creates two files in the `.cache` folder, with lists of the changes paths in the build assets (`public`) folder. + + - `newPages.txt` will contain a list of paths that have changed or are new + - `deletedPages.txt` will contain a list of paths that have been deleted + +If there are no changed or deleted paths, then the relevant files will not be created in the `.cache` folder. + +The `--log-pages` parameter will output all the file paths that have been updated or deleted at the end of the build stage. + +```bash +success Building production JavaScript and CSS bundles - 82.198s +success run queries - 82.762s - 4/4 0.05/s +success Building static HTML for pages - 19.386s - 2/2 0.10/s ++ success Delete previous page data - 1.512s +info Done building in 152.084 sec ++ info Built pages: ++ Updated page: /about ++ Updated page: /accounts/example ++ info Deleted pages: ++ Deleted page: /test + +Done in 154.501 sec +``` + +## More information + +- This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process + +- To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment + +- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that containing the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` the folder then a full build will be triggered + +- Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build diff --git a/docs/docs/page-build-time-enhancement.md b/docs/docs/page-build-time-enhancement.md deleted file mode 100644 index fdc33b4e4aab2..0000000000000 --- a/docs/docs/page-build-time-enhancement.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: Page Build Time Enhancement ---- - -Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. - -Projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deploying these sites don't present a problem. - -Building sites with large amounts of content (10,000s upwards) are relatively fast with Gatsby. The issue arises when we need to support CI/CD principles, continuously building and deploying all assets have increased demand on CPU and memory. - -One solution to these problems might be to use [Gatsby Cloud's 'Build' features](https://www.gatsbyjs.com/cloud/). - -For projects that require self-hosted environments, where Gatsby Cloud would not be an option, this page build enhancement can speed up your deployments by building only the pages that have been updated by the content sources. - -For more info on the standard build process please see [overview of the gatsby build process](/docs/overview-of-the-gatsby-build-process/) - -## Warning - -This enhancement will require access to your own CI/CD build pipelines. If your site is relatively small and doesn't require to be on-premise consider using [Gatsby JS cloud solution](https://www.gatsbyjs.com/cloud/). - -## Setup - -Set the environment variable to `GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true` whilst running the command - -`GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build`. - -Optionally, if you want to list the directories of the pages that have been updated at the end of the build process, you can use one of the following arguments: - -### --write-to-file - -The `--write-to-file` will create two text files in the gatsby `.cache` folder: - -- `newPages.txt` will contain a list of directory values of the pages that have changed or are new. -- `deletedPages.txt` will contain a list of directory values of pages removed from the content sources. - -**Note that these files will not be created if there are no values.** - -### --log-pages - -The `--log-pages` parameter will output all the file paths that have been updated or deleted at the end of the build stage. - -``` -success Building production JavaScript and CSS bundles - 82.198s -success run queries - 82.762s - 4/4 0.05/s -success Building static HTML for pages - 19.386s - 2/2 0.10/s -+ success Delete previous page data - 1.512s -info Done building in 152.084 sec -+ info Built pages: -+ Updated page: /about -+ Updated page: /accounts/example -+ info Deleted pages: -+ Deleted page: /test - -Done in 154.501 sec -``` - -## More information - -- This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process. - -- To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment. - -- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that containing the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` the folder then a full build will be triggered. - -- Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build. diff --git a/www/src/data/sidebars/doc-links.yaml b/www/src/data/sidebars/doc-links.yaml index 6a2b4aeca8d8e..9fc293e73aada 100644 --- a/www/src/data/sidebars/doc-links.yaml +++ b/www/src/data/sidebars/doc-links.yaml @@ -486,8 +486,8 @@ breadcrumbTitle: Guess.js - title: Scaling Issues link: /docs/scaling-issues/ - - title: Page Build Speed Enhancement - link: /docs/page-build-time-enhancement/ + - title: Page Build Optimizations for Incremental Data Changes + link: /docs/page-build-optimizations-for-incremental-data-changes/ - title: Localization & Internationalization with Gatsby link: /docs/localization-i18n/ breadcrumbTitle: Localization From e9b8492a117de034cab253a47422dc841d27ade7 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 15:52:09 +0000 Subject: [PATCH 12/50] initial refactor from code review --- packages/gatsby/src/commands/build.js | 24 +++++++++++++----------- packages/gatsby/src/query/index.js | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 0792e81d38c08..c1bd1b276365b 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -22,6 +22,8 @@ const { } = require(`../utils/jobs-manager`) const pageDataUtil = require(`../utils/page-data`) +const previousState = readState() + type BuildArgs = { directory: string, sitePackageJson: object, @@ -142,7 +144,7 @@ module.exports = async function build(program: BuildArgs) { await waitUntilAllJobsComplete() const pagePaths = process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES - ? await pageDataUtil.getChangedPageDataKeys(store.getState(), readState()) + ? await pageDataUtil.getChangedPageDataKeys(store.getState(), previousState) : [...store.getState().pages.keys()] activity = report.createProgress( `Building static HTML for pages`, @@ -190,7 +192,7 @@ module.exports = async function build(program: BuildArgs) { deletedPageKeys = await pageDataUtil.removePreviousPageData( program.directory, store.getState(), - readState() + previousState ) activity.end() } @@ -215,27 +217,27 @@ module.exports = async function build(program: BuildArgs) { if ( process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && - process.argv.indexOf(`--log-pages`) > -1 + process.argv.includes(`--log-pages`) ) { if (pagePaths.length) { report.info( - `Built pages:\n${pagePaths.map( - path => `Updated page: ${path}\n` - )}`.replace(/,/g, ``) + `Built pages:\n${pagePaths + .map(path => `Updated page: ${path}`) + .join(`\n`)}` ) } - if (typeof deletedPageKeys !== `undefined` && deletedPageKeys.length) { + if (deletedPageKeys?.length) { report.info( - `Deleted pages:\n${deletedPageKeys.map( - path => `Deleted page: ${path}\n` - )}`.replace(/,/g, ``) + `Deleted pages:\n${deletedPageKeys + .map(path => `Deleted page: ${path}`) + .join(`\n`)}` ) } } if ( process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && - process.argv.indexOf(`--write-to-file`) > -1 + process.argv.includes(`--write-to-file`) ) { const createdFilesPath = path.resolve( `${program.directory}/.cache`, diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index 780c46d625571..901c638b7a9b1 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -166,7 +166,7 @@ const processQueries = async (queryJobs, activity) => { const { pageData } = readState() if (pageData) { - readState().pageData.forEach((value, key) => { + pageData.forEach((_value, key) => { if (!pages.has(key)) { boundActionCreators.removePageData({ id: key, From d4d8a67ebc54c9d8251f9efd340efae9a444d77a Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 17:52:32 +0000 Subject: [PATCH 13/50] Use hash instead of whole page context --- packages/gatsby/src/commands/build.js | 21 +++-- packages/gatsby/src/query/index.js | 21 ++--- packages/gatsby/src/query/query-runner.js | 7 ++ packages/gatsby/src/query/queue.js | 11 +-- .../reducers/component-data-dependencies.js | 8 ++ packages/gatsby/src/utils/page-data.js | 76 ++++++++----------- 6 files changed, 76 insertions(+), 68 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index c1bd1b276365b..b52a9f8498e57 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -22,7 +22,7 @@ const { } = require(`../utils/jobs-manager`) const pageDataUtil = require(`../utils/page-data`) -const previousState = readState() +const cacheData = readState() type BuildArgs = { directory: string, @@ -74,6 +74,7 @@ module.exports = async function build(program: BuildArgs) { processStaticQueries, } = queryUtil.getInitialQueryProcessors({ parentSpan: buildSpan, + cacheData, }) await processStaticQueries() @@ -144,7 +145,7 @@ module.exports = async function build(program: BuildArgs) { await waitUntilAllJobsComplete() const pagePaths = process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES - ? await pageDataUtil.getChangedPageDataKeys(store.getState(), previousState) + ? pageDataUtil.getChangedPageDataKeys(store.getState(), cacheData) : [...store.getState().pages.keys()] activity = report.createProgress( `Building static HTML for pages`, @@ -189,11 +190,21 @@ module.exports = async function build(program: BuildArgs) { if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { activity = report.activityTimer(`Delete previous page data`) activity.start() - deletedPageKeys = await pageDataUtil.removePreviousPageData( - program.directory, + deletedPageKeys = pageDataUtil.removePreviousPageData( store.getState(), - previousState + cacheData ) + + deletedPageKeys.forEach(value => { + if (value === `/`) { + fs.removeSync(`${program.directory}/public/index.html`) + fs.removeSync(`${program.directory}/public/page-data/index`) + } else { + fs.removeSync(`${program.directory}/public${value}`) + fs.removeSync(`${program.directory}/public/page-data${value}`) + } + }) + activity.end() } diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index 901c638b7a9b1..b979bc836c0a0 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -158,15 +158,14 @@ const groupQueryIds = queryIds => { } } -const processQueries = async (queryJobs, activity) => { +const processQueries = async (queryJobs, activity, cacheData) => { const queue = queryQueue.createBuildQueue() await queryQueue.processBatch(queue, queryJobs, activity) if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { const { pages } = store.getState() - const { pageData } = readState() - if (pageData) { - pageData.forEach((_value, key) => { + if (cacheData && cacheData.pageData) { + cacheData.pageData.forEach((_value, key) => { if (!pages.has(key)) { boundActionCreators.removePageData({ id: key, @@ -220,7 +219,7 @@ const processStaticQueries = async (queryIds, { state, activity }) => { ) } -const processPageQueries = async (queryIds, { state, activity }) => { +const processPageQueries = async (queryIds, { state, activity }, cacheData) => { state = state || store.getState() // Make sure we filter out pages that don't exist. An example is // /dev-404-page/, whose SitePage node is created via @@ -233,7 +232,7 @@ const processPageQueries = async (queryIds, { state, activity }) => { ) } -const getInitialQueryProcessors = ({ parentSpan } = {}) => { +const getInitialQueryProcessors = ({ parentSpan, cacheData } = {}) => { const state = store.getState() const queryIds = calcInitialDirtyQueryIds(state) const { staticQueryIds, pageQueryIds } = groupQueryIds(queryIds) @@ -244,12 +243,12 @@ const getInitialQueryProcessors = ({ parentSpan } = {}) => { let activity = null let processedQueuesCount = 0 - const createProcessor = (fn, queryIds) => async () => { + const createProcessor = (fn, queryIds, cacheData) => async () => { if (!activity) { activity = createQueryRunningActivity(queryjobsCount, parentSpan) } - await fn(queryIds, { state, activity }) + await fn(queryIds, { state, activity }, cacheData) processedQueuesCount++ // if both page and static queries are done, finish activity @@ -260,7 +259,11 @@ const getInitialQueryProcessors = ({ parentSpan } = {}) => { return { processStaticQueries: createProcessor(processStaticQueries, staticQueryIds), - processPageQueries: createProcessor(processPageQueries, pageQueryIds), + processPageQueries: createProcessor( + processPageQueries, + pageQueryIds, + cacheData + ), pageQueryIds, } } diff --git a/packages/gatsby/src/query/query-runner.js b/packages/gatsby/src/query/query-runner.js index 7349005309a15..1c9d21a2acc7f 100644 --- a/packages/gatsby/src/query/query-runner.js +++ b/packages/gatsby/src/query/query-runner.js @@ -126,5 +126,12 @@ module.exports = async (graphqlRunner, queryJob: QueryJob) => { isPage: queryJob.isPage, }) + if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && queryJob.isPage) { + boundActionCreators.setPageData({ + id: queryJob.id, + result: { resultHash }, + }) + } + return result } diff --git a/packages/gatsby/src/query/queue.js b/packages/gatsby/src/query/queue.js index b72edd4f60ec3..f82e8b8a6beab 100644 --- a/packages/gatsby/src/query/queue.js +++ b/packages/gatsby/src/query/queue.js @@ -1,6 +1,5 @@ const Queue = require(`better-queue`) const { store } = require(`../redux`) -const { boundActionCreators } = require(`../redux/actions`) const FastMemoryStore = require(`../query/better-queue-custom-store`) const queryRunner = require(`../query/query-runner`) const websocketManager = require(`../utils/websocket-manager`) @@ -18,15 +17,7 @@ const createBuildQueue = () => { const handler = (queryJob, callback) => queryRunner(graphqlRunner, queryJob) - .then(result => { - if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && queryJob.isPage) { - boundActionCreators.setPageData({ - id: queryJob.id, - result, - }) - } - return callback(null, result) - }) + .then(result => callback(null, result)) .catch(callback) return new Queue(handler, createBaseOptions()) } diff --git a/packages/gatsby/src/redux/reducers/component-data-dependencies.js b/packages/gatsby/src/redux/reducers/component-data-dependencies.js index 989db2a7b7ba1..c92e89df264b4 100644 --- a/packages/gatsby/src/redux/reducers/component-data-dependencies.js +++ b/packages/gatsby/src/redux/reducers/component-data-dependencies.js @@ -47,6 +47,14 @@ module.exports = ( } }) + // if (state.pageData) { + // state.pageData.forEach((val, _key) => { + // for (const path of action.payload.paths) { + // val.delete(path) + // } + // }) + // } + return state // Don't delete data dependencies as we're now deleting transformed nodes // when their parent is changed. WIth the code below as stands, this diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index a730c9822e283..3c84c49ecc6e2 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,7 +1,6 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) -const Promise = require(`bluebird`) const getFilePath = ({ publicDir }, pagePath) => { const fixedPagePath = pagePath === `/` ? `index` : pagePath @@ -37,53 +36,42 @@ const write = async ({ publicDir }, page, result) => { await fs.outputFile(filePath, bodyStr) } -const getChangedPageDataKeys = (store, cacheData) => - new Promise(resolve => { - if (cacheData.webpackCompilationHash !== store.webpackCompilationHash) { - resolve([...store.pages.keys()]) - return - } - if (cacheData.pageData && store.pageData) { - const pageKeys = [] - store.pageData.forEach((value, key) => { - if (!cacheData.pageData.has(key)) { - pageKeys.push(key) - } else { - const newPageData = JSON.stringify(value) - const previousPageData = JSON.stringify(cacheData.pageData.get(key)) +const getChangedPageDataKeys = (store, cacheData) => { + if (cacheData.webpackCompilationHash !== store.webpackCompilationHash) { + return [...store.pages.keys()] + } + if (cacheData.pageData && store.pageData) { + const pageKeys = [] + store.pageData.forEach((value, key) => { + if (!cacheData.pageData.has(key)) { + pageKeys.push(key) + } else { + const newPageData = JSON.stringify(value) + const previousPageData = JSON.stringify(cacheData.pageData.get(key)) - if (newPageData !== previousPageData) { - pageKeys.push(key) - } + if (newPageData !== previousPageData) { + pageKeys.push(key) } - }) - resolve(pageKeys) - return - } + } + }) + return pageKeys + } - resolve([...store.pages.keys()]) - }) + return [...store.pages.keys()] +} -const removePreviousPageData = (directory, store, cacheData) => - new Promise(resolve => { - if (cacheData.pageData && store.pageData) { - const deletedPageKeys = [] - cacheData.pageData.forEach((value, key) => { - if (!store.pageData.has(key)) { - deletedPageKeys.push(key) - if (key === `/`) { - fs.removeSync(`${directory}/public/index.html`) - fs.removeSync(`${directory}/public/page-data/index`) - } else { - fs.removeSync(`${directory}/public${key}`) - fs.removeSync(`${directory}/public/page-data${key}`) - } - } - }) - resolve(deletedPageKeys) - } - resolve(false) - }) +const removePreviousPageData = (store, cacheData) => { + if (cacheData.pageData && store.pageData) { + const deletedPageKeys = [] + cacheData.pageData.forEach((_value, key) => { + if (!store.pageData.has(key)) { + deletedPageKeys.push(key) + } + }) + return deletedPageKeys + } + return [] +} module.exports = { read, From 4af19364264f9471745eee9610d6163af46a2ab1 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 17:53:08 +0000 Subject: [PATCH 14/50] Use hash instead of whole page context --- packages/gatsby/src/query/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index b979bc836c0a0..535ee1df4e259 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -3,7 +3,7 @@ const _ = require(`lodash`) const Queue = require(`better-queue`) // const convertHrtime = require(`convert-hrtime`) -const { store, emitter, readState } = require(`../redux`) +const { store, emitter } = require(`../redux`) const { boundActionCreators } = require(`../redux/actions`) const report = require(`gatsby-cli/lib/reporter`) const queryQueue = require(`./queue`) From ca519451f150a2c0857ed0736ec4a6b792e85856 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 18:07:47 +0000 Subject: [PATCH 15/50] Remove page data in dev action deleteComponentsDependencies --- .../redux/reducers/component-data-dependencies.js | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/gatsby/src/redux/reducers/component-data-dependencies.js b/packages/gatsby/src/redux/reducers/component-data-dependencies.js index c92e89df264b4..3bee7d826b95a 100644 --- a/packages/gatsby/src/redux/reducers/component-data-dependencies.js +++ b/packages/gatsby/src/redux/reducers/component-data-dependencies.js @@ -1,5 +1,5 @@ module.exports = ( - state = { nodes: new Map(), connections: new Map() }, + state = { nodes: new Map(), connections: new Map(), pageData: new Map() }, action ) => { switch (action.type) { @@ -47,13 +47,11 @@ module.exports = ( } }) - // if (state.pageData) { - // state.pageData.forEach((val, _key) => { - // for (const path of action.payload.paths) { - // val.delete(path) - // } - // }) - // } + if (state.pageData) { + for (const path of action.payload.paths) { + state.pageData.delete(path) + } + } return state // Don't delete data dependencies as we're now deleting transformed nodes From 967596d6c241f3efba6ebc8a25a4c86c2a6a4980 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 18:37:08 +0000 Subject: [PATCH 16/50] Pass cache page data to processQueries function --- packages/gatsby/src/commands/build.js | 3 ++- packages/gatsby/src/query/index.js | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index b52a9f8498e57..bff08a90e10e6 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -237,7 +237,8 @@ module.exports = async function build(program: BuildArgs) { .join(`\n`)}` ) } - if (deletedPageKeys?.length) { + + if (deletedPageKeys.length) { report.info( `Deleted pages:\n${deletedPageKeys .map(path => `Deleted page: ${path}`) diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index 535ee1df4e259..9bc44bd171bd0 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -163,7 +163,6 @@ const processQueries = async (queryJobs, activity, cacheData) => { await queryQueue.processBatch(queue, queryJobs, activity) if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { const { pages } = store.getState() - if (cacheData && cacheData.pageData) { cacheData.pageData.forEach((_value, key) => { if (!pages.has(key)) { @@ -228,7 +227,8 @@ const processPageQueries = async (queryIds, { state, activity }, cacheData) => { const pages = _.filter(queryIds.map(id => state.pages.get(id))) await processQueries( pages.map(page => createPageQueryJob(state, page)), - activity + activity, + cacheData ) } From 46ff7521f1adffc4ef009100b2c333a8eb1bffbc Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 19:07:07 +0000 Subject: [PATCH 17/50] Remove added pageData check in component-data-dependencies.js --- .../src/redux/reducers/component-data-dependencies.js | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/gatsby/src/redux/reducers/component-data-dependencies.js b/packages/gatsby/src/redux/reducers/component-data-dependencies.js index 3bee7d826b95a..989db2a7b7ba1 100644 --- a/packages/gatsby/src/redux/reducers/component-data-dependencies.js +++ b/packages/gatsby/src/redux/reducers/component-data-dependencies.js @@ -1,5 +1,5 @@ module.exports = ( - state = { nodes: new Map(), connections: new Map(), pageData: new Map() }, + state = { nodes: new Map(), connections: new Map() }, action ) => { switch (action.type) { @@ -47,12 +47,6 @@ module.exports = ( } }) - if (state.pageData) { - for (const path of action.payload.paths) { - state.pageData.delete(path) - } - } - return state // Don't delete data dependencies as we're now deleting transformed nodes // when their parent is changed. WIth the code below as stands, this From 7060d8d11af4cfd62c9e6f59c346e69dafbc46e9 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 17 Feb 2020 19:43:01 +0000 Subject: [PATCH 18/50] Remove pagedata if page removed in page-hot-reloader --- packages/gatsby/src/bootstrap/page-hot-reloader.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/bootstrap/page-hot-reloader.js b/packages/gatsby/src/bootstrap/page-hot-reloader.js index 3e8ae7b980695..3e64822b52ced 100644 --- a/packages/gatsby/src/bootstrap/page-hot-reloader.js +++ b/packages/gatsby/src/bootstrap/page-hot-reloader.js @@ -1,7 +1,11 @@ const { emitter, store } = require(`../redux`) const apiRunnerNode = require(`../utils/api-runner-node`) const { boundActionCreators } = require(`../redux/actions`) -const { deletePage, deleteComponentsDependencies } = boundActionCreators +const { + deletePage, + deleteComponentsDependencies, + removePageData, +} = boundActionCreators const report = require(`gatsby-cli/lib/reporter`) let pagesDirty = false @@ -34,6 +38,7 @@ const runCreatePages = async () => { page.path !== `/404.html` ) { deleteComponentsDependencies([page.path]) + removePageData(page.path) deletePage(page) } }) From a9b2b68ea138a043200097920dc9249366c56a04 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Tue, 18 Feb 2020 12:03:22 +0000 Subject: [PATCH 19/50] Update snapshots --- .../cache-dir/__tests__/__snapshots__/static-entry.js.snap | 6 +++--- .../gatsby/src/redux/__tests__/__snapshots__/index.js.snap | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap b/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap index 5868a8d2d64bb..25562f009fe1d 100644 --- a/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap +++ b/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap @@ -6,8 +6,8 @@ exports[`develop-static-entry onPreRenderHTML can be used to replace postBodyCom exports[`develop-static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; -exports[`static-entry onPreRenderHTML can be used to replace headComponents 1`] = `"
"`; +exports[`static-entry onPreRenderHTML can be used to replace headComponents 1`] = `"
"`; -exports[`static-entry onPreRenderHTML can be used to replace postBodyComponents 1`] = `"
div3
div2
div1
"`; +exports[`static-entry onPreRenderHTML can be used to replace postBodyComponents 1`] = `"
div3
div2
div1
"`; -exports[`static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; +exports[`static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; diff --git a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap index 9b332e53a51f8..949b041b551dd 100644 --- a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap +++ b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap @@ -20,6 +20,7 @@ Object { "complete": Map {}, "incomplete": Map {}, }, + "pageData": Map {}, "pageDataStats": Map {}, "staticQueryComponents": Map {}, "status": Object { From 697634eb94e3b251056945e95a26ce9d2749f8eb Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Tue, 18 Feb 2020 12:27:15 +0000 Subject: [PATCH 20/50] Revert "Update snapshots" This reverts commit a9b2b68ea138a043200097920dc9249366c56a04. --- .../cache-dir/__tests__/__snapshots__/static-entry.js.snap | 6 +++--- .../gatsby/src/redux/__tests__/__snapshots__/index.js.snap | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap b/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap index 25562f009fe1d..5868a8d2d64bb 100644 --- a/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap +++ b/packages/gatsby/cache-dir/__tests__/__snapshots__/static-entry.js.snap @@ -6,8 +6,8 @@ exports[`develop-static-entry onPreRenderHTML can be used to replace postBodyCom exports[`develop-static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; -exports[`static-entry onPreRenderHTML can be used to replace headComponents 1`] = `"
"`; +exports[`static-entry onPreRenderHTML can be used to replace headComponents 1`] = `"
"`; -exports[`static-entry onPreRenderHTML can be used to replace postBodyComponents 1`] = `"
div3
div2
div1
"`; +exports[`static-entry onPreRenderHTML can be used to replace postBodyComponents 1`] = `"
div3
div2
div1
"`; -exports[`static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; +exports[`static-entry onPreRenderHTML can be used to replace preBodyComponents 1`] = `"
div3
div2
div1
"`; diff --git a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap index 949b041b551dd..9b332e53a51f8 100644 --- a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap +++ b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap @@ -20,7 +20,6 @@ Object { "complete": Map {}, "incomplete": Map {}, }, - "pageData": Map {}, "pageDataStats": Map {}, "staticQueryComponents": Map {}, "status": Object { From 01b8283d5c20f36fee93766db5007ec7042fdcc0 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Tue, 18 Feb 2020 12:29:43 +0000 Subject: [PATCH 21/50] Update reducer context --- packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap index 9b332e53a51f8..949b041b551dd 100644 --- a/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap +++ b/packages/gatsby/src/redux/__tests__/__snapshots__/index.js.snap @@ -20,6 +20,7 @@ Object { "complete": Map {}, "incomplete": Map {}, }, + "pageData": Map {}, "pageDataStats": Map {}, "staticQueryComponents": Map {}, "status": Object { From 83fd4fa7d3d3a94babcf2a90c4614bcaeae527f8 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:01:42 +0000 Subject: [PATCH 22/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: LB --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index ff443b79e7330..fa050c977bb49 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -24,7 +24,7 @@ This will run the Gatsby build process, but only build pages that have data chan ### Reporting what has been built -You might need to get a list of the pages that have been built for example, if you want to perform a sync action in your CI/CD pipeline. +You may want to retrieve a list of the pages that were built. For example, if you want to perform a sync action in your CI/CD pipeline. To list the paths in the build assets (`public`) folder, you can use one (or both) of the following arguments in your `build` command. From ce2202dda3e67a55cde06badc071426a12ffbd59 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:02:00 +0000 Subject: [PATCH 23/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: LB --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index fa050c977bb49..8617f86dd7180 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -4,7 +4,7 @@ title: Page Build Optimizations for Incremental Data Changes Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. -Projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deploying these sites don't present a problem. +For projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deployment is not a challenge. Building sites with large amounts of content (10,000s upwards) are already relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. From e78c9a66a334ed39aa4eb42ffe7bad8a8bf78c30 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:03:18 +0000 Subject: [PATCH 24/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index ff443b79e7330..d97f6574cc5bc 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -2,7 +2,7 @@ title: Page Build Optimizations for Incremental Data Changes --- -Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. +Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each Gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. Projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deploying these sites don't present a problem. From eb9b9f831d2ffad7f3eec631322b858404e4f14c Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:05:37 +0000 Subject: [PATCH 25/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: LB --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index b37dcf59d2523..327e0df820933 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -6,7 +6,7 @@ Gatsby sources data from multiple sources (CMS, static files - like Markdown, da For projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deployment is not a challenge. -Building sites with large amounts of content (10,000s upwards) are already relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. +Even building sites with large amounts of content (10,000s upwards) are relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. One solution to these problems might be to use [Gatsby Cloud's Build features](https://www.gatsbyjs.com/cloud/). From 623a3ad328352b46351b4cd784e7d7a2e18cabe1 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:07:54 +0000 Subject: [PATCH 26/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: LB --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 327e0df820933..dc1b837114d19 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -52,7 +52,7 @@ Done in 154.501 sec If there are no changed or deleted paths, then the relevant files will not be created in the `.cache` folder. -The `--log-pages` parameter will output all the file paths that have been updated or deleted at the end of the build stage. +The `--log-pages` parameter will output all the file paths that were updated or deleted at the end of the build stage. ```bash success Building production JavaScript and CSS bundles - 82.198s From 6a73b97407714ace03db653cd6da7852613b3f6d Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:08:18 +0000 Subject: [PATCH 27/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: LB --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index dc1b837114d19..e68417a643f9b 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -75,6 +75,6 @@ Done in 154.501 sec - To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment -- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that containing the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` the folder then a full build will be triggered +- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered - Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build From d0ad8ee2a86bda3177ca20795edf91d368ee3795 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Tue, 18 Feb 2020 15:55:29 +0000 Subject: [PATCH 28/50] Fix doc duplication error --- ...imizations-for-incremental-data-changes.md | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index e68417a643f9b..16e58f0cff2fe 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -28,7 +28,7 @@ You may want to retrieve a list of the pages that were built. For example, if yo To list the paths in the build assets (`public`) folder, you can use one (or both) of the following arguments in your `build` command. -- `--log-pages` outputs the updated paths to the console at the end of the build +- `--log-pages` parameter will output all the file paths that were updated or deleted at the end of the build stage. ```bash success Building production JavaScript and CSS bundles - 82.198s @@ -52,23 +52,6 @@ Done in 154.501 sec If there are no changed or deleted paths, then the relevant files will not be created in the `.cache` folder. -The `--log-pages` parameter will output all the file paths that were updated or deleted at the end of the build stage. - -```bash -success Building production JavaScript and CSS bundles - 82.198s -success run queries - 82.762s - 4/4 0.05/s -success Building static HTML for pages - 19.386s - 2/2 0.10/s -+ success Delete previous page data - 1.512s -info Done building in 152.084 sec -+ info Built pages: -+ Updated page: /about -+ Updated page: /accounts/example -+ info Deleted pages: -+ Deleted page: /test - -Done in 154.501 sec -``` - ## More information - This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process From d379cd82e168fcabe36ed5a27bf03cfa02660608 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Wed, 19 Feb 2020 12:41:01 +0000 Subject: [PATCH 29/50] Update docs --- ...page-build-optimizations-for-incremental-data-changes.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 16e58f0cff2fe..8ef21dc2c7845 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -2,11 +2,7 @@ title: Page Build Optimizations for Incremental Data Changes --- -Gatsby sources data from multiple sources (CMS, static files - like Markdown, databases, APIs, etc) and creates an aggregated dataset in GraphQL. Currently, each Gatsby build uses the GraphQL dataset and queries to do a complete rebuild of the whole app - ready for deployment - including static assets like HTML, JavaScript, JSON, media files, etc. - -For projects that have a small (10s to 100s) to medium (100s to 1000s) amount of content, deployment is not a challenge. - -Even building sites with large amounts of content (10,000s upwards) are relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. +Building sites with large amounts of content (10,000s nodes upwards) is relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app on each `gatsby build` which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. One solution to these problems might be to use [Gatsby Cloud's Build features](https://www.gatsbyjs.com/cloud/). From fdc65a3b1603dc3a75654c655b32b2a45def19bb Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Thu, 20 Feb 2020 14:29:46 +0000 Subject: [PATCH 30/50] Update docs/docs/page-build-optimizations-for-incremental-data-changes.md Co-Authored-By: Michal Piechowiak --- .../page-build-optimizations-for-incremental-data-changes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 8ef21dc2c7845..9aa5d33189a9d 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -1,5 +1,5 @@ --- -title: Page Build Optimizations for Incremental Data Changes +title: Experimental Page Build Optimizations for Incremental Data Changes --- Building sites with large amounts of content (10,000s nodes upwards) is relatively fast with Gatsby. However, some projects might start to experience issues when adopting CI/CD principles - continuously building and deploying. Gatsby rebuilds the complete app on each `gatsby build` which means the complete app also needs to be deployed. Doing this each time a small data change occurs unnecessarily increases demand on CPU, memory, and bandwidth. From 6056bf4ec105c72444b485559cb41a16df148d70 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Thu, 20 Feb 2020 14:30:01 +0000 Subject: [PATCH 31/50] Update www/src/data/sidebars/doc-links.yaml Co-Authored-By: Michal Piechowiak --- www/src/data/sidebars/doc-links.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/www/src/data/sidebars/doc-links.yaml b/www/src/data/sidebars/doc-links.yaml index f618442a0e2c5..8cd58debacc65 100644 --- a/www/src/data/sidebars/doc-links.yaml +++ b/www/src/data/sidebars/doc-links.yaml @@ -491,7 +491,7 @@ breadcrumbTitle: Guess.js - title: Scaling Issues link: /docs/scaling-issues/ - - title: Page Build Optimizations for Incremental Data Changes + - title: Experimental Page Build Optimizations for Incremental Data Changes link: /docs/page-build-optimizations-for-incremental-data-changes/ - title: Localization & Internationalization with Gatsby link: /docs/localization-i18n/ From 637084f74affb68f6ced3afa13089e93aa4d29bb Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 20 Feb 2020 16:09:43 +0000 Subject: [PATCH 32/50] refector improvements --- packages/gatsby/src/bootstrap/index.js | 3 +- .../gatsby/src/bootstrap/page-hot-reloader.js | 7 +-- packages/gatsby/src/commands/build.js | 49 +++++++++++++++---- packages/gatsby/src/query/index.js | 31 +++--------- packages/gatsby/src/query/query-runner.js | 8 ++- packages/gatsby/src/redux/actions/public.js | 10 ++-- .../gatsby/src/redux/reducers/page-data.js | 2 +- packages/gatsby/src/utils/page-data.js | 22 +++++---- 8 files changed, 76 insertions(+), 56 deletions(-) diff --git a/packages/gatsby/src/bootstrap/index.js b/packages/gatsby/src/bootstrap/index.js index eaca8ddecb007..a866ad2aca4b8 100644 --- a/packages/gatsby/src/bootstrap/index.js +++ b/packages/gatsby/src/bootstrap/index.js @@ -191,7 +191,7 @@ module.exports = async (args: BootstrapArgs) => { // During builds, delete html and css files from the public directory as we don't want // deleted pages and styles from previous builds to stick around. if ( - !process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + !process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && process.env.NODE_ENV === `production` ) { activity = report.activityTimer( @@ -224,6 +224,7 @@ module.exports = async (args: BootstrapArgs) => { // logic in there e.g. generating slugs for custom pages. const pluginVersions = flattenedPlugins.map(p => p.version) const hashes = await Promise.all([ + !!process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES, md5File(`package.json`), Promise.resolve( md5File(`${program.directory}/gatsby-config.js`).catch(() => {}) diff --git a/packages/gatsby/src/bootstrap/page-hot-reloader.js b/packages/gatsby/src/bootstrap/page-hot-reloader.js index 3e64822b52ced..3e8ae7b980695 100644 --- a/packages/gatsby/src/bootstrap/page-hot-reloader.js +++ b/packages/gatsby/src/bootstrap/page-hot-reloader.js @@ -1,11 +1,7 @@ const { emitter, store } = require(`../redux`) const apiRunnerNode = require(`../utils/api-runner-node`) const { boundActionCreators } = require(`../redux/actions`) -const { - deletePage, - deleteComponentsDependencies, - removePageData, -} = boundActionCreators +const { deletePage, deleteComponentsDependencies } = boundActionCreators const report = require(`gatsby-cli/lib/reporter`) let pagesDirty = false @@ -38,7 +34,6 @@ const runCreatePages = async () => { page.path !== `/404.html` ) { deleteComponentsDependencies([page.path]) - removePageData(page.path) deletePage(page) } }) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index bff08a90e10e6..981abfeea1c76 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -21,8 +21,16 @@ const { waitUntilAllJobsComplete: waitUntilAllJobsV2Complete, } = require(`../utils/jobs-manager`) const pageDataUtil = require(`../utils/page-data`) - -const cacheData = readState() +const { boundActionCreators } = require(`../redux/actions`) + +let cachedPageData +let cachedWebpackCompilationHash +if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { + const { pageData, webpackCompilationHash } = readState() + // extract only data that we need to reuse and let v8 garbage collect rest of state + cachedPageData = pageData + cachedWebpackCompilationHash = webpackCompilationHash +} type BuildArgs = { directory: string, @@ -74,7 +82,6 @@ module.exports = async function build(program: BuildArgs) { processStaticQueries, } = queryUtil.getInitialQueryProcessors({ parentSpan: buildSpan, - cacheData, }) await processStaticQueries() @@ -124,6 +131,19 @@ module.exports = async function build(program: BuildArgs) { await processPageQueries() + if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { + const { pages } = store.getState() + if (cachedPageData) { + cachedPageData.forEach((_value, key) => { + if (!pages.has(key)) { + boundActionCreators.removePageData({ + id: key, + }) + } + }) + } + } + if (telemetry.isTrackingEnabled()) { // transform asset size to kB (from bytes) to fit 64 bit to numbers const bundleSizes = stats @@ -142,10 +162,19 @@ module.exports = async function build(program: BuildArgs) { `BOOTSTRAP_QUERY_RUNNING_FINISHED` ) + await db.saveState() + await waitUntilAllJobsComplete() - const pagePaths = process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES - ? pageDataUtil.getChangedPageDataKeys(store.getState(), cacheData) + // we need to save it again to make sure our latest state has been saved + await db.saveState() + + const pagePaths = process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES + ? pageDataUtil.getChangedPageDataKeys( + store.getState(), + cachedPageData, + cachedWebpackCompilationHash + ) : [...store.getState().pages.keys()] activity = report.createProgress( `Building static HTML for pages`, @@ -187,12 +216,12 @@ module.exports = async function build(program: BuildArgs) { activity.done() let deletedPageKeys = [] - if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { + if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { activity = report.activityTimer(`Delete previous page data`) activity.start() - deletedPageKeys = pageDataUtil.removePreviousPageData( + deletedPageKeys = pageDataUtil.collectRemovedPageData( store.getState(), - cacheData + cachedPageData ) deletedPageKeys.forEach(value => { @@ -227,7 +256,7 @@ module.exports = async function build(program: BuildArgs) { buildActivity.end() if ( - process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && process.argv.includes(`--log-pages`) ) { if (pagePaths.length) { @@ -248,7 +277,7 @@ module.exports = async function build(program: BuildArgs) { } if ( - process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && + process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && process.argv.includes(`--write-to-file`) ) { const createdFilesPath = path.resolve( diff --git a/packages/gatsby/src/query/index.js b/packages/gatsby/src/query/index.js index 9bc44bd171bd0..b183707da9cd6 100644 --- a/packages/gatsby/src/query/index.js +++ b/packages/gatsby/src/query/index.js @@ -158,21 +158,9 @@ const groupQueryIds = queryIds => { } } -const processQueries = async (queryJobs, activity, cacheData) => { +const processQueries = async (queryJobs, activity) => { const queue = queryQueue.createBuildQueue() await queryQueue.processBatch(queue, queryJobs, activity) - if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES) { - const { pages } = store.getState() - if (cacheData && cacheData.pageData) { - cacheData.pageData.forEach((_value, key) => { - if (!pages.has(key)) { - boundActionCreators.removePageData({ - id: key, - }) - } - }) - } - } } const createStaticQueryJob = (state, queryId) => { @@ -218,7 +206,7 @@ const processStaticQueries = async (queryIds, { state, activity }) => { ) } -const processPageQueries = async (queryIds, { state, activity }, cacheData) => { +const processPageQueries = async (queryIds, { state, activity }) => { state = state || store.getState() // Make sure we filter out pages that don't exist. An example is // /dev-404-page/, whose SitePage node is created via @@ -227,12 +215,11 @@ const processPageQueries = async (queryIds, { state, activity }, cacheData) => { const pages = _.filter(queryIds.map(id => state.pages.get(id))) await processQueries( pages.map(page => createPageQueryJob(state, page)), - activity, - cacheData + activity ) } -const getInitialQueryProcessors = ({ parentSpan, cacheData } = {}) => { +const getInitialQueryProcessors = ({ parentSpan } = {}) => { const state = store.getState() const queryIds = calcInitialDirtyQueryIds(state) const { staticQueryIds, pageQueryIds } = groupQueryIds(queryIds) @@ -243,12 +230,12 @@ const getInitialQueryProcessors = ({ parentSpan, cacheData } = {}) => { let activity = null let processedQueuesCount = 0 - const createProcessor = (fn, queryIds, cacheData) => async () => { + const createProcessor = (fn, queryIds) => async () => { if (!activity) { activity = createQueryRunningActivity(queryjobsCount, parentSpan) } - await fn(queryIds, { state, activity }, cacheData) + await fn(queryIds, { state, activity }) processedQueuesCount++ // if both page and static queries are done, finish activity @@ -259,11 +246,7 @@ const getInitialQueryProcessors = ({ parentSpan, cacheData } = {}) => { return { processStaticQueries: createProcessor(processStaticQueries, staticQueryIds), - processPageQueries: createProcessor( - processPageQueries, - pageQueryIds, - cacheData - ), + processPageQueries: createProcessor(processPageQueries, pageQueryIds), pageQueryIds, } } diff --git a/packages/gatsby/src/query/query-runner.js b/packages/gatsby/src/query/query-runner.js index 1c9d21a2acc7f..59b50a84d50aa 100644 --- a/packages/gatsby/src/query/query-runner.js +++ b/packages/gatsby/src/query/query-runner.js @@ -126,10 +126,14 @@ module.exports = async (graphqlRunner, queryJob: QueryJob) => { isPage: queryJob.isPage, }) - if (process.env.GATSBY_PAGE_BUILD_ON_DATA_CHANGES && queryJob.isPage) { + // Sets pageData to the store, here for easier access to the resultHash + if ( + process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && + queryJob.isPage + ) { boundActionCreators.setPageData({ id: queryJob.id, - result: { resultHash }, + resultHash, }) } diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index b6cbcff87aa44..b8e2c5724c0b4 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -104,7 +104,11 @@ type ActionOptions = { type PageData = { id: string, - result: ?Object, + resultHash: string, +} + +type PageDataRemove = { + id: string, } /** @@ -1420,7 +1424,7 @@ actions.createPageDependency = ( * * @param {Object} $0 * @param {string} $0.id the path to the page. - * @param {string} $0.result object of the pages data and context. + * @param {string} $0.resultHash pages content hash. */ actions.setPageData = (pageData: PageData) => { return { @@ -1435,7 +1439,7 @@ actions.setPageData = (pageData: PageData) => { * @param {Object} $0 * @param {string} $0.id the path to the page. */ -actions.removePageData = (id: PageData) => { +actions.removePageData = (id: PageDataRemove) => { return { type: `REMOVE_PAGE_DATA`, payload: id, diff --git a/packages/gatsby/src/redux/reducers/page-data.js b/packages/gatsby/src/redux/reducers/page-data.js index 52f6d1d99cf09..ffa11ae736f8a 100644 --- a/packages/gatsby/src/redux/reducers/page-data.js +++ b/packages/gatsby/src/redux/reducers/page-data.js @@ -5,7 +5,7 @@ module.exports = (state = new Map(), action) => { return state case `SET_PAGE_DATA`: { - return state.set(action.payload.id, action.payload.result) + return state.set(action.payload.id, action.payload.resultHash) } default: diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 3c84c49ecc6e2..4f056f6588b87 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -36,18 +36,22 @@ const write = async ({ publicDir }, page, result) => { await fs.outputFile(filePath, bodyStr) } -const getChangedPageDataKeys = (store, cacheData) => { - if (cacheData.webpackCompilationHash !== store.webpackCompilationHash) { +const getChangedPageDataKeys = ( + store, + cachedPageData, + cachedWebpackCompilationHash +) => { + if (cachedWebpackCompilationHash !== store.webpackCompilationHash) { return [...store.pages.keys()] } - if (cacheData.pageData && store.pageData) { + if (cachedPageData && store.pageData) { const pageKeys = [] store.pageData.forEach((value, key) => { - if (!cacheData.pageData.has(key)) { + if (!cachedPageData.has(key)) { pageKeys.push(key) } else { const newPageData = JSON.stringify(value) - const previousPageData = JSON.stringify(cacheData.pageData.get(key)) + const previousPageData = JSON.stringify(cachedPageData.get(key)) if (newPageData !== previousPageData) { pageKeys.push(key) @@ -60,10 +64,10 @@ const getChangedPageDataKeys = (store, cacheData) => { return [...store.pages.keys()] } -const removePreviousPageData = (store, cacheData) => { - if (cacheData.pageData && store.pageData) { +const collectRemovedPageData = (store, cachedPageData) => { + if (cachedPageData && store.pageData) { const deletedPageKeys = [] - cacheData.pageData.forEach((_value, key) => { + cachedPageData.forEach((_value, key) => { if (!store.pageData.has(key)) { deletedPageKeys.push(key) } @@ -77,5 +81,5 @@ module.exports = { read, write, getChangedPageDataKeys, - removePreviousPageData, + collectRemovedPageData, } From 605329734b0ad841da14a413a300f57ab6ab91b6 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 20 Feb 2020 17:23:43 +0000 Subject: [PATCH 33/50] improve delete to use promise all --- packages/gatsby/src/commands/build.js | 21 ++++++++++++++------- packages/gatsby/src/query/queue.js | 1 - 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 981abfeea1c76..31d1ae97861b6 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -224,15 +224,22 @@ module.exports = async function build(program: BuildArgs) { cachedPageData ) - deletedPageKeys.forEach(value => { + const removePages = deletedPageKeys.map(value => { if (value === `/`) { - fs.removeSync(`${program.directory}/public/index.html`) - fs.removeSync(`${program.directory}/public/page-data/index`) + return fs.remove(`${program.directory}/public/index.html`) } else { - fs.removeSync(`${program.directory}/public${value}`) - fs.removeSync(`${program.directory}/public/page-data${value}`) + return fs.remove(`${program.directory}/public${value}`) } }) + const removePageData = deletedPageKeys.map(value => { + if (value === `/`) { + return fs.remove(`${program.directory}/public/page-data/index`) + } else { + return fs.remove(`${program.directory}/public/page-data${value}`) + } + }) + + await Promise.all([...removePages, ...removePageData]) activity.end() } @@ -290,11 +297,11 @@ module.exports = async function build(program: BuildArgs) { ) if (pagePaths.length) { - fs.writeFileSync(createdFilesPath, `${pagePaths.join(`\n`)}\n`, `utf8`) + await fs.writeFile(createdFilesPath, `${pagePaths.join(`\n`)}\n`, `utf8`) report.info(`newPages.txt created`) } if (deletedPageKeys.length) { - fs.writeFileSync( + await fs.writeFile( deletedFilesPath, `${deletedPageKeys.join(`\n`)}\n`, `utf8` diff --git a/packages/gatsby/src/query/queue.js b/packages/gatsby/src/query/queue.js index f82e8b8a6beab..bc310a020c806 100644 --- a/packages/gatsby/src/query/queue.js +++ b/packages/gatsby/src/query/queue.js @@ -14,7 +14,6 @@ const createBaseOptions = () => { const createBuildQueue = () => { const graphqlRunner = new GraphQLRunner(store) - const handler = (queryJob, callback) => queryRunner(graphqlRunner, queryJob) .then(result => callback(null, result)) From 4ce5c2748957ae3814d4720b59d15ed10030e4ce Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 20 Feb 2020 21:16:54 +0000 Subject: [PATCH 34/50] Block develop mode if experimental flag is used --- packages/gatsby/src/commands/develop.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/gatsby/src/commands/develop.ts b/packages/gatsby/src/commands/develop.ts index 784f4b50dac06..87f13277fd20d 100644 --- a/packages/gatsby/src/commands/develop.ts +++ b/packages/gatsby/src/commands/develop.ts @@ -150,6 +150,7 @@ async function startServer(program: IProgram): Promise { /** * Set up the express app. **/ + const app = express() app.use(telemetry.expressMiddleware(`DEVELOP`)) app.use( @@ -350,6 +351,15 @@ async function startServer(program: IProgram): Promise { } module.exports = async (program: IProgram): Promise => { + if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { + report.panic( + `The flag ${chalk.yellow( + `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES` + )} is not available with ${chalk.cyan( + `gatsby develop` + )}, please retry using ${chalk.cyan(`gatsby build`)}` + ) + } initTracer(program.openTracingConfigFile) report.pendingActivity({ id: `webpack-develop` }) telemetry.trackCli(`DEVELOP_START`) From 0029ae132be70cc4bb77a1892a09fee8faf1907e Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 20 Feb 2020 21:20:36 +0000 Subject: [PATCH 35/50] Update docs with new flag name GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES --- docs/docs/overview-of-the-gatsby-build-process.md | 2 +- .../page-build-optimizations-for-incremental-data-changes.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/overview-of-the-gatsby-build-process.md b/docs/docs/overview-of-the-gatsby-build-process.md index 5177f0f57ada6..1f94bed1c9ca9 100644 --- a/docs/docs/overview-of-the-gatsby-build-process.md +++ b/docs/docs/overview-of-the-gatsby-build-process.md @@ -301,7 +301,7 @@ Page queries that were queued up earlier from query extraction are run so the da With everything ready for the HTML pages in place, HTML is compiled and written out to files so it can be served up statically. Since HTML is being produced in a Node.js server context, [references to browser APIs like `window` can break the build](/docs/debugging-html-builds/) and must be conditionally applied. -By default, Gatsby rebuilds static HTML for all pages on each build. There is an experimental feature flag `GATSBY_PAGE_BUILD_ON_DATA_CHANGES` which enables [Page Build Optimizations for Incremental Data Changes](/docs/page-build-optimizations-for-incremental-data-changes/). +By default, Gatsby rebuilds static HTML for all pages on each build. There is an experimental feature flag `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES` which enables [Page Build Optimizations for Incremental Data Changes](/docs/page-build-optimizations-for-incremental-data-changes/). ## What do you get from a successful build? diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 9aa5d33189a9d..ca3e82db46317 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -12,9 +12,9 @@ For more info on the standard build process please see [overview of the gatsby b ## How to use -To enable this enhancement, use the environment variable `GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true` in your `gatsby build` command, for example: +To enable this enhancement, use the environment variable `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES=true` in your `gatsby build` command, for example: -`GATSBY_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build` +`GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build` This will run the Gatsby build process, but only build pages that have data changes since your last build. If there are any changes to code (JS, CSS) the bundling process returns a new webpack compilation hash which causes all pages to be rebuilt. From 6f08e534925559610f19cbb00764b250916f6c15 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 21 Feb 2020 09:29:57 +0000 Subject: [PATCH 36/50] Improvements remove pages logic --- packages/gatsby/src/commands/build.js | 36 ++++++++-------------- packages/gatsby/src/utils/page-data.js | 42 +++++++++++++++++--------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 31d1ae97861b6..1068f09114010 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -169,13 +169,17 @@ module.exports = async function build(program: BuildArgs) { // we need to save it again to make sure our latest state has been saved await db.saveState() - const pagePaths = process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES - ? pageDataUtil.getChangedPageDataKeys( - store.getState(), - cachedPageData, - cachedWebpackCompilationHash - ) - : [...store.getState().pages.keys()] + let pagePaths = [...store.getState().pages.keys()] + + // Rebuild subset of pages if user opt into GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES + // if there were no source files (for example components, static queries, etc) changes since last build, otherwise rebuild all pages + if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { + pagePaths = + cachedWebpackCompilationHash !== store.getState().webpackCompilationHash + ? [...store.getState().pages.keys()] + : pageDataUtil.getChangedPageDataKeys(store.getState(), cachedPageData) + } + activity = report.createProgress( `Building static HTML for pages`, pagePaths.length, @@ -223,23 +227,7 @@ module.exports = async function build(program: BuildArgs) { store.getState(), cachedPageData ) - - const removePages = deletedPageKeys.map(value => { - if (value === `/`) { - return fs.remove(`${program.directory}/public/index.html`) - } else { - return fs.remove(`${program.directory}/public${value}`) - } - }) - const removePageData = deletedPageKeys.map(value => { - if (value === `/`) { - return fs.remove(`${program.directory}/public/page-data/index`) - } else { - return fs.remove(`${program.directory}/public/page-data${value}`) - } - }) - - await Promise.all([...removePages, ...removePageData]) + await pageDataUtil.removePageFiles({ publicDir }, deletedPageKeys) activity.end() } diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 4f056f6588b87..ac3001e12f874 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -2,10 +2,14 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) -const getFilePath = ({ publicDir }, pagePath) => { +const getFilePath = ({ publicDir }, pagePath, type) => { const fixedPagePath = pagePath === `/` ? `index` : pagePath + if (type === `html`) { + return path.join(publicDir, fixedPagePath, `index.html`) + } return path.join(publicDir, `page-data`, fixedPagePath, `page-data.json`) } + const read = async ({ publicDir }, pagePath) => { const filePath = getFilePath({ publicDir }, pagePath) const rawPageData = await fs.readFile(filePath, `utf-8`) @@ -36,17 +40,10 @@ const write = async ({ publicDir }, page, result) => { await fs.outputFile(filePath, bodyStr) } -const getChangedPageDataKeys = ( - store, - cachedPageData, - cachedWebpackCompilationHash -) => { - if (cachedWebpackCompilationHash !== store.webpackCompilationHash) { - return [...store.pages.keys()] - } - if (cachedPageData && store.pageData) { +const getChangedPageDataKeys = (state, cachedPageData) => { + if (cachedPageData && state.pageData) { const pageKeys = [] - store.pageData.forEach((value, key) => { + state.pageData.forEach((value, key) => { if (!cachedPageData.has(key)) { pageKeys.push(key) } else { @@ -61,14 +58,14 @@ const getChangedPageDataKeys = ( return pageKeys } - return [...store.pages.keys()] + return [...state.pages.keys()] } -const collectRemovedPageData = (store, cachedPageData) => { - if (cachedPageData && store.pageData) { +const collectRemovedPageData = (state, cachedPageData) => { + if (cachedPageData && state.pageData) { const deletedPageKeys = [] cachedPageData.forEach((_value, key) => { - if (!store.pageData.has(key)) { + if (!state.pageData.has(key)) { deletedPageKeys.push(key) } }) @@ -77,9 +74,24 @@ const collectRemovedPageData = (store, cachedPageData) => { return [] } +const removePageFiles = ({ publicDir }, pageKeys) => { + const removePages = pageKeys.map(value => { + const pageHtml = getFilePath({ publicDir }, value, `html`) + return fs.remove(pageHtml) + }) + + const removePagesData = pageKeys.map(value => { + const pageData = getFilePath({ publicDir }, value) + return fs.remove(pageData) + }) + + return Promise.all([...removePages, ...removePagesData]) +} + module.exports = { read, write, getChangedPageDataKeys, collectRemovedPageData, + removePageFiles, } From 65efb64848ad261ba0afcd14ec8028e4ee44c9f2 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 21 Feb 2020 14:20:26 +0000 Subject: [PATCH 37/50] Remove empty directory if no files --- packages/gatsby/src/utils/page-data.js | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index ac3001e12f874..fcdc88334f6db 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -74,15 +74,28 @@ const collectRemovedPageData = (state, cachedPageData) => { return [] } +const checkAndRemoveEmptyDir = (dir, pagePath) => { + const hasFiles = fs.readdirSync(path.join(dir, pagePath), `utf8`, true) + if (!hasFiles.length) { + fs.removeSync(path.join(dir, pagePath)) + } +} + const removePageFiles = ({ publicDir }, pageKeys) => { - const removePages = pageKeys.map(value => { - const pageHtml = getFilePath({ publicDir }, value, `html`) - return fs.remove(pageHtml) + const removePages = pageKeys.map(pagePath => { + const pageHtmlFile = getFilePath({ publicDir }, pagePath, `html`) + return fs + .remove(pageHtmlFile) + .then(() => checkAndRemoveEmptyDir(publicDir, pagePath)) }) - const removePagesData = pageKeys.map(value => { - const pageData = getFilePath({ publicDir }, value) - return fs.remove(pageData) + const removePagesData = pageKeys.map(pagePath => { + const pageDataFile = getFilePath({ publicDir }, pagePath) + return fs + .remove(pageDataFile) + .then(() => + checkAndRemoveEmptyDir(path.join(publicDir, `page-data`), pagePath) + ) }) return Promise.all([...removePages, ...removePagesData]) From 9c95671e3bfcef9f499bb03b59809ebbac0725f2 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Sat, 22 Feb 2020 07:20:51 +0000 Subject: [PATCH 38/50] Refactor pagePath reassign in build --- packages/gatsby/src/commands/build.js | 17 ++++++++++------- packages/gatsby/src/utils/page-data.js | 12 ++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 1068f09114010..7c8243e643942 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -173,11 +173,14 @@ module.exports = async function build(program: BuildArgs) { // Rebuild subset of pages if user opt into GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES // if there were no source files (for example components, static queries, etc) changes since last build, otherwise rebuild all pages - if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { - pagePaths = - cachedWebpackCompilationHash !== store.getState().webpackCompilationHash - ? [...store.getState().pages.keys()] - : pageDataUtil.getChangedPageDataKeys(store.getState(), cachedPageData) + if ( + process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && + cachedWebpackCompilationHash === store.getState().webpackCompilationHash + ) { + pagePaths = pageDataUtil.getChangedPageDataKeys( + store.getState(), + cachedPageData + ) } activity = report.createProgress( @@ -286,7 +289,7 @@ module.exports = async function build(program: BuildArgs) { if (pagePaths.length) { await fs.writeFile(createdFilesPath, `${pagePaths.join(`\n`)}\n`, `utf8`) - report.info(`newPages.txt created`) + report.info(`.cache/newPages.txt created`) } if (deletedPageKeys.length) { await fs.writeFile( @@ -294,7 +297,7 @@ module.exports = async function build(program: BuildArgs) { `${deletedPageKeys.join(`\n`)}\n`, `utf8` ) - report.info(`deletedPages.txt created`) + report.info(`.cache/deletedPages.txt created`) } } } diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index fcdc88334f6db..3fb5a42a76ea6 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -43,14 +43,14 @@ const write = async ({ publicDir }, page, result) => { const getChangedPageDataKeys = (state, cachedPageData) => { if (cachedPageData && state.pageData) { const pageKeys = [] - state.pageData.forEach((value, key) => { + state.pageData.forEach((resultHash, key) => { if (!cachedPageData.has(key)) { pageKeys.push(key) } else { - const newPageData = JSON.stringify(value) - const previousPageData = JSON.stringify(cachedPageData.get(key)) + const newPageDataHash = resultHash + const previousPageDataHash = cachedPageData.get(key) - if (newPageData !== previousPageData) { + if (newPageDataHash !== previousPageDataHash) { pageKeys.push(key) } } @@ -89,7 +89,7 @@ const removePageFiles = ({ publicDir }, pageKeys) => { .then(() => checkAndRemoveEmptyDir(publicDir, pagePath)) }) - const removePagesData = pageKeys.map(pagePath => { + const removePageData = pageKeys.map(pagePath => { const pageDataFile = getFilePath({ publicDir }, pagePath) return fs .remove(pageDataFile) @@ -98,7 +98,7 @@ const removePageFiles = ({ publicDir }, pageKeys) => { ) }) - return Promise.all([...removePages, ...removePagesData]) + return Promise.all([...removePages, ...removePageData]) } module.exports = { From 0f57c47bc84339f2f7ceffe0dac9b5ef0e33a0d9 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Sun, 23 Feb 2020 14:48:50 +0000 Subject: [PATCH 39/50] initial attempt at removing nested folders in the correct order --- packages/gatsby/src/utils/page-data.js | 47 +++++++++++-------- packages/gatsby/src/utils/page-html.js | 16 +++++++ .../gatsby/src/utils/worker/render-html.js | 20 +++----- 3 files changed, 51 insertions(+), 32 deletions(-) create mode 100644 packages/gatsby/src/utils/page-html.js diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 3fb5a42a76ea6..69b5d104fe69e 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,12 +1,10 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) +const { generateHtmlPathToOutput } = require(`../utils/page-html`) -const getFilePath = ({ publicDir }, pagePath, type) => { +const getFilePath = ({ publicDir }, pagePath) => { const fixedPagePath = pagePath === `/` ? `index` : pagePath - if (type === `html`) { - return path.join(publicDir, fixedPagePath, `index.html`) - } return path.join(publicDir, `page-data`, fixedPagePath, `page-data.json`) } @@ -43,13 +41,11 @@ const write = async ({ publicDir }, page, result) => { const getChangedPageDataKeys = (state, cachedPageData) => { if (cachedPageData && state.pageData) { const pageKeys = [] - state.pageData.forEach((resultHash, key) => { + state.pageData.forEach((newPageDataHash, key) => { if (!cachedPageData.has(key)) { pageKeys.push(key) } else { - const newPageDataHash = resultHash const previousPageDataHash = cachedPageData.get(key) - if (newPageDataHash !== previousPageDataHash) { pageKeys.push(key) } @@ -75,30 +71,43 @@ const collectRemovedPageData = (state, cachedPageData) => { } const checkAndRemoveEmptyDir = (dir, pagePath) => { - const hasFiles = fs.readdirSync(path.join(dir, pagePath), `utf8`, true) + const directory = path.join(dir, pagePath) + const hasFiles = fs.readdirSync(directory) if (!hasFiles.length) { - fs.removeSync(path.join(dir, pagePath)) + fs.removeSync(directory) } } +const sortedPageKeysByNestedLevel = pageKeys => + pageKeys.sort((a, b) => { + const currentPathPathValue = a.split(`/`).length + const previousPathPathValue = b.split(`/`).length + if (currentPathPathValue > previousPathPathValue) { + return -1 + } + if (currentPathPathValue < previousPathPathValue) { + return 1 + } + return 0 + }) + const removePageFiles = ({ publicDir }, pageKeys) => { const removePages = pageKeys.map(pagePath => { - const pageHtmlFile = getFilePath({ publicDir }, pagePath, `html`) - return fs - .remove(pageHtmlFile) - .then(() => checkAndRemoveEmptyDir(publicDir, pagePath)) + const pageHtmlFile = generateHtmlPathToOutput(publicDir, pagePath) + return fs.remove(pageHtmlFile) }) const removePageData = pageKeys.map(pagePath => { const pageDataFile = getFilePath({ publicDir }, pagePath) - return fs - .remove(pageDataFile) - .then(() => - checkAndRemoveEmptyDir(path.join(publicDir, `page-data`), pagePath) - ) + return fs.remove(pageDataFile) }) - return Promise.all([...removePages, ...removePageData]) + return Promise.all([...removePages, ...removePageData]).then(() => { + sortedPageKeysByNestedLevel(pageKeys).forEach(pagePath => { + checkAndRemoveEmptyDir(publicDir, pagePath) + checkAndRemoveEmptyDir(`${publicDir}/page-data`, pagePath) + }) + }) } module.exports = { diff --git a/packages/gatsby/src/utils/page-html.js b/packages/gatsby/src/utils/page-html.js new file mode 100644 index 0000000000000..a17a49655b9c9 --- /dev/null +++ b/packages/gatsby/src/utils/page-html.js @@ -0,0 +1,16 @@ +const path = require(`path`) + +// copied from https://github.com/markdalgleish/static-site-generator-webpack-plugin/blob/master/index.js#L161 +const generateHtmlPathToOutput = (dir, outputPath) => { + let outputFileName = outputPath.replace(/^(\/|\\)/, ``) // Remove leading slashes for webpack-dev-server + + if (!/\.(html?)$/i.test(outputFileName)) { + outputFileName = path.join(outputFileName, `index.html`) + } + + return path.join(dir, outputFileName) +} + +module.exports = { + generateHtmlPathToOutput, +} diff --git a/packages/gatsby/src/utils/worker/render-html.js b/packages/gatsby/src/utils/worker/render-html.js index ac74894853aa3..68958f18e782d 100644 --- a/packages/gatsby/src/utils/worker/render-html.js +++ b/packages/gatsby/src/utils/worker/render-html.js @@ -1,17 +1,6 @@ const fs = require(`fs-extra`) -const path = require(`path`) const Promise = require(`bluebird`) - -// copied from https://github.com/markdalgleish/static-site-generator-webpack-plugin/blob/master/index.js#L161 -const generatePathToOutput = outputPath => { - let outputFileName = outputPath.replace(/^(\/|\\)/, ``) // Remove leading slashes for webpack-dev-server - - if (!/\.(html?)$/i.test(outputFileName)) { - outputFileName = path.join(outputFileName, `index.html`) - } - - return path.join(process.cwd(), `public`, outputFileName) -} +const { generateHtmlPathToOutput } = require(`../../utils/page-html`) export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { // This is being executed in child process, so we need to set some vars @@ -25,7 +14,12 @@ export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { const htmlComponentRenderer = require(htmlComponentRendererPath) try { htmlComponentRenderer.default(path, (throwAway, htmlString) => { - resolve(fs.outputFile(generatePathToOutput(path), htmlString)) + resolve( + fs.outputFile( + generateHtmlPathToOutput(`${process.cwd()}public`, path), + htmlString + ) + ) }) } catch (e) { // add some context to error so we can display more helpful message From 300d49cfec1ff6e6801744e12662d30c8cbb5efd Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Sun, 23 Feb 2020 15:06:31 +0000 Subject: [PATCH 40/50] refactor delete public html and data function --- packages/gatsby/src/utils/page-data.js | 11 ++++++----- packages/gatsby/src/utils/worker/render-html.js | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 69b5d104fe69e..88ca04dbb33a8 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -70,11 +70,13 @@ const collectRemovedPageData = (state, cachedPageData) => { return [] } -const checkAndRemoveEmptyDir = (dir, pagePath) => { - const directory = path.join(dir, pagePath) - const hasFiles = fs.readdirSync(directory) +const checkAndRemoveEmptyDir = (publicDir, pagePath) => { + const pageHtmlDirectory = path.join(publicDir, pagePath) + const pageDataDirectory = path.join(publicDir, `page-data`, pagePath) + const hasFiles = fs.readdirSync(pageHtmlDirectory) if (!hasFiles.length) { - fs.removeSync(directory) + fs.removeSync(pageHtmlDirectory) + fs.removeSync(pageDataDirectory) } } @@ -105,7 +107,6 @@ const removePageFiles = ({ publicDir }, pageKeys) => { return Promise.all([...removePages, ...removePageData]).then(() => { sortedPageKeysByNestedLevel(pageKeys).forEach(pagePath => { checkAndRemoveEmptyDir(publicDir, pagePath) - checkAndRemoveEmptyDir(`${publicDir}/page-data`, pagePath) }) }) } diff --git a/packages/gatsby/src/utils/worker/render-html.js b/packages/gatsby/src/utils/worker/render-html.js index 68958f18e782d..aa21b0b15880c 100644 --- a/packages/gatsby/src/utils/worker/render-html.js +++ b/packages/gatsby/src/utils/worker/render-html.js @@ -16,7 +16,7 @@ export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { htmlComponentRenderer.default(path, (throwAway, htmlString) => { resolve( fs.outputFile( - generateHtmlPathToOutput(`${process.cwd()}public`, path), + generateHtmlPathToOutput(`${process.cwd()}/public`, path), htmlString ) ) From 0599b8f96f38a12ec903ecf75d827a8e283c9070 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Sun, 23 Feb 2020 18:39:55 +0000 Subject: [PATCH 41/50] use join in render-html.js --- packages/gatsby/src/utils/worker/render-html.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/utils/worker/render-html.js b/packages/gatsby/src/utils/worker/render-html.js index aa21b0b15880c..566d5411d2f61 100644 --- a/packages/gatsby/src/utils/worker/render-html.js +++ b/packages/gatsby/src/utils/worker/render-html.js @@ -1,5 +1,6 @@ const fs = require(`fs-extra`) const Promise = require(`bluebird`) +const { join } = require(`path`) const { generateHtmlPathToOutput } = require(`../../utils/page-html`) export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { @@ -16,7 +17,7 @@ export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { htmlComponentRenderer.default(path, (throwAway, htmlString) => { resolve( fs.outputFile( - generateHtmlPathToOutput(`${process.cwd()}/public`, path), + generateHtmlPathToOutput(join(process.cwd(), `public`), path), htmlString ) ) From fa11ff6393ce1ffa713af5ef56a499fad6c8121e Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Mon, 24 Feb 2020 08:17:20 +0000 Subject: [PATCH 42/50] Renaming functions and refactoring --- packages/gatsby/src/utils/page-data.js | 43 +++++++++++-------- packages/gatsby/src/utils/page-html.js | 4 +- .../gatsby/src/utils/worker/render-html.js | 4 +- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 88ca04dbb33a8..9e146eb089054 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,21 +1,21 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) -const { generateHtmlPathToOutput } = require(`../utils/page-html`) +const { getPageHtmlFilePath } = require(`../utils/page-html`) -const getFilePath = ({ publicDir }, pagePath) => { - const fixedPagePath = pagePath === `/` ? `index` : pagePath - return path.join(publicDir, `page-data`, fixedPagePath, `page-data.json`) -} +const fixedPagePath = pagePath => (pagePath === `/` ? `index` : pagePath) + +const getPageDataFilePath = ({ publicDir }, pagePath) => + path.join(publicDir, `page-data`, fixedPagePath(pagePath), `page-data.json`) const read = async ({ publicDir }, pagePath) => { - const filePath = getFilePath({ publicDir }, pagePath) + const filePath = getPageDataFilePath({ publicDir }, pagePath) const rawPageData = await fs.readFile(filePath, `utf-8`) return JSON.parse(rawPageData) } const write = async ({ publicDir }, page, result) => { - const filePath = getFilePath({ publicDir }, page.path) + const filePath = getPageDataFilePath({ publicDir }, page.path) const body = { componentChunkName: page.componentChunkName, path: page.path, @@ -72,8 +72,14 @@ const collectRemovedPageData = (state, cachedPageData) => { const checkAndRemoveEmptyDir = (publicDir, pagePath) => { const pageHtmlDirectory = path.join(publicDir, pagePath) - const pageDataDirectory = path.join(publicDir, `page-data`, pagePath) + const pageDataDirectory = path.join( + publicDir, + `page-data`, + fixedPagePath(pagePath) + ) const hasFiles = fs.readdirSync(pageHtmlDirectory) + + // if page's html folder is empty also remove matching page-data folder if (!hasFiles.length) { fs.removeSync(pageHtmlDirectory) fs.removeSync(pageDataDirectory) @@ -82,29 +88,28 @@ const checkAndRemoveEmptyDir = (publicDir, pagePath) => { const sortedPageKeysByNestedLevel = pageKeys => pageKeys.sort((a, b) => { - const currentPathPathValue = a.split(`/`).length - const previousPathPathValue = b.split(`/`).length - if (currentPathPathValue > previousPathPathValue) { - return -1 - } - if (currentPathPathValue < previousPathPathValue) { - return 1 - } - return 0 + const currentPagePathValue = a.split(`/`).length + const previousPagePathValue = b.split(`/`).length + return currentPagePathValue > previousPagePathValue + ? -1 + : currentPagePathValue < previousPagePathValue + ? 1 + : 0 }) const removePageFiles = ({ publicDir }, pageKeys) => { const removePages = pageKeys.map(pagePath => { - const pageHtmlFile = generateHtmlPathToOutput(publicDir, pagePath) + const pageHtmlFile = getPageHtmlFilePath(publicDir, pagePath) return fs.remove(pageHtmlFile) }) const removePageData = pageKeys.map(pagePath => { - const pageDataFile = getFilePath({ publicDir }, pagePath) + const pageDataFile = getPageDataFilePath({ publicDir }, pagePath) return fs.remove(pageDataFile) }) return Promise.all([...removePages, ...removePageData]).then(() => { + // Sort removed pageKeys by nested directories and remove if empty. sortedPageKeysByNestedLevel(pageKeys).forEach(pagePath => { checkAndRemoveEmptyDir(publicDir, pagePath) }) diff --git a/packages/gatsby/src/utils/page-html.js b/packages/gatsby/src/utils/page-html.js index a17a49655b9c9..bfaf19569877a 100644 --- a/packages/gatsby/src/utils/page-html.js +++ b/packages/gatsby/src/utils/page-html.js @@ -1,7 +1,7 @@ const path = require(`path`) // copied from https://github.com/markdalgleish/static-site-generator-webpack-plugin/blob/master/index.js#L161 -const generateHtmlPathToOutput = (dir, outputPath) => { +const getPageHtmlFilePath = (dir, outputPath) => { let outputFileName = outputPath.replace(/^(\/|\\)/, ``) // Remove leading slashes for webpack-dev-server if (!/\.(html?)$/i.test(outputFileName)) { @@ -12,5 +12,5 @@ const generateHtmlPathToOutput = (dir, outputPath) => { } module.exports = { - generateHtmlPathToOutput, + getPageHtmlFilePath, } diff --git a/packages/gatsby/src/utils/worker/render-html.js b/packages/gatsby/src/utils/worker/render-html.js index 566d5411d2f61..8da84ac7ae1cb 100644 --- a/packages/gatsby/src/utils/worker/render-html.js +++ b/packages/gatsby/src/utils/worker/render-html.js @@ -1,7 +1,7 @@ const fs = require(`fs-extra`) const Promise = require(`bluebird`) const { join } = require(`path`) -const { generateHtmlPathToOutput } = require(`../../utils/page-html`) +const { getPageHtmlFilePath } = require(`../../utils/page-html`) export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { // This is being executed in child process, so we need to set some vars @@ -17,7 +17,7 @@ export function renderHTML({ htmlComponentRendererPath, paths, envVars }) { htmlComponentRenderer.default(path, (throwAway, htmlString) => { resolve( fs.outputFile( - generateHtmlPathToOutput(join(process.cwd(), `public`), path), + getPageHtmlFilePath(join(process.cwd(), `public`), path), htmlString ) ) From e04bcd82aa89e76c5d3683c3cc6635b14fc50b9d Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Tue, 25 Feb 2020 09:17:16 +0000 Subject: [PATCH 43/50] Add remove functions to page util --- packages/gatsby/src/commands/develop.ts | 2 -- packages/gatsby/src/query/query-runner.js | 1 - .../gatsby/src/redux/reducers/page-data.js | 3 ++ packages/gatsby/src/utils/page-data.js | 34 +++++++++---------- packages/gatsby/src/utils/page-html.js | 7 ++++ 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/packages/gatsby/src/commands/develop.ts b/packages/gatsby/src/commands/develop.ts index 87f13277fd20d..29f27ab196961 100644 --- a/packages/gatsby/src/commands/develop.ts +++ b/packages/gatsby/src/commands/develop.ts @@ -150,7 +150,6 @@ async function startServer(program: IProgram): Promise { /** * Set up the express app. **/ - const app = express() app.use(telemetry.expressMiddleware(`DEVELOP`)) app.use( @@ -417,7 +416,6 @@ module.exports = async (program: IProgram): Promise => { require(`../redux/actions`).boundActionCreators.setProgramStatus( `BOOTSTRAP_QUERY_RUNNING_FINISHED` ) - await db.saveState() await waitUntilAllJobsComplete() diff --git a/packages/gatsby/src/query/query-runner.js b/packages/gatsby/src/query/query-runner.js index 59b50a84d50aa..f3d59eea46daf 100644 --- a/packages/gatsby/src/query/query-runner.js +++ b/packages/gatsby/src/query/query-runner.js @@ -136,6 +136,5 @@ module.exports = async (graphqlRunner, queryJob: QueryJob) => { resultHash, }) } - return result } diff --git a/packages/gatsby/src/redux/reducers/page-data.js b/packages/gatsby/src/redux/reducers/page-data.js index ffa11ae736f8a..619bbb66d9520 100644 --- a/packages/gatsby/src/redux/reducers/page-data.js +++ b/packages/gatsby/src/redux/reducers/page-data.js @@ -1,5 +1,8 @@ module.exports = (state = new Map(), action) => { switch (action.type) { + case `DELETE_CACHE`: + return new Map() + case `REMOVE_PAGE_DATA`: state.delete(action.payload.id) return state diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index 9e146eb089054..db62fb4fe0ad9 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,21 +1,26 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) -const { getPageHtmlFilePath } = require(`../utils/page-html`) +const { remove: removeHtml } = require(`../utils/page-html`) const fixedPagePath = pagePath => (pagePath === `/` ? `index` : pagePath) -const getPageDataFilePath = ({ publicDir }, pagePath) => +const getFilePath = ({ publicDir }, pagePath) => path.join(publicDir, `page-data`, fixedPagePath(pagePath), `page-data.json`) const read = async ({ publicDir }, pagePath) => { - const filePath = getPageDataFilePath({ publicDir }, pagePath) + const filePath = getFilePath({ publicDir }, pagePath) const rawPageData = await fs.readFile(filePath, `utf-8`) return JSON.parse(rawPageData) } +const remove = async ({ publicDir }, pagePath) => { + const filePath = getFilePath({ publicDir }, pagePath) + return fs.remove(filePath) +} + const write = async ({ publicDir }, page, result) => { - const filePath = getPageDataFilePath({ publicDir }, page.path) + const filePath = getFilePath({ publicDir }, page.path) const body = { componentChunkName: page.componentChunkName, path: page.path, @@ -90,23 +95,17 @@ const sortedPageKeysByNestedLevel = pageKeys => pageKeys.sort((a, b) => { const currentPagePathValue = a.split(`/`).length const previousPagePathValue = b.split(`/`).length - return currentPagePathValue > previousPagePathValue - ? -1 - : currentPagePathValue < previousPagePathValue - ? 1 - : 0 + return previousPagePathValue - currentPagePathValue }) const removePageFiles = ({ publicDir }, pageKeys) => { - const removePages = pageKeys.map(pagePath => { - const pageHtmlFile = getPageHtmlFilePath(publicDir, pagePath) - return fs.remove(pageHtmlFile) - }) + const removePages = pageKeys.map(pagePath => + removeHtml({ publicDir }, pagePath) + ) - const removePageData = pageKeys.map(pagePath => { - const pageDataFile = getPageDataFilePath({ publicDir }, pagePath) - return fs.remove(pageDataFile) - }) + const removePageData = pageKeys.map(pagePath => + remove({ publicDir }, pagePath) + ) return Promise.all([...removePages, ...removePageData]).then(() => { // Sort removed pageKeys by nested directories and remove if empty. @@ -119,6 +118,7 @@ const removePageFiles = ({ publicDir }, pageKeys) => { module.exports = { read, write, + remove, getChangedPageDataKeys, collectRemovedPageData, removePageFiles, diff --git a/packages/gatsby/src/utils/page-html.js b/packages/gatsby/src/utils/page-html.js index bfaf19569877a..92817c463b9ba 100644 --- a/packages/gatsby/src/utils/page-html.js +++ b/packages/gatsby/src/utils/page-html.js @@ -1,5 +1,11 @@ +const fs = require(`fs-extra`) const path = require(`path`) +const remove = async ({ publicDir }, pagePath) => { + const filePath = getPageHtmlFilePath(publicDir, pagePath) + return fs.remove(filePath) +} + // copied from https://github.com/markdalgleish/static-site-generator-webpack-plugin/blob/master/index.js#L161 const getPageHtmlFilePath = (dir, outputPath) => { let outputFileName = outputPath.replace(/^(\/|\\)/, ``) // Remove leading slashes for webpack-dev-server @@ -12,5 +18,6 @@ const getPageHtmlFilePath = (dir, outputPath) => { } module.exports = { + remove, getPageHtmlFilePath, } From ded3ccc0aeb3c1d33ea80066becf672ba988447c Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Tue, 25 Feb 2020 09:51:27 +0000 Subject: [PATCH 44/50] Remove whitespace from page-data --- packages/gatsby/src/utils/page-data.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index db62fb4fe0ad9..fdce1b8c11968 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -28,7 +28,6 @@ const write = async ({ publicDir }, page, result) => { result, } const bodyStr = JSON.stringify(body) - // transform asset size to kB (from bytes) to fit 64 bit to numbers const pageDataSize = Buffer.byteLength(bodyStr) / 1000 From c6f5d1c10142aeba04fb682c704b91e302b0424f Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Thu, 27 Feb 2020 08:21:32 +0000 Subject: [PATCH 45/50] Move new build functions to build-utils.js --- ...imizations-for-incremental-data-changes.md | 2 +- packages/gatsby/src/commands/build-utils.js | 85 +++++++++++++++++++ packages/gatsby/src/commands/build.js | 8 +- packages/gatsby/src/redux/types.ts | 4 +- packages/gatsby/src/utils/page-data.js | 77 +---------------- 5 files changed, 93 insertions(+), 83 deletions(-) create mode 100644 packages/gatsby/src/commands/build-utils.js diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index ca3e82db46317..71758e37627b5 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -14,7 +14,7 @@ For more info on the standard build process please see [overview of the gatsby b To enable this enhancement, use the environment variable `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES=true` in your `gatsby build` command, for example: -`GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES=true node ./node_modules/.bin/gatsby build` +`GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES=true gatsby build --log-pages` This will run the Gatsby build process, but only build pages that have data changes since your last build. If there are any changes to code (JS, CSS) the bundling process returns a new webpack compilation hash which causes all pages to be rebuilt. diff --git a/packages/gatsby/src/commands/build-utils.js b/packages/gatsby/src/commands/build-utils.js new file mode 100644 index 0000000000000..aafe320b46734 --- /dev/null +++ b/packages/gatsby/src/commands/build-utils.js @@ -0,0 +1,85 @@ +const fs = require(`fs-extra`) +const path = require(`path`) +const { remove: removePageHtmlFile } = require(`../utils/page-html`) +const { + remove: removePageDataFile, + fixedPagePath, +} = require(`../utils/page-data`) + +const getChangedPageDataKeys = (state, cachedPageData) => { + if (cachedPageData && state.pageData) { + const pageKeys = [] + state.pageData.forEach((newPageDataHash, key) => { + if (!cachedPageData.has(key)) { + pageKeys.push(key) + } else { + const previousPageDataHash = cachedPageData.get(key) + if (newPageDataHash !== previousPageDataHash) { + pageKeys.push(key) + } + } + }) + return pageKeys + } + + return [...state.pages.keys()] +} + +const collectRemovedPageData = (state, cachedPageData) => { + if (cachedPageData && state.pageData) { + const deletedPageKeys = [] + cachedPageData.forEach((_value, key) => { + if (!state.pageData.has(key)) { + deletedPageKeys.push(key) + } + }) + return deletedPageKeys + } + return [] +} + +const checkAndRemoveEmptyDir = (publicDir, pagePath) => { + const pageHtmlDirectory = path.join(publicDir, pagePath) + const pageDataDirectory = path.join( + publicDir, + `page-data`, + fixedPagePath(pagePath) + ) + const hasFiles = fs.readdirSync(pageHtmlDirectory) + + // if page's html folder is empty also remove matching page-data folder + if (!hasFiles.length) { + fs.removeSync(pageHtmlDirectory) + fs.removeSync(pageDataDirectory) + } +} + +const sortedPageKeysByNestedLevel = pageKeys => + pageKeys.sort((a, b) => { + const currentPagePathValue = a.split(`/`).length + const previousPagePathValue = b.split(`/`).length + return previousPagePathValue - currentPagePathValue + }) + +const removePageFiles = ({ publicDir }, pageKeys) => { + const removePages = pageKeys.map(pagePath => + removePageHtmlFile({ publicDir }, pagePath) + ) + + const removePageData = pageKeys.map(pagePath => + removePageDataFile({ publicDir }, pagePath) + ) + + return Promise.all([...removePages, ...removePageData]).then(() => { + // Sort removed pageKeys by nested directories and remove if empty. + sortedPageKeysByNestedLevel(pageKeys).forEach(pagePath => { + checkAndRemoveEmptyDir(publicDir, pagePath) + }) + }) +} + +module.exports = { + getChangedPageDataKeys, + collectRemovedPageData, + removePageFiles, +} diff --git a/packages/gatsby/src/commands/build.js b/packages/gatsby/src/commands/build.js index 7c8243e643942..be80259c10d5f 100644 --- a/packages/gatsby/src/commands/build.js +++ b/packages/gatsby/src/commands/build.js @@ -20,7 +20,7 @@ const { structureWebpackErrors } = require(`../utils/webpack-error-utils`) const { waitUntilAllJobsComplete: waitUntilAllJobsV2Complete, } = require(`../utils/jobs-manager`) -const pageDataUtil = require(`../utils/page-data`) +const buildUtils = require(`../commands/build-utils`) const { boundActionCreators } = require(`../redux/actions`) let cachedPageData @@ -177,7 +177,7 @@ module.exports = async function build(program: BuildArgs) { process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES && cachedWebpackCompilationHash === store.getState().webpackCompilationHash ) { - pagePaths = pageDataUtil.getChangedPageDataKeys( + pagePaths = buildUtils.getChangedPageDataKeys( store.getState(), cachedPageData ) @@ -226,11 +226,11 @@ module.exports = async function build(program: BuildArgs) { if (process.env.GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES) { activity = report.activityTimer(`Delete previous page data`) activity.start() - deletedPageKeys = pageDataUtil.collectRemovedPageData( + deletedPageKeys = buildUtils.collectRemovedPageData( store.getState(), cachedPageData ) - await pageDataUtil.removePageFiles({ publicDir }, deletedPageKeys) + await buildUtils.removePageFiles({ publicDir }, deletedPageKeys) activity.end() } diff --git a/packages/gatsby/src/redux/types.ts b/packages/gatsby/src/redux/types.ts index 323aa4eff1b1e..8a1b7af06b124 100644 --- a/packages/gatsby/src/redux/types.ts +++ b/packages/gatsby/src/redux/types.ts @@ -28,7 +28,7 @@ export interface IReduxState { developMiddleware: any proxy: any } - pageData?: any + pageData: any } export interface ICachedReduxState { @@ -40,7 +40,7 @@ export interface ICachedReduxState { staticQueryComponents: IReduxState["staticQueryComponents"] webpackCompilationHash: IReduxState["webpackCompilationHash"] pageDataStats: IReduxState["pageDataStats"] - pageData?: IReduxState["pageData"] + pageData: IReduxState["pageData"] } export type ActionsUnion = diff --git a/packages/gatsby/src/utils/page-data.js b/packages/gatsby/src/utils/page-data.js index fdce1b8c11968..277975d1d8fd7 100644 --- a/packages/gatsby/src/utils/page-data.js +++ b/packages/gatsby/src/utils/page-data.js @@ -1,7 +1,6 @@ const fs = require(`fs-extra`) const path = require(`path`) const { store } = require(`../redux`) -const { remove: removeHtml } = require(`../utils/page-html`) const fixedPagePath = pagePath => (pagePath === `/` ? `index` : pagePath) @@ -42,83 +41,9 @@ const write = async ({ publicDir }, page, result) => { await fs.outputFile(filePath, bodyStr) } -const getChangedPageDataKeys = (state, cachedPageData) => { - if (cachedPageData && state.pageData) { - const pageKeys = [] - state.pageData.forEach((newPageDataHash, key) => { - if (!cachedPageData.has(key)) { - pageKeys.push(key) - } else { - const previousPageDataHash = cachedPageData.get(key) - if (newPageDataHash !== previousPageDataHash) { - pageKeys.push(key) - } - } - }) - return pageKeys - } - - return [...state.pages.keys()] -} - -const collectRemovedPageData = (state, cachedPageData) => { - if (cachedPageData && state.pageData) { - const deletedPageKeys = [] - cachedPageData.forEach((_value, key) => { - if (!state.pageData.has(key)) { - deletedPageKeys.push(key) - } - }) - return deletedPageKeys - } - return [] -} - -const checkAndRemoveEmptyDir = (publicDir, pagePath) => { - const pageHtmlDirectory = path.join(publicDir, pagePath) - const pageDataDirectory = path.join( - publicDir, - `page-data`, - fixedPagePath(pagePath) - ) - const hasFiles = fs.readdirSync(pageHtmlDirectory) - - // if page's html folder is empty also remove matching page-data folder - if (!hasFiles.length) { - fs.removeSync(pageHtmlDirectory) - fs.removeSync(pageDataDirectory) - } -} - -const sortedPageKeysByNestedLevel = pageKeys => - pageKeys.sort((a, b) => { - const currentPagePathValue = a.split(`/`).length - const previousPagePathValue = b.split(`/`).length - return previousPagePathValue - currentPagePathValue - }) - -const removePageFiles = ({ publicDir }, pageKeys) => { - const removePages = pageKeys.map(pagePath => - removeHtml({ publicDir }, pagePath) - ) - - const removePageData = pageKeys.map(pagePath => - remove({ publicDir }, pagePath) - ) - - return Promise.all([...removePages, ...removePageData]).then(() => { - // Sort removed pageKeys by nested directories and remove if empty. - sortedPageKeysByNestedLevel(pageKeys).forEach(pagePath => { - checkAndRemoveEmptyDir(publicDir, pagePath) - }) - }) -} - module.exports = { read, write, remove, - getChangedPageDataKeys, - collectRemovedPageData, - removePageFiles, + fixedPagePath, } From 2d60d5e99367a6dc80f34580835c2d9f9b8a2314 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 28 Feb 2020 11:08:29 +0000 Subject: [PATCH 46/50] check html suffix function --- packages/gatsby/src/utils/page-html.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/gatsby/src/utils/page-html.js b/packages/gatsby/src/utils/page-html.js index 92817c463b9ba..ea6b744b451eb 100644 --- a/packages/gatsby/src/utils/page-html.js +++ b/packages/gatsby/src/utils/page-html.js @@ -1,8 +1,11 @@ const fs = require(`fs-extra`) const path = require(`path`) +const checkForHtmlSuffix = pagePath => !/\.(html?)$/i.test(pagePath) + const remove = async ({ publicDir }, pagePath) => { const filePath = getPageHtmlFilePath(publicDir, pagePath) + return fs.remove(filePath) } @@ -10,7 +13,7 @@ const remove = async ({ publicDir }, pagePath) => { const getPageHtmlFilePath = (dir, outputPath) => { let outputFileName = outputPath.replace(/^(\/|\\)/, ``) // Remove leading slashes for webpack-dev-server - if (!/\.(html?)$/i.test(outputFileName)) { + if (checkForHtmlSuffix(outputPath)) { outputFileName = path.join(outputFileName, `index.html`) } From ec61132d77f62377b993d4942b5846ea56535f9a Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 28 Feb 2020 11:22:11 +0000 Subject: [PATCH 47/50] update docs --- ...ge-build-optimizations-for-incremental-data-changes.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 71758e37627b5..5d37528b4b45a 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -54,6 +54,10 @@ If there are no changed or deleted paths, then the relevant files will not be cr - To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment -- At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered +- This feature is not available with `gatsby develop`. -- Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build +* At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered + +* Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build + +- Using the `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES` flag should be used consistently when building your project, otherwise removing the flag will clear all cache From fef05f56c9426c2ef7cfbc575210a6c165336f81 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Fri, 28 Feb 2020 11:45:23 +0000 Subject: [PATCH 48/50] handle .html paths when removing empty directories --- packages/gatsby/src/commands/build-utils.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/gatsby/src/commands/build-utils.js b/packages/gatsby/src/commands/build-utils.js index aafe320b46734..b22f2f3860e9a 100644 --- a/packages/gatsby/src/commands/build-utils.js +++ b/packages/gatsby/src/commands/build-utils.js @@ -1,6 +1,9 @@ const fs = require(`fs-extra`) const path = require(`path`) -const { remove: removePageHtmlFile } = require(`../utils/page-html`) +const { + remove: removePageHtmlFile, + getPageHtmlFilePath, +} = require(`../utils/page-html`) const { remove: removePageDataFile, fixedPagePath, @@ -39,7 +42,9 @@ const collectRemovedPageData = (state, cachedPageData) => { } const checkAndRemoveEmptyDir = (publicDir, pagePath) => { - const pageHtmlDirectory = path.join(publicDir, pagePath) + const pageHtmlDirectory = path.dirname( + getPageHtmlFilePath(publicDir, pagePath) + ) const pageDataDirectory = path.join( publicDir, `page-data`, From a31749cc68b8a9119c1cda5339c51c9363b06960 Mon Sep 17 00:00:00 2001 From: Dominic Fallows Date: Fri, 28 Feb 2020 14:18:38 +0000 Subject: [PATCH 49/50] Apply suggestions from code review Co-Authored-By: LB --- ...timizations-for-incremental-data-changes.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 5d37528b4b45a..07a4f4b8c44ec 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -6,7 +6,7 @@ Building sites with large amounts of content (10,000s nodes upwards) is relative One solution to these problems might be to use [Gatsby Cloud's Build features](https://www.gatsbyjs.com/cloud/). -For projects that require self-hosted environments, where Gatsby Cloud would not be an option, being able to only deploy the content that has changed or is new (incremental data changes, you might say) would help reduce build times, deployment times and demand on resources. +For projects that require self-hosted environments, where Gatsby Cloud would not be an option, deploying only the content that has changed or is new (incremental data changes, you might say) can help reduce build times, deployment times and demand on resources. For more info on the standard build process please see [overview of the gatsby build process](/docs/overview-of-the-gatsby-build-process/) @@ -41,23 +41,23 @@ info Done building in 152.084 sec Done in 154.501 sec ``` -- `--write-to-file` creates two files in the `.cache` folder, with lists of the changes paths in the build assets (`public`) folder. +- `--write-to-file` creates two files in the `.cache` folder, with lists of the changed paths in the build assets (`public`) folder. - - `newPages.txt` will contain a list of paths that have changed or are new - - `deletedPages.txt` will contain a list of paths that have been deleted + - `newPages.txt` will contain a list of new or changed paths + - `deletedPages.txt` will contain a list of deleted paths If there are no changed or deleted paths, then the relevant files will not be created in the `.cache` folder. ## More information -- This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process +- This enhancement works by comparing the page data from the previous build to the new page data. This creates a list of page directories that are passed to the static build process. -- To enable this build option you will need to set an environment variable, so you will need access to set variables in your build environment +- To enable this build option you will need to set an environment variable, which requires access to do so in your build environment. - This feature is not available with `gatsby develop`. -* At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains the all previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison, if there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered +* At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison. If there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered -* Any code or static query changes (templates, components, source handling, new plugins etc) creates a new webpack compilation hash and triggers a full build +* Any code or static query changes (templates, components, source handling, new plugins etc) will prompt the creation of a new webpack compilation hash and trigger a full build. -- Using the `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES` flag should be used consistently when building your project, otherwise removing the flag will clear all cache +Note: When using the `GATSBY_EXPERIMENTAL_PAGE_BUILD_ON_DATA_CHANGES` flag it is important to do so consistently when building your project. Otherwise, the cache will be cleared and the necessary data for comparison will no longer be available, removing the ability to check for incremental data changes. From 09cc3057c328a6657720d2c85e53a8231716e778 Mon Sep 17 00:00:00 2001 From: Stuart Rayson Date: Sat, 29 Feb 2020 16:39:07 +0000 Subject: [PATCH 50/50] fix formating issue on docs --- .../page-build-optimizations-for-incremental-data-changes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/page-build-optimizations-for-incremental-data-changes.md b/docs/docs/page-build-optimizations-for-incremental-data-changes.md index 07a4f4b8c44ec..1d93dc8c2b852 100644 --- a/docs/docs/page-build-optimizations-for-incremental-data-changes.md +++ b/docs/docs/page-build-optimizations-for-incremental-data-changes.md @@ -43,7 +43,7 @@ Done in 154.501 sec - `--write-to-file` creates two files in the `.cache` folder, with lists of the changed paths in the build assets (`public`) folder. - - `newPages.txt` will contain a list of new or changed paths + - `newPages.txt` will contain a list of new or changed paths - `deletedPages.txt` will contain a list of deleted paths If there are no changed or deleted paths, then the relevant files will not be created in the `.cache` folder. @@ -56,7 +56,7 @@ If there are no changed or deleted paths, then the relevant files will not be cr - This feature is not available with `gatsby develop`. -* At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison. If there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered +* At the end of each build, gatsby creates a `redux.state` file in `/.cache` that contains previous build data. You will need to persist the `.cache/redux.state` between builds, allowing for comparison. If there is no `redux.state` file located in the `/.cache` folder then a full build will be triggered. * Any code or static query changes (templates, components, source handling, new plugins etc) will prompt the creation of a new webpack compilation hash and trigger a full build.