diff --git a/packages/gatsby/src/redux/__tests__/index.js b/packages/gatsby/src/redux/__tests__/index.js index 5c759396e2003..ed610db255a7c 100644 --- a/packages/gatsby/src/redux/__tests__/index.js +++ b/packages/gatsby/src/redux/__tests__/index.js @@ -1,4 +1,5 @@ const _ = require(`lodash`) +const path = require(`path`) const writeToCache = jest.spyOn(require(`../persist`), `writeToCache`) const { saveState, store, readState } = require(`../index`) @@ -14,6 +15,34 @@ jest.mock(`fs-extra`, () => { mockWrittenContent.set(file, content) ), readFileSync: jest.fn(file => mockWrittenContent.get(file)), + renameSync: jest.fn((from, to) => { + // This will only work for folders if they are always the full prefix + // of the file... (that goes for both input dirs). That's the case here. + + if (mockWrittenContent.has(to)) { + throw new Error(`File/folder exists`) + } + + // Move all files in this folder as well ... :/ + mockWrittenContent.forEach((value, key) => { + if (key.startsWith(from)) { + // rename('foo/bar', 'a/b/c') => foo/bar/ding.js -> a/b/c/ding.js + // (.replace with string arg will only replace the first occurrence) + mockWrittenContent.set( + key.replace(from, to), + mockWrittenContent.get(key) + ) + mockWrittenContent.delete(key) + } + }) + }), + existsSync: jest.fn(target => mockWrittenContent.has(target)), + mkdtempSync: jest.fn(suffix => { + let dir = `some/tmp` + suffix + Math.random() + mockWrittenContent.set(dir, Buffer.from(`empty dir`)) + return dir + }), + removeSync: jest.fn(file => mockWrittenContent.delete(file)), } }) @@ -41,11 +70,9 @@ describe(`redux db`, () => { mockWrittenContent.clear() }) - it(`expect components state to be empty initially`, () => { + it(`should write cache to disk`, async () => { expect(initialComponentsState).toEqual(new Map()) - }) - it(`should write cache to disk`, async () => { await saveState() expect(writeToCache).toBeCalled() @@ -66,4 +93,18 @@ describe(`redux db`, () => { // yuck - loki and redux will have different shape of redux state (nodes and nodesByType) expect(_.omit(data, [`nodes`, `nodesByType`])).toMatchSnapshot() }) + + it(`should drop legacy file if exists`, async () => { + expect(initialComponentsState).toEqual(new Map()) + + const legacyLocation = path.join(process.cwd(), `.cache/redux.state`) + mockWrittenContent.set( + legacyLocation, + Buffer.from(`legacy location for cache`) + ) + + await saveState() + + expect(mockWrittenContent.has(legacyLocation)).toBe(false) + }) }) diff --git a/packages/gatsby/src/redux/persist.ts b/packages/gatsby/src/redux/persist.ts index bf6b6902e06f1..9f5297daf5ffa 100644 --- a/packages/gatsby/src/redux/persist.ts +++ b/packages/gatsby/src/redux/persist.ts @@ -1,12 +1,155 @@ +import path from "path" import v8 from "v8" -import { readFileSync, writeFileSync } from "fs-extra" -import { ICachedReduxState } from "./types" +import { + existsSync, + mkdtempSync, + readFileSync, + removeSync, + renameSync, + writeFileSync, +} from "fs-extra" +import { IReduxNode, ICachedReduxState } from "./types" +import { sync as globSync } from "glob" -const file = (): string => `${process.cwd()}/.cache/redux.state` +const legacyCacheFile = path.join(process.cwd(), `.cache/redux.state`) +const reduxCacheFolder = path.join(process.cwd(), `.cache/redux`) +function reduxRestFile(dir: string): string { + return path.join(dir, `redux.rest.state`) +} +function reduxChunkFilePrefix(dir: string): string { + return path.join(dir, `redux.node.state_`) +} + +function readFromLegacyCache(): ICachedReduxState { + // TODO: remove this bit with next major bump (fairly benign change but still) + return v8.deserialize(readFileSync(legacyCacheFile)) +} + +export function readFromCache(): ICachedReduxState { + // The cache is stored in two steps; the nodes in chunks and the rest + // First we revive the rest, then we inject the nodes into that obj (if any) + // Each chunk is stored in its own file, this circumvents max buffer lengths + // for sites with a _lot_ of content. Since all nodes go into a Map, the order + // of reading them is not relevant. + + if (!existsSync(reduxCacheFolder)) { + return readFromLegacyCache() + } + + const obj: ICachedReduxState = v8.deserialize( + readFileSync(reduxRestFile(reduxCacheFolder)) + ) + + // Note: at 1M pages, this will be 1M/chunkSize chunks (ie. 1m/10k=100) + const chunks = globSync( + reduxChunkFilePrefix(reduxCacheFolder) + `*` + ).map(file => v8.deserialize(readFileSync(file))) + + const nodes: [string, IReduxNode][] = [].concat(...chunks) + + if (chunks.length) { + obj.nodes = new Map(nodes) + } + + return obj +} + +function guessSafeChunkSize(values: [string, IReduxNode][]): number { + // Pick a few random elements and measure their size then pick a chunk size + // ceiling based on the worst case. Each test takes time so there's trade-off. + // This attempts to prevent small sites with very large pages from OOMing. + // This heuristic could still fail if it randomly grabs the smallest nodes. + // TODO: test a few nodes per each type instead of from all nodes + + const nodesToTest = 11 // Very arbitrary number + const valueCount = values.length + const step = Math.floor(valueCount / nodesToTest) + let maxSize = 0 + for (let i = 0; i < valueCount; i += step) { + const size = v8.serialize(values[i]).length + maxSize = Math.max(size, maxSize) + } + + // Max size of a Buffer is 2gb (yeah, we're assuming 64bit system) + // https://stackoverflow.com/questions/8974375/whats-the-maximum-size-of-a-node-js-buffer + // Use 1.5gb as the target ceiling, allowing for some margin of error + return Math.floor((150 * 1024 * 1024 * 1024) / maxSize) +} + +function prepareCacheFolder( + targetDir: string, + contents: ICachedReduxState +): void { + // Temporarily save the nodes and remove them from the main redux store + // This prevents an OOM when the page nodes collectively contain to much data + const map = contents.nodes + contents.nodes = undefined + writeFileSync(reduxRestFile(targetDir), v8.serialize(contents)) + // Now restore them on the redux store + contents.nodes = map + + if (map) { + // Now store the nodes separately, chunk size determined by a heuristic + const values: [string, IReduxNode][] = [...map.entries()] + const chunkSize = guessSafeChunkSize(values) + const chunks = Math.ceil(values.length / chunkSize) + + for (let i = 0; i < chunks; ++i) { + writeFileSync( + reduxChunkFilePrefix(targetDir) + i, + v8.serialize(values.slice(i * chunkSize, i * chunkSize + chunkSize)) + ) + } + } +} + +function safelyRenameToBak(reduxCacheFolder: string): string { + // Basically try to work around the potential of previous renamed caches + // not being removed for whatever reason. _That_ should not be a blocker. + const tmpSuffix = `.bak` + let suffixCounter = 0 + let bakName = reduxCacheFolder + tmpSuffix // Start without number + while (existsSync(bakName)) { + ++suffixCounter + bakName = reduxCacheFolder + tmpSuffix + suffixCounter + } + renameSync(reduxCacheFolder, bakName) + + return bakName +} + +export function writeToCache(contents: ICachedReduxState): void { + // Note: this should be a transactional operation. So work in a tmp dir and + // make sure the cache cannot be left in a corruptable state due to errors. + + const tmpDir = mkdtempSync(`reduxcache`) // linux / windows + + prepareCacheFolder(tmpDir, contents) + + // Replace old cache folder with new. If the first rename fails, the cache + // is just stale. If the second rename fails, the cache is empty. In either + // case the cache is not left in a corrupt state. + + let bakName = `` + if (existsSync(reduxCacheFolder)) { + // Don't drop until after swapping over (renaming is less likely to fail) + bakName = safelyRenameToBak(reduxCacheFolder) + } -export const readFromCache = (): ICachedReduxState => - v8.deserialize(readFileSync(file())) + // The redux cache folder should now not exist so we can rename our tmp to it + renameSync(tmpDir, reduxCacheFolder) -export const writeToCache = (contents: ICachedReduxState): void => { - writeFileSync(file(), v8.serialize(contents)) + // Now try to yolorimraf the old cache folder + try { + if (existsSync(legacyCacheFile)) { + removeSync(legacyCacheFile) + } + if (bakName !== ``) { + removeSync(bakName) + } + } catch (e) { + console.warn( + `Non-fatal: Deleting the old cache folder failed, left behind in \`${bakName}\`. Rimraf reported this error: ${e}` + ) + } } diff --git a/packages/gatsby/src/redux/types.ts b/packages/gatsby/src/redux/types.ts index 1da7173c599e3..cb423f04c668e 100644 --- a/packages/gatsby/src/redux/types.ts +++ b/packages/gatsby/src/redux/types.ts @@ -3,7 +3,7 @@ export enum ProgramStatus { BOOTSTRAP_QUERY_RUNNING_FINISHED = `BOOTSTRAP_QUERY_RUNNING_FINISHED`, } -export type IReduxNode = { +export interface IReduxNode { id: string internal: { type: string