From 7670bec2bcbadde81fbf4fc786727d4ff611e3b1 Mon Sep 17 00:00:00 2001 From: Sunil Pai Date: Sun, 13 Mar 2022 18:58:43 +0530 Subject: [PATCH] feat: expire unused assets in `[site]` uploads This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics. --- .changeset/thick-keys-worry.md | 7 + .../wrangler/src/__tests__/helpers/mock-kv.ts | 25 +-- packages/wrangler/src/__tests__/kv.test.ts | 81 +++------ .../wrangler/src/__tests__/publish.test.ts | 170 +++++++++++++++++- packages/wrangler/src/{kv/index.tsx => kv.ts} | 4 +- packages/wrangler/src/sites.tsx | 66 +++++-- 6 files changed, 262 insertions(+), 91 deletions(-) create mode 100644 .changeset/thick-keys-worry.md rename packages/wrangler/src/{kv/index.tsx => kv.ts} (99%) diff --git a/.changeset/thick-keys-worry.md b/.changeset/thick-keys-worry.md new file mode 100644 index 000000000000..bb2041535e25 --- /dev/null +++ b/.changeset/thick-keys-worry.md @@ -0,0 +1,7 @@ +--- +"wrangler": patch +--- + +feat: expire unused assets in `[site]` uploads + +This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics. diff --git a/packages/wrangler/src/__tests__/helpers/mock-kv.ts b/packages/wrangler/src/__tests__/helpers/mock-kv.ts index 8c11122495ec..ada531fd5ccc 100644 --- a/packages/wrangler/src/__tests__/helpers/mock-kv.ts +++ b/packages/wrangler/src/__tests__/helpers/mock-kv.ts @@ -1,18 +1,15 @@ import { createFetchResult, setMockRawResponse } from "./mock-cfetch"; +import type { NamespaceKeyInfo } from "../../kv"; export function mockKeyListRequest( expectedNamespaceId: string, - expectedKeys: string[], + expectedKeys: NamespaceKeyInfo[], keysPerRequest = 1000, blankCursorValue: "" | undefined | null = undefined ) { const requests = { count: 0 }; // See https://api.cloudflare.com/#workers-kv-namespace-list-a-namespace-s-keys - const expectedKeyObjects = expectedKeys.map((name) => ({ - name, - expiration: 123456789, - metadata: {}, - })); + setMockRawResponse( "/accounts/:accountId/storage/kv/namespaces/:namespaceId/keys", "GET", @@ -20,19 +17,15 @@ export function mockKeyListRequest( requests.count++; expect(accountId).toEqual("some-account-id"); expect(namespaceId).toEqual(expectedNamespaceId); - if (expectedKeyObjects.length <= keysPerRequest) { - return createFetchResult(expectedKeyObjects); + if (expectedKeys.length <= keysPerRequest) { + return createFetchResult(expectedKeys); } else { const start = parseInt(query.get("cursor") ?? "0") || 0; const end = start + keysPerRequest; - const cursor = end < expectedKeyObjects.length ? end : blankCursorValue; - return createFetchResult( - expectedKeyObjects.slice(start, end), - true, - [], - [], - { cursor } - ); + const cursor = end < expectedKeys.length ? end : blankCursorValue; + return createFetchResult(expectedKeys.slice(start, end), true, [], [], { + cursor, + }); } } ); diff --git a/packages/wrangler/src/__tests__/kv.test.ts b/packages/wrangler/src/__tests__/kv.test.ts index 05434a9b0339..28aff58cee9b 100644 --- a/packages/wrangler/src/__tests__/kv.test.ts +++ b/packages/wrangler/src/__tests__/kv.test.ts @@ -673,26 +673,25 @@ describe("wrangler", () => { describe("list", () => { it("should list the keys of a namespace specified by namespace-id", async () => { - const keys = ["key-1", "key-2", "key-3"]; + const keys = [ + { name: "key-1" }, + { name: "key-2", expiration: 123456789 }, + { name: "key-3" }, + ]; mockKeyListRequest("some-namespace-id", keys); await runWrangler("kv:key list --namespace-id some-namespace-id"); expect(std.err).toMatchInlineSnapshot(`""`); expect(std.out).toMatchInlineSnapshot(` "[ { - \\"name\\": \\"key-1\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-1\\" }, { \\"name\\": \\"key-2\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"expiration\\": 123456789 }, { - \\"name\\": \\"key-3\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-3\\" } ]" `); @@ -700,7 +699,7 @@ describe("wrangler", () => { it("should list the keys of a namespace specified by binding", async () => { writeWranglerConfig(); - const keys = ["key-1", "key-2", "key-3"]; + const keys = [{ name: "key-1" }, { name: "key-2" }, { name: "key-3" }]; mockKeyListRequest("bound-id", keys); await runWrangler("kv:key list --binding someBinding"); @@ -708,19 +707,13 @@ describe("wrangler", () => { expect(std.out).toMatchInlineSnapshot(` "[ { - \\"name\\": \\"key-1\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-1\\" }, { - \\"name\\": \\"key-2\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-2\\" }, { - \\"name\\": \\"key-3\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-3\\" } ]" `); @@ -728,26 +721,20 @@ describe("wrangler", () => { it("should list the keys of a preview namespace specified by binding", async () => { writeWranglerConfig(); - const keys = ["key-1", "key-2", "key-3"]; + const keys = [{ name: "key-1" }, { name: "key-2" }, { name: "key-3" }]; mockKeyListRequest("preview-bound-id", keys); await runWrangler("kv:key list --binding someBinding --preview"); expect(std.err).toMatchInlineSnapshot(`""`); expect(std.out).toMatchInlineSnapshot(` "[ { - \\"name\\": \\"key-1\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-1\\" }, { - \\"name\\": \\"key-2\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-2\\" }, { - \\"name\\": \\"key-3\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-3\\" } ]" `); @@ -755,7 +742,7 @@ describe("wrangler", () => { it("should list the keys of a namespace specified by binding, in a given environment", async () => { writeWranglerConfig(); - const keys = ["key-1", "key-2", "key-3"]; + const keys = [{ name: "key-1" }, { name: "key-2" }, { name: "key-3" }]; mockKeyListRequest("env-bound-id", keys); await runWrangler( "kv:key list --binding someBinding --env some-environment" @@ -764,19 +751,13 @@ describe("wrangler", () => { expect(std.out).toMatchInlineSnapshot(` "[ { - \\"name\\": \\"key-1\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-1\\" }, { - \\"name\\": \\"key-2\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-2\\" }, { - \\"name\\": \\"key-3\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-3\\" } ]" `); @@ -784,7 +765,7 @@ describe("wrangler", () => { it("should list the keys of a preview namespace specified by binding, in a given environment", async () => { writeWranglerConfig(); - const keys = ["key-1", "key-2", "key-3"]; + const keys = [{ name: "key-1" }, { name: "key-2" }, { name: "key-3" }]; mockKeyListRequest("preview-env-bound-id", keys); await runWrangler( "kv:key list --binding someBinding --preview --env some-environment" @@ -793,19 +774,13 @@ describe("wrangler", () => { expect(std.out).toMatchInlineSnapshot(` "[ { - \\"name\\": \\"key-1\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-1\\" }, { - \\"name\\": \\"key-2\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-2\\" }, { - \\"name\\": \\"key-3\\", - \\"expiration\\": 123456789, - \\"metadata\\": {} + \\"name\\": \\"key-3\\" } ]" `); @@ -822,9 +797,9 @@ describe("wrangler", () => { describe(`cursor - ${blankCursorValue}`, () => { it("should make multiple requests for paginated results", async () => { // Create a lot of mock keys, so that the fetch requests will be paginated - const keys: string[] = []; + const keys: NamespaceKeyInfo[] = []; for (let i = 0; i < 550; i++) { - keys.push("key-" + i); + keys.push({ name: "key-" + i }); } // Ask for the keys in pages of size 100. const requests = mockKeyListRequest( @@ -837,9 +812,7 @@ describe("wrangler", () => { "kv:key list --namespace-id some-namespace-id --limit 100" ); expect(std.err).toMatchInlineSnapshot(`""`); - expect( - JSON.parse(std.out).map((k: NamespaceKeyInfo) => k.name) - ).toEqual(keys); + expect(JSON.parse(std.out)).toEqual(keys); expect(requests.count).toEqual(6); }); }); diff --git a/packages/wrangler/src/__tests__/publish.test.ts b/packages/wrangler/src/__tests__/publish.test.ts index 06f77da0c904..a182570b683b 100644 --- a/packages/wrangler/src/__tests__/publish.test.ts +++ b/packages/wrangler/src/__tests__/publish.test.ts @@ -4,9 +4,11 @@ import * as TOML from "@iarna/toml"; import { mockAccountId, mockApiToken } from "./helpers/mock-account-id"; import { createFetchResult, + setMockFetchKVGetValue, setMockRawResponse, setMockResponse, unsetAllMocks, + unsetMockFetchKVGetValues, } from "./helpers/mock-cfetch"; import { mockConsoleMethods } from "./helpers/mock-console"; import { mockKeyListRequest } from "./helpers/mock-kv"; @@ -31,6 +33,7 @@ describe("publish", () => { afterEach(() => { unsetAllMocks(); + unsetMockFetchKVGetValues(); }); describe("environments", () => { @@ -809,8 +812,10 @@ export default{ mockUploadWorkerRequest(); mockSubDomainRequest(); mockListKVNamespacesRequest(kvNamespace); - // Put file-1 in the KV namespace - mockKeyListRequest(kvNamespace.id, ["assets/file-1.2ca234f380.txt"]); + // Put file-1 in the KV namespace, no expiration + mockKeyListRequest(kvNamespace.id, [ + { name: "assets/file-1.2ca234f380.txt" }, + ]); // Check we do not upload file-1 mockUploadAssetsToKVRequest( kvNamespace.id, @@ -1232,6 +1237,164 @@ export default{ %s If you think this is a bug then please create an issue at https://github.com/cloudflare/wrangler2/issues/new." `); }); + + describe("expire assets", () => { + // it's a 100 seconds past epoch + // everyone's still talking about how great woodstock was + const DATE_NOW = 100000; + beforeEach(() => { + jest.spyOn(Date, "now").mockImplementation((): number => { + return DATE_NOW; + }); + }); + afterEach(() => { + (Date.now as jest.Mock).mockRestore(); + }); + + it("should expire uploaded assets that aren't included anymore", async () => { + const assets = [ + { filePath: "assets/file-1.txt", content: "Content of file-1" }, + { filePath: "assets/file-2.txt", content: "Content of file-2" }, + ]; + const kvNamespace = { + title: "__test-name-workers_sites_assets", + id: "__test-name-workers_sites_assets-id", + }; + writeWranglerToml({ + main: "./index.js", + site: { + bucket: "assets", + }, + }); + writeWorkerSource(); + writeAssets(assets); + mockUploadWorkerRequest(); + mockSubDomainRequest(); + mockListKVNamespacesRequest(kvNamespace); + mockKeyListRequest(kvNamespace.id, [ + // Put file-1 in the KV namespace + { name: "assets/file-1.2ca234f380.txt" }, + // As well as a couple from a previous upload + { name: "assets/file-3.somehash.txt" }, + { name: "assets/file-4.anotherhash.txt" }, + ]); + + // Check that we pull down the values of file-3.txt and file-4.txt + setMockFetchKVGetValue( + "some-account-id", + "__test-name-workers_sites_assets-id", + "assets/file-3.somehash.txt", + btoa("Content of file-3") + ); + setMockFetchKVGetValue( + "some-account-id", + "__test-name-workers_sites_assets-id", + "assets/file-4.anotherhash.txt", + btoa("Content of file-4") + ); + + // and send them back up + mockUploadAssetsToKVRequest(kvNamespace.id, [ + ...assets.filter((a) => a.filePath !== "assets/file-1.txt"), + { + filePath: "assets/file-3.txt", + content: "Content of file-3", + // 100 seconds + 300 more + expiration: 400, + }, + { + filePath: "assets/file-4.txt", + content: "Content of file-4", + // 100 seconds + 300 more + expiration: 400, + }, + ]); + + await runWrangler("publish"); + + expect(std.out).toMatchInlineSnapshot(` + "reading assets/file-1.txt... + skipping - already uploaded + reading assets/file-2.txt... + uploading as assets/file-2.5938485188.txt... + expiring unused assets/file-3.somehash.txt... + expiring unused assets/file-4.anotherhash.txt... + Uploaded test-name (TIMINGS) + Published test-name (TIMINGS) + test-name.test-sub-domain.workers.dev" + `); + expect(std.err).toMatchInlineSnapshot(`""`); + }); + + it("should not try to expire uploaded assets that are already set to expire", async () => { + const assets = [ + { + filePath: "assets/file-1.txt", + content: "Content of file-1", + }, + { filePath: "assets/file-2.txt", content: "Content of file-2" }, + ]; + const kvNamespace = { + title: "__test-name-workers_sites_assets", + id: "__test-name-workers_sites_assets-id", + }; + writeWranglerToml({ + main: "./index.js", + site: { + bucket: "assets", + }, + }); + writeWorkerSource(); + writeAssets(assets); + mockUploadWorkerRequest(); + mockSubDomainRequest(); + mockListKVNamespacesRequest(kvNamespace); + // These return with an expiration added on each of them + mockKeyListRequest(kvNamespace.id, [ + // Put file-1 in the KV namespace, with an expiration + { name: "assets/file-1.2ca234f380.txt", expiration: 10000 }, + // As well as a couple from a previous upload + // This one's set to expire so it won't be uploaded back + { name: "assets/file-3.somehash.txt", expiration: 10000 }, + // This one will be sent back with an expiration value + { name: "assets/file-4.anotherhash.txt" }, + ]); + + setMockFetchKVGetValue( + "some-account-id", + "__test-name-workers_sites_assets-id", + "assets/file-4.anotherhash.txt", + btoa("Content of file-4") + ); + + mockUploadAssetsToKVRequest(kvNamespace.id, [ + // so it'll re-upload file-1 because it's set to expire + // and file-2 because it hasn't been uploaded yet + ...assets, + // and it'll expire file-4 since it's just not included anymore + { + filePath: "assets/file-4.txt", + content: "Content of file-4", + // 100 seconds + 300 more + expiration: 400, + }, + ]); + + await runWrangler("publish"); + + expect(std.out).toMatchInlineSnapshot(` + "reading assets/file-1.txt... + uploading as assets/file-1.2ca234f380.txt... + reading assets/file-2.txt... + uploading as assets/file-2.5938485188.txt... + expiring unused assets/file-4.anotherhash.txt... + Uploaded test-name (TIMINGS) + Published test-name (TIMINGS) + test-name.test-sub-domain.workers.dev" + `); + expect(std.err).toMatchInlineSnapshot(`""`); + }); + }); }); describe("workers_dev setting", () => { @@ -2417,7 +2580,7 @@ function mockListKVNamespacesRequest(...namespaces: KVNamespaceInfo[]) { /** Create a mock handler for the request that tries to do a bulk upload of assets to a KV namespace. */ function mockUploadAssetsToKVRequest( expectedNamespaceId: string, - assets: { filePath: string; content: string }[] + assets: { filePath: string; content: string; expiration?: number }[] ) { setMockResponse( "/accounts/:accountId/storage/kv/namespaces/:namespaceId/bulk", @@ -2446,6 +2609,7 @@ function mockUploadAssetsToKVRequest( expect(Buffer.from(upload.value, "base64").toString()).toEqual( asset.content ); + expect(upload.expiration).toEqual(asset.expiration); } return null; } diff --git a/packages/wrangler/src/kv/index.tsx b/packages/wrangler/src/kv.ts similarity index 99% rename from packages/wrangler/src/kv/index.tsx rename to packages/wrangler/src/kv.ts index 88f4c710b3f6..a4baf0b22aad 100644 --- a/packages/wrangler/src/kv/index.tsx +++ b/packages/wrangler/src/kv.ts @@ -1,6 +1,6 @@ import { URLSearchParams } from "node:url"; -import { fetchListResult, fetchResult, fetchKVGetValue } from "../cfetch"; -import type { Config, Environment } from "../config"; +import { fetchListResult, fetchResult, fetchKVGetValue } from "./cfetch"; +import type { Config, Environment } from "./config"; /** The largest number of kv items we can pass to the API in a single request. */ const API_MAX = 10000; diff --git a/packages/wrangler/src/sites.tsx b/packages/wrangler/src/sites.tsx index e488acc9fa44..e6e5a3c72561 100644 --- a/packages/wrangler/src/sites.tsx +++ b/packages/wrangler/src/sites.tsx @@ -4,32 +4,31 @@ import ignore from "ignore"; import xxhash from "xxhash-wasm"; import { createNamespace, + getKeyValue, listNamespaceKeys, listNamespaces, putBulkKeyValue, } from "./kv"; import type { Config } from "./config"; -import type { KeyValue } from "./kv"; +import type { KeyValue, NamespaceKeyInfo } from "./kv"; import type { XXHashAPI } from "xxhash-wasm"; /** Paths to always ignore. */ -const ALWAYS_IGNORE = ["node_modules"]; -const HIDDEN_FILES_TO_INCLUDE = [ +const ALWAYS_IGNORE = new Set(["node_modules"]); +const HIDDEN_FILES_TO_INCLUDE = new Set([ ".well-known", // See https://datatracker.ietf.org/doc/html/rfc8615 -]; +]); +const FIVE_MINUTES = 1000 * 60 * 5; async function* getFilesInFolder(dirPath: string): AsyncIterable { const files = await readdir(dirPath, { withFileTypes: true }); for (const file of files) { // Skip files that we never want to process. - if (ALWAYS_IGNORE.some((p) => file.name === p)) { + if (ALWAYS_IGNORE.has(file.name)) { continue; } // Skip hidden files (starting with .) except for some special ones - if ( - file.name.startsWith(".") && - !HIDDEN_FILES_TO_INCLUDE.some((p) => file.name === p) - ) { + if (file.name.startsWith(".") && !HIDDEN_FILES_TO_INCLUDE.has(file.name)) { continue; } // TODO: follow symlinks?? @@ -126,16 +125,18 @@ export async function syncAssets( // let's get all the keys in this namespace const result = await listNamespaceKeys(accountId, namespace); - const keys = new Set(result.map((x) => x.name)); + const keyMap = result.reduce>( + (km, key) => Object.assign(km, { [key.name]: key }), + {} + ); // new Set(result.map((x) => x.name)); const manifest: Record = {}; - const upload: KeyValue[] = []; + const toUpload: KeyValue[] = []; const include = createPatternMatcher(siteAssets.includePatterns, false); const exclude = createPatternMatcher(siteAssets.excludePatterns, true); const hasher = await xxhash(); - // TODO: this can be more efficient by parallelising for await (const file of getFilesInFolder(siteAssets.baseDirectory)) { if (!include(file)) { continue; @@ -148,13 +149,13 @@ export async function syncAssets( console.log(`reading ${file}...`); const content = await readFile(file, "base64"); - const assetKey = await hashAsset(hasher, file, content); + const assetKey = hashAsset(hasher, file, content); validateAssetKey(assetKey); // now put each of the files into kv - if (!keys.has(assetKey)) { + if (!(assetKey in keyMap) || keyMap[assetKey].expiration) { console.log(`uploading as ${assetKey}...`); - upload.push({ + toUpload.push({ key: assetKey, value: content, base64: true, @@ -162,9 +163,42 @@ export async function syncAssets( } else { console.log(`skipping - already uploaded`); } + // remove the key from the set so we know we've seen it + delete keyMap[assetKey]; manifest[path.relative(siteAssets.baseDirectory, file)] = assetKey; } - await putBulkKeyValue(accountId, namespace, upload, () => {}); + + // `keyMap` now contains the assets that we need to expire + const FIVE_MINUTES_LATER = Math.ceil((Date.now() + FIVE_MINUTES) / 1000); // expressed in seconds, since that's what the API accepts + + let toExpire: KeyValue[] = []; + for (const asset of Object.values(keyMap)) { + if (!asset.expiration) { + console.log(`expiring unused ${asset.name}...`); + toExpire.push({ + key: asset.name, + value: "", // we'll fill all the values in one go + expiration: FIVE_MINUTES_LATER, + base64: true, + }); + } + } + + // TODO: batch these in groups if it causes problems + toExpire = await Promise.all( + toExpire.map(async (asset) => ({ + ...asset, + // it would be great if we didn't have to do this fetch at all + value: await getKeyValue(accountId, namespace, asset.key), + })) + ); + + await putBulkKeyValue( + accountId, + namespace, + toUpload.concat(toExpire), + () => {} + ); return { manifest, namespace }; }