From 7756c7d5d9b2da240aee0c605363750b218fb075 Mon Sep 17 00:00:00 2001 From: Sunil Pai Date: Sun, 13 Mar 2022 18:58:43 +0530 Subject: [PATCH] feat: expire unused assets in `[site]` uploads This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics. --- .changeset/thick-keys-worry.md | 7 + .../wrangler/src/__tests__/helpers/mock-kv.ts | 5 +- .../wrangler/src/__tests__/publish.test.ts | 143 +++++++++++++++++- packages/wrangler/src/sites.tsx | 66 ++++++-- 4 files changed, 200 insertions(+), 21 deletions(-) create mode 100644 .changeset/thick-keys-worry.md diff --git a/.changeset/thick-keys-worry.md b/.changeset/thick-keys-worry.md new file mode 100644 index 000000000000..bb2041535e25 --- /dev/null +++ b/.changeset/thick-keys-worry.md @@ -0,0 +1,7 @@ +--- +"wrangler": patch +--- + +feat: expire unused assets in `[site]` uploads + +This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics. diff --git a/packages/wrangler/src/__tests__/helpers/mock-kv.ts b/packages/wrangler/src/__tests__/helpers/mock-kv.ts index 8c11122495ec..ba5ff516ac98 100644 --- a/packages/wrangler/src/__tests__/helpers/mock-kv.ts +++ b/packages/wrangler/src/__tests__/helpers/mock-kv.ts @@ -4,13 +4,14 @@ export function mockKeyListRequest( expectedNamespaceId: string, expectedKeys: string[], keysPerRequest = 1000, - blankCursorValue: "" | undefined | null = undefined + blankCursorValue: "" | undefined | null = undefined, + expiration?: number | null ) { const requests = { count: 0 }; // See https://api.cloudflare.com/#workers-kv-namespace-list-a-namespace-s-keys const expectedKeyObjects = expectedKeys.map((name) => ({ name, - expiration: 123456789, + ...(expiration !== null && { expiration: expiration || 123456789 }), metadata: {}, })); setMockRawResponse( diff --git a/packages/wrangler/src/__tests__/publish.test.ts b/packages/wrangler/src/__tests__/publish.test.ts index 082796aa25e5..fcd77f30020e 100644 --- a/packages/wrangler/src/__tests__/publish.test.ts +++ b/packages/wrangler/src/__tests__/publish.test.ts @@ -4,9 +4,11 @@ import * as TOML from "@iarna/toml"; import { mockAccountId, mockApiToken } from "./helpers/mock-account-id"; import { createFetchResult, + setMockFetchKVGetValue, setMockRawResponse, setMockResponse, unsetAllMocks, + unsetMockFetchKVGetValues, } from "./helpers/mock-cfetch"; import { mockConsoleMethods } from "./helpers/mock-console"; import { mockKeyListRequest } from "./helpers/mock-kv"; @@ -31,6 +33,7 @@ describe("publish", () => { afterEach(() => { unsetAllMocks(); + unsetMockFetchKVGetValues(); }); describe("environments", () => { @@ -808,8 +811,14 @@ export default{ mockUploadWorkerRequest(); mockSubDomainRequest(); mockListKVNamespacesRequest(kvNamespace); - // Put file-1 in the KV namespace - mockKeyListRequest(kvNamespace.id, ["assets/file-1.2ca234f380.txt"]); + // Put file-1 in the KV namespace, no expiration + mockKeyListRequest( + kvNamespace.id, + ["assets/file-1.2ca234f380.txt"], + 100, + undefined, + null + ); // Check we do not upload file-1 mockUploadAssetsToKVRequest( kvNamespace.id, @@ -1231,6 +1240,134 @@ export default{ %s If you think this is a bug then please create an issue at https://github.com/cloudflare/wrangler2/issues/new." `); }); + + it("should expire uploaded assets that aren't included anymore", async () => { + const assets = [ + { filePath: "assets/file-1.txt", content: "Content of file-1" }, + { filePath: "assets/file-2.txt", content: "Content of file-2" }, + ]; + const kvNamespace = { + title: "__test-name-workers_sites_assets", + id: "__test-name-workers_sites_assets-id", + }; + writeWranglerToml({ + main: "./index.js", + site: { + bucket: "assets", + }, + }); + writeWorkerSource(); + writeAssets(assets); + mockUploadWorkerRequest(); + mockSubDomainRequest(); + mockListKVNamespacesRequest(kvNamespace); + mockKeyListRequest( + kvNamespace.id, + [ + // Put file-1 in the KV namespace + "assets/file-1.2ca234f380.txt", + // As well as a couple from a previous upload + "assets/file-3.somehash.txt", + "assets/file-4.anotherhash.txt", + ], + 100, + undefined, + // disable expiration values for this test + null + ); + + // Check that we pull down the values of file-3.txt and file-4.txt + setMockFetchKVGetValue( + "some-account-id", + "__test-name-workers_sites_assets-id", + "assets/file-3.somehash.txt", + btoa("Content of file-3") + ); + setMockFetchKVGetValue( + "some-account-id", + "__test-name-workers_sites_assets-id", + "assets/file-4.anotherhash.txt", + btoa("Content of file-4") + ); + + // and send them back up + mockUploadAssetsToKVRequest(kvNamespace.id, [ + ...assets.filter((a) => a.filePath !== "assets/file-1.txt"), + { + filePath: "assets/file-3.txt", + content: "Content of file-3", + expiration: Date.now() + 5 * 60 * 3000, + }, + { + filePath: "assets/file-4.txt", + content: "Content of file-4", + expiration: Date.now() + 5 * 60 * 3000, + }, + ]); + + await runWrangler("publish"); + + expect(std.out).toMatchInlineSnapshot(` + "reading assets/file-1.txt... + skipping - already uploaded + reading assets/file-2.txt... + uploading as assets/file-2.5938485188.txt... + expiring unused assets/file-3.somehash.txt... + expiring unused assets/file-4.anotherhash.txt... + Uploaded test-name (TIMINGS) + Published test-name (TIMINGS) + test-name.test-sub-domain.workers.dev" + `); + expect(std.err).toMatchInlineSnapshot(`""`); + }); + + it("should not try to expire uploaded assets that are already set to expire", async () => { + const assets = [ + { filePath: "assets/file-1.txt", content: "Content of file-1" }, + { filePath: "assets/file-2.txt", content: "Content of file-2" }, + ]; + const kvNamespace = { + title: "__test-name-workers_sites_assets", + id: "__test-name-workers_sites_assets-id", + }; + writeWranglerToml({ + main: "./index.js", + site: { + bucket: "assets", + }, + }); + writeWorkerSource(); + writeAssets(assets); + mockUploadWorkerRequest(); + mockSubDomainRequest(); + mockListKVNamespacesRequest(kvNamespace); + // These return with an expiration added on each of them + mockKeyListRequest(kvNamespace.id, [ + // Put file-1 in the KV namespace + "assets/file-1.2ca234f380.txt", + // As well as a couple from a previous upload + "assets/file-3.somehash.txt", + "assets/file-4.anotherhash.txt", + ]); + + // so it'll upload file-1 because it's set to expire + // and file-2 because it hasn't been uploaded yet + mockUploadAssetsToKVRequest(kvNamespace.id, assets); + // it won't expire file-3 and file-4 because they're set to expire + + await runWrangler("publish"); + + expect(std.out).toMatchInlineSnapshot(` + "reading assets/file-1.txt... + uploading as assets/file-1.2ca234f380.txt... + reading assets/file-2.txt... + uploading as assets/file-2.5938485188.txt... + Uploaded test-name (TIMINGS) + Published test-name (TIMINGS) + test-name.test-sub-domain.workers.dev" + `); + expect(std.err).toMatchInlineSnapshot(`""`); + }); }); describe("workers_dev setting", () => { @@ -2216,7 +2353,7 @@ function mockListKVNamespacesRequest(...namespaces: KVNamespaceInfo[]) { /** Create a mock handler for the request that tries to do a bulk upload of assets to a KV namespace. */ function mockUploadAssetsToKVRequest( expectedNamespaceId: string, - assets: { filePath: string; content: string }[] + assets: { filePath: string; content: string; expiration?: number }[] ) { setMockResponse( "/accounts/:accountId/storage/kv/namespaces/:namespaceId/bulk", diff --git a/packages/wrangler/src/sites.tsx b/packages/wrangler/src/sites.tsx index e488acc9fa44..c13f00d4b34d 100644 --- a/packages/wrangler/src/sites.tsx +++ b/packages/wrangler/src/sites.tsx @@ -4,32 +4,31 @@ import ignore from "ignore"; import xxhash from "xxhash-wasm"; import { createNamespace, + getKeyValue, listNamespaceKeys, listNamespaces, putBulkKeyValue, } from "./kv"; import type { Config } from "./config"; -import type { KeyValue } from "./kv"; +import type { KeyValue, NamespaceKeyInfo } from "./kv"; import type { XXHashAPI } from "xxhash-wasm"; /** Paths to always ignore. */ -const ALWAYS_IGNORE = ["node_modules"]; -const HIDDEN_FILES_TO_INCLUDE = [ +const ALWAYS_IGNORE = new Set(["node_modules"]); +const HIDDEN_FILES_TO_INCLUDE = new Set([ ".well-known", // See https://datatracker.ietf.org/doc/html/rfc8615 -]; +]); +const FIVE_MINUTES = 1000 * 60 * 5; async function* getFilesInFolder(dirPath: string): AsyncIterable { const files = await readdir(dirPath, { withFileTypes: true }); for (const file of files) { // Skip files that we never want to process. - if (ALWAYS_IGNORE.some((p) => file.name === p)) { + if (ALWAYS_IGNORE.has(file.name)) { continue; } // Skip hidden files (starting with .) except for some special ones - if ( - file.name.startsWith(".") && - !HIDDEN_FILES_TO_INCLUDE.some((p) => file.name === p) - ) { + if (file.name.startsWith(".") && !HIDDEN_FILES_TO_INCLUDE.has(file.name)) { continue; } // TODO: follow symlinks?? @@ -126,16 +125,18 @@ export async function syncAssets( // let's get all the keys in this namespace const result = await listNamespaceKeys(accountId, namespace); - const keys = new Set(result.map((x) => x.name)); + const keyMap = result.reduce>( + (km, key) => Object.assign(km, { [key.name]: key }), + {} + ); // new Set(result.map((x) => x.name)); const manifest: Record = {}; - const upload: KeyValue[] = []; + const toUpload: KeyValue[] = []; const include = createPatternMatcher(siteAssets.includePatterns, false); const exclude = createPatternMatcher(siteAssets.excludePatterns, true); const hasher = await xxhash(); - // TODO: this can be more efficient by parallelising for await (const file of getFilesInFolder(siteAssets.baseDirectory)) { if (!include(file)) { continue; @@ -148,13 +149,13 @@ export async function syncAssets( console.log(`reading ${file}...`); const content = await readFile(file, "base64"); - const assetKey = await hashAsset(hasher, file, content); + const assetKey = hashAsset(hasher, file, content); validateAssetKey(assetKey); // now put each of the files into kv - if (!keys.has(assetKey)) { + if (!(assetKey in keyMap) || keyMap[assetKey].expiration) { console.log(`uploading as ${assetKey}...`); - upload.push({ + toUpload.push({ key: assetKey, value: content, base64: true, @@ -162,9 +163,42 @@ export async function syncAssets( } else { console.log(`skipping - already uploaded`); } + // remove the key from the set so we know we've seen it + delete keyMap[assetKey]; manifest[path.relative(siteAssets.baseDirectory, file)] = assetKey; } - await putBulkKeyValue(accountId, namespace, upload, () => {}); + + // `keyMap` now contains the assets that we need to expire + const FIVE_MINUTES_LATER = (Date.now() + FIVE_MINUTES) / 1000; // expressed in seconds, since that's what the API accepts + + let toExpire: KeyValue[] = []; + for (const asset of Object.values(keyMap)) { + if (!asset.expiration) { + console.log(`expiring unused ${asset.name}...`); + toExpire.push({ + key: asset.name, + value: "", // we'll fill all the values in one go + expiration: FIVE_MINUTES_LATER, + base64: true, + }); + } + } + + // TODO: batch these in groups if it causes problems + toExpire = await Promise.all( + toExpire.map(async (asset) => ({ + ...asset, + // it would be great if we didn't have to do this fetch at all + value: await getKeyValue(accountId, namespace, asset.key), + })) + ); + + await putBulkKeyValue( + accountId, + namespace, + toUpload.concat(toExpire), + () => {} + ); return { manifest, namespace }; }