Skip to content

Commit

Permalink
feat: expire unused assets in [site] uploads
Browse files Browse the repository at this point in the history
This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics.
  • Loading branch information
threepointone committed Mar 16, 2022
1 parent 8c98c00 commit 7756c7d
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 21 deletions.
7 changes: 7 additions & 0 deletions .changeset/thick-keys-worry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"wrangler": patch
---

feat: expire unused assets in `[site]` uploads

This expires any previously uploaded assets when using a Sites / `[site]` configuration. Because we currently do a full iteration of a namespace's keys when publishing, for rapidly changing sites this means that uploads get slower and slower. We can't just delete unused assets because it leads to occasional 404s on older publishes while we're publishing. So we expire previous assets while uploading new ones. The implementation/constraints of the kv api means that uploads may become slower, but should hopefully be faster overall. These optimisations also only matter for rapidly changing sites, so common usecases still have the same perf characteristics.
5 changes: 3 additions & 2 deletions packages/wrangler/src/__tests__/helpers/mock-kv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@ export function mockKeyListRequest(
expectedNamespaceId: string,
expectedKeys: string[],
keysPerRequest = 1000,
blankCursorValue: "" | undefined | null = undefined
blankCursorValue: "" | undefined | null = undefined,
expiration?: number | null
) {
const requests = { count: 0 };
// See https://api.cloudflare.com/#workers-kv-namespace-list-a-namespace-s-keys
const expectedKeyObjects = expectedKeys.map((name) => ({
name,
expiration: 123456789,
...(expiration !== null && { expiration: expiration || 123456789 }),
metadata: {},
}));
setMockRawResponse(
Expand Down
143 changes: 140 additions & 3 deletions packages/wrangler/src/__tests__/publish.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import * as TOML from "@iarna/toml";
import { mockAccountId, mockApiToken } from "./helpers/mock-account-id";
import {
createFetchResult,
setMockFetchKVGetValue,
setMockRawResponse,
setMockResponse,
unsetAllMocks,
unsetMockFetchKVGetValues,
} from "./helpers/mock-cfetch";
import { mockConsoleMethods } from "./helpers/mock-console";
import { mockKeyListRequest } from "./helpers/mock-kv";
Expand All @@ -31,6 +33,7 @@ describe("publish", () => {

afterEach(() => {
unsetAllMocks();
unsetMockFetchKVGetValues();
});

describe("environments", () => {
Expand Down Expand Up @@ -808,8 +811,14 @@ export default{
mockUploadWorkerRequest();
mockSubDomainRequest();
mockListKVNamespacesRequest(kvNamespace);
// Put file-1 in the KV namespace
mockKeyListRequest(kvNamespace.id, ["assets/file-1.2ca234f380.txt"]);
// Put file-1 in the KV namespace, no expiration
mockKeyListRequest(
kvNamespace.id,
["assets/file-1.2ca234f380.txt"],
100,
undefined,
null
);
// Check we do not upload file-1
mockUploadAssetsToKVRequest(
kvNamespace.id,
Expand Down Expand Up @@ -1231,6 +1240,134 @@ export default{
%s If you think this is a bug then please create an issue at https://github.com/cloudflare/wrangler2/issues/new."
`);
});

it("should expire uploaded assets that aren't included anymore", async () => {
const assets = [
{ filePath: "assets/file-1.txt", content: "Content of file-1" },
{ filePath: "assets/file-2.txt", content: "Content of file-2" },
];
const kvNamespace = {
title: "__test-name-workers_sites_assets",
id: "__test-name-workers_sites_assets-id",
};
writeWranglerToml({
main: "./index.js",
site: {
bucket: "assets",
},
});
writeWorkerSource();
writeAssets(assets);
mockUploadWorkerRequest();
mockSubDomainRequest();
mockListKVNamespacesRequest(kvNamespace);
mockKeyListRequest(
kvNamespace.id,
[
// Put file-1 in the KV namespace
"assets/file-1.2ca234f380.txt",
// As well as a couple from a previous upload
"assets/file-3.somehash.txt",
"assets/file-4.anotherhash.txt",
],
100,
undefined,
// disable expiration values for this test
null
);

// Check that we pull down the values of file-3.txt and file-4.txt
setMockFetchKVGetValue(
"some-account-id",
"__test-name-workers_sites_assets-id",
"assets/file-3.somehash.txt",
btoa("Content of file-3")
);
setMockFetchKVGetValue(
"some-account-id",
"__test-name-workers_sites_assets-id",
"assets/file-4.anotherhash.txt",
btoa("Content of file-4")
);

// and send them back up
mockUploadAssetsToKVRequest(kvNamespace.id, [
...assets.filter((a) => a.filePath !== "assets/file-1.txt"),
{
filePath: "assets/file-3.txt",
content: "Content of file-3",
expiration: Date.now() + 5 * 60 * 3000,
},
{
filePath: "assets/file-4.txt",
content: "Content of file-4",
expiration: Date.now() + 5 * 60 * 3000,
},
]);

await runWrangler("publish");

expect(std.out).toMatchInlineSnapshot(`
"reading assets/file-1.txt...
skipping - already uploaded
reading assets/file-2.txt...
uploading as assets/file-2.5938485188.txt...
expiring unused assets/file-3.somehash.txt...
expiring unused assets/file-4.anotherhash.txt...
Uploaded test-name (TIMINGS)
Published test-name (TIMINGS)
test-name.test-sub-domain.workers.dev"
`);
expect(std.err).toMatchInlineSnapshot(`""`);
});

it("should not try to expire uploaded assets that are already set to expire", async () => {
const assets = [
{ filePath: "assets/file-1.txt", content: "Content of file-1" },
{ filePath: "assets/file-2.txt", content: "Content of file-2" },
];
const kvNamespace = {
title: "__test-name-workers_sites_assets",
id: "__test-name-workers_sites_assets-id",
};
writeWranglerToml({
main: "./index.js",
site: {
bucket: "assets",
},
});
writeWorkerSource();
writeAssets(assets);
mockUploadWorkerRequest();
mockSubDomainRequest();
mockListKVNamespacesRequest(kvNamespace);
// These return with an expiration added on each of them
mockKeyListRequest(kvNamespace.id, [
// Put file-1 in the KV namespace
"assets/file-1.2ca234f380.txt",
// As well as a couple from a previous upload
"assets/file-3.somehash.txt",
"assets/file-4.anotherhash.txt",
]);

// so it'll upload file-1 because it's set to expire
// and file-2 because it hasn't been uploaded yet
mockUploadAssetsToKVRequest(kvNamespace.id, assets);
// it won't expire file-3 and file-4 because they're set to expire

await runWrangler("publish");

expect(std.out).toMatchInlineSnapshot(`
"reading assets/file-1.txt...
uploading as assets/file-1.2ca234f380.txt...
reading assets/file-2.txt...
uploading as assets/file-2.5938485188.txt...
Uploaded test-name (TIMINGS)
Published test-name (TIMINGS)
test-name.test-sub-domain.workers.dev"
`);
expect(std.err).toMatchInlineSnapshot(`""`);
});
});

describe("workers_dev setting", () => {
Expand Down Expand Up @@ -2216,7 +2353,7 @@ function mockListKVNamespacesRequest(...namespaces: KVNamespaceInfo[]) {
/** Create a mock handler for the request that tries to do a bulk upload of assets to a KV namespace. */
function mockUploadAssetsToKVRequest(
expectedNamespaceId: string,
assets: { filePath: string; content: string }[]
assets: { filePath: string; content: string; expiration?: number }[]
) {
setMockResponse(
"/accounts/:accountId/storage/kv/namespaces/:namespaceId/bulk",
Expand Down
66 changes: 50 additions & 16 deletions packages/wrangler/src/sites.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,31 @@ import ignore from "ignore";
import xxhash from "xxhash-wasm";
import {
createNamespace,
getKeyValue,
listNamespaceKeys,
listNamespaces,
putBulkKeyValue,
} from "./kv";
import type { Config } from "./config";
import type { KeyValue } from "./kv";
import type { KeyValue, NamespaceKeyInfo } from "./kv";
import type { XXHashAPI } from "xxhash-wasm";

/** Paths to always ignore. */
const ALWAYS_IGNORE = ["node_modules"];
const HIDDEN_FILES_TO_INCLUDE = [
const ALWAYS_IGNORE = new Set(["node_modules"]);
const HIDDEN_FILES_TO_INCLUDE = new Set([
".well-known", // See https://datatracker.ietf.org/doc/html/rfc8615
];
]);
const FIVE_MINUTES = 1000 * 60 * 5;

async function* getFilesInFolder(dirPath: string): AsyncIterable<string> {
const files = await readdir(dirPath, { withFileTypes: true });
for (const file of files) {
// Skip files that we never want to process.
if (ALWAYS_IGNORE.some((p) => file.name === p)) {
if (ALWAYS_IGNORE.has(file.name)) {
continue;
}
// Skip hidden files (starting with .) except for some special ones
if (
file.name.startsWith(".") &&
!HIDDEN_FILES_TO_INCLUDE.some((p) => file.name === p)
) {
if (file.name.startsWith(".") && !HIDDEN_FILES_TO_INCLUDE.has(file.name)) {
continue;
}
// TODO: follow symlinks??
Expand Down Expand Up @@ -126,16 +125,18 @@ export async function syncAssets(

// let's get all the keys in this namespace
const result = await listNamespaceKeys(accountId, namespace);
const keys = new Set(result.map((x) => x.name));
const keyMap = result.reduce<Record<string, NamespaceKeyInfo>>(
(km, key) => Object.assign(km, { [key.name]: key }),
{}
); // new Set(result.map((x) => x.name));

const manifest: Record<string, string> = {};
const upload: KeyValue[] = [];
const toUpload: KeyValue[] = [];

const include = createPatternMatcher(siteAssets.includePatterns, false);
const exclude = createPatternMatcher(siteAssets.excludePatterns, true);
const hasher = await xxhash();

// TODO: this can be more efficient by parallelising
for await (const file of getFilesInFolder(siteAssets.baseDirectory)) {
if (!include(file)) {
continue;
Expand All @@ -148,23 +149,56 @@ export async function syncAssets(
console.log(`reading ${file}...`);
const content = await readFile(file, "base64");

const assetKey = await hashAsset(hasher, file, content);
const assetKey = hashAsset(hasher, file, content);
validateAssetKey(assetKey);

// now put each of the files into kv
if (!keys.has(assetKey)) {
if (!(assetKey in keyMap) || keyMap[assetKey].expiration) {
console.log(`uploading as ${assetKey}...`);
upload.push({
toUpload.push({
key: assetKey,
value: content,
base64: true,
});
} else {
console.log(`skipping - already uploaded`);
}
// remove the key from the set so we know we've seen it
delete keyMap[assetKey];
manifest[path.relative(siteAssets.baseDirectory, file)] = assetKey;
}
await putBulkKeyValue(accountId, namespace, upload, () => {});

// `keyMap` now contains the assets that we need to expire
const FIVE_MINUTES_LATER = (Date.now() + FIVE_MINUTES) / 1000; // expressed in seconds, since that's what the API accepts

let toExpire: KeyValue[] = [];
for (const asset of Object.values(keyMap)) {
if (!asset.expiration) {
console.log(`expiring unused ${asset.name}...`);
toExpire.push({
key: asset.name,
value: "", // we'll fill all the values in one go
expiration: FIVE_MINUTES_LATER,
base64: true,
});
}
}

// TODO: batch these in groups if it causes problems
toExpire = await Promise.all(
toExpire.map(async (asset) => ({
...asset,
// it would be great if we didn't have to do this fetch at all
value: await getKeyValue(accountId, namespace, asset.key),
}))
);

await putBulkKeyValue(
accountId,
namespace,
toUpload.concat(toExpire),
() => {}
);
return { manifest, namespace };
}

Expand Down

0 comments on commit 7756c7d

Please sign in to comment.