Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Orama search #162

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions packages/site-kit/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@
"svelte-local-storage-store": "^0.5.0"
},
"devDependencies": {
"@sveltejs/kit": "^1.21.0",
"@sveltejs/package": "^2.1.0",
"@orama/orama": "^1.0.8",
"@sveltejs/kit": "^1.22.1",
"@sveltejs/package": "^2.0.2",
"@types/marked": "^5.0.0",
"@types/node": "^20.3.3",
"@types/node": "^20.4.0",
"@types/prettier": "^2.7.3",
"flexsearch": "^0.7.31",
"magic-string": "^0.30.1",
"magic-string": "^0.30.0",
"marked": "^5.1.0",
"prettier": "^2.8.8",
"shiki-twoslash": "^3.1.2",
"svelte": "^4.0.4",
"typescript": "^5.1.6",
"vite": "^4.3.9"
"svelte": "^4.0.5",
"typescript": "^5.1.3",
"vite": "^4.4.2"
},
"publishConfig": {
"access": "public"
Expand Down
7 changes: 6 additions & 1 deletion packages/site-kit/src/lib/search/SearchBox.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ It appears when the user clicks on the `Search` component or presses the corresp
import Icon from '../components/Icon.svelte';
import SearchResults from './SearchResults.svelte';
import SearchWorker from './search-worker.js?worker';
import { page } from '$app/stores';

/** @type {HTMLElement} */
let modal;
Expand Down Expand Up @@ -48,7 +49,8 @@ It appears when the user clicks on the `Search` component or presses the corresp
worker.postMessage({
type: 'init',
payload: {
origin: location.origin
origin: location.origin,
priority_map: $page.data.search.priority_map
}
});
});
Expand Down Expand Up @@ -125,6 +127,7 @@ It appears when the user clicks on the `Search` component or presses the corresp
{#if $searching && ready}
<div class="pseudo-overlay" aria-hidden="true" on:click={close} />

<!-- svelte-ignore a11y-no-static-element-interactions -->
<div
bind:this={modal}
class="modal"
Expand Down Expand Up @@ -174,6 +177,8 @@ It appears when the user clicks on the `Search` component or presses the corresp

<div class="results">
{#if search?.query}
<!-- svelte-ignore a11y-click-events-have-key-events -->
<!-- svelte-ignore a11y-no-static-element-interactions -->
<div class="results-container" on:click={() => ($searching = false)}>
<SearchResults
results={search.results}
Expand Down
4 changes: 2 additions & 2 deletions packages/site-kit/src/lib/search/search-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ addEventListener('message', async (event) => {
if (type === 'init') {
const res = await fetch(`${payload.origin}/content.json`);
const { blocks } = await res.json();
init(blocks);
await init(blocks, payload.priority_map);

postMessage({ type: 'ready' });
}

if (type === 'query') {
const query = payload;
const results = search(query);
const results = await search(query);

postMessage({ type: 'results', payload: { results, query } });
}
Expand Down
212 changes: 145 additions & 67 deletions packages/site-kit/src/lib/search/search.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,77 @@
import flexsearch from 'flexsearch';

// @ts-expect-error
const Index = /** @type {import('flexsearch').Index} */ (flexsearch.Index) ?? flexsearch;
import { create, insertMultiple, search as orama_search } from '@orama/orama';

/** If the search is already initialized */
export let inited = false;

/** @type {import('flexsearch').Index<any>[]} */
let indexes;
/** @type {import('@orama/orama').Orama} */
let index;

/** @type {Map<string, import('./types').Block>} */
const map = new Map();

/** @type {Map<string, string>} */
const hrefs = new Map();

/** @type {import('./types').SearchAppropriateBlock[]} */
const search_appropriate_blocks = [];

/**
* Initialize the search index
* @param {import('./types').Block[]} blocks
* @param {Record<string, number>} priority_map
*/
export function init(blocks) {
export async function init(blocks, priority_map) {
if (inited) return;

// we have multiple indexes, so we can rank sections (migration guide comes last)
const max_rank = Math.max(...blocks.map((block) => block.rank ?? 0));
for (const { breadcrumbs, href, content } of blocks) {
const new_block = /** @type {import('./types').SearchAppropriateBlock} */ ({});

if (breadcrumbs.length >= 1 && breadcrumbs?.[0]) {
new_block.h1 = breadcrumbs[0];
}
if (breadcrumbs.length >= 2 && breadcrumbs?.[1]) {
new_block.h2 = breadcrumbs[1];
}
if (breadcrumbs.length >= 3 && breadcrumbs?.[2]) {
new_block.h3 = breadcrumbs[2];
}

// Add priorities
for (const [regex_str, priority] of Object.entries(priority_map)) {
const regex = new RegExp(regex_str);
if (regex.test(href)) {
new_block.priority = priority;
break;
}
}

new_block.href = href;
new_block.content = content;

search_appropriate_blocks.push(new_block);
}

index = await create({
schema: {
content: 'string',
h1: 'string',
h2: 'string',
h3: 'string',
priority: 'number'
},
components: {
tokenizer: { language: 'english', stemming: true }
},
sort: { enabled: false }
});

indexes = Array.from({ length: max_rank + 1 }, () => new Index({ tokenize: 'forward' }));
// @ts-ignore Block[] is the right type
await insertMultiple(index, search_appropriate_blocks);

console.log(search_appropriate_blocks);

for (const block of blocks) {
const title = block.breadcrumbs.at(-1);
map.set(block.href, block);
// NOTE: we're not using a number as the ID here, but it is recommended:
// https://github.com/nextapps-de/flexsearch#use-numeric-ids
// If we were to switch to a number we would need a second map from ID to block
// We need to keep the existing one to allow looking up recent searches by URL even if docs change
// It's unclear how much browsers do string interning and how this might affect memory
// We'd probably want to test both implementations across browsers if memory usage becomes an issue
// TODO: fix the type by updating flexsearch after
// https://github.com/nextapps-de/flexsearch/pull/364 is merged and released
indexes[block.rank ?? 0].add(block.href, `${title} ${block.content}`);

hrefs.set(block.breadcrumbs.join('::'), block.href);
}

Expand All @@ -49,33 +81,47 @@ export function init(blocks) {
/**
* Search for a given query in the existing index
* @param {string} query
* @returns {import('./types').Tree[]}
* @returns {Promise<import('./types').Tree[]>}
*/
export function search(query) {
const escaped = query.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
const regex = new RegExp(`(^|\\b)${escaped}`, 'i');

const blocks = indexes
.flatMap((index) => index.search(query))
.map(lookup)
.map((block, rank) => ({ block: /** @type{import('./types').Block} */ (block), rank }))
.sort((a, b) => {
const a_title_matches = regex.test(/** @type {string} */ (a.block.breadcrumbs.at(-1)));
const b_title_matches = regex.test(/** @type {string} */ (b.block.breadcrumbs.at(-1)));

// massage the order a bit, so that title matches
// are given higher priority
if (a_title_matches !== b_title_matches) {
return a_title_matches ? -1 : 1;
}
export async function search(query) {
const search_results = /** @type {any[]} */ (
(
await orama_search(index, {
term: query,
sortBy: (a, b) => {
const [_docIdA, scoreA, docA] = a;
const [_docIdB, scoreB, docB] = b;

// @ts-ignore
return docB.priority * 1000 + scoreB - (docA.priority * 1000 + scoreA);
},
boost: {
h1: 3,
h2: 2,
h3: 1
},

limit: search_appropriate_blocks.length
})
).hits.map(({ document }) => document)
);

/** @type {import('./types').SearchAppropriateBlock[]} */
const blocks = [];

for (const result of search_results) {
// @ts-ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this @ts-ignore needed? it doesn't look like it would be, but if so, then perhaps it should be @ts-expect-error?

const block = /** @type {import('./types').SearchAppropriateBlock} */ (result);

blocks.push(block);
}

return a.block.breadcrumbs.length - b.block.breadcrumbs.length || a.rank - b.rank;
})
.map(({ block }) => block);
// console.log(search_results);
// console.log(results);

const results = tree([], blocks).children;
console.log(buildBlockTree(blocks));
Comment on lines +119 to +122
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should remove these few lines


return results;
return buildBlockTree(blocks);
}

/**
Expand All @@ -87,29 +133,61 @@ export function lookup(href) {
}

/**
* @param {string[]} breadcrumbs
* @param {import('./types').Block[]} blocks
* @returns {import('./types').Tree}
* @param {SearchAppropriateBlock[]} blocks
* @returns {Tree[]}
*/
function tree(breadcrumbs, blocks) {
const depth = breadcrumbs.length;

const node = blocks.find((block) => {
if (block.breadcrumbs.length !== depth) return false;
return breadcrumbs.every((part, i) => block.breadcrumbs[i] === part);
});

const descendants = blocks.filter((block) => {
if (block.breadcrumbs.length <= depth) return false;
return breadcrumbs.every((part, i) => block.breadcrumbs[i] === part);
function buildBlockTree(blocks) {
// Group blocks by h1
const groupedByH1 = blocks.reduce((acc, block) => {
if (block.h1) {
acc[block.h1] = acc[block.h1] || [];
acc[block.h1].push(block);
}
return acc;
}, {});

// Create trees
return Object.entries(groupedByH1).map(([h1, group]) => {
// Create a node for h1
const h1Node = group.find((block) => !block.h2) || { ...group[0], content: '' };

// Group h2s under h1
const groupedByH2 = group.reduce((acc, block) => {
if (block.h2) {
acc[block.h2] = acc[block.h2] || [];
acc[block.h2].push(block);
}
return acc;
}, {});

// Create children for h1 node
const children = Object.entries(groupedByH2).map(([h2, group]) => {
// Create a node for h2
const h2Node = group.find((block) => !block.h3) || { ...group[0], content: '' };

// h3 blocks under h2
const h3Children = group
.filter((block) => block.h3)
.map((block) => ({
breadcrumbs: [h1, h2, block.h3],
href: block.href,
node: { header: block.h3, content: block.content },
children: []
}));

return {
breadcrumbs: [h1, h2],
href: h2Node.href,
node: { header: h2, content: h2Node.content },
children: h3Children
};
});

return {
breadcrumbs: [h1],
href: h1Node.href,
node: { header: h1, content: h1Node.content },
children
};
});

const child_parts = Array.from(new Set(descendants.map((block) => block.breadcrumbs[depth])));

return {
breadcrumbs,
href: /** @type {string} */ (hrefs.get(breadcrumbs.join('::'))),
node: /** @type {import('./types').Block} */ (node),
children: child_parts.map((part) => tree([...breadcrumbs, part], descendants))
};
}
11 changes: 10 additions & 1 deletion packages/site-kit/src/lib/search/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,18 @@ export interface Block {
rank: number;
}

export interface SearchAppropriateBlock {
content: string;
h1: string;
h2?: string;
h3?: string;
href: string;
priority: number;
}

export interface Tree {
breadcrumbs: string[];
href: string;
node: Block;
node: Partial<Block>;
children: Tree[];
}
2 changes: 2 additions & 0 deletions packages/site-kit/src/lib/stores/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ import { writable } from 'svelte/store';

export const searching = writable(false);
export const search_query = writable('');

/** @type {import('svelte/store').Writable<string[]>} */
export const search_recent = persisted('svelte:recent-searches', []);
Loading