diff --git a/docs/content/docs/api.md b/docs/content/docs/api.md index 8436df0e..5eba92f7 100644 --- a/docs/content/docs/api.md +++ b/docs/content/docs/api.md @@ -239,6 +239,18 @@ const search = await pagefind.search("static", { See [Sorting using the Pagefind JavaScript API](/docs/js-api-sorting/) for more details and functionality. +## Turning off density-weighting + +By default, the results' weights are "density-weighted", i.e. the weights are calculated by counting the number of matches within a page divided by the page's word count. This density-weighting can be turned off: + +{{< diffcode >}} +```js +const earch = await pagefind.search("term", { ++ use_weighting: false +}); +``` +{{< /diffcode >}} + ## Re-initializing the search API In some cases you might need to re-initialize Pagefind. For example, if you dynamically change the language of the page without reloading, Pagefind will need to be re-initialized to reflect this langauge change. diff --git a/pagefind/features/weighting.feature b/pagefind/features/weighting.feature index 4de98933..5afde92c 100644 --- a/pagefind/features/weighting.feature +++ b/pagefind/features/weighting.feature @@ -224,3 +224,31 @@ Feature: Word Weighting Then There should be no logs # Treat the bal value here as a snapshot — update the expected value as needed Then The selector "p" should contain "weight:1/bal:82.28572/loc:4" + + Scenario: Density weighting can be turned off + Given I have a "public/single-word.html" file with the body: + """ +

word

+ """ + Given I have a "public/three-words.html" file with the body: + """ +

I have a word and a word and another word

+ """ + When I run my program + Then I should see "Running Pagefind" in stdout + When I serve the "public" directory + When I load "/" + When I evaluate: + """ + async function() { + let pagefind = await import("/pagefind/pagefind.js"); + + let search = await pagefind.search(`word`); + let search2 = await pagefind.search(`word`, { use_weighting: false }); + let counts = [search, search2].map(s => s.results.map(r => r.words.length)); + document.querySelector('p').innerText = JSON.stringify(counts); + } + """ + Then There should be no logs + # With density weighting, single-word should be the first hit, otherwise three-words + Then The selector "p" should contain "[[1,3],[3,1]]" diff --git a/pagefind_web/src/lib.rs b/pagefind_web/src/lib.rs index f4412e70..97cc919a 100644 --- a/pagefind_web/src/lib.rs +++ b/pagefind_web/src/lib.rs @@ -210,7 +210,7 @@ pub fn filters(ptr: *mut SearchIndex) -> String { } #[wasm_bindgen] -pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool) -> String { +pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool, weighting: bool) -> String { let search_index = unsafe { Box::from_raw(ptr) }; if let Some(generator_version) = search_index.generator_version.as_ref() { @@ -225,7 +225,7 @@ pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exac let (unfiltered_results, mut results) = if exact { search_index.exact_term(query, filter_set) } else { - search_index.search_term(query, filter_set) + search_index.search_term(query, filter_set, weighting) }; let unfiltered_total = unfiltered_results.len(); debug!({ format!("Raw total of {} results", unfiltered_total) }); diff --git a/pagefind_web/src/search.rs b/pagefind_web/src/search.rs index 824e7975..aded607c 100644 --- a/pagefind_web/src/search.rs +++ b/pagefind_web/src/search.rs @@ -175,6 +175,7 @@ impl SearchIndex { &self, term: &str, filter_results: Option, + weighting: bool, ) -> (Vec, Vec) { debug!({ format! {"Searching {:?}", term} @@ -318,7 +319,7 @@ impl SearchIndex { .map(|BalancedWordScore { balanced_score, .. }| balanced_score) .sum::() / 24.0) - / page.word_count as f32; + / (if weighting { page.word_count as f32 } else { 1.0 }); let search_result = PageSearchResult { page: page.hash.clone(), diff --git a/pagefind_web_js/lib/coupled_search.ts b/pagefind_web_js/lib/coupled_search.ts index d340971b..55cdea2a 100644 --- a/pagefind_web_js/lib/coupled_search.ts +++ b/pagefind_web_js/lib/coupled_search.ts @@ -390,6 +390,7 @@ class PagefindInstance { verbose: false, filters: {}, sort: {}, + use_weighting: true, ...options, }; const log = (str: string) => { if (options.verbose) console.log(str) }; @@ -443,7 +444,7 @@ class PagefindInstance { // pointer may have updated from the loadChunk calls ptr = await this.getPtr(); let searchStart = Date.now(); - let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search) as string; + let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search, options.use_weighting) as string; log(`Got the raw search result: ${result}`); let [unfilteredResultCount, all_results, filters, totalFilters] = result.split(/:([^:]*):(.*)__PF_UNFILTERED_DELIM__(.*)$/); let filterObj = this.parseFilters(filters);