Skip to content

Commit

Permalink
Add an option to turn off density-weighting
Browse files Browse the repository at this point in the history
When searching, Pagefind applies a heuristic that often works quite well
to boost pages with a higher density, i.e. a higher number of hits
divided by the number of words on the page. This is called "density
weighting".

In some instances, it is desirable, though, to just use the number of
hits directly, without dividing by the number of words on the page.

Let's support this via the search option `use_weighting`, which
default to `true` to maintain the current behavior.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
  • Loading branch information
dscho committed Dec 24, 2023
1 parent fd70ca9 commit 04d87d8
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pagefind_web/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ pub fn filters(ptr: *mut SearchIndex) -> String {
}

#[wasm_bindgen]
pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool) -> String {
pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exact: bool, weighting: bool) -> String {
let search_index = unsafe { Box::from_raw(ptr) };

if let Some(generator_version) = search_index.generator_version.as_ref() {
Expand All @@ -225,7 +225,7 @@ pub fn search(ptr: *mut SearchIndex, query: &str, filter: &str, sort: &str, exac
let (unfiltered_results, mut results) = if exact {
search_index.exact_term(query, filter_set)
} else {
search_index.search_term(query, filter_set)
search_index.search_term(query, filter_set, weighting)
};
let unfiltered_total = unfiltered_results.len();
debug!({ format!("Raw total of {} results", unfiltered_total) });
Expand Down
3 changes: 2 additions & 1 deletion pagefind_web/src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ impl SearchIndex {
&self,
term: &str,
filter_results: Option<BitSet>,
weighting: bool,
) -> (Vec<usize>, Vec<PageSearchResult>) {
debug!({
format! {"Searching {:?}", term}
Expand Down Expand Up @@ -318,7 +319,7 @@ impl SearchIndex {
.map(|BalancedWordScore { balanced_score, .. }| balanced_score)
.sum::<f32>()
/ 24.0)
/ page.word_count as f32;
/ (if weighting { page.word_count as f32 } else { 1.0 });

let search_result = PageSearchResult {
page: page.hash.clone(),
Expand Down
3 changes: 2 additions & 1 deletion pagefind_web_js/lib/coupled_search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ class PagefindInstance {
verbose: false,
filters: {},
sort: {},
use_weighting: true,
...options,
};
const log = (str: string) => { if (options.verbose) console.log(str) };
Expand Down Expand Up @@ -443,7 +444,7 @@ class PagefindInstance {
// pointer may have updated from the loadChunk calls
ptr = await this.getPtr();
let searchStart = Date.now();
let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search) as string;
let result = this.backend.search(ptr, term, filter_list, sort_list, exact_search, options.use_weighting) as string;
log(`Got the raw search result: ${result}`);
let [unfilteredResultCount, all_results, filters, totalFilters] = result.split(/:([^:]*):(.*)__PF_UNFILTERED_DELIM__(.*)$/);
let filterObj = this.parseFilters(filters);
Expand Down

0 comments on commit 04d87d8

Please sign in to comment.