From a390d75673ad59de69997a6426a29905ad128f28 Mon Sep 17 00:00:00 2001 From: silkentrance Date: Thu, 27 Jun 2024 21:21:42 +0200 Subject: [PATCH] fix #4479: add fuzzy keyword search to title --- modules/indexer/internal/bleve/query.go | 8 ++++++++ modules/indexer/internal/bleve/util.go | 10 ++++++---- modules/indexer/issues/bleve/bleve.go | 2 +- routers/web/repo/issue.go | 1 + 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 21422b281c498..c5399fe47f980 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query return q } +// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness +func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery { + q := bleve.NewFuzzyQuery(matchPhrase) + q.FieldVal = field + q.Fuzziness = fuzziness + return q +} + // BoolFieldQuery generates a bool field query for the given value and field func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { q := bleve.NewBoolFieldQuery(value) diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go index a2265f86e6b35..ac6bae2018d4a 100644 --- a/modules/indexer/internal/bleve/util.go +++ b/modules/indexer/internal/bleve/util.go @@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { func GuessFuzzinessByKeyword(s string) int { // according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2 - // magic number 4 was chosen to determine the levenshtein distance per each character of a keyword - // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot. + // BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot + // which we need to live with, as we need to support not just ASCII + // in case of code points >= 128 we will increase the fuzziness to 2 + // the standard is 1 for _, r := range s { if r >= 128 { - return 0 + return 2 } } - return min(2, len(s)/4) + return 1 } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 7ef370e89c5d2..045fd5f0a92fd 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( } queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), }...)) diff --git a/routers/web/repo/issue.go b/routers/web/repo/issue.go index 7c4e3e36f3caa..537474cbd4202 100644 --- a/routers/web/repo/issue.go +++ b/routers/web/repo/issue.go @@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) { MilestoneIDs: includedMilestones, ProjectID: projectID, SortBy: issue_indexer.SortByCreatedDesc, + IsFuzzyKeyword: true, } if since != 0 {