Skip to content

Commit

Permalink
Make repository indexer tokenize by camel case selectable
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillermo Prandi committed Aug 3, 2019
1 parent 6c0c5c5 commit a8d4e40
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 12 deletions.
3 changes: 3 additions & 0 deletions custom/conf/app.ini.sample
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ REPO_INDEXER_ENABLED = false
REPO_INDEXER_PATH = indexers/repos.bleve
UPDATE_BUFFER_LEN = 20
MAX_FILE_SIZE = 1048576
; Break camel case names into separate words for indexing.
; It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.
REPO_INDEXER_CAMEL_CASE = true

[admin]
; Disallow regular (non-admin) users from creating organizations.
Expand Down
2 changes: 2 additions & 0 deletions docs/content/doc/advanced/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.
- `REPO_INDEXER_CAMEL_CASE`: **true**: When `REPO_INDEXER_CAMEL_CASE` is true, repository indexer will break camel case into words, so thisCameCaseName will be indexed as this, camel, case, name. It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.


## Security (`security`)

Expand Down
13 changes: 11 additions & 2 deletions modules/indexer/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,20 @@ func createRepoIndexer(path string, latestVersion int) error {
mapping := bleve.NewIndexMapping()
if err = addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
}

var tokenFilters []string
if setting.Indexer.RepoUseCamelCaseTokenizer {
tokenFilters = []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name}
} else {
tokenFilters = []string{unicodeNormalizeName, lowercase.Name, unique.Name}
}

if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name},
"token_filters": tokenFilters,
}); err != nil {
return err
}
Expand Down
22 changes: 12 additions & 10 deletions modules/setting/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@ const (
var (
// Indexer settings
Indexer = struct {
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
IssueType string
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
IssueQueueType string
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
RepoUseCamelCaseTokenizer bool
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
Expand All @@ -53,6 +54,7 @@ func newIndexerService() {
}
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.RepoUseCamelCaseTokenizer = sec.Key("REPO_INDEXER_CAMEL_CASE").MustBool(true)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
Indexer.IssueQueueDir = sec.Key("ISSUE_INDEXER_QUEUE_DIR").MustString(path.Join(AppDataPath, "indexers/issues.queue"))
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))
Expand Down

0 comments on commit a8d4e40

Please sign in to comment.