Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add similarity property to the Azure Search Index definition for API 2019-05-06-preview #9249

Merged
merged 9 commits into from
Apr 29, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@
"applicationId": "00000000-0000-0000-0000-000000000000",
"applicationSecret": "myapplicationsecret"
}
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"b" : 0.5,
"k1" : 1.3
}
}
},
Expand Down Expand Up @@ -356,6 +361,11 @@
"applicationId": "00000000-0000-0000-0000-000000000000",
"applicationSecret": null
}
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"b" : 0.5,
"k1" : 1.3
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@
"keyVaultKeyVersion": "myKeyVersion-32charAlphaNumericString",
"keyVaultUri": "https://myKeyVault.vault.azure.net",
"accessCredentials": null
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.ClassicSimilarity"
}
}
},
Expand Down Expand Up @@ -353,6 +356,9 @@
"keyVaultKeyVersion": "myKeyVersion-32charAlphaNumericString",
"keyVaultUri": "https://myKeyVault.vault.azure.net",
"accessCredentials": null
},
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.ClassicSimilarity"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4296,6 +4296,51 @@
"url": "https://lucene.apache.org/core/4_10_3/analyzers-common/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.html"
}
},
"Similarity": {
"discriminator": "@odata.type",
"properties": {
"@odata.type": {
"type": "string"
}
},
"required": [
"@odata.type"
],
"description": "Base type for similarity algorithms. Similarity algorithms are used to calculate scores that tie queries to documents. The higher the score, the more relevant the document is to that specific query. Those scores are used to rank the search results.",
"externalDocs": {
"url": "https://docs.microsoft.com/azure/search/index-ranking-similarity"
}
},
"ClassicSimilarity": {
"x-ms-discriminator-value": "#Microsoft.Azure.Search.ClassicSimilarity",
"allOf": [
{
"$ref": "#/definitions/Similarity"
}
],
"description": "Legacy similarity algorithm which uses the Lucene TFIDFSimilarity implementation of TF-IDF. This variation of TF-IDF introduces static document length normalization as well as coordinating factors that penalize documents that only partially match the searched queries."
},
"BM25Similarity": {
"x-ms-discriminator-value": "#Microsoft.Azure.Search.BM25Similarity",
"allOf": [
{
"$ref": "#/definitions/Similarity"
}
],
"properties": {
"k1": {
"type": "number",
"format": "double",
"description": "The k1 parameter controls the scaling function between the term frequency of each matching terms and the final relevance score of a document-query pair. By default, a value of 1.2 is used. A value of 0.0 means the score does not scale with an increase in term frequency."
shmed marked this conversation as resolved.
Show resolved Hide resolved
},
"b": {
"type": "number",
"format": "double",
"description": "The b parameter controls how the length of a document affects the relevance score. By default, a value of 0.75 is used. A value of 0.0 means no length normalization is applied, while a value of 1.0 means the score is fully normalized by the length of the document."
}
},
"description": "Ranking function based on the Okapi BM25 similarity algorithm. BM25 is a TF-IDF-like algorithm that includes length normalization (controlled by the 'b' parameter) as well as term frequency saturation (controlled by the 'k1' parameter)."
},
"DataSourceCredentials": {
"properties": {
"connectionString": {
Expand Down Expand Up @@ -5409,6 +5454,13 @@
"url": "https://aka.ms/azure-search-encryption-with-cmk"
}
},
"similarity": {
"$ref": "#/definitions/Similarity",
"description": "The type of similarity algorithm to be used when scoring and ranking the documents matching a search query. The similarity algorithm can only be defined at index creation time and cannot be modified on existing indexes. If null, the ClassicSimilarity algorithm is used.",
"externalDocs": {
"url": "https://docs.microsoft.com/azure/search/index-ranking-similarity"
}
},
"@odata.etag": {
"x-ms-client-name": "ETag",
"type": "string",
Expand Down