diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc index 0ea45ca52dcc0..d31dcfd1d0905 100644 --- a/docs/reference/search/suggesters/phrase-suggest.asciidoc +++ b/docs/reference/search/suggesters/phrase-suggest.asciidoc @@ -161,6 +161,53 @@ can contain misspellings (See parameter descriptions below). in a row are changed the entire phrase of changed tokens is wrapped rather than each token. +`collate`:: + Checks each suggestion against the specified `query` or `filter` to + prune suggestions for which no matching docs exist in the index. Either + a `query` or a `filter` must be specified, and it is run as a + <>. The current suggestion is + automatically made available as the `{{suggestion}}` variable, which + should be used in your query/filter. You can still specify your own + template `params` -- the `suggestion` value will be added to the + variables you specify. You can also specify a `preference` to control + on which shards the query is executed (see <>). + The default value is `_only_local`. + +[source,js] +-------------------------------------------------- +curl -XPOST 'localhost:9200/_search' -d { + "suggest" : { + "text" : "Xor the Got-Jewel", + "simple_phrase" : { + "phrase" : { + "field" : "bigram", + "size" : 1, + "direct_generator" : [ { + "field" : "body", + "suggest_mode" : "always", + "min_word_length" : 1 + } ], + "collate": { + "query": { <1> + "match": { + "{{field_name}}" : "{{suggestion}}" <2> + } + }, + "params": {"field_name" : "title"}, <3> + "preference": "_primary", <4> + } + } + } + } + } +-------------------------------------------------- +<1> This query will be run once for every suggestion. +<2> The `{{suggestion}}` variable will be replaced by the text + of each suggestion. +<3> An additional `field_name` variable has been specified in + `params` and is used by the `match` query. +<4> The default `preference` has been changed to `_primary`. + ==== Smoothing Models The `phrase` suggester supports multiple smoothing models to balance diff --git a/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java index b39dd9fb0895c..3c81c240eadd1 100644 --- a/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java +++ b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java @@ -167,14 +167,16 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index } } if (preference.charAt(0) == '_') { - if (preference.startsWith("_shards:")) { + Preference preferenceType = Preference.parse(preference); + if (preferenceType == Preference.SHARDS) { // starts with _shards, so execute on specific ones int index = preference.indexOf(';'); + String shards; if (index == -1) { - shards = preference.substring("_shards:".length()); + shards = preference.substring(Preference.SHARDS.type().length() + 1); } else { - shards = preference.substring("_shards:".length(), index); + shards = preference.substring(Preference.SHARDS.type().length() + 1, index); } String[] ids = Strings.splitStringByCommaToArray(shards); boolean found = false; @@ -200,25 +202,24 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index preference = preference.substring(index + 1); } } - if (preference.startsWith("_prefer_node:")) { - return indexShard.preferNodeActiveInitializingShardsIt(preference.substring("_prefer_node:".length())); - } - if ("_local".equals(preference)) { - return indexShard.preferNodeActiveInitializingShardsIt(localNodeId); - } - if ("_primary".equals(preference)) { - return indexShard.primaryActiveInitializingShardIt(); - } - if ("_primary_first".equals(preference) || "_primaryFirst".equals(preference)) { - return indexShard.primaryFirstActiveInitializingShardsIt(); - } - if ("_only_local".equals(preference) || "_onlyLocal".equals(preference)) { - return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId); - } - if (preference.startsWith("_only_node:")) { - String nodeId = preference.substring("_only_node:".length()); - ensureNodeIdExists(nodes, nodeId); - return indexShard.onlyNodeActiveInitializingShardsIt(nodeId); + preferenceType = Preference.parse(preference); + switch (preferenceType) { + case PREFER_NODE: + return indexShard.preferNodeActiveInitializingShardsIt(preference.substring(Preference.PREFER_NODE.type().length() + 1)); + case LOCAL: + return indexShard.preferNodeActiveInitializingShardsIt(localNodeId); + case PRIMARY: + return indexShard.primaryActiveInitializingShardIt(); + case PRIMARY_FIRST: + return indexShard.primaryFirstActiveInitializingShardsIt(); + case ONLY_LOCAL: + return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId); + case ONLY_NODE: + String nodeId = preference.substring(Preference.ONLY_NODE.type().length() + 1); + ensureNodeIdExists(nodes, nodeId); + return indexShard.onlyNodeActiveInitializingShardsIt(nodeId); + default: + throw new ElasticsearchIllegalArgumentException("unknown preference [" + preferenceType + "]"); } } // if not, then use it as the index diff --git a/src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java new file mode 100644 index 0000000000000..0d9de6d3d1e84 --- /dev/null +++ b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java @@ -0,0 +1,109 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.cluster.routing.operation.plain; + +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.collect.Tuple; + +/** + * Routing Preference Type + */ +public enum Preference { + + /** + * Route to specific shards + */ + SHARDS("_shards"), + + /** + * Route to preferred node, if possible + */ + PREFER_NODE("_prefer_node"), + + /** + * Route to local node, if possible + */ + LOCAL("_local"), + + /** + * Route to primary shards + */ + PRIMARY("_primary"), + + /** + * Route to primary shards first + */ + PRIMARY_FIRST("_primary_first"), + + /** + * Route to the local shard only + */ + ONLY_LOCAL("_only_local"), + + /** + * Route to specific node only + */ + ONLY_NODE("_only_node"); + + private final String type; + + Preference(String type) { + this.type = type; + } + + public String type() { + return type; + } + /** + * Parses the Preference Type given a string + */ + public static Preference parse(String preference) { + String preferenceType; + int colonIndex = preference.indexOf(':'); + if (colonIndex == -1) { + preferenceType = preference; + } else { + preferenceType = preference.substring(0, colonIndex); + } + + switch (preferenceType) { + case "_shards": + return SHARDS; + case "_prefer_node": + return PREFER_NODE; + case "_only_node": + return ONLY_NODE; + case "_local": + return LOCAL; + case "_primary": + return PRIMARY; + case "_primary_first": + case "_primaryFirst": + return PRIMARY_FIRST; + case "_only_local": + case "_onlyLocal": + return ONLY_LOCAL; + default: + throw new ElasticsearchIllegalArgumentException("no Preference for [" + preferenceType + "]"); + } + } +} + + + diff --git a/src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java b/src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java new file mode 100644 index 0000000000000..a86ea239f70ca --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java @@ -0,0 +1,49 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.*; + +import java.io.IOException; + +/** + * FilterBuilder that constructs filters from {@link org.elasticsearch.common.bytes.BytesReference} + * source + */ +public class BytesFilterBuilder extends BaseFilterBuilder { + + private final BytesReference source; + + public BytesFilterBuilder(BytesReference source) { + this.source = source; + + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + try (XContentParser parser = XContentFactory.xContent(source).createParser(source)) { + // unwrap the first layer of json dictionary + parser.nextToken(); + parser.nextToken(); + builder.copyCurrentStructure(parser); + } + } +} diff --git a/src/main/java/org/elasticsearch/index/query/FilterBuilders.java b/src/main/java/org/elasticsearch/index/query/FilterBuilders.java index 7ff4adcc5f89a..79a7ba6278d69 100644 --- a/src/main/java/org/elasticsearch/index/query/FilterBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/FilterBuilders.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.geo.builders.ShapeBuilder; @@ -557,6 +558,15 @@ public static WrapperFilterBuilder wrapperFilter(byte[] data, int offset, int le return new WrapperFilterBuilder(data, offset, length); } + /** + * Constructs a bytes filter to generate a filter from a {@link BytesReference} source + * + * @param source The filter source + */ + public static BytesFilterBuilder bytesFilter(BytesReference source) { + return new BytesFilterBuilder(source); + } + private FilterBuilders() { } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java index a5d85967f32e4..af43fb79acb7d 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java @@ -23,10 +23,12 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.script.CompiledScript; import org.elasticsearch.search.suggest.SuggestContextParser; import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.SuggestionSearchContext; @@ -124,6 +126,43 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma } } } + } else if ("collate".equals(fieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if ("query".equals(fieldName) || "filter".equals(fieldName)) { + String templateNameOrTemplateContent; + if (token == XContentParser.Token.START_OBJECT && !parser.hasTextCharacters()) { + XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent()); + builder.copyCurrentStructure(parser); + templateNameOrTemplateContent = builder.string(); + } else { + templateNameOrTemplateContent = parser.text(); + } + if (templateNameOrTemplateContent == null) { + throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] no query/filter found in collate object"); + } + if (suggestion.getCollateFilterScript() != null) { + throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] filter already set, doesn't support additional [" + fieldName + "]"); + } + if (suggestion.getCollateQueryScript() != null) { + throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] query already set, doesn't support additional [" + fieldName + "]"); + } + CompiledScript compiledScript = suggester.scriptService().compile("mustache", templateNameOrTemplateContent); + if ("query".equals(fieldName)) { + suggestion.setCollateQueryScript(compiledScript); + } else { + suggestion.setCollateFilterScript(compiledScript); + } + } else if ("preference".equals(fieldName)) { + suggestion.setPreference(parser.text()); + } else if ("params".equals(fieldName)) { + suggestion.setCollateScriptParams(parser.map()); + } else { + throw new ElasticsearchIllegalArgumentException( + "suggester[phrase][collate] doesn't support field [" + fieldName + "]"); + } + } } else { throw new ElasticsearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]"); } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java index 6e04fef71ac72..1b48f20451438 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java @@ -27,8 +27,18 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.search.*; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.query.FilterBuilders; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.script.CompiledScript; +import org.elasticsearch.script.ExecutableScript; +import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.suggest.Suggest.Suggestion; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; @@ -38,10 +48,20 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; public final class PhraseSuggester extends Suggester { private final BytesRef SEPARATOR = new BytesRef(" "); - + private static final String SUGGESTION_TEMPLATE_VAR_NAME = "suggestion"; + private final Client client; + private final ScriptService scriptService; + + @Inject + public PhraseSuggester(Client client, ScriptService scriptService) { + this.client = client; + this.scriptService = scriptService; + } + /* * More Ideas: * - add ability to find whitespace problems -> we can build a poor mans decompounder with our index based on a automaton? @@ -84,7 +104,14 @@ public Suggestion> innerExecute(String name, P response.addTerm(resultEntry); BytesRef byteSpare = new BytesRef(); - for (Correction correction : checkerResult.corrections) { + + MultiSearchResponse multiSearchResponse = collate(suggestion, checkerResult, byteSpare, spare); + + for (int i = 0; i < checkerResult.corrections.length; i++) { + if (!hasMatchingDocs(multiSearchResponse, i)) { + continue; + } + Correction correction = checkerResult.corrections[i]; UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare); Text phrase = new StringText(spare.toString()); Text highlighted = null; @@ -104,6 +131,69 @@ private PhraseSuggestion.Entry buildResultEntry(PhraseSuggestionContext suggesti UnicodeUtil.UTF8toUTF16(suggestion.getText(), spare); return new PhraseSuggestion.Entry(new StringText(spare.toString()), 0, spare.length, cutoffScore); } + + private MultiSearchResponse collate(PhraseSuggestionContext suggestion, Result checkerResult, BytesRef byteSpare, CharsRef spare) throws IOException { + CompiledScript collateQueryScript = suggestion.getCollateQueryScript(); + CompiledScript collateFilterScript = suggestion.getCollateFilterScript(); + MultiSearchResponse multiSearchResponse = null; + if (collateQueryScript != null) { + multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateQueryScript, false, suggestion, byteSpare, spare); + } else if (collateFilterScript != null) { + multiSearchResponse = fetchMatchingDocCountResponses(checkerResult.corrections, collateFilterScript, true, suggestion, byteSpare, spare); + } + return multiSearchResponse; + } + + private MultiSearchResponse fetchMatchingDocCountResponses(Correction[] corrections, CompiledScript collateScript, + boolean isFilter, PhraseSuggestionContext suggestions, + BytesRef byteSpare, CharsRef spare) throws IOException { + Map vars = suggestions.getCollateScriptParams(); + MultiSearchResponse multiSearchResponse = null; + MultiSearchRequestBuilder multiSearchRequestBuilder = client.prepareMultiSearch(); + boolean requestAdded = false; + SearchRequestBuilder req; + for (Correction correction : corrections) { + UnicodeUtil.UTF8toUTF16(correction.join(SEPARATOR, byteSpare, null, null), spare); + vars.put(SUGGESTION_TEMPLATE_VAR_NAME, spare.toString()); + ExecutableScript executable = scriptService.executable(collateScript, vars); + BytesReference querySource = (BytesReference) executable.run(); + requestAdded = true; + if (isFilter) { + req = client.prepareSearch() + .setPreference(suggestions.getPreference()) + .setQuery(QueryBuilders.constantScoreQuery(FilterBuilders.bytesFilter(querySource))) + .setSearchType(SearchType.COUNT); + } else { + req = client.prepareSearch() + .setPreference(suggestions.getPreference()) + .setQuery(querySource) + .setSearchType(SearchType.COUNT); + } + multiSearchRequestBuilder.add(req); + } + if (requestAdded) { + multiSearchResponse = multiSearchRequestBuilder.get(); + } + + return multiSearchResponse; + } + + private static boolean hasMatchingDocs(MultiSearchResponse multiSearchResponse, int index) { + if (multiSearchResponse == null) { + return true; + } + MultiSearchResponse.Item item = multiSearchResponse.getResponses()[index]; + if (!item.isFailure()) { + SearchResponse resp = item.getResponse(); + return resp.getHits().totalHits() > 0; + } else { + throw new ElasticsearchException("Collate request failed: " + item.getFailureMessage()); + } + } + + ScriptService scriptService() { + return scriptService; + } @Override public String[] names() { diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java index 58945698a5857..5296b54b4fa61 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionBuilder.java @@ -42,6 +42,10 @@ public final class PhraseSuggestionBuilder extends SuggestionBuilder collateParams; public PhraseSuggestionBuilder(String name) { super(name, "phrase"); @@ -166,6 +170,38 @@ public PhraseSuggestionBuilder highlight(String preTag, String postTag) { return this; } + /** + * Sets a query used for filtering out suggested phrases (collation). + */ + public PhraseSuggestionBuilder collateQuery(String collateQuery) { + this.collateQuery = collateQuery; + return this; + } + + /** + * Sets a filter used for filtering out suggested phrases (collation). + */ + public PhraseSuggestionBuilder collateFilter(String collateFilter) { + this.collateFilter = collateFilter; + return this; + } + + /** + * Sets routing preferences for executing filter query (collation). + */ + public PhraseSuggestionBuilder collatePreference(String collatePreference) { + this.collatePreference = collatePreference; + return this; + } + + /** + * Sets additional params for collate script + */ + public PhraseSuggestionBuilder collateParams(Map collateParams) { + this.collateParams = collateParams; + return this; + } + @Override public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { if (realWordErrorLikelihood != null) { @@ -210,6 +246,22 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t builder.field("post_tag", postTag); builder.endObject(); } + if (collateQuery != null || collateFilter != null) { + builder.startObject("collate"); + if (collateQuery != null) { + builder.field("query", collateQuery); + } + if (collateFilter != null) { + builder.field("filter", collateFilter); + } + if (collatePreference != null) { + builder.field("preference", collatePreference); + } + if (collateParams != null) { + builder.field("params", collateParams); + } + builder.endObject(); + } return builder; } @@ -610,4 +662,4 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java index 50867b5e02109..549314a5805ad 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestionContext.java @@ -19,11 +19,15 @@ package org.elasticsearch.search.suggest.phrase; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.cluster.routing.operation.plain.Preference; +import org.elasticsearch.script.CompiledScript; import org.elasticsearch.search.suggest.DirectSpellcheckerSettings; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; @@ -40,6 +44,10 @@ class PhraseSuggestionContext extends SuggestionContext { private int tokenLimit = NoisyChannelSpellChecker.DEFAULT_TOKEN_LIMIT; private BytesRef preTag; private BytesRef postTag; + private CompiledScript collateQueryScript; + private CompiledScript collateFilterScript; + private String preference = Preference.ONLY_LOCAL.type(); + private Map collateScriptParams = new HashMap<>(1); private WordScorer.WordScorerFactory scorer; @@ -180,4 +188,37 @@ public void setPostTag(BytesRef postTag) { public BytesRef getPostTag() { return postTag; } + + CompiledScript getCollateQueryScript() { + return collateQueryScript; + } + + void setCollateQueryScript(CompiledScript collateQueryScript) { + this.collateQueryScript = collateQueryScript; + } + + CompiledScript getCollateFilterScript() { + return collateFilterScript; + } + + void setCollateFilterScript(CompiledScript collateFilterScript) { + this.collateFilterScript = collateFilterScript; + } + + String getPreference() { + return preference; + } + + void setPreference(String preference) { + this.preference = preference; + } + + Map getCollateScriptParams() { + return collateScriptParams; + } + + void setCollateScriptParams(Map collateScriptParams) { + this.collateScriptParams = collateScriptParams; + } + } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java index 5bc44765145df..da083431d30be 100644 --- a/src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/search/suggest/SuggestSearchTests.java @@ -1094,7 +1094,165 @@ public void suggestWithManyCandidates() throws InterruptedException, ExecutionEx assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006"); // assertThat(total, lessThan(1000L)); // Takes many seconds without fix - just for debugging } - + + @Test + public void suggestPhrasesInIndex() throws InterruptedException, ExecutionException, IOException { + CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() + .put(indexSettings()) + .put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable. + .put("index.analysis.analyzer.text.tokenizer", "standard") + .putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle") + .put("index.analysis.filter.my_shingle.type", "shingle") + .put("index.analysis.filter.my_shingle.output_unigrams", true) + .put("index.analysis.filter.my_shingle.min_shingle_size", 2) + .put("index.analysis.filter.my_shingle.max_shingle_size", 3)); + + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type1") + .startObject("properties") + .startObject("title") + .field("type", "string") + .field("analyzer", "text") + .endObject() + .endObject() + .endObject() + .endObject(); + assertAcked(builder.addMapping("type1", mapping)); + ensureGreen(); + + ImmutableList.Builder titles = ImmutableList.builder(); + + titles.add("United States House of Representatives Elections in Washington 2006"); + titles.add("United States House of Representatives Elections in Washington 2005"); + titles.add("State"); + titles.add("Houses of Parliament"); + titles.add("Representative Government"); + titles.add("Election"); + + List builders = new ArrayList<>(); + for (String title: titles.build()) { + builders.add(client().prepareIndex("test", "type1").setSource("title", title)); + } + indexRandom(true, builders); + + // suggest without filtering + PhraseSuggestionBuilder suggest = phraseSuggestion("title") + .field("title") + .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("title") + .suggestMode("always") + .maxTermFreq(.99f) + .size(10) + .maxInspections(200) + ) + .confidence(0f) + .maxErrors(2f) + .shardSize(30000) + .size(10); + Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", suggest); + assertSuggestionSize(searchSuggest, 0, 10, "title"); + + // suggest with filtering + String filterString = XContentFactory.jsonBuilder() + .startObject() + .startObject("match_phrase") + .field("title", "{{suggestion}}") + .endObject() + .endObject() + .string(); + PhraseSuggestionBuilder filteredQuerySuggest = suggest.collateQuery(filterString); + searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredQuerySuggest); + assertSuggestionSize(searchSuggest, 0, 2, "title"); + + // filtered suggest with no result (boundary case) + searchSuggest = searchSuggest("Elections of Representatives Parliament", filteredQuerySuggest); + assertSuggestionSize(searchSuggest, 0, 0, "title"); + + // filtered suggest with bad query + String incorrectFilterString = XContentFactory.jsonBuilder() + .startObject() + .startObject("test") + .field("title", "{{suggestion}}") + .endObject() + .endObject() + .string(); + PhraseSuggestionBuilder incorrectFilteredSuggest = suggest.collateQuery(incorrectFilterString); + try { + searchSuggest("united states house of representatives elections in washington 2006", incorrectFilteredSuggest); + fail("Post query error has been swallowed"); + } catch(ElasticsearchException e) { + // expected + } + + // suggest with filter collation + String filterStringAsFilter = XContentFactory.jsonBuilder() + .startObject() + .startObject("query") + .startObject("match_phrase") + .field("title", "{{suggestion}}") + .endObject() + .endObject() + .endObject() + .string(); + + PhraseSuggestionBuilder filteredFilterSuggest = suggest.collateQuery(null).collateFilter(filterStringAsFilter); + searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredFilterSuggest); + assertSuggestionSize(searchSuggest, 0, 2, "title"); + + // filtered suggest with bad filter + String filterStr = XContentFactory.jsonBuilder() + .startObject() + .startObject("pprefix") + .field("title", "{{suggestion}}") + .endObject() + .endObject() + .string(); + + PhraseSuggestionBuilder in = suggest.collateQuery(null).collateFilter(filterStr); + try { + searchSuggest("united states house of representatives elections in washington 2006", in); + fail("Post filter error has been swallowed"); + } catch(ElasticsearchException e) { + //expected + } + + // collate script failure due to no additional params + String collateWithParams = XContentFactory.jsonBuilder() + .startObject() + .startObject("{{query_type}}") + .field("{{query_field}}", "{{suggestion}}") + .endObject() + .endObject() + .string(); + + + PhraseSuggestionBuilder phraseSuggestWithNoParams = suggest.collateFilter(null).collateQuery(collateWithParams); + try { + searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithNoParams); + fail("Malformed query (lack of additional params) should fail"); + } catch (ElasticsearchException e) { + // expected + } + + // collate script with additional params + Map params = new HashMap<>(); + params.put("query_type", "match_phrase"); + params.put("query_field", "title"); + + PhraseSuggestionBuilder phraseSuggestWithParams = suggest.collateFilter(null).collateQuery(collateWithParams).collateParams(params); + searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParams); + assertSuggestionSize(searchSuggest, 0, 2, "title"); + + //collate request defining both query/filter should fail + PhraseSuggestionBuilder phraseSuggestWithFilterAndQuery = suggest.collateFilter(filterStringAsFilter).collateQuery(filterString); + try { + searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithFilterAndQuery); + fail("expected parse failure, as both filter and query are set in collate"); + } catch (ElasticsearchException e) { + // expected + } + } + protected Suggest searchSuggest(SuggestionBuilder... suggestion) { return searchSuggest(null, suggestion); }