Phrase Suggester: Add collate option to PhraseSuggester

The newly added collate option will let the user provide a template query/filter which will be executed for every phrase suggestions generated to ensure that the suggestion matches at least one document for the filter/query. The user can also add routing preference `preference` to route the collate query/filter and additional `params` to inject into the collate template. Closes #3482
elastic · Jul 14, 2014 · 7634389 · 7634389
1 parent f8be82f
commit 7634389
Show file tree

Hide file tree

Showing 10 changed files with 622 additions and 26 deletions.
diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc
@@ -161,6 +161,53 @@ can contain misspellings (See parameter descriptions below).
     in a row are changed the entire phrase of changed tokens 
     is wrapped rather than each token.
 
+`collate`::
+    Checks each suggestion against the specified `query` or `filter` to
+    prune suggestions for which no matching docs exist in the index. Either
+    a `query` or a `filter` must be specified, and it is run as a
+    <<query-dsl-template-query,`template` query>>. The current suggestion is
+    automatically made available as the `{{suggestion}}`  variable, which
+    should be used in your query/filter.  You can still specify  your own
+    template `params` -- the `suggestion` value will be added to the
+    variables you specify. You can also specify a `preference` to control
+    on which shards the query is executed (see <<search-request-preference>>).
+    The default value is `_only_local`.
+
+[source,js]
+--------------------------------------------------
+curl -XPOST 'localhost:9200/_search' -d {
+   "suggest" : {
+     "text" : "Xor the Got-Jewel",
+     "simple_phrase" : {
+       "phrase" : {
+         "field" :  "bigram",
+         "size" :   1,
+         "direct_generator" : [ {
+           "field" :            "body",
+           "suggest_mode" :     "always",
+           "min_word_length" :  1
+         } ],
+         "collate": {
+           "query": { <1>
+             "match": {
+                 "{{field_name}}" : "{{suggestion}}" <2>
+             }
+           },
+           "params": {"field_name" : "title"}, <3>
+           "preference": "_primary", <4>
+         }
+       }
+     }
+   }
+ }
+--------------------------------------------------
+<1> This query will be run once for every suggestion.
+<2> The `{{suggestion}}` variable will be replaced by the text
+    of each suggestion.
+<3> An additional `field_name` variable has been specified in
+    `params` and is used by the `match` query.
+<4> The default `preference` has been changed to `_primary`.
+
 ==== Smoothing Models
 
 The `phrase` suggester supports multiple smoothing models to balance

diff --git a/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/PlainOperationRouting.java
@@ -167,14 +167,16 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index
             }
         }
         if (preference.charAt(0) == '_') {
-            if (preference.startsWith("_shards:")) {
+            Preference preferenceType = Preference.parse(preference);
+            if (preferenceType == Preference.SHARDS) {
                 // starts with _shards, so execute on specific ones
                 int index = preference.indexOf(';');
+
                 String shards;
                 if (index == -1) {
-                    shards = preference.substring("_shards:".length());
+                    shards = preference.substring(Preference.SHARDS.type().length() + 1);
                 } else {
-                    shards = preference.substring("_shards:".length(), index);
+                    shards = preference.substring(Preference.SHARDS.type().length() + 1, index);
                 }
                 String[] ids = Strings.splitStringByCommaToArray(shards);
                 boolean found = false;
@@ -200,25 +202,24 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index
                     preference = preference.substring(index + 1);
                 }
             }
-            if (preference.startsWith("_prefer_node:")) {
-                return indexShard.preferNodeActiveInitializingShardsIt(preference.substring("_prefer_node:".length()));
-            }
-            if ("_local".equals(preference)) {
-                return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
-            }
-            if ("_primary".equals(preference)) {
-                return indexShard.primaryActiveInitializingShardIt();
-            }
-            if ("_primary_first".equals(preference) || "_primaryFirst".equals(preference)) {
-                return indexShard.primaryFirstActiveInitializingShardsIt();
-            }
-            if ("_only_local".equals(preference) || "_onlyLocal".equals(preference)) {
-                return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
-            }
-            if (preference.startsWith("_only_node:")) {
-                String nodeId = preference.substring("_only_node:".length());
-                ensureNodeIdExists(nodes, nodeId);
-                return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
+            preferenceType = Preference.parse(preference);
+            switch (preferenceType) {
+                case PREFER_NODE:
+                    return indexShard.preferNodeActiveInitializingShardsIt(preference.substring(Preference.PREFER_NODE.type().length() + 1));
+                case LOCAL:
+                    return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
+                case PRIMARY:
+                    return indexShard.primaryActiveInitializingShardIt();
+                case PRIMARY_FIRST:
+                    return indexShard.primaryFirstActiveInitializingShardsIt();
+                case ONLY_LOCAL:
+                    return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
+                case ONLY_NODE:
+                    String nodeId = preference.substring(Preference.ONLY_NODE.type().length() + 1);
+                    ensureNodeIdExists(nodes, nodeId);
+                    return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
+                default:
+                    throw new ElasticsearchIllegalArgumentException("unknown preference [" + preferenceType + "]");
             }
         }
         // if not, then use it as the index

diff --git a/src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java b/src/main/java/org/elasticsearch/cluster/routing/operation/plain/Preference.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.cluster.routing.operation.plain;
+
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.common.collect.Tuple;
+
+/**
+ * Routing Preference Type
+ */
+public enum  Preference {
+
+    /**
+     * Route to specific shards
+     */
+    SHARDS("_shards"),
+
+    /**
+     * Route to preferred node, if possible
+     */
+    PREFER_NODE("_prefer_node"),
+
+    /**
+     * Route to local node, if possible
+     */
+    LOCAL("_local"),
+
+    /**
+     * Route to primary shards
+     */
+    PRIMARY("_primary"),
+
+    /**
+     * Route to primary shards first
+     */
+    PRIMARY_FIRST("_primary_first"),
+
+    /**
+     * Route to the local shard only
+     */
+    ONLY_LOCAL("_only_local"),
+
+    /**
+     * Route to specific node only
+     */
+    ONLY_NODE("_only_node");
+
+    private final String type;
+
+    Preference(String type) {
+        this.type = type;
+    }
+
+    public String type() {
+        return type;
+    }
+    /**
+     * Parses the Preference Type given a string
+     */
+    public static Preference parse(String preference) {
+        String preferenceType;
+        int colonIndex = preference.indexOf(':');
+        if (colonIndex == -1) {
+            preferenceType = preference;
+        } else {
+            preferenceType = preference.substring(0, colonIndex);
+        }
+
+        switch (preferenceType) {
+            case "_shards":
+                return SHARDS;
+            case "_prefer_node":
+                return PREFER_NODE;
+            case "_only_node":
+                return ONLY_NODE;
+            case "_local":
+                return LOCAL;
+            case "_primary":
+                return PRIMARY;
+            case "_primary_first":
+            case "_primaryFirst":
+                return PRIMARY_FIRST;
+            case "_only_local":
+            case "_onlyLocal":
+                return ONLY_LOCAL;
+            default:
+                throw new ElasticsearchIllegalArgumentException("no Preference for [" + preferenceType + "]");
+        }
+    }
+}
+
+
+
diff --git a/src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java b/src/main/java/org/elasticsearch/index/query/BytesFilterBuilder.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.query;
+
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.xcontent.*;
+
+import java.io.IOException;
+
+/**
+ * FilterBuilder that constructs filters from {@link org.elasticsearch.common.bytes.BytesReference}
+ * source
+ */
+public class BytesFilterBuilder extends BaseFilterBuilder {
+
+    private final BytesReference source;
+
+    public BytesFilterBuilder(BytesReference source) {
+        this.source = source;
+
+    }
+
+    @Override
+    protected void doXContent(XContentBuilder builder, Params params) throws IOException {
+        try (XContentParser parser = XContentFactory.xContent(source).createParser(source)) {
+            // unwrap the first layer of json dictionary
+            parser.nextToken();
+            parser.nextToken();
+            builder.copyCurrentStructure(parser);
+        }
+    }
+}
diff --git a/src/main/java/org/elasticsearch/index/query/FilterBuilders.java b/src/main/java/org/elasticsearch/index/query/FilterBuilders.java
@@ -20,6 +20,7 @@
 package org.elasticsearch.index.query;
 
 import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.geo.GeoPoint;
 import org.elasticsearch.common.geo.ShapeRelation;
 import org.elasticsearch.common.geo.builders.ShapeBuilder;
@@ -557,6 +558,15 @@ public static WrapperFilterBuilder wrapperFilter(byte[] data, int offset, int le
         return new WrapperFilterBuilder(data, offset, length);
     }
 
+    /**
+     * Constructs a bytes filter to generate a filter from a {@link BytesReference} source
+     *
+     * @param source The filter source
+     */
+    public static BytesFilterBuilder bytesFilter(BytesReference source) {
+        return new BytesFilterBuilder(source);
+    }
+
     private FilterBuilders() {
 
     }

diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java
@@ -23,10 +23,12 @@
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentParser.Token;
 import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
 import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.script.CompiledScript;
 import org.elasticsearch.search.suggest.SuggestContextParser;
 import org.elasticsearch.search.suggest.SuggestUtils;
 import org.elasticsearch.search.suggest.SuggestionSearchContext;
@@ -124,6 +126,43 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
                             }
                         }
                     }
+                } else if ("collate".equals(fieldName)) {
+                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                        if (token == XContentParser.Token.FIELD_NAME) {
+                            fieldName = parser.currentName();
+                        } else if ("query".equals(fieldName) || "filter".equals(fieldName)) {
+                            String templateNameOrTemplateContent;
+                            if (token == XContentParser.Token.START_OBJECT && !parser.hasTextCharacters()) {
+                                XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent());
+                                builder.copyCurrentStructure(parser);
+                                templateNameOrTemplateContent = builder.string();
+                            } else {
+                                templateNameOrTemplateContent = parser.text();
+                            }
+                            if (templateNameOrTemplateContent == null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] no query/filter found in collate object");
+                            }
+                            if (suggestion.getCollateFilterScript() != null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] filter already set, doesn't support additional [" + fieldName + "]");
+                            }
+                            if (suggestion.getCollateQueryScript() != null) {
+                                throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] query already set, doesn't support additional [" + fieldName + "]");
+                            }
+                            CompiledScript compiledScript = suggester.scriptService().compile("mustache", templateNameOrTemplateContent);
+                            if ("query".equals(fieldName)) {
+                                suggestion.setCollateQueryScript(compiledScript);
+                            } else {
+                                suggestion.setCollateFilterScript(compiledScript);
+                            }
+                        } else if ("preference".equals(fieldName)) {
+                            suggestion.setPreference(parser.text());
+                        } else if ("params".equals(fieldName)) {
+                            suggestion.setCollateScriptParams(parser.map());
+                        } else {
+                            throw new ElasticsearchIllegalArgumentException(
+                                    "suggester[phrase][collate] doesn't support field [" + fieldName + "]");
+                        }
+                    }
                 } else {
                     throw new ElasticsearchIllegalArgumentException("suggester[phrase]  doesn't support array field [" + fieldName + "]");
                 }