Skip to content

Commit

Permalink
Phrase Suggester: Add collate option to PhraseSuggester
Browse files Browse the repository at this point in the history
The newly added collate option will let the user provide a template query/filter which will be executed for every phrase suggestions generated to ensure that the suggestion matches at least one document for the filter/query.
The user can also add routing preference `preference` to route the collate query/filter and additional `params` to inject into the collate template.

Closes #3482
  • Loading branch information
areek committed Jul 14, 2014
1 parent f8be82f commit 7634389
Show file tree
Hide file tree
Showing 10 changed files with 622 additions and 26 deletions.
47 changes: 47 additions & 0 deletions docs/reference/search/suggesters/phrase-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,53 @@ can contain misspellings (See parameter descriptions below).
in a row are changed the entire phrase of changed tokens
is wrapped rather than each token.

`collate`::
Checks each suggestion against the specified `query` or `filter` to
prune suggestions for which no matching docs exist in the index. Either
a `query` or a `filter` must be specified, and it is run as a
<<query-dsl-template-query,`template` query>>. The current suggestion is
automatically made available as the `{{suggestion}}` variable, which
should be used in your query/filter. You can still specify your own
template `params` -- the `suggestion` value will be added to the
variables you specify. You can also specify a `preference` to control
on which shards the query is executed (see <<search-request-preference>>).
The default value is `_only_local`.

[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/_search' -d {
"suggest" : {
"text" : "Xor the Got-Jewel",
"simple_phrase" : {
"phrase" : {
"field" : "bigram",
"size" : 1,
"direct_generator" : [ {
"field" : "body",
"suggest_mode" : "always",
"min_word_length" : 1
} ],
"collate": {
"query": { <1>
"match": {
"{{field_name}}" : "{{suggestion}}" <2>
}
},
"params": {"field_name" : "title"}, <3>
"preference": "_primary", <4>
}
}
}
}
}
--------------------------------------------------
<1> This query will be run once for every suggestion.
<2> The `{{suggestion}}` variable will be replaced by the text
of each suggestion.
<3> An additional `field_name` variable has been specified in
`params` and is used by the `match` query.
<4> The default `preference` has been changed to `_primary`.

==== Smoothing Models

The `phrase` suggester supports multiple smoothing models to balance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,16 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index
}
}
if (preference.charAt(0) == '_') {
if (preference.startsWith("_shards:")) {
Preference preferenceType = Preference.parse(preference);
if (preferenceType == Preference.SHARDS) {
// starts with _shards, so execute on specific ones
int index = preference.indexOf(';');

String shards;
if (index == -1) {
shards = preference.substring("_shards:".length());
shards = preference.substring(Preference.SHARDS.type().length() + 1);
} else {
shards = preference.substring("_shards:".length(), index);
shards = preference.substring(Preference.SHARDS.type().length() + 1, index);
}
String[] ids = Strings.splitStringByCommaToArray(shards);
boolean found = false;
Expand All @@ -200,25 +202,24 @@ private ShardIterator preferenceActiveShardIterator(IndexShardRoutingTable index
preference = preference.substring(index + 1);
}
}
if (preference.startsWith("_prefer_node:")) {
return indexShard.preferNodeActiveInitializingShardsIt(preference.substring("_prefer_node:".length()));
}
if ("_local".equals(preference)) {
return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
}
if ("_primary".equals(preference)) {
return indexShard.primaryActiveInitializingShardIt();
}
if ("_primary_first".equals(preference) || "_primaryFirst".equals(preference)) {
return indexShard.primaryFirstActiveInitializingShardsIt();
}
if ("_only_local".equals(preference) || "_onlyLocal".equals(preference)) {
return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
}
if (preference.startsWith("_only_node:")) {
String nodeId = preference.substring("_only_node:".length());
ensureNodeIdExists(nodes, nodeId);
return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
preferenceType = Preference.parse(preference);
switch (preferenceType) {
case PREFER_NODE:
return indexShard.preferNodeActiveInitializingShardsIt(preference.substring(Preference.PREFER_NODE.type().length() + 1));
case LOCAL:
return indexShard.preferNodeActiveInitializingShardsIt(localNodeId);
case PRIMARY:
return indexShard.primaryActiveInitializingShardIt();
case PRIMARY_FIRST:
return indexShard.primaryFirstActiveInitializingShardsIt();
case ONLY_LOCAL:
return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId);
case ONLY_NODE:
String nodeId = preference.substring(Preference.ONLY_NODE.type().length() + 1);
ensureNodeIdExists(nodes, nodeId);
return indexShard.onlyNodeActiveInitializingShardsIt(nodeId);
default:
throw new ElasticsearchIllegalArgumentException("unknown preference [" + preferenceType + "]");
}
}
// if not, then use it as the index
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.routing.operation.plain;

import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.collect.Tuple;

/**
* Routing Preference Type
*/
public enum Preference {

/**
* Route to specific shards
*/
SHARDS("_shards"),

/**
* Route to preferred node, if possible
*/
PREFER_NODE("_prefer_node"),

/**
* Route to local node, if possible
*/
LOCAL("_local"),

/**
* Route to primary shards
*/
PRIMARY("_primary"),

/**
* Route to primary shards first
*/
PRIMARY_FIRST("_primary_first"),

/**
* Route to the local shard only
*/
ONLY_LOCAL("_only_local"),

/**
* Route to specific node only
*/
ONLY_NODE("_only_node");

private final String type;

Preference(String type) {
this.type = type;
}

public String type() {
return type;
}
/**
* Parses the Preference Type given a string
*/
public static Preference parse(String preference) {
String preferenceType;
int colonIndex = preference.indexOf(':');
if (colonIndex == -1) {
preferenceType = preference;
} else {
preferenceType = preference.substring(0, colonIndex);
}

switch (preferenceType) {
case "_shards":
return SHARDS;
case "_prefer_node":
return PREFER_NODE;
case "_only_node":
return ONLY_NODE;
case "_local":
return LOCAL;
case "_primary":
return PRIMARY;
case "_primary_first":
case "_primaryFirst":
return PRIMARY_FIRST;
case "_only_local":
case "_onlyLocal":
return ONLY_LOCAL;
default:
throw new ElasticsearchIllegalArgumentException("no Preference for [" + preferenceType + "]");
}
}
}



Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.query;

import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.*;

import java.io.IOException;

/**
* FilterBuilder that constructs filters from {@link org.elasticsearch.common.bytes.BytesReference}
* source
*/
public class BytesFilterBuilder extends BaseFilterBuilder {

private final BytesReference source;

public BytesFilterBuilder(BytesReference source) {
this.source = source;

}

@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
try (XContentParser parser = XContentFactory.xContent(source).createParser(source)) {
// unwrap the first layer of json dictionary
parser.nextToken();
parser.nextToken();
builder.copyCurrentStructure(parser);
}
}
}
10 changes: 10 additions & 0 deletions src/main/java/org/elasticsearch/index/query/FilterBuilders.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.query;

import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.geo.builders.ShapeBuilder;
Expand Down Expand Up @@ -557,6 +558,15 @@ public static WrapperFilterBuilder wrapperFilter(byte[] data, int offset, int le
return new WrapperFilterBuilder(data, offset, length);
}

/**
* Constructs a bytes filter to generate a filter from a {@link BytesReference} source
*
* @param source The filter source
*/
public static BytesFilterBuilder bytesFilter(BytesReference source) {
return new BytesFilterBuilder(source);
}

private FilterBuilders() {

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.CompiledScript;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
Expand Down Expand Up @@ -124,6 +126,43 @@ public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, Ma
}
}
}
} else if ("collate".equals(fieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName();
} else if ("query".equals(fieldName) || "filter".equals(fieldName)) {
String templateNameOrTemplateContent;
if (token == XContentParser.Token.START_OBJECT && !parser.hasTextCharacters()) {
XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent());
builder.copyCurrentStructure(parser);
templateNameOrTemplateContent = builder.string();
} else {
templateNameOrTemplateContent = parser.text();
}
if (templateNameOrTemplateContent == null) {
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] no query/filter found in collate object");
}
if (suggestion.getCollateFilterScript() != null) {
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] filter already set, doesn't support additional [" + fieldName + "]");
}
if (suggestion.getCollateQueryScript() != null) {
throw new ElasticsearchIllegalArgumentException("suggester[phrase][collate] query already set, doesn't support additional [" + fieldName + "]");
}
CompiledScript compiledScript = suggester.scriptService().compile("mustache", templateNameOrTemplateContent);
if ("query".equals(fieldName)) {
suggestion.setCollateQueryScript(compiledScript);
} else {
suggestion.setCollateFilterScript(compiledScript);
}
} else if ("preference".equals(fieldName)) {
suggestion.setPreference(parser.text());
} else if ("params".equals(fieldName)) {
suggestion.setCollateScriptParams(parser.map());
} else {
throw new ElasticsearchIllegalArgumentException(
"suggester[phrase][collate] doesn't support field [" + fieldName + "]");
}
}
} else {
throw new ElasticsearchIllegalArgumentException("suggester[phrase] doesn't support array field [" + fieldName + "]");
}
Expand Down
Loading

0 comments on commit 7634389

Please sign in to comment.