Skip to content

Commit

Permalink
Add more comprehensive tests for field aliases in queries + aggregati…
Browse files Browse the repository at this point in the history
…ons. (#31565)

* Make sure that significant terms aggregations work with field aliases.
* Add a test for ValuesSourceConfig.
* Allow for subclasses of AggregatorTestCase to provide field aliases.
* Add tests for nested and reverse_nested aggregations.
* Add an integration test for nested queries.
* Add an integration test for 'more like this' queries.
* Add tests for querying and loading meta-fields.
* Add unit tests for the relevant query builders.
* Add integration tests for geo polygon and shape queries.
* Fix static analysis violations.
* Document that aliases cannot be used in a query lookup path.
  • Loading branch information
jtibshirani committed Jul 18, 2018
1 parent 5995794 commit 6cfa032
Show file tree
Hide file tree
Showing 47 changed files with 894 additions and 256 deletions.
12 changes: 8 additions & 4 deletions docs/reference/mapping/types/alias.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ field alias to query over multiple target fields in a single clause.
==== Unsupported APIs

Writes to field aliases are not supported: attempting to use an alias in an index or update request
will result in a failure. This also precludes aliases from being the target of `copy_to`.
will result in a failure. Likewise, aliases cannot be used as the target of `copy_to`.

Because alias names are not present in the document source, aliases cannot be used when performing
source filtering. For example, the following request will return an empty result for `_source`:
Expand All @@ -92,6 +92,10 @@ GET /_search
// CONSOLE
// TEST[continued]

Finally, currently only the search and field capabilities APIs will accept and resolve
field aliases. Other APIs that accept field names, such as <<docs-termvectors, term vectors>>,
cannot be used with field aliases.
Currently only the search and field capabilities APIs will accept and resolve field aliases.
Other APIs that accept field names, such as <<docs-termvectors, term vectors>>, cannot be used
with field aliases.

Finally, some queries, such as `terms`, `geo_shape`, and `more_like_this`, allow for fetching query
information from an indexed document. Because field aliases aren't supported when fetching documents,
the part of the query that specifies the lookup path cannot refer to a field by its alias.
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public static Query newFilter(QueryShardContext context, String fieldPattern) {
}

if (context.indexVersionCreated().before(Version.V_6_1_0)) {
return newLegacyExistsQuery(fields);
return newLegacyExistsQuery(context, fields);
}

if (fields.size() == 1) {
Expand All @@ -164,22 +164,28 @@ public static Query newFilter(QueryShardContext context, String fieldPattern) {
return new ConstantScoreQuery(boolFilterBuilder.build());
}

private static Query newLegacyExistsQuery(Collection<String> fields) {
private static Query newLegacyExistsQuery(QueryShardContext context, Collection<String> fields) {
// We create TermsQuery directly here rather than using FieldNamesFieldType.termsQuery()
// so we don't end up with deprecation warnings
if (fields.size() == 1) {
Query filter = new TermQuery(new Term(FieldNamesFieldMapper.NAME, fields.iterator().next()));
Query filter = newLegacyExistsQuery(context, fields.iterator().next());
return new ConstantScoreQuery(filter);
}

BooleanQuery.Builder boolFilterBuilder = new BooleanQuery.Builder();
for (String field : fields) {
Query filter = new TermQuery(new Term(FieldNamesFieldMapper.NAME, field));
Query filter = newLegacyExistsQuery(context, field);
boolFilterBuilder.add(filter, BooleanClause.Occur.SHOULD);
}
return new ConstantScoreQuery(boolFilterBuilder.build());
}

private static Query newLegacyExistsQuery(QueryShardContext context, String field) {
MappedFieldType fieldType = context.fieldMapper(field);
String fieldName = fieldType != null ? fieldType.name() : field;
return new TermQuery(new Term(FieldNamesFieldMapper.NAME, fieldName));
}

private static Query newFieldExistsQuery(QueryShardContext context, String field) {
MappedFieldType fieldType = context.getMapperService().fullName(field);
if (fieldType == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
Expand Down Expand Up @@ -202,14 +203,18 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
multiTermQueryBuilder.getClass().getName() + ", should be " + MultiTermQuery.class.getName()
+ " but was " + subQuery.getClass().getName());
}

PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder;
MappedFieldType fieldType = context.fieldMapper(prefixBuilder.fieldName());
String fieldName = fieldType != null ? fieldType.name() : prefixBuilder.fieldName();

if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_6_4_0)) {
/**
* Indices created in this version do not index positions on the prefix field
* so we cannot use it to match positional queries. Instead, we explicitly create the prefix
* query on the main field to avoid the rewrite.
*/
PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder;
PrefixQuery prefixQuery = new PrefixQuery(new Term(prefixBuilder.fieldName(), prefixBuilder.value()));
PrefixQuery prefixQuery = new PrefixQuery(new Term(fieldName, prefixBuilder.value()));
if (prefixBuilder.rewrite() != null) {
MultiTermQuery.RewriteMethod rewriteMethod =
QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE);
Expand All @@ -218,15 +223,14 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
subQuery = prefixQuery;
spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery);
} else {
String origFieldName = ((PrefixQueryBuilder) multiTermQueryBuilder).fieldName();
SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm());
/**
* Prefixes are indexed in a different field so we mask the term query with the original field
* name. This is required because span_near and span_or queries don't work across different field.
* The masking is safe because the prefix field is indexed using the same content than the original field
* and the prefix analyzer preserves positions.
*/
spanQuery = new FieldMaskingSpanQuery(spanTermQuery, origFieldName);
SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm());
spanQuery = new FieldMaskingSpanQuery(spanTermQuery, fieldName);
}
} else {
if (subQuery instanceof MultiTermQuery == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentLocation;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -218,7 +219,8 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
}
String spanNearFieldName = null;
if (isGap) {
spanNearFieldName = ((SpanGapQueryBuilder) queryBuilder).fieldName();
String fieldName = ((SpanGapQueryBuilder) queryBuilder).fieldName();
spanNearFieldName = queryFieldName(context, fieldName);
} else {
spanNearFieldName = ((SpanQuery) query).getField();
}
Expand All @@ -241,7 +243,9 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
isGap = queryBuilder instanceof SpanGapQueryBuilder;
if (isGap) {
String fieldName = ((SpanGapQueryBuilder) queryBuilder).fieldName();
if (!spanNearFieldName.equals(fieldName)) {
String spanGapFieldName = queryFieldName(context, fieldName);

if (!spanNearFieldName.equals(spanGapFieldName)) {
throw new IllegalArgumentException("[span_near] clauses must have same field");
}
int gap = ((SpanGapQueryBuilder) queryBuilder).width();
Expand All @@ -255,6 +259,11 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
return builder.build();
}

private String queryFieldName(QueryShardContext context, String fieldName) {
MappedFieldType fieldType = context.fieldMapper(fieldName);
return fieldType != null ? fieldType.name() : fieldName;
}

@Override
protected int doHashCode() {
return Objects.hash(clauses, slop, inOrder);
Expand All @@ -273,11 +282,11 @@ public String getWriteableName() {
}

/**
* SpanGapQueryBuilder enables gaps in a SpanNearQuery.
* SpanGapQueryBuilder enables gaps in a SpanNearQuery.
* Since, SpanGapQuery is private to SpanNearQuery, SpanGapQueryBuilder cannot
* be used to generate a Query (SpanGapQuery) like another QueryBuilder.
* Instead, it just identifies a span_gap clause so that SpanNearQuery.addGap(int)
* can be invoked for it.
* Instead, it just identifies a span_gap clause so that SpanNearQuery.addGap(int)
* can be invoked for it.
* This QueryBuilder is only applicable as a clause in SpanGapQueryBuilder but
* yet to enforce this restriction.
*/
Expand All @@ -286,9 +295,9 @@ public static class SpanGapQueryBuilder implements SpanQueryBuilder {

/** Name of field to match against. */
private final String fieldName;

/** Width of the gap introduced. */
private final int width;
private final int width;

/**
* Constructs a new SpanGapQueryBuilder term query.
Expand All @@ -301,7 +310,7 @@ public SpanGapQueryBuilder(String fieldName, int width) {
throw new IllegalArgumentException("[span_gap] field name is null or empty");
}
//lucene has not coded any restriction on value of width.
//to-do : find if theoretically it makes sense to apply restrictions.
//to-do : find if theoretically it makes sense to apply restrictions.
this.fieldName = fieldName;
this.width = width;
}
Expand Down Expand Up @@ -396,7 +405,7 @@ public static SpanGapQueryBuilder fromXContent(XContentParser parser) throws IOE
fieldName = currentFieldName;
} else if (token.isValue()) {
width = parser.intValue();
}
}
}
SpanGapQueryBuilder result = new SpanGapQueryBuilder(fieldName, width);
return result;
Expand All @@ -420,7 +429,7 @@ public final int hashCode() {
return Objects.hash(getClass(), fieldName, width);
}


@Override
public final String toString() {
return Strings.toString(this, true, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IO
}

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
protected Query doToQuery(QueryShardContext context) {
if (values.isEmpty()) {
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
}
Expand All @@ -230,6 +230,15 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
throw new BooleanQuery.TooManyClauses();
}

List<Query> queries = createTermQueries(context);
LongValuesSource longValuesSource = createValuesSource(context);
return new CoveringQuery(queries, longValuesSource);
}

/**
* Visible only for testing purposes.
*/
List<Query> createTermQueries(QueryShardContext context) {
final MappedFieldType fieldType = context.fieldMapper(fieldName);
final List<Query> queries = new ArrayList<>(values.size());
for (Object value : values) {
Expand All @@ -239,7 +248,11 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value))));
}
}
final LongValuesSource longValuesSource;
return queries;
}

private LongValuesSource createValuesSource(QueryShardContext context) {
LongValuesSource longValuesSource;
if (minimumShouldMatchField != null) {
MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField);
if (msmFieldType == null) {
Expand All @@ -253,13 +266,13 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
SearchScript.TERMS_SET_QUERY_CONTEXT);
Map<String, Object> params = new HashMap<>();
params.putAll(minimumShouldMatchScript.getParams());
params.put("num_terms", queries.size());
params.put("num_terms", values.size());
SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup());
longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory);
} else {
throw new IllegalStateException("No minimum should match has been specified");
}
return new CoveringQuery(queries, longValuesSource);
return longValuesSource;
}

static final class ScriptLongValueSource extends LongValuesSource {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ public SignificantTermsAggregatorFactory(String name,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData) throws IOException {
super(name, config, context, parent, subFactoriesBuilder, metaData);

if (!config.unmapped()) {
this.fieldType = config.fieldContext().fieldType();
this.indexedFieldName = fieldType.name();
}

this.includeExclude = includeExclude;
this.executionHint = executionHint;
this.filter = filterBuilder == null
Expand All @@ -98,15 +104,6 @@ public SignificantTermsAggregatorFactory(String name,
: searcher.count(filter);
this.bucketCountThresholds = bucketCountThresholds;
this.significanceHeuristic = significanceHeuristic;
setFieldInfo(context);

}

private void setFieldInfo(SearchContext context) {
if (!config.unmapped()) {
this.indexedFieldName = config.fieldContext().field();
fieldType = context.smartNameFieldType(indexedFieldName);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,13 +343,10 @@ protected void doWriteTo(StreamOutput out) throws IOException {
protected AggregatorFactory<?> doBuild(SearchContext context, AggregatorFactory<?> parent,
Builder subFactoriesBuilder) throws IOException {
SignificanceHeuristic executionHeuristic = this.significanceHeuristic.rewrite(context);
String[] execFieldNames = sourceFieldNames;
if (execFieldNames == null) {
execFieldNames = new String[] { fieldName };
}

return new SignificantTextAggregatorFactory(name, includeExclude, filterBuilder,
bucketCountThresholds, executionHeuristic, context, parent, subFactoriesBuilder,
fieldName, execFieldNames, filterDuplicateText, metaData);
fieldName, sourceFieldNames, filterDuplicateText, metaData);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,19 @@ public SignificantTextAggregatorFactory(String name, IncludeExclude includeExclu
AggregatorFactories.Builder subFactoriesBuilder, String fieldName, String [] sourceFieldNames,
boolean filterDuplicateText, Map<String, Object> metaData) throws IOException {
super(name, context, parent, subFactoriesBuilder, metaData);

// Note that if the field is unmapped (its field type is null), we don't fail,
// and just use the given field name as a placeholder.
this.fieldType = context.getQueryShardContext().fieldMapper(fieldName);
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
this.sourceFieldNames = sourceFieldNames == null
? new String[] { indexedFieldName }
: sourceFieldNames;

this.includeExclude = includeExclude;
this.filter = filterBuilder == null
? null
: filterBuilder.toQuery(context.getQueryShardContext());
this.indexedFieldName = fieldName;
this.sourceFieldNames = sourceFieldNames;
this.filterDuplicateText = filterDuplicateText;
IndexSearcher searcher = context.searcher();
// Important - need to use the doc count that includes deleted docs
Expand All @@ -86,11 +93,8 @@ public SignificantTextAggregatorFactory(String name, IncludeExclude includeExclu
: searcher.count(filter);
this.bucketCountThresholds = bucketCountThresholds;
this.significanceHeuristic = significanceHeuristic;
fieldType = context.getQueryShardContext().fieldMapper(indexedFieldName);

}


/**
* Get the number of docs in the superset.
*/
Expand Down Expand Up @@ -133,13 +137,13 @@ private long getBackgroundFrequency(String value) throws IOException {
}
return context.searcher().count(query);
}

public long getBackgroundFrequency(BytesRef termBytes) throws IOException {
String value = format.format(termBytes).toString();
return getBackgroundFrequency(value);
}
}



@Override
public void close() {
try {
Expand All @@ -154,19 +158,19 @@ public void close() {
@Override
protected Aggregator createInternal(Aggregator parent, boolean collectsFromSingleBucket,
List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
throws IOException {
throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}

numberOfAggregatorsCreated++;
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (bucketCountThresholds.getShardSize() == SignificantTextAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection.
// Use default heuristic to avoid any wrong-ranking caused by
// distributed counting but request double the usual amount.
// We typically need more than the number of "top" terms requested
// by other aggregations as the significance algorithm is in less
// by other aggregations as the significance algorithm is in less
// of a position to down-select at shard-level - some of the things
// we want to find have only one occurrence on each shard and as
// such are impossible to differentiate from non-significant terms
Expand All @@ -177,9 +181,9 @@ protected Aggregator createInternal(Aggregator parent, boolean collectsFromSingl

// TODO - need to check with mapping that this is indeed a text field....

IncludeExclude.StringFilter incExcFilter = includeExclude == null ? null:
IncludeExclude.StringFilter incExcFilter = includeExclude == null ? null:
includeExclude.convertToStringFilter(DocValueFormat.RAW);

return new SignificantTextAggregator(name, factories, context, parent, pipelineAggregators, bucketCountThresholds,
incExcFilter, significanceHeuristic, this, indexedFieldName, sourceFieldNames, filterDuplicateText, metaData);

Expand Down
Loading

0 comments on commit 6cfa032

Please sign in to comment.