Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include unindexed field in FieldStats response #21821

Merged
merged 5 commits into from
Dec 6, 2016
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 146 additions & 23 deletions core/src/main/java/org/elasticsearch/action/fieldstats/FieldStats.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
package org.elasticsearch.action.fieldstats;

import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -45,9 +47,50 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
private long sumTotalTermFreq;
private boolean isSearchable;
private boolean isAggregatable;
private boolean hasMinMax;
protected T minValue;
protected T maxValue;

/**
* Builds a FieldStats where min and max value are not available for the field.
* @param type The native type of this FieldStats
* @param maxDoc Max number of docs
* @param docCount the number of documents that have at least one term for this field,
* or -1 if this information isn't available for this field.
* @param sumDocFreq the sum of {@link TermsEnum#docFreq()} for all terms in this field,
* or -1 if this information isn't available for this field.
* @param sumTotalTermFreq the sum of {@link TermsEnum#totalTermFreq} for all terms in this field,
* or -1 if this measure isn't available for this field.
* @param isSearchable true if this field is searchable
* @param isAggregatable true if this field is aggregatable
*/
FieldStats(byte type, long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
this.type = type;
this.maxDoc = maxDoc;
this.docCount = docCount;
this.sumDocFreq = sumDocFreq;
this.sumTotalTermFreq = sumTotalTermFreq;
this.isSearchable = isSearchable;
this.isAggregatable = isAggregatable;
this.hasMinMax = false;
}

/**
* Builds a FieldStats with min and max value for the field.
* @param type The native type of this FieldStats
* @param maxDoc Max number of docs
* @param docCount the number of documents that have at least one term for this field,
* or -1 if this information isn't available for this field.
* @param sumDocFreq the sum of {@link TermsEnum#docFreq()} for all terms in this field,
* or -1 if this information isn't available for this field.
* @param sumTotalTermFreq the sum of {@link TermsEnum#totalTermFreq} for all terms in this field,
* or -1 if this measure isn't available for this field.
* @param isSearchable true if this field is searchable
* @param isAggregatable true if this field is aggregatable
* @param minValue the minimum value indexed in this field
* @param maxValue the maximum value indexed in this field
*/
FieldStats(byte type,
long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable, T minValue, T maxValue) {
Expand All @@ -60,6 +103,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
this.sumTotalTermFreq = sumTotalTermFreq;
this.isSearchable = isSearchable;
this.isAggregatable = isAggregatable;
this.hasMinMax = true;
this.minValue = minValue;
this.maxValue = maxValue;
}
Expand All @@ -85,6 +129,13 @@ public String getDisplayType() {
}
}

/**
* @return true if min/max informations are available for this field
*/
public boolean hasMinMax() {
return hasMinMax;
}

/**
* @return the total number of documents.
*
Expand Down Expand Up @@ -216,7 +267,13 @@ public final void accumulate(FieldStats other) {
isAggregatable |= other.isAggregatable;

assert type == other.getType();
updateMinMax((T) other.minValue, (T) other.maxValue);
if (hasMinMax && other.hasMinMax) {
updateMinMax((T) other.minValue, (T) other.maxValue);
} else {
hasMinMax = false;
minValue = null;
maxValue = null;
}
}

private void updateMinMax(T min, T max) {
Expand All @@ -241,7 +298,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(SUM_TOTAL_TERM_FREQ_FIELD, sumTotalTermFreq);
builder.field(SEARCHABLE_FIELD, isSearchable);
builder.field(AGGREGATABLE_FIELD, isAggregatable);
toInnerXContent(builder);
if (hasMinMax) {
toInnerXContent(builder);
}
builder.endObject();
return builder;
}
Expand All @@ -262,7 +321,14 @@ public final void writeTo(StreamOutput out) throws IOException {
out.writeLong(sumTotalTermFreq);
out.writeBoolean(isSearchable);
out.writeBoolean(isAggregatable);
writeMinMax(out);
if (out.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) {
out.writeBoolean(hasMinMax);
if (hasMinMax) {
writeMinMax(out);
}
} else {
writeMinMax(out);
}
}

protected abstract void writeMinMax(StreamOutput out) throws IOException;
Expand All @@ -272,6 +338,9 @@ public final void writeTo(StreamOutput out) throws IOException {
* otherwise <code>false</code> is returned
*/
public boolean match(IndexConstraint constraint) {
if (hasMinMax == false) {
return false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we throw an exception if min/max info is not available?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the current behavior for fields that are not indexed so maybe better to just ignore these fields for now ? The index constraint works on indexed fields and always returns false on non-indexed fields, I can add a sentence like this in the docs if needed ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That works for me. Thanks for the explanation.

int cmp;
T value = valueOf(constraint.getValue(), constraint.getOptionalFormat());
if (constraint.getProperty() == IndexConstraint.Property.MIN) {
Expand Down Expand Up @@ -310,6 +379,10 @@ public boolean equals(Object o) {
if (sumTotalTermFreq != that.sumTotalTermFreq) return false;
if (isSearchable != that.isSearchable) return false;
if (isAggregatable != that.isAggregatable) return false;
if (hasMinMax != that.hasMinMax) return false;
if (hasMinMax == false) {
return true;
}
if (!minValue.equals(that.minValue)) return false;
return maxValue.equals(that.maxValue);

Expand All @@ -318,10 +391,16 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
return Objects.hash(type, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable,
minValue, maxValue);
hasMinMax, minValue, maxValue);
}

public static class Long extends FieldStats<java.lang.Long> {
public Long(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 0, maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}

public Long(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
long minValue, long maxValue) {
Expand Down Expand Up @@ -357,6 +436,11 @@ public String getMaxValueAsString() {
}

public static class Double extends FieldStats<java.lang.Double> {
public Double(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 1, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable);
}

public Double(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
double minValue, double maxValue) {
Expand Down Expand Up @@ -397,6 +481,12 @@ public String getMaxValueAsString() {
public static class Date extends FieldStats<java.lang.Long> {
private FormatDateTimeFormatter formatter;

public Date(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 2, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable);
this.formatter = null;
}

public Date(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
FormatDateTimeFormatter formatter,
Expand Down Expand Up @@ -439,23 +529,27 @@ public String getMaxValueAsString() {

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;

Date that = (Date) o;
return Objects.equals(formatter.format(), that.formatter.format());
return Objects.equals(formatter == null ? null : formatter.format(),
that.formatter == null ? null : that.formatter.format());
}

@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + formatter.format().hashCode();
result = 31 * result + (formatter == null ? 0 : formatter.format().hashCode());
return result;
}
}

public static class Text extends FieldStats<BytesRef> {
public Text(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 3, maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}

public Text(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
BytesRef minValue, BytesRef maxValue) {
Expand Down Expand Up @@ -501,6 +595,13 @@ protected void toInnerXContent(XContentBuilder builder) throws IOException {
}

public static class Ip extends FieldStats<InetAddress> {
public Ip(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 4, maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}


public Ip(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
InetAddress minValue, InetAddress maxValue) {
Expand Down Expand Up @@ -550,27 +651,50 @@ public static FieldStats readFrom(StreamInput in) throws IOException {
long sumTotalTermFreq = in.readLong();
boolean isSearchable = in.readBoolean();
boolean isAggregatable = in.readBoolean();

boolean hasMinMax = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is weird to see this pre-NamedWriteable stuff still around. It might make sense to convert these to NamedWriteable in master.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure of this, NamedWriteable is ok when you want to serialize objects that have nothing in common but that's not the case here. We just want to serialize the min/max value differently for each specialization of the class. Rewriting this as a NamedWriteable would be completely different. We would need to create a class to encapsulate the min/max info and then use NamedWriteable to serialize these classes. I can do that but I prefer to do it in another PR since it's not related to the change that we're trying to make here ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, another PR is totally fine. No need to hold this up. And I'm not 100% sure it makes sense to convert them, just that it makes sense to think about it.

We have lots of NamedWriteables that write things in common - queries and aggregations do it all the time. They defer to superclasses to load the common stuff.

if (in.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) {
hasMinMax = in.readBoolean();
}
switch (type) {
case 0:
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
if (hasMinMax) {
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, in.readLong(), in.readLong());

} else {
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
case 1:
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, in.readDouble(), in.readDouble());

if (hasMinMax) {
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, in.readDouble(), in.readDouble());
} else {
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
case 2:
FormatDateTimeFormatter formatter = Joda.forPattern(in.readString());
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, formatter, in.readLong(), in.readLong());


if (hasMinMax) {
FormatDateTimeFormatter formatter = Joda.forPattern(in.readString());
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, formatter, in.readLong(), in.readLong());
} else {
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
case 3:
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, in.readBytesRef(), in.readBytesRef());
if (hasMinMax) {
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, in.readBytesRef(), in.readBytesRef());
} else {
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}

case 4:
if (hasMinMax == false) {
return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
int l1 = in.readByte();
byte[] b1 = new byte[l1];
in.readBytes(b1, 0, l1);
Expand Down Expand Up @@ -599,5 +723,4 @@ public static FieldStats readFrom(StreamInput in) throws IOException {
private static final String MIN_VALUE_AS_STRING_FIELD = "min_value_as_string";
private static final String MAX_VALUE_FIELD = "max_value";
private static final String MAX_VALUE_AS_STRING_FIELD = "max_value_as_string";

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.action.fieldstats;

import org.elasticsearch.Version;
import org.elasticsearch.action.ShardOperationFailedException;
import org.elasticsearch.action.support.broadcast.BroadcastResponse;
import org.elasticsearch.common.Nullable;
Expand Down Expand Up @@ -91,10 +92,21 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(indicesMergedFieldStats.size());
for (Map.Entry<String, Map<String, FieldStats>> entry1 : indicesMergedFieldStats.entrySet()) {
out.writeString(entry1.getKey());
out.writeVInt(entry1.getValue().size());
int size = entry1.getValue().size();
if (out.getVersion().before(Version.V_5_2_0_UNRELEASED)) {
// filter fieldstats without min/max information
for (FieldStats stats : entry1.getValue().values()) {
if (stats.hasMinMax() == false) {
size--;
}
}
}
out.writeVInt(size);
for (Map.Entry<String, FieldStats> entry2 : entry1.getValue().entrySet()) {
out.writeString(entry2.getKey());
entry2.getValue().writeTo(out);
if (entry2.getValue().hasMinMax() || out.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) {
out.writeString(entry2.getKey());
entry2.getValue().writeTo(out);
}
}
}
out.writeVInt(conflicts.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
Expand Down Expand Up @@ -299,9 +300,13 @@ public long parseToMilliseconds(Object value, boolean roundUp,
@Override
public FieldStats.Date stats(IndexReader reader) throws IOException {
String field = name();
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
if (fi == null) {
return null;
}
long size = PointValues.size(reader, field);
if (size == 0) {
return null;
return new FieldStats.Date(reader.maxDoc(), 0, -1, -1, isSearchable(), isAggregatable());
}
int docCount = PointValues.getDocCount(reader, field);
byte[] min = PointValues.getMinPackedValue(reader, field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
Expand Down Expand Up @@ -213,9 +214,13 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
@Override
public FieldStats.Ip stats(IndexReader reader) throws IOException {
String field = name();
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
if (fi == null) {
return null;
}
long size = PointValues.size(reader, field);
if (size == 0) {
return null;
return new FieldStats.Ip(reader.maxDoc(), 0, -1, -1, isSearchable(), isAggregatable());
}
int docCount = PointValues.getDocCount(reader, field);
byte[] min = PointValues.getMinPackedValue(reader, field);
Expand Down
Loading