-
Notifications
You must be signed in to change notification settings - Fork 24.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Include unindexed field in FieldStats response #21821
Changes from 4 commits
56d5cb6
ac546c9
727add1
be82024
d401c16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,10 @@ | |
package org.elasticsearch.action.fieldstats; | ||
|
||
import org.apache.lucene.document.InetAddressPoint; | ||
import org.apache.lucene.index.TermsEnum; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.apache.lucene.util.StringHelper; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.common.io.stream.StreamInput; | ||
import org.elasticsearch.common.io.stream.StreamOutput; | ||
import org.elasticsearch.common.io.stream.Writeable; | ||
|
@@ -45,9 +47,50 @@ public abstract class FieldStats<T> implements Writeable, ToXContent { | |
private long sumTotalTermFreq; | ||
private boolean isSearchable; | ||
private boolean isAggregatable; | ||
private boolean hasMinMax; | ||
protected T minValue; | ||
protected T maxValue; | ||
|
||
/** | ||
* Builds a FieldStats where min and max value are not available for the field. | ||
* @param type The native type of this FieldStats | ||
* @param maxDoc Max number of docs | ||
* @param docCount the number of documents that have at least one term for this field, | ||
* or -1 if this information isn't available for this field. | ||
* @param sumDocFreq the sum of {@link TermsEnum#docFreq()} for all terms in this field, | ||
* or -1 if this information isn't available for this field. | ||
* @param sumTotalTermFreq the sum of {@link TermsEnum#totalTermFreq} for all terms in this field, | ||
* or -1 if this measure isn't available for this field. | ||
* @param isSearchable true if this field is searchable | ||
* @param isAggregatable true if this field is aggregatable | ||
*/ | ||
FieldStats(byte type, long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
this.type = type; | ||
this.maxDoc = maxDoc; | ||
this.docCount = docCount; | ||
this.sumDocFreq = sumDocFreq; | ||
this.sumTotalTermFreq = sumTotalTermFreq; | ||
this.isSearchable = isSearchable; | ||
this.isAggregatable = isAggregatable; | ||
this.hasMinMax = false; | ||
} | ||
|
||
/** | ||
* Builds a FieldStats with min and max value for the field. | ||
* @param type The native type of this FieldStats | ||
* @param maxDoc Max number of docs | ||
* @param docCount the number of documents that have at least one term for this field, | ||
* or -1 if this information isn't available for this field. | ||
* @param sumDocFreq the sum of {@link TermsEnum#docFreq()} for all terms in this field, | ||
* or -1 if this information isn't available for this field. | ||
* @param sumTotalTermFreq the sum of {@link TermsEnum#totalTermFreq} for all terms in this field, | ||
* or -1 if this measure isn't available for this field. | ||
* @param isSearchable true if this field is searchable | ||
* @param isAggregatable true if this field is aggregatable | ||
* @param minValue the minimum value indexed in this field | ||
* @param maxValue the maximum value indexed in this field | ||
*/ | ||
FieldStats(byte type, | ||
long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, T minValue, T maxValue) { | ||
|
@@ -60,6 +103,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent { | |
this.sumTotalTermFreq = sumTotalTermFreq; | ||
this.isSearchable = isSearchable; | ||
this.isAggregatable = isAggregatable; | ||
this.hasMinMax = true; | ||
this.minValue = minValue; | ||
this.maxValue = maxValue; | ||
} | ||
|
@@ -85,6 +129,13 @@ public String getDisplayType() { | |
} | ||
} | ||
|
||
/** | ||
* @return true if min/max informations are available for this field | ||
*/ | ||
public boolean hasMinMax() { | ||
return hasMinMax; | ||
} | ||
|
||
/** | ||
* @return the total number of documents. | ||
* | ||
|
@@ -216,7 +267,13 @@ public final void accumulate(FieldStats other) { | |
isAggregatable |= other.isAggregatable; | ||
|
||
assert type == other.getType(); | ||
updateMinMax((T) other.minValue, (T) other.maxValue); | ||
if (hasMinMax && other.hasMinMax) { | ||
updateMinMax((T) other.minValue, (T) other.maxValue); | ||
} else { | ||
hasMinMax = false; | ||
minValue = null; | ||
maxValue = null; | ||
} | ||
} | ||
|
||
private void updateMinMax(T min, T max) { | ||
|
@@ -241,7 +298,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws | |
builder.field(SUM_TOTAL_TERM_FREQ_FIELD, sumTotalTermFreq); | ||
builder.field(SEARCHABLE_FIELD, isSearchable); | ||
builder.field(AGGREGATABLE_FIELD, isAggregatable); | ||
toInnerXContent(builder); | ||
if (hasMinMax) { | ||
toInnerXContent(builder); | ||
} | ||
builder.endObject(); | ||
return builder; | ||
} | ||
|
@@ -262,7 +321,14 @@ public final void writeTo(StreamOutput out) throws IOException { | |
out.writeLong(sumTotalTermFreq); | ||
out.writeBoolean(isSearchable); | ||
out.writeBoolean(isAggregatable); | ||
writeMinMax(out); | ||
if (out.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) { | ||
out.writeBoolean(hasMinMax); | ||
if (hasMinMax) { | ||
writeMinMax(out); | ||
} | ||
} else { | ||
writeMinMax(out); | ||
} | ||
} | ||
|
||
protected abstract void writeMinMax(StreamOutput out) throws IOException; | ||
|
@@ -272,6 +338,9 @@ public final void writeTo(StreamOutput out) throws IOException { | |
* otherwise <code>false</code> is returned | ||
*/ | ||
public boolean match(IndexConstraint constraint) { | ||
if (hasMinMax == false) { | ||
return false; | ||
} | ||
int cmp; | ||
T value = valueOf(constraint.getValue(), constraint.getOptionalFormat()); | ||
if (constraint.getProperty() == IndexConstraint.Property.MIN) { | ||
|
@@ -310,6 +379,10 @@ public boolean equals(Object o) { | |
if (sumTotalTermFreq != that.sumTotalTermFreq) return false; | ||
if (isSearchable != that.isSearchable) return false; | ||
if (isAggregatable != that.isAggregatable) return false; | ||
if (hasMinMax != that.hasMinMax) return false; | ||
if (hasMinMax == false) { | ||
return true; | ||
} | ||
if (!minValue.equals(that.minValue)) return false; | ||
return maxValue.equals(that.maxValue); | ||
|
||
|
@@ -318,10 +391,16 @@ public boolean equals(Object o) { | |
@Override | ||
public int hashCode() { | ||
return Objects.hash(type, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable, | ||
minValue, maxValue); | ||
hasMinMax, minValue, maxValue); | ||
} | ||
|
||
public static class Long extends FieldStats<java.lang.Long> { | ||
public Long(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
super((byte) 0, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
|
||
public Long(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, | ||
long minValue, long maxValue) { | ||
|
@@ -357,6 +436,11 @@ public String getMaxValueAsString() { | |
} | ||
|
||
public static class Double extends FieldStats<java.lang.Double> { | ||
public Double(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
super((byte) 1, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable); | ||
} | ||
|
||
public Double(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, | ||
double minValue, double maxValue) { | ||
|
@@ -397,6 +481,12 @@ public String getMaxValueAsString() { | |
public static class Date extends FieldStats<java.lang.Long> { | ||
private FormatDateTimeFormatter formatter; | ||
|
||
public Date(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
super((byte) 2, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable); | ||
this.formatter = null; | ||
} | ||
|
||
public Date(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, | ||
FormatDateTimeFormatter formatter, | ||
|
@@ -439,23 +529,27 @@ public String getMaxValueAsString() { | |
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
if (!super.equals(o)) return false; | ||
|
||
Date that = (Date) o; | ||
return Objects.equals(formatter.format(), that.formatter.format()); | ||
return Objects.equals(formatter == null ? null : formatter.format(), | ||
that.formatter == null ? null : that.formatter.format()); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
int result = super.hashCode(); | ||
result = 31 * result + formatter.format().hashCode(); | ||
result = 31 * result + (formatter == null ? 0 : formatter.format().hashCode()); | ||
return result; | ||
} | ||
} | ||
|
||
public static class Text extends FieldStats<BytesRef> { | ||
public Text(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
super((byte) 3, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
|
||
public Text(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, | ||
BytesRef minValue, BytesRef maxValue) { | ||
|
@@ -501,6 +595,13 @@ protected void toInnerXContent(XContentBuilder builder) throws IOException { | |
} | ||
|
||
public static class Ip extends FieldStats<InetAddress> { | ||
public Ip(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable) { | ||
super((byte) 4, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
|
||
|
||
public Ip(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, | ||
boolean isSearchable, boolean isAggregatable, | ||
InetAddress minValue, InetAddress maxValue) { | ||
|
@@ -550,27 +651,50 @@ public static FieldStats readFrom(StreamInput in) throws IOException { | |
long sumTotalTermFreq = in.readLong(); | ||
boolean isSearchable = in.readBoolean(); | ||
boolean isAggregatable = in.readBoolean(); | ||
|
||
boolean hasMinMax = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is weird to see this pre-NamedWriteable stuff still around. It might make sense to convert these to NamedWriteable in master. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure of this, NamedWriteable is ok when you want to serialize objects that have nothing in common but that's not the case here. We just want to serialize the min/max value differently for each specialization of the class. Rewriting this as a NamedWriteable would be completely different. We would need to create a class to encapsulate the min/max info and then use NamedWriteable to serialize these classes. I can do that but I prefer to do it in another PR since it's not related to the change that we're trying to make here ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, another PR is totally fine. No need to hold this up. And I'm not 100% sure it makes sense to convert them, just that it makes sense to think about it. We have lots of NamedWriteables that write things in common - queries and aggregations do it all the time. They defer to superclasses to load the common stuff. |
||
if (in.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) { | ||
hasMinMax = in.readBoolean(); | ||
} | ||
switch (type) { | ||
case 0: | ||
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
if (hasMinMax) { | ||
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, in.readLong(), in.readLong()); | ||
|
||
} else { | ||
return new Long(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
case 1: | ||
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, in.readDouble(), in.readDouble()); | ||
|
||
if (hasMinMax) { | ||
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, in.readDouble(), in.readDouble()); | ||
} else { | ||
return new Double(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
case 2: | ||
FormatDateTimeFormatter formatter = Joda.forPattern(in.readString()); | ||
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, formatter, in.readLong(), in.readLong()); | ||
|
||
|
||
if (hasMinMax) { | ||
FormatDateTimeFormatter formatter = Joda.forPattern(in.readString()); | ||
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, formatter, in.readLong(), in.readLong()); | ||
} else { | ||
return new Date(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
case 3: | ||
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, in.readBytesRef(), in.readBytesRef()); | ||
if (hasMinMax) { | ||
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable, in.readBytesRef(), in.readBytesRef()); | ||
} else { | ||
return new Text(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
|
||
case 4: | ||
if (hasMinMax == false) { | ||
return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq, | ||
isSearchable, isAggregatable); | ||
} | ||
int l1 = in.readByte(); | ||
byte[] b1 = new byte[l1]; | ||
in.readBytes(b1, 0, l1); | ||
|
@@ -599,5 +723,4 @@ public static FieldStats readFrom(StreamInput in) throws IOException { | |
private static final String MIN_VALUE_AS_STRING_FIELD = "min_value_as_string"; | ||
private static final String MAX_VALUE_FIELD = "max_value"; | ||
private static final String MAX_VALUE_AS_STRING_FIELD = "max_value_as_string"; | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we throw an exception if min/max info is not available?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the current behavior for fields that are not indexed so maybe better to just ignore these fields for now ? The index constraint works on indexed fields and always returns false on non-indexed fields, I can add a sentence like this in the docs if needed ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That works for me. Thanks for the explanation.