From 0bc7b496104eebb72e0c2ca9b6a91cb69c09d706 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 1 May 2024 08:11:15 +0200 Subject: [PATCH] Optimise multiterms aggregation for single value fields (#107937) --- docs/changelog/107937.yaml | 5 ++ .../multiterms/MultiTermsAggregator.java | 74 ++++++++++++++++--- 2 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/107937.yaml diff --git a/docs/changelog/107937.yaml b/docs/changelog/107937.yaml new file mode 100644 index 0000000000000..5938c8e8b6602 --- /dev/null +++ b/docs/changelog/107937.yaml @@ -0,0 +1,5 @@ +pr: 107937 +summary: Optimise multiterms aggregation for single value fields +area: Aggregations +type: enhancement +issues: [] diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java index e0c927c762514..85882a5c56851 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java @@ -7,7 +7,10 @@ package org.elasticsearch.xpack.analytics.multiterms; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.BytesRef; @@ -20,6 +23,8 @@ import org.elasticsearch.common.util.ObjectArrayPriorityQueue; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.NumericDoubleValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; @@ -376,14 +381,19 @@ static class LongTermValuesSource implements TermValuesSource { @Override public TermValues getValues(LeafReaderContext ctx) throws IOException { - SortedNumericDocValues values = source.longValues(ctx); + final SortedNumericDocValues values = source.longValues(ctx); + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + return singleton != null ? getValues(singleton) : getValues(values); + } + + public TermValues getValues(SortedNumericDocValues values) { return doc -> { if (values.advanceExact(doc)) { - List objects = new ArrayList<>(); - int valuesCount = values.docValueCount(); + final List objects = new ArrayList<>(); + final int valuesCount = values.docValueCount(); long previous = Long.MAX_VALUE; for (int i = 0; i < valuesCount; ++i) { - long val = values.nextValue(); + final long val = values.nextValue(); if (previous != val || i == 0) { objects.add(val); previous = val; @@ -396,6 +406,16 @@ public TermValues getValues(LeafReaderContext ctx) throws IOException { }; } + public TermValues getValues(NumericDocValues values) { + return doc -> { + if (values.advanceExact(doc)) { + return List.of(values.longValue()); + } else { + return null; + } + }; + } + @Override public InternalMultiTerms.KeyConverter keyConverter() { return converter; @@ -414,14 +434,19 @@ static class DoubleTermValuesSource implements TermValuesSource { @Override public TermValues getValues(LeafReaderContext ctx) throws IOException { - SortedNumericDoubleValues values = source.doubleValues(ctx); + final SortedNumericDoubleValues values = source.doubleValues(ctx); + final NumericDoubleValues singleton = FieldData.unwrapSingleton(values); + return singleton != null ? getValues(singleton) : getValues(values); + } + + public TermValues getValues(SortedNumericDoubleValues values) { return doc -> { if (values.advanceExact(doc)) { - List objects = new ArrayList<>(); - int valuesCount = values.docValueCount(); + final List objects = new ArrayList<>(); + final int valuesCount = values.docValueCount(); double previous = Double.MAX_VALUE; for (int i = 0; i < valuesCount; ++i) { - double val = values.nextValue(); + final double val = values.nextValue(); if (previous != val || i == 0) { objects.add(val); previous = val; @@ -434,6 +459,16 @@ public TermValues getValues(LeafReaderContext ctx) throws IOException { }; } + public TermValues getValues(NumericDoubleValues values) { + return doc -> { + if (values.advanceExact(doc)) { + return List.of(values.doubleValue()); + } else { + return null; + } + }; + } + @Override public InternalMultiTerms.KeyConverter keyConverter() { return InternalMultiTerms.KeyConverter.DOUBLE; @@ -453,16 +488,21 @@ abstract static class BinaryTermValuesSource implements TermValuesSource { @Override public TermValues getValues(LeafReaderContext ctx) throws IOException { - SortedBinaryDocValues values = source.bytesValues(ctx); + final SortedBinaryDocValues values = source.bytesValues(ctx); + final BinaryDocValues singleton = FieldData.unwrapSingleton(values); + return singleton != null ? getValues(singleton) : getValues(values); + } + + private TermValues getValues(SortedBinaryDocValues values) { return doc -> { if (values.advanceExact(doc)) { - int valuesCount = values.docValueCount(); - List objects = new ArrayList<>(valuesCount); + final int valuesCount = values.docValueCount(); + final List objects = new ArrayList<>(valuesCount); // SortedBinaryDocValues don't guarantee uniqueness so we // need to take care of dups previous.clear(); for (int i = 0; i < valuesCount; ++i) { - BytesRef bytes = values.nextValue(); + final BytesRef bytes = values.nextValue(); if (i > 0 && previous.get().equals(bytes)) { continue; } @@ -475,6 +515,16 @@ public TermValues getValues(LeafReaderContext ctx) throws IOException { } }; } + + private TermValues getValues(BinaryDocValues values) { + return doc -> { + if (values.advanceExact(doc)) { + return List.of(BytesRef.deepCopyOf(values.binaryValue())); + } else { + return null; + } + }; + } } /**