From cc776beefd3c94e10968ad1c3a1167cf965c9e00 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Tue, 23 Feb 2021 12:29:49 -0800 Subject: [PATCH] Fix geo_line agg behavior with missing values (#69395) The geo_line aggregation incorrectly handled results with missing values. Tests to verify this behavior were incorrectly checking results. This commit resolves these in three ways - explicitly testing missing sort field values - explicitly testing handling missing point field values - explicitly testing handling mixed scenarios where some documents have one and not the other, or missing both. Fixes https://github.com/elastic/kibana/issues/90653. Fixes #69346. --- .../aggregations/GeoLineBucketedSort.java | 24 ++-- .../aggregations/GeoLineAggregatorTests.java | 110 +++++++++++++++--- 2 files changed, 113 insertions(+), 21 deletions(-) diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineBucketedSort.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineBucketedSort.java index adf5624a742e0..0a4bc1d867056 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineBucketedSort.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineBucketedSort.java @@ -53,7 +53,13 @@ public long sizeOf(long bucket) { } long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1); long end = rootIndex + bucketSize; - return end - start; + long size = 0; + for (long index = start; index < end; index++) { + if (((Extra) extra).empty.isEmpty(index) == false) { + size += 1; + } + } + return size; } /** @@ -70,11 +76,13 @@ public double[] getSortValues(long bucket) { } long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1); long end = rootIndex + bucketSize; - double[] result = new double[(int)(end - start)]; + double[] result = new double[(int) sizeOf(bucket)]; int i = 0; for (long index = start; index < end; index++) { - double timestampValue = ((DoubleArray)values()).get(index); - result[i++] = timestampValue; + if (((Extra) extra).empty.isEmpty(index) == false) { + double timestampValue = ((DoubleArray)values()).get(index); + result[i++] = timestampValue; + } } return result; } @@ -92,11 +100,13 @@ public long[] getPoints(long bucket) { } long start = inHeapMode(bucket) ? rootIndex : (rootIndex + getNextGatherOffset(rootIndex) + 1); long end = rootIndex + bucketSize; - long[] result = new long[(int)(end - start)]; + long[] result = new long[(int) sizeOf(bucket)]; int i = 0; for (long index = start; index < end; index++) { - long geoPointValue = ((Extra) extra).values.get(index); - result[i++] = geoPointValue; + if (((Extra) extra).empty.isEmpty(index) == false) { + long geoPointValue = ((Extra) extra).values.get(index); + result[i++] = geoPointValue; + } } return result; } diff --git a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java index b54d5a3921692..b25b299b78d0c 100644 --- a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java +++ b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.spatial.search.aggregations; +import org.apache.lucene.document.Field; import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -35,6 +36,7 @@ import org.elasticsearch.xpack.spatial.SpatialPlugin; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -52,7 +54,7 @@ protected List getSearchPlugins() { } // test that missing values are ignored - public void testMissingValues() throws IOException { + public void testMixedMissingValues() throws IOException { MultiValuesSourceFieldConfig valueConfig = new MultiValuesSourceFieldConfig.Builder() .setFieldName("value_field") .build(); @@ -68,9 +70,98 @@ public void testMissingValues() throws IOException { .subAggregation(lineAggregationBuilder); long lonLat = (((long) GeoEncodingUtils.encodeLongitude(90.0)) << 32) | GeoEncodingUtils.encodeLatitude(45.0) & 0xffffffffL; + // input documents for testing + // ---------------------------- + // | sort_field | value_field | + // ---------------------------- + // | N/A | lonLat | + // | 1 | N/A | + // | 2 | lonLat | + // | N/A | N/A | + // | 4 | lonLat | + // ---------------------------- + double[] sortValues = new double[]{ -1, 1, 2, -1, 4 }; + long[] points = new long[] { lonLat, -1, lonLat, -1,lonLat }; + //expected + long[] expectedAggPoints = new long[] { lonLat, lonLat }; + double[] expectedAggSortValues = new double[]{ + NumericUtils.doubleToSortableLong(2), + NumericUtils.doubleToSortableLong(4) + }; + + testCase(new MatchAllDocsQuery(), aggregationBuilder, iw -> { + for (int i = 0; i < points.length; i++) { + List fields = new ArrayList<>(); + fields.add(new SortedDocValuesField("group_id", new BytesRef("group"))); + if (sortValues[i] != -1) { + fields.add(new SortedNumericDocValuesField("sort_field", NumericUtils.doubleToSortableLong(sortValues[i]))); + } + if (points[i] != -1) { + fields.add(new LatLonDocValuesField("value_field", 45.0, 90.0)); + } + iw.addDocument(fields); + } + }, terms -> { + assertThat(terms.getBuckets().size(), equalTo(1)); + InternalGeoLine geoLine = terms.getBuckets().get(0).getAggregations().get("_name"); + assertThat(geoLine.length(), equalTo(2)); + assertTrue(geoLine.isComplete()); + assertArrayEquals(expectedAggPoints, geoLine.line()); + assertArrayEquals(expectedAggSortValues, geoLine.sortVals(), 0d); + }); + } + + public void testMissingGeoPointValueField() throws IOException { + MultiValuesSourceFieldConfig valueConfig = new MultiValuesSourceFieldConfig.Builder() + .setFieldName("value_field") + .build(); + MultiValuesSourceFieldConfig sortConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("sort_field").build(); + GeoLineAggregationBuilder lineAggregationBuilder = new GeoLineAggregationBuilder("_name") + .point(valueConfig) + .sortOrder(SortOrder.ASC) + .sort(sortConfig) + .size(10); + + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name") + .field("group_id") + .subAggregation(lineAggregationBuilder); + //input - long[] points = new long[] {lonLat, 0, lonLat, 0,lonLat, lonLat, lonLat}; double[] sortValues = new double[]{1, 0, 2, 0, 3, 4, 5}; + + testCase(new MatchAllDocsQuery(), aggregationBuilder, iw -> { + for (int i = 0; i < sortValues.length; i++) { + iw.addDocument(Arrays.asList( + new SortedNumericDocValuesField("sort_field", NumericUtils.doubleToSortableLong(sortValues[i])), + new SortedDocValuesField("group_id", new BytesRef("group") + ))); + } + }, terms -> { + assertThat(terms.getBuckets().size(), equalTo(1)); + InternalGeoLine geoLine = terms.getBuckets().get(0).getAggregations().get("_name"); + assertThat(geoLine.length(), equalTo(0)); + assertTrue(geoLine.isComplete()); + }); + } + + public void testMissingSortField() throws IOException { + MultiValuesSourceFieldConfig valueConfig = new MultiValuesSourceFieldConfig.Builder() + .setFieldName("value_field") + .build(); + MultiValuesSourceFieldConfig sortConfig = new MultiValuesSourceFieldConfig.Builder().setFieldName("sort_field").build(); + GeoLineAggregationBuilder lineAggregationBuilder = new GeoLineAggregationBuilder("_name") + .point(valueConfig) + .sortOrder(SortOrder.ASC) + .sort(sortConfig) + .size(10); + + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name") + .field("group_id") + .subAggregation(lineAggregationBuilder); + + long lonLat = (((long) GeoEncodingUtils.encodeLongitude(90.0)) << 32) | GeoEncodingUtils.encodeLatitude(45.0) & 0xffffffffL; + //input + long[] points = new long[] {lonLat, 0, lonLat, 0,lonLat, lonLat, lonLat}; //expected long[] expectedAggPoints = new long[] {lonLat, lonLat, lonLat, lonLat, lonLat}; double[] expectedAggSortValues = new double[]{ @@ -82,24 +173,15 @@ public void testMissingValues() throws IOException { }; testCase(new MatchAllDocsQuery(), aggregationBuilder, iw -> { - for (int i = 0; i < points.length; i++) { - if (points[i] == 0) { - // do not index value - iw.addDocument(Collections.singletonList(new SortedDocValuesField("group_id", new BytesRef("group")))); - } else { - iw.addDocument(Arrays.asList(new LatLonDocValuesField("value_field", 45.0, 90.0), - new SortedNumericDocValuesField("sort_field", NumericUtils.doubleToSortableLong(sortValues[i])), - new SortedDocValuesField("group_id", new BytesRef("group")))); - } + iw.addDocument(Arrays.asList(new LatLonDocValuesField("value_field", 45.0, 90.0), + new SortedDocValuesField("group_id", new BytesRef("group")))); } }, terms -> { assertThat(terms.getBuckets().size(), equalTo(1)); InternalGeoLine geoLine = terms.getBuckets().get(0).getAggregations().get("_name"); - assertThat(geoLine.length(), equalTo(5)); + assertThat(geoLine.length(), equalTo(0)); assertTrue(geoLine.isComplete()); - assertArrayEquals(expectedAggPoints, geoLine.line()); - assertArrayEquals(expectedAggSortValues, geoLine.sortVals(), 0d); }); }