Skip to content

Commit

Permalink
Skip segment for MatchNoDocsQuery filters.
Browse files Browse the repository at this point in the history
When a query of a filter gets rewritten to MatchNoDocsQuery, segments
will not produce any results. We can therefore skip processing such
segments.

This applies to FilterByFilterAggregator and FiltersAggregator, as well
as to TermsAggregator when it uses StringTermsAggregatorFromFilters
internally; the latter is an adapter aggregator to
FilterByFilterAggregator.

Fixes elastic#94637
  • Loading branch information
kkrik-es committed Aug 8, 2023
1 parent 36d60e2 commit d8c6ff6
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.Bits;
Expand Down Expand Up @@ -233,7 +234,7 @@ private FilterByFilterAggregator(
@Override
protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException {
assert scoreMode().needsScores() == false;
if (filters().size() == 0) {
if (filters().isEmpty() || (filters().size() == 1 && filters().get(0).query() instanceof MatchNoDocsQuery)) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}
Bits live = aggCtx.getLeafReaderContext().reader().getLiveDocs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Scorable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand Down Expand Up @@ -289,6 +290,10 @@ static class Compatible extends FiltersAggregator {

@Override
protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException {
if (filters().isEmpty() || (filters().size() == 1 && filters().get(0).query() instanceof MatchNoDocsQuery)) {
return LeafBucketCollector.NO_OP_COLLECTOR;
}

IntPredicate[] docFilters = new IntPredicate[filters().size()];
for (int filterOrd = 0; filterOrd < filters().size(); filterOrd++) {
docFilters[filterOrd] = filters().get(filterOrd).matchingDocIds(aggCtx.getLeafReaderContext());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -919,14 +919,14 @@ public void testMatchNoneFilter() throws IOException {
matchesMap().entry("segments_with_doc_count_field", 0)
.entry("segments_with_deleted_docs", 0)
.entry("segments_collected", 0)
.entry("segments_counted", greaterThanOrEqualTo(1))
.entry("segments_counted", 0)
.entry(
"filters",
matchesList().item(
matchesMap().entry(
"query",
"MatchNoDocsQuery(\"The \"range\" query was rewritten to a \"match_none\" query.\")"
).entry("segments_counted_in_constant_time", greaterThan(0))
).entry("segments_counted_in_constant_time", 0)
)
)
)
Expand Down Expand Up @@ -958,14 +958,14 @@ public void testMatchNoneTopLevel() throws IOException {
matchesMap().entry("segments_with_doc_count_field", 0)
.entry("segments_with_deleted_docs", 0)
.entry("segments_collected", 0)
.entry("segments_counted", greaterThanOrEqualTo(1))
.entry("segments_counted", 0)
.entry(
"filters",
matchesList().item(
matchesMap().entry(
"query",
"MatchNoDocsQuery(\"The \"range\" query was rewritten to a \"match_none\" query.\")"
).entry("segments_counted_in_constant_time", greaterThan(0))
).entry("segments_counted_in_constant_time", 0)
)
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,200 @@ setup:
- match: {hits.hits.0.fields.foo.0: bar }
- match: {hits.hits.1.fields.foo.0: bar }
- match: {hits.hits.2.fields.foo.0: baz }

---
"Simple filter":
- skip:
version: " - 7.6.99"
reason: "constant_keyword was added in 7.7"

- do:
indices.create:
index: bicycles
body:
settings:
number_of_shards: 1
mappings:
properties:
cycle_type:
type: constant_keyword
value: bicycle
id:
type: integer

- do:
bulk:
refresh: true
body:
- index:
_index: bicycles
_id: 1
- id: 1
- index:
_index: bicycles
_id: 2
- id: 2
- index:
_index: bicycles
_id: 3
- id: 3

- do:
search:
index: bicycles
body:
size: 0
aggs:
f:
filter:
term:
cycle_type: bicycle
- match: { hits.total.value: 3 }
- match: { aggregations.f.doc_count: 3 }

- do:
search:
index: bicycles
body:
size: 0
aggs:
f:
filter:
term:
cycle_type: nomatch
- match: { hits.total.value: 3 }
- match: { aggregations.f.doc_count: 0 }


---
"Agg with histogram":
- skip:
version: " - 7.6.99"
reason: "constant_keyword was added in 7.7"

- do:
indices.create:
index: bicycles
body:
settings:
number_of_shards: 1
mappings:
properties:
cycle_type:
type: constant_keyword
value: bicycle
id:
type: integer
status:
type: keyword
price:
type: integer

- do:
bulk:
refresh: true
body:
- index:
_index: bicycles
_id: 1
- id: 1
status: up
price: 100
- index:
_index: bicycles
_id: 2
- id: 2
status: up
price: 150
- index:
_index: bicycles
_id: 3
- id: 3
status: down
price: 200

- do:
search:
index: bicycles
body:
size: 0
aggs:
agg:
histogram:
field: price
interval: 50
aggs:
0-bucket:
filter:
bool:
filter:
- bool:
filter:
- bool:
filter:
- bool:
should:
- term:
id: 1
minimum_should_match: 1
- bool:
should:
- term:
cycle_type: bicycle
minimum_should_match: 1
- bool:
should:
- term:
status: up
minimum_should_match: 1
- match: { hits.total.value: 3 }
- length: { aggregations.agg.buckets: 3 }
- match: { aggregations.agg.buckets.0.key: 100.0 }
- match: { aggregations.agg.buckets.0.0-bucket.doc_count: 1 }
- match: { aggregations.agg.buckets.1.key: 150.0 }
- match: { aggregations.agg.buckets.1.0-bucket.doc_count: 0 }
- match: { aggregations.agg.buckets.2.key: 200.0 }
- match: { aggregations.agg.buckets.2.0-bucket.doc_count: 0 }

- do:
search:
index: bicycles
body:
size: 0
aggs:
agg:
histogram:
field: price
interval: 50
aggs:
0-bucket:
filter:
bool:
filter:
- bool:
filter:
- bool:
filter:
- bool:
should:
- term:
id: 1
minimum_should_match: 1
- bool:
should:
- term:
cycle_type: nomatch
minimum_should_match: 1
- bool:
should:
- term:
status: up
minimum_should_match: 1
- match: { hits.total.value: 3 }
- length: { aggregations.agg.buckets: 3 }
- match: { aggregations.agg.buckets.0.key: 100.0 }
- match: { aggregations.agg.buckets.0.0-bucket.doc_count: 0 }
- match: { aggregations.agg.buckets.1.key: 150.0 }
- match: { aggregations.agg.buckets.1.0-bucket.doc_count: 0 }
- match: { aggregations.agg.buckets.2.key: 200.0 }
- match: { aggregations.agg.buckets.2.0-bucket.doc_count: 0 }

0 comments on commit d8c6ff6

Please sign in to comment.