Skip to content

Commit

Permalink
Avoid reloading _source for every inner hit. (#60494)
Browse files Browse the repository at this point in the history
Previously if an inner_hits block required _ source, we would reload and parse
the root document's source for every hit. This PR adds a shared SourceLookup to
the inner hits context that allows inner hits to reuse parsed source if it's
already available. This matches our approach for sharing the root document ID.

Relates to #32818.
  • Loading branch information
jtibshirani authored Aug 3, 2020
1 parent ae01606 commit 7b64410
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 175 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,83 +97,75 @@ static final class JoinFieldInnerHitSubContext extends InnerHitsContext.InnerHit
}

@Override
public TopDocsAndMaxScore[] topDocs(SearchHit[] hits) throws IOException {
Weight innerHitQueryWeight = createInnerHitQueryWeight();
TopDocsAndMaxScore[] result = new TopDocsAndMaxScore[hits.length];
for (int i = 0; i < hits.length; i++) {
SearchHit hit = hits[i];
String joinName = getSortedDocValue(joinFieldMapper.name(), context, hit.docId());
if (joinName == null) {
result[i] = new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
continue;
}
public TopDocsAndMaxScore topDocs(SearchHit hit) throws IOException {
Weight innerHitQueryWeight = getInnerHitQueryWeight();
String joinName = getSortedDocValue(joinFieldMapper.name(), context, hit.docId());
if (joinName == null) {
return new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
}

QueryShardContext qsc = context.getQueryShardContext();
ParentIdFieldMapper parentIdFieldMapper =
joinFieldMapper.getParentIdFieldMapper(typeName, fetchChildInnerHits == false);
if (parentIdFieldMapper == null) {
result[i] = new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
continue;
}
QueryShardContext qsc = context.getQueryShardContext();
ParentIdFieldMapper parentIdFieldMapper =
joinFieldMapper.getParentIdFieldMapper(typeName, fetchChildInnerHits == false);
if (parentIdFieldMapper == null) {
return new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
}

Query q;
if (fetchChildInnerHits) {
Query hitQuery = parentIdFieldMapper.fieldType().termQuery(hit.getId(), qsc);
q = new BooleanQuery.Builder()
// Only include child documents that have the current hit as parent:
.add(hitQuery, BooleanClause.Occur.FILTER)
// and only include child documents of a single relation:
.add(joinFieldMapper.fieldType().termQuery(typeName, qsc), BooleanClause.Occur.FILTER)
.build();
} else {
String parentId = getSortedDocValue(parentIdFieldMapper.name(), context, hit.docId());
if (parentId == null) {
result[i] = new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
continue;
}
q = context.mapperService().fieldType(IdFieldMapper.NAME).termQuery(parentId, qsc);
Query q;
if (fetchChildInnerHits) {
Query hitQuery = parentIdFieldMapper.fieldType().termQuery(hit.getId(), qsc);
q = new BooleanQuery.Builder()
// Only include child documents that have the current hit as parent:
.add(hitQuery, BooleanClause.Occur.FILTER)
// and only include child documents of a single relation:
.add(joinFieldMapper.fieldType().termQuery(typeName, qsc), BooleanClause.Occur.FILTER)
.build();
} else {
String parentId = getSortedDocValue(parentIdFieldMapper.name(), context, hit.docId());
if (parentId == null) {
return new TopDocsAndMaxScore(Lucene.EMPTY_TOP_DOCS, Float.NaN);
}
q = context.mapperService().fieldType(IdFieldMapper.NAME).termQuery(parentId, qsc);
}

Weight weight = context.searcher().createWeight(context.searcher().rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1f);
if (size() == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
for (LeafReaderContext ctx : context.searcher().getIndexReader().leaves()) {
intersect(weight, innerHitQueryWeight, totalHitCountCollector, ctx);
}
result[i] = new TopDocsAndMaxScore(
new TopDocs(
new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO),
Lucene.EMPTY_SCORE_DOCS
), Float.NaN);
} else {
int topN = Math.min(from() + size(), context.searcher().getIndexReader().maxDoc());
TopDocsCollector<?> topDocsCollector;
MaxScoreCollector maxScoreCollector = null;
if (sort() != null) {
topDocsCollector = TopFieldCollector.create(sort().sort, topN, Integer.MAX_VALUE);
if (trackScores()) {
maxScoreCollector = new MaxScoreCollector();
}
} else {
topDocsCollector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
Weight weight = context.searcher().createWeight(context.searcher().rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1f);
if (size() == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
for (LeafReaderContext ctx : context.searcher().getIndexReader().leaves()) {
intersect(weight, innerHitQueryWeight, totalHitCountCollector, ctx);
}
return new TopDocsAndMaxScore(
new TopDocs(
new TotalHits(totalHitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO),
Lucene.EMPTY_SCORE_DOCS
), Float.NaN);
} else {
int topN = Math.min(from() + size(), context.searcher().getIndexReader().maxDoc());
TopDocsCollector<?> topDocsCollector;
MaxScoreCollector maxScoreCollector = null;
if (sort() != null) {
topDocsCollector = TopFieldCollector.create(sort().sort, topN, Integer.MAX_VALUE);
if (trackScores()) {
maxScoreCollector = new MaxScoreCollector();
}
try {
for (LeafReaderContext ctx : context.searcher().getIndexReader().leaves()) {
intersect(weight, innerHitQueryWeight, MultiCollector.wrap(topDocsCollector, maxScoreCollector), ctx);
}
} finally {
clearReleasables(Lifetime.COLLECTION);
}
TopDocs topDocs = topDocsCollector.topDocs(from(), size());
float maxScore = Float.NaN;
if (maxScoreCollector != null) {
maxScore = maxScoreCollector.getMaxScore();
} else {
topDocsCollector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
maxScoreCollector = new MaxScoreCollector();
}
try {
for (LeafReaderContext ctx : context.searcher().getIndexReader().leaves()) {
intersect(weight, innerHitQueryWeight, MultiCollector.wrap(topDocsCollector, maxScoreCollector), ctx);
}
result[i] = new TopDocsAndMaxScore(topDocs, maxScore);
} finally {
clearReleasables(Lifetime.COLLECTION);
}
TopDocs topDocs = topDocsCollector.topDocs(from(), size());
float maxScore = Float.NaN;
if (maxScoreCollector != null) {
maxScore = maxScoreCollector.getMaxScore();
}
return new TopDocsAndMaxScore(topDocs, maxScore);
}
return result;
}

private String getSortedDocValue(String field, SearchContext context, int docId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,17 @@ public void testNestedSource() throws Exception {
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getInnerHits().get("comments").getTotalHits().value, equalTo(1L));
assertThat(response.getHits().getAt(0).getInnerHits().get("comments").getAt(0).getSourceAsMap().size(), equalTo(0));

// Check that inner hits contain _source even when it's disabled on the root request.
response = client().prepareSearch()
.setFetchSource(false)
.setQuery(nestedQuery("comments", matchQuery("comments.message", "fox"), ScoreMode.None)
.innerHit(new InnerHitBuilder()))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getInnerHits().get("comments").getTotalHits().value, equalTo(2L));
assertFalse(response.getHits().getAt(0).getInnerHits().get("comments").getAt(0).getSourceAsMap().isEmpty());
}

public void testInnerHitsWithIgnoreUnmapped() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,61 +388,57 @@ public void seqNoAndPrimaryTerm(boolean seqNoAndPrimaryTerm) {
}

@Override
public TopDocsAndMaxScore[] topDocs(SearchHit[] hits) throws IOException {
Weight innerHitQueryWeight = createInnerHitQueryWeight();
TopDocsAndMaxScore[] result = new TopDocsAndMaxScore[hits.length];
for (int i = 0; i < hits.length; i++) {
SearchHit hit = hits[i];
Query rawParentFilter;
if (parentObjectMapper == null) {
rawParentFilter = Queries.newNonNestedFilter();
} else {
rawParentFilter = parentObjectMapper.nestedTypeFilter();
}
public TopDocsAndMaxScore topDocs(SearchHit hit) throws IOException {
Weight innerHitQueryWeight = getInnerHitQueryWeight();

int parentDocId = hit.docId();
final int readerIndex = ReaderUtil.subIndex(parentDocId, searcher().getIndexReader().leaves());
// With nested inner hits the nested docs are always in the same segement, so need to use the other segments
LeafReaderContext ctx = searcher().getIndexReader().leaves().get(readerIndex);

Query childFilter = childObjectMapper.nestedTypeFilter();
BitSetProducer parentFilter = context.bitsetFilterCache().getBitSetProducer(rawParentFilter);
Query q = new ParentChildrenBlockJoinQuery(parentFilter, childFilter, parentDocId);
Weight weight = context.searcher().createWeight(context.searcher().rewrite(q),
org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
if (size() == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
intersect(weight, innerHitQueryWeight, totalHitCountCollector, ctx);
result[i] = new TopDocsAndMaxScore(new TopDocs(new TotalHits(totalHitCountCollector.getTotalHits(),
TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN);
} else {
int topN = Math.min(from() + size(), context.searcher().getIndexReader().maxDoc());
TopDocsCollector<?> topDocsCollector;
MaxScoreCollector maxScoreCollector = null;
if (sort() != null) {
topDocsCollector = TopFieldCollector.create(sort().sort, topN, Integer.MAX_VALUE);
if (trackScores()) {
maxScoreCollector = new MaxScoreCollector();
}
} else {
topDocsCollector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
Query rawParentFilter;
if (parentObjectMapper == null) {
rawParentFilter = Queries.newNonNestedFilter();
} else {
rawParentFilter = parentObjectMapper.nestedTypeFilter();
}

int parentDocId = hit.docId();
final int readerIndex = ReaderUtil.subIndex(parentDocId, searcher().getIndexReader().leaves());
// With nested inner hits the nested docs are always in the same segement, so need to use the other segments
LeafReaderContext ctx = searcher().getIndexReader().leaves().get(readerIndex);

Query childFilter = childObjectMapper.nestedTypeFilter();
BitSetProducer parentFilter = context.bitsetFilterCache().getBitSetProducer(rawParentFilter);
Query q = new ParentChildrenBlockJoinQuery(parentFilter, childFilter, parentDocId);
Weight weight = context.searcher().createWeight(context.searcher().rewrite(q),
org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES, 1f);
if (size() == 0) {
TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
intersect(weight, innerHitQueryWeight, totalHitCountCollector, ctx);
return new TopDocsAndMaxScore(new TopDocs(new TotalHits(totalHitCountCollector.getTotalHits(),
TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN);
} else {
int topN = Math.min(from() + size(), context.searcher().getIndexReader().maxDoc());
TopDocsCollector<?> topDocsCollector;
MaxScoreCollector maxScoreCollector = null;
if (sort() != null) {
topDocsCollector = TopFieldCollector.create(sort().sort, topN, Integer.MAX_VALUE);
if (trackScores()) {
maxScoreCollector = new MaxScoreCollector();
}
try {
intersect(weight, innerHitQueryWeight, MultiCollector.wrap(topDocsCollector, maxScoreCollector), ctx);
} finally {
clearReleasables(Lifetime.COLLECTION);
}
} else {
topDocsCollector = TopScoreDocCollector.create(topN, Integer.MAX_VALUE);
maxScoreCollector = new MaxScoreCollector();
}
try {
intersect(weight, innerHitQueryWeight, MultiCollector.wrap(topDocsCollector, maxScoreCollector), ctx);
} finally {
clearReleasables(Lifetime.COLLECTION);
}

TopDocs td = topDocsCollector.topDocs(from(), size());
float maxScore = Float.NaN;
if (maxScoreCollector != null) {
maxScore = maxScoreCollector.getMaxScore();
}
result[i] = new TopDocsAndMaxScore(td, maxScore);
TopDocs td = topDocsCollector.topDocs(from(), size());
float maxScore = Float.NaN;
if (maxScoreCollector != null) {
maxScore = maxScoreCollector.getMaxScore();
}
return new TopDocsAndMaxScore(td, maxScore);
}
return result;
}
}
}
Loading

0 comments on commit 7b64410

Please sign in to comment.