Sort document by internal doc id in FetchPhase to better use LRU cache (

#57273) This change sorts the docIdsToLoad once instead of in each sub-phase.
elastic · Jun 30, 2020 · fa192a7 · fa192a7
1 parent a3279b0
commit fa192a7
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 32 deletions.
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
@@ -57,6 +57,7 @@
 import org.elasticsearch.tasks.TaskCancelledException;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -140,13 +141,20 @@ public void execute(SearchContext context) {
         }
 
         try {
+            DocIdToIndex[] docs = new DocIdToIndex[context.docIdsToLoadSize()];
+            for (int index = 0; index < context.docIdsToLoadSize(); index++) {
+                docs[index] = new DocIdToIndex(context.docIdsToLoad()[context.docIdsToLoadFrom() + index], index);
+            }
+            Arrays.sort(docs);
+
             SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
+            SearchHit[] sortedHits = new SearchHit[context.docIdsToLoadSize()];
             FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
             for (int index = 0; index < context.docIdsToLoadSize(); index++) {
                 if (context.isCancelled()) {
                     throw new TaskCancelledException("cancelled");
                 }
-                int docId = context.docIdsToLoad()[context.docIdsToLoadFrom() + index];
+                int docId = docs[index].docId;
                 int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves());
                 LeafReaderContext subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
                 int subDocId = docId - subReaderContext.docBase;
@@ -161,7 +169,8 @@ public void execute(SearchContext context) {
                         storedToRequestedFields, subReaderContext);
                 }
 
-                hits[index] = searchHit;
+                sortedHits[index] = searchHit;
+                hits[docs[index].index] = searchHit;
                 hitContext.reset(searchHit, subReaderContext, subDocId, context.searcher());
                 for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
                     fetchSubPhase.hitExecute(context, hitContext);
@@ -172,7 +181,7 @@ public void execute(SearchContext context) {
             }
 
             for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
-                fetchSubPhase.hitsExecute(context, hits);
+                fetchSubPhase.hitsExecute(context, sortedHits);
                 if (context.isCancelled()) {
                     throw new TaskCancelledException("cancelled");
                 }
@@ -185,6 +194,21 @@ public void execute(SearchContext context) {
         }
     }
 
+    static class DocIdToIndex implements Comparable<DocIdToIndex> {
+        final int docId;
+        final int index;
+
+        DocIdToIndex(int docId, int index) {
+            this.docId = docId;
+            this.index = index;
+        }
+
+        @Override
+        public int compareTo(DocIdToIndex o) {
+            return Integer.compare(docId, o.docId);
+        }
+    }
+
     private int findRootDocumentIfNested(SearchContext context, LeafReaderContext subReaderContext, int subDocId) throws IOException {
         if (context.mapperService().hasNested()) {
             BitSet bits = context.bitsetFilterCache()

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java
@@ -81,6 +81,8 @@ public Map<String, Object> cache() {
      */
     default void hitExecute(SearchContext context, HitContext hitContext) throws IOException {}
 
-
+    /**
+     * Executes the hits level phase (note, hits are sorted by doc ids).
+     */
     default void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException {}
 }
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchDocValuesPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchDocValuesPhase.java
@@ -38,9 +38,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.List;
 
 import static org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
@@ -71,9 +69,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept
             return;
         }
 
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, Comparator.comparingInt(SearchHit::docId));
-
         for (FieldAndFormat fieldAndFormat : context.docValuesContext().fields()) {
             String field = fieldAndFormat.field;
             MappedFieldType fieldType = context.mapperService().fieldType(field);

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchScorePhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchScorePhase.java
@@ -30,8 +30,6 @@
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Comparator;
 import java.util.Iterator;
 
 public class FetchScorePhase implements FetchSubPhase {
@@ -44,9 +42,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept
             return;
         }
 
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, Comparator.comparingInt(SearchHit::docId));
-
         final IndexSearcher searcher = context.searcher();
         final Weight weight = searcher.createWeight(searcher.rewrite(context.query()), ScoreMode.COMPLETE, 1);
         Iterator<LeafReaderContext> leafContextIterator = searcher.getIndexReader().leaves().iterator();

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchVersionPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchVersionPhase.java
@@ -28,8 +28,6 @@
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Comparator;
 
 public final class FetchVersionPhase implements FetchSubPhase {
     @Override
@@ -39,9 +37,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept
             return;
         }
 
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, Comparator.comparingInt(SearchHit::docId));
-
         int lastReaderId = -1;
         NumericDocValues versions = null;
         for (SearchHit hit : hits) {

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesPhase.java
@@ -35,7 +35,6 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -49,8 +48,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) {
             context.parsedQuery() == null) {
             return;
         }
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, (a, b) -> Integer.compare(a.docId(), b.docId()));
         @SuppressWarnings("unchecked")
         List<String>[] matchedQueries = new List[hits.length];
         for (int i = 0; i < matchedQueries.length; ++i) {

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/ScriptFieldsPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/ScriptFieldsPhase.java
@@ -30,10 +30,8 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.List;
 
 public final class ScriptFieldsPhase implements FetchSubPhase {
@@ -44,9 +42,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept
             return;
         }
 
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, Comparator.comparingInt(SearchHit::docId));
-
         int lastReaderId = -1;
         FieldScript[] leafScripts = null;
         List<ScriptFieldsContext.ScriptField> scriptFields = context.scriptFields().fields();

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/SeqNoPrimaryTermPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/SeqNoPrimaryTermPhase.java
@@ -28,8 +28,6 @@
 import org.elasticsearch.search.internal.SearchContext;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Comparator;
 
 public final class SeqNoPrimaryTermPhase implements FetchSubPhase {
     @Override
@@ -38,9 +36,6 @@ public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOExcept
             return;
         }
 
-        hits = hits.clone(); // don't modify the incoming hits
-        Arrays.sort(hits, Comparator.comparingInt(SearchHit::docId));
-
         int lastReaderId = -1;
         NumericDocValues seqNoField = null;
         NumericDocValues primaryTermField = null;