[#8023, #11142] YQL: Enable DocDB to process lookups on a subset of t…

…he range key Summary: ScanChoices is an iterator type that iterates over tuples allowed by a given query filter in the DocDB layer. Currently, there are two types of ScanChoices based on filter type. 1. Scan choices that iterates over discrete filters: these work with filters of the form `x` IN (a, b, c) AND `y` IN (d,e,f) where a,b,c,d,e,f are all discrete values and `x`, `y` are table columns. We call this type DiscreteScanChoices. In this example, a DiscreteScanChoices would iterate over the space (a,b,c) x (d,e,f). 2. Another type of ScanChoices operates over filters that express ranges of values such as a <= x <= b where x is a table column and a, b are discrete values. We call this type RangeBasedScanChoices. In this example, the RangeBasedScanChoices would iterate over the space (a,....,b). A shortcoming of this approach of having different ScanChoices implementations for different filter types is that it cannot support a mixture of the two filter types. If we consider a filter of the form x IN (a,b,c,d,e,f) AND p <= y <= q, then we are forced to choose either using DiscreteScanChoices and only being able to process the filter for x or the alternative where we can only process the filter on y. This diff introduces HybridScanChoices which aims to support both types of filters. It treats both filter types as part of a larger class of filters that are conceptualized as lists of ranges as opposed to lists of values or singular ranges. It converts a filter of the form r1 IN (a,b,c,d) to r1 IN ([a,a], [b,b], [c,c], [d,d]). A range filter of the form a <= r1 <= b is converted into r1 IN ([a,b]). Unifying the way filters are interpreted at the DocDB iteration level and allows much more efficient queries. Consider the following table and query: CREATE TABLE sample (h int, r1 int, r2 int, primary key(h HASH, r1 ASC, r2 ASC)); SELECT * FROM sample WHERE h = 1 and r1 IN (1, 4) and r2 <= 5; Previously, the filter r1 IN (1,4) would have been converted to r1 >= 1 AND r1 <= 4 before processing. With this diff, that need not happen. A filter on a subset of the range key yielded a similar scenario: SELECT * FROM sample WHERE h = 1 and r1 IN (1, 5, 6); In this query, there is an implicit range filter on r2 from -Inf to +Inf. This was also treated as a mixed filter, causing the filter on r1 to be converted to a range filter again. This diff helps these such queries as well. This feature can be disabled by setting the runtime GFlag, disable_hybrid_scan to true. Test Plan: ``` ./yb_build --java-test 'org.yb.pgsql.TestPgSelect' ``` ``` ./yb_build --java-test 'org.yb.cql.TestSelect' ``` Reviewers: rskannan, amitanand, mtakahara Reviewed By: mtakahara Subscribers: kannan, rsami, zyu, yql Differential Revision: https://phabricator.dev.yugabyte.com/D12617
yugabyte · Feb 5, 2022 · 323f2ca · 323f2ca
1 parent 242d14c
commit 323f2ca
Show file tree

Hide file tree

Showing 8 changed files with 897 additions and 22 deletions.
diff --git a/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java b/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java
@@ -1284,6 +1284,39 @@ public void testClusteringInSeeks() throws Exception {
       assertEquals(4, metrics.seekCount);
     }
 
+    // Test using a partial specification of range key
+    {
+        String query =
+            "SELECT * FROM in_range_test WHERE h = 1 AND r1 IN (80, 30)";
+
+        String[] rows = {"Row[1, 80, 0, 180]",
+                        "Row[1, 80, 10, 181]",
+                        "Row[1, 80, 20, 182]",
+                        "Row[1, 80, 30, 183]",
+                        "Row[1, 80, 40, 184]",
+                        "Row[1, 80, 50, 185]",
+                        "Row[1, 80, 60, 186]",
+                        "Row[1, 80, 70, 187]",
+                        "Row[1, 80, 80, 188]",
+                        "Row[1, 80, 90, 189]",
+                        "Row[1, 30, 0, 130]",
+                        "Row[1, 30, 10, 131]",
+                        "Row[1, 30, 20, 132]",
+                        "Row[1, 30, 30, 133]",
+                        "Row[1, 30, 40, 134]",
+                        "Row[1, 30, 50, 135]",
+                        "Row[1, 30, 60, 136]",
+                        "Row[1, 30, 70, 137]",
+                        "Row[1, 30, 80, 138]",
+                        "Row[1, 30, 90, 139]"};
+        RocksDBMetrics metrics = assertPartialRangeSpec("in_range_test", query,
+        rows);
+        // seeking to 2 places
+        // Seeking to DocKey(0x0a73, [1], [80, kLowest])
+        // Seeking to DocKey(0x0a73, [1], [30, kLowest])
+        assertEquals(2, metrics.seekCount);
+    }
+
     // Test ORDER BY clause with IN (reverse scan).
     {
       String query = "SELECT * FROM in_range_test WHERE h = 1 AND " +
@@ -1533,7 +1566,7 @@ public void testSeekWithRangeFilter() throws Exception {
       // Additionally, one
       //   Seeking to DocKey([], []) per tablet.
       // Overall, 11 * 10 + 9
-      assertEquals(119, metrics.seekCount);
+      assertEquals(109, metrics.seekCount);
     }
 
     // Test ORDER BY clause (reverse scan).
@@ -1564,7 +1597,7 @@ public void testSeekWithRangeFilter() throws Exception {
       //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "40"]), []))
       //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "30"]), []))
       //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "20"]), []))
-      assertEquals(14, metrics.seekCount);
+      assertEquals(10, metrics.seekCount);
     }
 
     {
@@ -1608,7 +1641,7 @@ public void testSeekWithRangeFilter() throws Exception {
       //  Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "30"]), []))
       //  Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "20"]), []))
       //  Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "10"]), []))
-      assertEquals(34, metrics.seekCount);
+      assertEquals(28, metrics.seekCount);
     }
   }
 

diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java
@@ -17,7 +17,11 @@
 import org.junit.runner.RunWith;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
+import org.yb.minicluster.RocksDBMetrics;
+
 import org.yb.util.BuildTypeUtil;
+import org.yb.util.YBTestRunnerNonTsanOnly;
 import org.yb.util.RegexMatcher;
 import org.yb.util.YBTestRunnerNonTsanOnly;
 
@@ -1114,4 +1118,206 @@ public void testMulticolumnNullPushdown() throws Exception {
     }
   }
 
+  private RocksDBMetrics assertFullDocDBFilter(Statement statement,
+    String query, String table_name) throws Exception {
+    RocksDBMetrics beforeMetrics = getRocksDBMetric(table_name);
+    String explainOutput = getExplainAnalyzeOutput(statement, query);
+        assertFalse("Expect DocDB to filter fully",
+                    explainOutput.contains("Rows Removed by"));
+    RocksDBMetrics afterMetrics = getRocksDBMetric(table_name);
+    return afterMetrics.subtract(beforeMetrics);
+  }
+
+  @Test
+  public void testPartialKeyScan() throws Exception {
+    String query = "CREATE TABLE sample_table(h INT, r1 INT, r2 INT, r3 INT, "
+                    + "v INT, PRIMARY KEY(h HASH, r1 ASC, r2 ASC, r3 DESC))";
+
+    try (Statement statement = connection.createStatement()) {
+        statement.execute(query);
+
+        // v has values from 1 to 100000 and the other columns are
+        // various digits of v as such
+        // h    r1  r2  r3      v
+        // 0    0   0   0       0
+        // 0    0   0   1       1
+        // ...
+        // 12   4   9   3      12493
+        // ...
+        // 100  0   0   0      100000
+        query = "INSERT INTO sample_table SELECT i/1000, (i/100)%10, " +
+                "(i/10)%10, i%10, i FROM generate_series(1, 100000) i";
+        statement.execute(query);
+
+        Set<Row> allRows = new HashSet<>();
+        for (int i = 1; i <= 100000; i++) {
+            allRows.add(new Row(i/1000, (i/100)%10, (i/10)%10, i%10, i));
+        }
+
+        // Select where hash code is specified and one range constraint
+        query = "SELECT * FROM sample_table WHERE h = 1 AND r3 < 6";
+
+        Set<Row> expectedRows = allRows.stream()
+                                .filter(r -> r.getInt(0) == 1
+                                        && r.getInt(3) < 6)
+                                .collect(Collectors.toSet());
+        assertRowSet(statement, query, expectedRows);
+
+        RocksDBMetrics metrics = assertFullDocDBFilter(statement, query, "sample_table");
+        // There are 10 * 10 total values for r1 and r2 that we have to look
+        // through. For each pair (r1, r2) we iterate through all values of
+        // r3 in [0, 6] and then seek to the next pair for (r1, r2). There
+        // are 10 * 10 such pairs. There is also an initial seek into the
+        // hash key, making the total 10 * 10 + 1 = 101. The actual seeks are
+        // as follows:
+        // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 6]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 1, 6]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 6]), []))
+        // ...
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 0, 6]), []))
+        // ...
+        // Seek(SubDocKey(DocKey(0x1210, [1], [9, 9, 6]), []))
+        assertEquals(101, metrics.seekCount);
+
+        // Select where hash code is specified, one range constraint
+        // and one option constraint on two separate columns.
+        // No constraint is specified for r2.
+        query = "SELECT * FROM sample_table WHERE " +
+                "h = 1 AND r1 < 2 AND r3 IN (2, 25, 8, 7, 23, 18)";
+        Integer[] r3FilterArray = {2, 25, 8, 7, 23, 18};
+        Set<Integer> r3Filter = new HashSet<Integer>();
+        r3Filter.addAll(Arrays.asList(r3FilterArray));
+
+        expectedRows = allRows.stream()
+                                .filter(r -> r.getInt(0) == 1
+                                        && r.getInt(1) < 2
+                                        && r3Filter.contains(r.getInt(3)))
+                                .collect(Collectors.toSet());
+        assertRowSet(statement, query, expectedRows);
+
+        metrics = assertFullDocDBFilter(statement, query, "sample_table");
+        // For each of the 3 * 10 possible pairs of (r1, r2) we seek through
+        // 4 values of r3 (8, 7, 2, kHighest). We must have that seek to
+        // r3 = kHighest in order to get to the next value of (r1,r2).
+        // We also have one initial seek into the hash key, making the total
+        // number of seeks 3 * 10 * 4 + 1 = 121
+        // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, kHighest]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 1, 8]), []))
+        // ...
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 9, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 9, kHighest]), []))
+        assertEquals(121, metrics.seekCount);
+
+        // Select where all keys have some sort of discrete constraint
+        // on them
+        query = "SELECT * FROM sample_table WHERE " +
+                "h = 1 AND r1 IN (1,2) AND r2 IN (2,3) " +
+                "AND r3 IN (2, 25, 8, 7, 23, 18)";
+
+        expectedRows = allRows.stream()
+                                .filter(r -> r.getInt(0) == 1
+                                        && (r.getInt(1) == 1
+                                            || r.getInt(1) == 2)
+                                        && (r.getInt(2) == 2
+                                            || r.getInt(2) == 3)
+                                        && r3Filter.contains(r.getInt(3)))
+                                .collect(Collectors.toSet());
+        assertRowSet(statement, query, expectedRows);
+
+        metrics = assertFullDocDBFilter(statement, query, "sample_table");
+        // There are 2 possible values for r1 and 2 possible values for r2.
+        // There are 3 possible values for r3 (8, 7, 2). Remember that for
+        // each value of (r1, r2), we must seek to (r1, r2, 25) to get
+        // to the first row that has value of (r1, r2),
+        // resulting in 4 total seeks for each (r1, r2).
+        // Altogether there are 2 * 2 * 4 = 16 seeks.
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 2]), []))
+        assertEquals(16, metrics.seekCount);
+
+
+        // Select where two out of three columns have discrete constraints
+        // set up while the other one has no restrictions
+        query = "SELECT * FROM sample_table WHERE " +
+                "h = 1 AND r2 IN (2,3) AND r3 IN (2, 25, 8, 7, 23, 18)";
+
+        expectedRows = allRows.stream()
+                                .filter(r -> r.getInt(0) == 1
+                                        && (r.getInt(2) == 2
+                                            || r.getInt(2) == 3)
+                                        && r3Filter.contains(r.getInt(3)))
+                                .collect(Collectors.toSet());
+        assertRowSet(statement, query, expectedRows);
+
+        metrics = assertFullDocDBFilter(statement, query, "sample_table");
+
+        // For each value of r1, we have two values of r2 to seek through and
+        // for each of those we have at most 6 values of r3 to seek through.
+        // In reality, we seek through 4 values of r3 for each (r1,r2) for
+        // the same reason as the previous test. After we've exhausted all
+        // possibilities for (r2,r3) for a given r1, we seek to (r1,kHighest)
+        // to seek to the next possible value of r1. Therefore, we seek
+        // 4 * 2 + 1 = 9 values for each r1.
+        // Note that there are 10 values of r1 to seek through and we do an
+        // initial seek into the hash code as usual. So in total, we have
+        // 10 * (4 * 2 + 1) + 1 = 10 * 9 + 1 = 91 seeks.
+        // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 25]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 8]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 7]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 2]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [0, kHighest]), []))
+        // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 25]), []))
+        // ...
+        // Seek(SubDocKey(DocKey(0x1210, [1], [9, kHighest]), []))
+        assertEquals(91, metrics.seekCount);
+
+        // Select where we have options for the hash code and discrete
+        // filters on two out of three range columns
+        query = "SELECT * FROM sample_table WHERE " +
+                "h IN (1,5) AND r2 IN (2,3) AND r3 IN (2, 25, 8, 7, 23, 18)";
+
+        expectedRows = allRows.stream()
+                                .filter(r -> (r.getInt(0) == 1
+                                            || r.getInt(0) == 5)
+                                        && (r.getInt(2) == 2
+                                            || r.getInt(2) == 3)
+                                        && r3Filter.contains(r.getInt(3)))
+                                .collect(Collectors.toSet());
+        assertRowSet(statement, query, expectedRows);
+
+        metrics = assertFullDocDBFilter(statement, query, "sample_table");
+        // Note that in this case, YSQL sends two batches of requests
+        // to DocDB in parallel, one for each hash code option. So this
+        // should really just be double the number of seeks as
+        // SELECT * FROM sample_table WHERE h = 1 AND r2 IN (2,3)
+        // AND r3 IN (2, 25, 8, 7, 23, 18)
+        // We have 91 * 2 = 182 seeks
+        assertEquals(182, metrics.seekCount);
+    }
+  }
+
 }
diff --git a/src/yb/common/ql_scanspec.h b/src/yb/common/ql_scanspec.h
@@ -71,6 +71,14 @@ class QLScanRange {
     return (iter == ranges_.end() ? QLRange() : iter->second);
   }
 
+  std::vector<ColumnId> GetColIds() const {
+    std::vector<ColumnId> col_id_list;
+    for (auto &it : ranges_) {
+      col_id_list.push_back(it.first);
+    }
+    return col_id_list;
+  }
+
   bool has_in_range_options() const {
     return has_in_range_options_;
   }

diff --git a/src/yb/docdb/doc_pgsql_scanspec.cc b/src/yb/docdb/doc_pgsql_scanspec.cc
@@ -25,6 +25,8 @@
 #include "yb/util/result.h"
 #include "yb/util/status_format.h"
 
+DECLARE_bool(disable_hybrid_scan);
+
 namespace yb {
 namespace docdb {
 
@@ -161,6 +163,10 @@ DocPgsqlScanSpec::DocPgsqlScanSpec(
     LOG(FATAL) << "DEVELOPERS: Add support for condition (where clause)";
   }
 
+  if (range_bounds_) {
+    range_bounds_indexes_ = range_bounds_->GetColIds();
+  }
+
   // If the hash key is fixed and we have range columns with IN condition, try to construct the
   // exact list of range options to scan for.
   if ((!hashed_components_->empty() || schema_.num_hash_key_columns() == 0) &&
@@ -171,12 +177,16 @@ DocPgsqlScanSpec::DocPgsqlScanSpec(
         std::make_shared<std::vector<std::vector<PrimitiveValue>>>(schema_.num_range_key_columns());
     InitRangeOptions(*condition);
 
-    // Range options are only valid if all range columns are set (i.e. have one or more options).
-    for (size_t i = 0; i < schema_.num_range_key_columns(); i++) {
-      if ((*range_options_)[i].empty()) {
-        range_options_ = nullptr;
-        break;
-      }
+    if (FLAGS_disable_hybrid_scan) {
+        // Range options are only valid if all
+        // range columns are set (i.e. have one or more options)
+        // when hybrid scan is disabled
+        for (size_t i = 0; i < schema_.num_range_key_columns(); i++) {
+            if ((*range_options_)[i].empty()) {
+                range_options_ = nullptr;
+                break;
+            }
+        }
     }
   }
 }
@@ -212,6 +222,7 @@ void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) {
       }
 
       SortingType sortingType = schema_.column(col_idx).sorting_type();
+      range_options_indexes_.emplace_back(condition.operands(0).column_id());
 
       if (condition.op() == QL_OP_EQUAL) {
         auto pv = PrimitiveValue::FromQLValuePB(condition.operands(1).value(), sortingType);

diff --git a/src/yb/docdb/doc_pgsql_scanspec.h b/src/yb/docdb/doc_pgsql_scanspec.h
@@ -85,6 +85,14 @@ class DocPgsqlScanSpec : public PgsqlScanSpec {
     return range_options_;
   }
 
+  const std::vector<ColumnId> range_options_indexes() const {
+    return range_options_indexes_;
+  }
+
+  const std::vector<ColumnId> range_bounds_indexes() const {
+    return range_bounds_indexes_;
+  }
+
  private:
   static const DocKey& DefaultStartDocKey();
 
@@ -97,6 +105,9 @@ class DocPgsqlScanSpec : public PgsqlScanSpec {
   // The scan range within the hash key when a WHERE condition is specified.
   const std::unique_ptr<const QLScanRange> range_bounds_;
 
+  // Indexes of columns that have range bounds such as c2 < 4 AND c2 >= 1
+  std::vector<ColumnId> range_bounds_indexes_;
+
   // Initialize range_options_ if hashed_components_ in set and all range columns have one or more
   // options (i.e. using EQ/IN conditions). Otherwise range_options_ will stay null and we will
   // only use the range_bounds for scanning.
@@ -105,6 +116,10 @@ class DocPgsqlScanSpec : public PgsqlScanSpec {
   // The range value options if set. (possibly more than one due to IN conditions).
   std::shared_ptr<std::vector<std::vector<PrimitiveValue>>> range_options_;
 
+  // Indexes of columns that have range option filters such as
+  // c2 IN (1, 5, 6, 9)
+  std::vector<ColumnId> range_options_indexes_;
+
   // Schema of the columns to scan.
   const Schema& schema_;