diff --git a/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java b/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java index 996fa013c5da..759e2cdc716e 100644 --- a/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java +++ b/java/yb-cql/src/test/java/org/yb/cql/TestSelect.java @@ -1284,6 +1284,39 @@ public void testClusteringInSeeks() throws Exception { assertEquals(4, metrics.seekCount); } + // Test using a partial specification of range key + { + String query = + "SELECT * FROM in_range_test WHERE h = 1 AND r1 IN (80, 30)"; + + String[] rows = {"Row[1, 80, 0, 180]", + "Row[1, 80, 10, 181]", + "Row[1, 80, 20, 182]", + "Row[1, 80, 30, 183]", + "Row[1, 80, 40, 184]", + "Row[1, 80, 50, 185]", + "Row[1, 80, 60, 186]", + "Row[1, 80, 70, 187]", + "Row[1, 80, 80, 188]", + "Row[1, 80, 90, 189]", + "Row[1, 30, 0, 130]", + "Row[1, 30, 10, 131]", + "Row[1, 30, 20, 132]", + "Row[1, 30, 30, 133]", + "Row[1, 30, 40, 134]", + "Row[1, 30, 50, 135]", + "Row[1, 30, 60, 136]", + "Row[1, 30, 70, 137]", + "Row[1, 30, 80, 138]", + "Row[1, 30, 90, 139]"}; + RocksDBMetrics metrics = assertPartialRangeSpec("in_range_test", query, + rows); + // seeking to 2 places + // Seeking to DocKey(0x0a73, [1], [80, kLowest]) + // Seeking to DocKey(0x0a73, [1], [30, kLowest]) + assertEquals(2, metrics.seekCount); + } + // Test ORDER BY clause with IN (reverse scan). { String query = "SELECT * FROM in_range_test WHERE h = 1 AND " + @@ -1533,7 +1566,7 @@ public void testSeekWithRangeFilter() throws Exception { // Additionally, one // Seeking to DocKey([], []) per tablet. // Overall, 11 * 10 + 9 - assertEquals(119, metrics.seekCount); + assertEquals(109, metrics.seekCount); } // Test ORDER BY clause (reverse scan). @@ -1564,7 +1597,7 @@ public void testSeekWithRangeFilter() throws Exception { //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "40"]), [])) //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "30"]), [])) //Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 30, kString : "20"]), [])) - assertEquals(14, metrics.seekCount); + assertEquals(10, metrics.seekCount); } { @@ -1608,7 +1641,7 @@ public void testSeekWithRangeFilter() throws Exception { // Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "30"]), [])) // Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "20"]), [])) // Seek(SubDocKey(DocKey(0x1210, [kInt32 : 1], [kInt32Descending : 40, kString : "10"]), [])) - assertEquals(34, metrics.seekCount); + assertEquals(28, metrics.seekCount); } } diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java index de7271647d33..28fd9a283c38 100644 --- a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgSelect.java @@ -17,7 +17,11 @@ import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import org.yb.minicluster.RocksDBMetrics; + import org.yb.util.BuildTypeUtil; +import org.yb.util.YBTestRunnerNonTsanOnly; import org.yb.util.RegexMatcher; import org.yb.util.YBTestRunnerNonTsanOnly; @@ -1114,4 +1118,206 @@ public void testMulticolumnNullPushdown() throws Exception { } } + private RocksDBMetrics assertFullDocDBFilter(Statement statement, + String query, String table_name) throws Exception { + RocksDBMetrics beforeMetrics = getRocksDBMetric(table_name); + String explainOutput = getExplainAnalyzeOutput(statement, query); + assertFalse("Expect DocDB to filter fully", + explainOutput.contains("Rows Removed by")); + RocksDBMetrics afterMetrics = getRocksDBMetric(table_name); + return afterMetrics.subtract(beforeMetrics); + } + + @Test + public void testPartialKeyScan() throws Exception { + String query = "CREATE TABLE sample_table(h INT, r1 INT, r2 INT, r3 INT, " + + "v INT, PRIMARY KEY(h HASH, r1 ASC, r2 ASC, r3 DESC))"; + + try (Statement statement = connection.createStatement()) { + statement.execute(query); + + // v has values from 1 to 100000 and the other columns are + // various digits of v as such + // h r1 r2 r3 v + // 0 0 0 0 0 + // 0 0 0 1 1 + // ... + // 12 4 9 3 12493 + // ... + // 100 0 0 0 100000 + query = "INSERT INTO sample_table SELECT i/1000, (i/100)%10, " + + "(i/10)%10, i%10, i FROM generate_series(1, 100000) i"; + statement.execute(query); + + Set allRows = new HashSet<>(); + for (int i = 1; i <= 100000; i++) { + allRows.add(new Row(i/1000, (i/100)%10, (i/10)%10, i%10, i)); + } + + // Select where hash code is specified and one range constraint + query = "SELECT * FROM sample_table WHERE h = 1 AND r3 < 6"; + + Set expectedRows = allRows.stream() + .filter(r -> r.getInt(0) == 1 + && r.getInt(3) < 6) + .collect(Collectors.toSet()); + assertRowSet(statement, query, expectedRows); + + RocksDBMetrics metrics = assertFullDocDBFilter(statement, query, "sample_table"); + // There are 10 * 10 total values for r1 and r2 that we have to look + // through. For each pair (r1, r2) we iterate through all values of + // r3 in [0, 6] and then seek to the next pair for (r1, r2). There + // are 10 * 10 such pairs. There is also an initial seek into the + // hash key, making the total 10 * 10 + 1 = 101. The actual seeks are + // as follows: + // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 6]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 1, 6]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 6]), [])) + // ... + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 0, 6]), [])) + // ... + // Seek(SubDocKey(DocKey(0x1210, [1], [9, 9, 6]), [])) + assertEquals(101, metrics.seekCount); + + // Select where hash code is specified, one range constraint + // and one option constraint on two separate columns. + // No constraint is specified for r2. + query = "SELECT * FROM sample_table WHERE " + + "h = 1 AND r1 < 2 AND r3 IN (2, 25, 8, 7, 23, 18)"; + Integer[] r3FilterArray = {2, 25, 8, 7, 23, 18}; + Set r3Filter = new HashSet(); + r3Filter.addAll(Arrays.asList(r3FilterArray)); + + expectedRows = allRows.stream() + .filter(r -> r.getInt(0) == 1 + && r.getInt(1) < 2 + && r3Filter.contains(r.getInt(3))) + .collect(Collectors.toSet()); + assertRowSet(statement, query, expectedRows); + + metrics = assertFullDocDBFilter(statement, query, "sample_table"); + // For each of the 3 * 10 possible pairs of (r1, r2) we seek through + // 4 values of r3 (8, 7, 2, kHighest). We must have that seek to + // r3 = kHighest in order to get to the next value of (r1,r2). + // We also have one initial seek into the hash key, making the total + // number of seeks 3 * 10 * 4 + 1 = 121 + // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 0, kHighest]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 1, 8]), [])) + // ... + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 9, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 9, kHighest]), [])) + assertEquals(121, metrics.seekCount); + + // Select where all keys have some sort of discrete constraint + // on them + query = "SELECT * FROM sample_table WHERE " + + "h = 1 AND r1 IN (1,2) AND r2 IN (2,3) " + + "AND r3 IN (2, 25, 8, 7, 23, 18)"; + + expectedRows = allRows.stream() + .filter(r -> r.getInt(0) == 1 + && (r.getInt(1) == 1 + || r.getInt(1) == 2) + && (r.getInt(2) == 2 + || r.getInt(2) == 3) + && r3Filter.contains(r.getInt(3))) + .collect(Collectors.toSet()); + assertRowSet(statement, query, expectedRows); + + metrics = assertFullDocDBFilter(statement, query, "sample_table"); + // There are 2 possible values for r1 and 2 possible values for r2. + // There are 3 possible values for r3 (8, 7, 2). Remember that for + // each value of (r1, r2), we must seek to (r1, r2, 25) to get + // to the first row that has value of (r1, r2), + // resulting in 4 total seeks for each (r1, r2). + // Altogether there are 2 * 2 * 4 = 16 seeks. + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 3, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 2, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [2, 3, 2]), [])) + assertEquals(16, metrics.seekCount); + + + // Select where two out of three columns have discrete constraints + // set up while the other one has no restrictions + query = "SELECT * FROM sample_table WHERE " + + "h = 1 AND r2 IN (2,3) AND r3 IN (2, 25, 8, 7, 23, 18)"; + + expectedRows = allRows.stream() + .filter(r -> r.getInt(0) == 1 + && (r.getInt(2) == 2 + || r.getInt(2) == 3) + && r3Filter.contains(r.getInt(3))) + .collect(Collectors.toSet()); + assertRowSet(statement, query, expectedRows); + + metrics = assertFullDocDBFilter(statement, query, "sample_table"); + + // For each value of r1, we have two values of r2 to seek through and + // for each of those we have at most 6 values of r3 to seek through. + // In reality, we seek through 4 values of r3 for each (r1,r2) for + // the same reason as the previous test. After we've exhausted all + // possibilities for (r2,r3) for a given r1, we seek to (r1,kHighest) + // to seek to the next possible value of r1. Therefore, we seek + // 4 * 2 + 1 = 9 values for each r1. + // Note that there are 10 values of r1 to seek through and we do an + // initial seek into the hash code as usual. So in total, we have + // 10 * (4 * 2 + 1) + 1 = 10 * 9 + 1 = 91 seeks. + // Seek(SubDocKey(DocKey(0x1210, [1], [kLowest]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 2, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 25]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 8]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 7]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, 3, 2]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [0, kHighest]), [])) + // Seek(SubDocKey(DocKey(0x1210, [1], [1, 2, 25]), [])) + // ... + // Seek(SubDocKey(DocKey(0x1210, [1], [9, kHighest]), [])) + assertEquals(91, metrics.seekCount); + + // Select where we have options for the hash code and discrete + // filters on two out of three range columns + query = "SELECT * FROM sample_table WHERE " + + "h IN (1,5) AND r2 IN (2,3) AND r3 IN (2, 25, 8, 7, 23, 18)"; + + expectedRows = allRows.stream() + .filter(r -> (r.getInt(0) == 1 + || r.getInt(0) == 5) + && (r.getInt(2) == 2 + || r.getInt(2) == 3) + && r3Filter.contains(r.getInt(3))) + .collect(Collectors.toSet()); + assertRowSet(statement, query, expectedRows); + + metrics = assertFullDocDBFilter(statement, query, "sample_table"); + // Note that in this case, YSQL sends two batches of requests + // to DocDB in parallel, one for each hash code option. So this + // should really just be double the number of seeks as + // SELECT * FROM sample_table WHERE h = 1 AND r2 IN (2,3) + // AND r3 IN (2, 25, 8, 7, 23, 18) + // We have 91 * 2 = 182 seeks + assertEquals(182, metrics.seekCount); + } + } + } diff --git a/src/yb/common/ql_scanspec.h b/src/yb/common/ql_scanspec.h index 0932d2aac3de..e3656bab5a1d 100644 --- a/src/yb/common/ql_scanspec.h +++ b/src/yb/common/ql_scanspec.h @@ -71,6 +71,14 @@ class QLScanRange { return (iter == ranges_.end() ? QLRange() : iter->second); } + std::vector GetColIds() const { + std::vector col_id_list; + for (auto &it : ranges_) { + col_id_list.push_back(it.first); + } + return col_id_list; + } + bool has_in_range_options() const { return has_in_range_options_; } diff --git a/src/yb/docdb/doc_pgsql_scanspec.cc b/src/yb/docdb/doc_pgsql_scanspec.cc index f42bc096d9bc..e156a5bfa1eb 100644 --- a/src/yb/docdb/doc_pgsql_scanspec.cc +++ b/src/yb/docdb/doc_pgsql_scanspec.cc @@ -25,6 +25,8 @@ #include "yb/util/result.h" #include "yb/util/status_format.h" +DECLARE_bool(disable_hybrid_scan); + namespace yb { namespace docdb { @@ -161,6 +163,10 @@ DocPgsqlScanSpec::DocPgsqlScanSpec( LOG(FATAL) << "DEVELOPERS: Add support for condition (where clause)"; } + if (range_bounds_) { + range_bounds_indexes_ = range_bounds_->GetColIds(); + } + // If the hash key is fixed and we have range columns with IN condition, try to construct the // exact list of range options to scan for. if ((!hashed_components_->empty() || schema_.num_hash_key_columns() == 0) && @@ -171,12 +177,16 @@ DocPgsqlScanSpec::DocPgsqlScanSpec( std::make_shared>>(schema_.num_range_key_columns()); InitRangeOptions(*condition); - // Range options are only valid if all range columns are set (i.e. have one or more options). - for (size_t i = 0; i < schema_.num_range_key_columns(); i++) { - if ((*range_options_)[i].empty()) { - range_options_ = nullptr; - break; - } + if (FLAGS_disable_hybrid_scan) { + // Range options are only valid if all + // range columns are set (i.e. have one or more options) + // when hybrid scan is disabled + for (size_t i = 0; i < schema_.num_range_key_columns(); i++) { + if ((*range_options_)[i].empty()) { + range_options_ = nullptr; + break; + } + } } } } @@ -212,6 +222,7 @@ void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) { } SortingType sortingType = schema_.column(col_idx).sorting_type(); + range_options_indexes_.emplace_back(condition.operands(0).column_id()); if (condition.op() == QL_OP_EQUAL) { auto pv = PrimitiveValue::FromQLValuePB(condition.operands(1).value(), sortingType); diff --git a/src/yb/docdb/doc_pgsql_scanspec.h b/src/yb/docdb/doc_pgsql_scanspec.h index 12627b064cae..08eeaab7cb5b 100644 --- a/src/yb/docdb/doc_pgsql_scanspec.h +++ b/src/yb/docdb/doc_pgsql_scanspec.h @@ -85,6 +85,14 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { return range_options_; } + const std::vector range_options_indexes() const { + return range_options_indexes_; + } + + const std::vector range_bounds_indexes() const { + return range_bounds_indexes_; + } + private: static const DocKey& DefaultStartDocKey(); @@ -97,6 +105,9 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { // The scan range within the hash key when a WHERE condition is specified. const std::unique_ptr range_bounds_; + // Indexes of columns that have range bounds such as c2 < 4 AND c2 >= 1 + std::vector range_bounds_indexes_; + // Initialize range_options_ if hashed_components_ in set and all range columns have one or more // options (i.e. using EQ/IN conditions). Otherwise range_options_ will stay null and we will // only use the range_bounds for scanning. @@ -105,6 +116,10 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { // The range value options if set. (possibly more than one due to IN conditions). std::shared_ptr>> range_options_; + // Indexes of columns that have range option filters such as + // c2 IN (1, 5, 6, 9) + std::vector range_options_indexes_; + // Schema of the columns to scan. const Schema& schema_; diff --git a/src/yb/docdb/doc_ql_scanspec.cc b/src/yb/docdb/doc_ql_scanspec.cc index f9ca1b63e934..399f118e5cda 100644 --- a/src/yb/docdb/doc_ql_scanspec.cc +++ b/src/yb/docdb/doc_ql_scanspec.cc @@ -27,6 +27,8 @@ #include "yb/util/result.h" #include "yb/util/status_format.h" +DECLARE_bool(disable_hybrid_scan); + using std::vector; namespace yb { @@ -68,6 +70,11 @@ DocQLScanSpec::DocQLScanSpec( upper_doc_key_(bound_key(false)), query_id_(query_id) { + if (range_bounds_) { + range_bounds_indexes_ = range_bounds_->GetColIds(); + } + + // If the hash key is fixed and we have range columns with IN condition, try to construct the // exact list of range options to scan for. if (!hashed_components_->empty() && schema_.num_range_key_columns() > 0 && @@ -77,12 +84,15 @@ DocQLScanSpec::DocQLScanSpec( std::make_shared>>(schema_.num_range_key_columns()); InitRangeOptions(*condition); - // Range options are only valid if all range columns are set (i.e. have one or more options). - for (size_t i = 0; i < schema_.num_range_key_columns(); i++) { - if ((*range_options_)[i].empty()) { - range_options_ = nullptr; - break; - } + if (FLAGS_disable_hybrid_scan) { + // Range options are only valid if all range columns + // are set (i.e. have one or more options). + for (size_t i = 0; i < schema_.num_range_key_columns(); i++) { + if ((*range_options_)[i].empty()) { + range_options_ = nullptr; + break; + } + } } } } @@ -119,6 +129,7 @@ void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) { } SortingType sortingType = schema_.column(col_idx).sorting_type(); + range_options_indexes_.emplace_back(condition.operands(0).column_id()); if (condition.op() == QL_OP_EQUAL) { auto pv = PrimitiveValue::FromQLValuePB(condition.operands(1).value(), sortingType); diff --git a/src/yb/docdb/doc_ql_scanspec.h b/src/yb/docdb/doc_ql_scanspec.h index 407f049777d7..f7533a028785 100644 --- a/src/yb/docdb/doc_ql_scanspec.h +++ b/src/yb/docdb/doc_ql_scanspec.h @@ -79,6 +79,14 @@ class DocQLScanSpec : public QLScanSpec { const Schema* schema() const override { return &schema_; } + const std::vector range_options_indexes() const { + return range_options_indexes_; + } + + const std::vector range_bounds_indexes() const { + return range_bounds_indexes_; + } + private: static const DocKey& DefaultStartDocKey(); @@ -99,6 +107,9 @@ class DocQLScanSpec : public QLScanSpec { // The scan range within the hash key when a WHERE condition is specified. const std::unique_ptr range_bounds_; + // Indexes of columns that have range bounds such as c2 < 4 AND c2 >= 1 + std::vector range_bounds_indexes_; + // Schema of the columns to scan. const Schema& schema_; @@ -116,6 +127,10 @@ class DocQLScanSpec : public QLScanSpec { // The range value options if set. (possibly more than one due to IN conditions). std::shared_ptr>> range_options_; + // Indexes of columns that have range option filters such as + // c2 IN (1, 5, 6, 9) + std::vector range_options_indexes_; + // Does the scan include static columns also? const bool include_static_columns_; diff --git a/src/yb/docdb/doc_rowwise_iterator.cc b/src/yb/docdb/doc_rowwise_iterator.cc index 19a085f9dc6f..7648cbbe4507 100644 --- a/src/yb/docdb/doc_rowwise_iterator.cc +++ b/src/yb/docdb/doc_rowwise_iterator.cc @@ -44,15 +44,23 @@ #include "yb/docdb/value_type.h" #include "yb/gutil/strings/substitute.h" +#include "yb/util/flags.h" +#include "yb/rocksdb/db/compaction.h" +#include "yb/rocksutil/yb_rocksdb.h" #include "yb/rocksdb/db.h" +#include "yb/util/flag_tags.h" #include "yb/util/result.h" #include "yb/util/status.h" #include "yb/util/status_format.h" #include "yb/util/status_log.h" #include "yb/util/strongly_typed_bool.h" +DEFINE_bool(disable_hybrid_scan, false, + "If true, hybrid scan will be disabled"); +TAG_FLAG(disable_hybrid_scan, runtime); + using std::string; namespace yb { @@ -236,7 +244,9 @@ Status DiscreteScanChoices::DoneWithCurrentTarget() { } Status DiscreteScanChoices::SkipTargetsUpTo(const Slice& new_target) { - VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= " << new_target; + VLOG(2) << __PRETTY_FUNCTION__ + << " Updating current target to be >= " + << DocKey::DebugSliceToString(new_target); DCHECK(!FinishedWithScanChoices()); RETURN_NOT_OK(InitScanTargetRangeGroupIfNeeded()); DocKeyDecoder decoder(new_target); @@ -293,6 +303,9 @@ Status DiscreteScanChoices::SkipTargetsUpTo(const Slice& new_target) { current_scan_target_.AppendValueType(ValueType::kGroupEnd); + VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " + << DocKey::DebugSliceToString(current_scan_target_); + return Status::OK(); } @@ -313,10 +326,548 @@ Status DiscreteScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) { return Status::OK(); } +// This class combines the notions of option filters (col1 IN (1,2,3)) and +// singular range bound filters (col1 < 4 AND col1 >= 1) into a single notion of +// lists of ranges. So a filter for a column given in the +// Doc(QL/PGSQL)ScanSpec is converted into a range bound filter. +// In the end, each HybridScanChoices +// instance should have a sorted list of disjoint ranges to filter each column. +// Right now this supports a conjunction of range bound and discrete filters. +// Disjunctions are also supported but are UNTESTED. +// TODO: Test disjunctions when YSQL and YQL support pushing those down + +class HybridScanChoices : public ScanChoices { + public: + + // Constructs a list of ranges for each column from the given scanspec. + // A filter of the form col1 IN (1,4,5) is converted to a filter + // in the form col1 IN ([1, 1], [4, 4], [5, 5]). + HybridScanChoices(const Schema& schema, + const KeyBytes &lower_doc_key, + const KeyBytes &upper_doc_key, + bool is_forward_scan, + const std::vector &range_options_indexes, + const + std::shared_ptr>>& + range_options, + const std::vector range_bounds_indexes, + const QLScanRange *range_bounds) + : ScanChoices(is_forward_scan), + lower_doc_key_(lower_doc_key), + upper_doc_key_(upper_doc_key) { + auto range_cols_scan_options = range_options; + size_t idx = 0; + range_cols_scan_options_lower_.reserve(schema.num_range_key_columns()); + range_cols_scan_options_upper_.reserve(schema.num_range_key_columns()); + + size_t num_hash_cols = schema.num_hash_key_columns(); + + for (idx = schema.num_hash_key_columns(); + idx < schema.num_key_columns(); idx++) { + const ColumnId col_idx = schema.column_id(idx); + range_cols_scan_options_lower_.push_back({}); + range_cols_scan_options_upper_.push_back({}); + + // If this is a range bound filter, we create a singular + // list of the given range bound + if ((std::find(range_bounds_indexes.begin(), + range_bounds_indexes.end(), col_idx) + != range_bounds_indexes.end()) + && (std::find(range_options_indexes.begin(), + range_options_indexes.end(), col_idx) + == range_options_indexes.end())) { + const auto col_sort_type = schema.column(idx).sorting_type(); + const QLScanRange::QLRange range = range_bounds->RangeFor(col_idx); + const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, + true /* lower_bound */); + const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, + false /* upper_bound */); + + range_cols_scan_options_lower_[idx - num_hash_cols].push_back(lower); + range_cols_scan_options_upper_[idx - num_hash_cols].push_back(upper); + } else { + + // If this is an option filter, we turn each option into a + // range bound to produce a list of singular range bounds + if(std::find(range_options_indexes.begin(), + range_options_indexes.end(), col_idx) + != range_options_indexes.end()) { + auto &options = (*range_cols_scan_options)[idx - num_hash_cols]; + + if (options.empty()) { + // If there is nothing specified in the IN list like in + // SELECT * FROM ... WHERE c1 IN (); + // then nothing should pass the filter. + // To enforce this, we create a range bound (kHighest, kLowest) + range_cols_scan_options_lower_[idx + - num_hash_cols].push_back(PrimitiveValue(ValueType::kHighest)); + range_cols_scan_options_lower_[idx + - num_hash_cols].push_back(PrimitiveValue(ValueType::kLowest)); + } + + for (auto val : options) { + const auto lower = val; + const auto upper = val; + range_cols_scan_options_lower_[idx + - num_hash_cols].push_back(lower); + range_cols_scan_options_upper_[idx + - num_hash_cols].push_back(upper); + } + + } else { + // If no filter is specified, we just impose an artificial range + // filter [kLowest, kHighest] + range_cols_scan_options_lower_[idx - num_hash_cols] + .push_back(PrimitiveValue(ValueType::kLowest)); + range_cols_scan_options_upper_[idx - num_hash_cols] + .push_back(PrimitiveValue(ValueType::kHighest)); + } + } + } + + current_scan_target_idxs_.resize(range_cols_scan_options_lower_.size()); + + if (is_forward_scan_) { + current_scan_target_ = lower_doc_key; + } else { + current_scan_target_ = upper_doc_key; + } + + } + + HybridScanChoices(const Schema& schema, + const DocPgsqlScanSpec& doc_spec, + const KeyBytes &lower_doc_key, + const KeyBytes &upper_doc_key) + : HybridScanChoices(schema, lower_doc_key, upper_doc_key, + doc_spec.is_forward_scan(), doc_spec.range_options_indexes(), + doc_spec.range_options(), doc_spec.range_bounds_indexes(), + doc_spec.range_bounds()) { + } + + HybridScanChoices(const Schema& schema, + const DocQLScanSpec& doc_spec, + const KeyBytes &lower_doc_key, + const KeyBytes &upper_doc_key) + : HybridScanChoices(schema, lower_doc_key, upper_doc_key, + doc_spec.is_forward_scan(), doc_spec.range_options_indexes(), + doc_spec.range_options(), doc_spec.range_bounds_indexes(), + doc_spec.range_bounds()) { + } + + CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override; + CHECKED_STATUS DoneWithCurrentTarget() override; + CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override; + + protected: + // Utility function for (multi)key scans. Updates the target scan key by + // incrementing the option + // index for one column. Will handle overflow by setting current column + // index to 0 and incrementing the previous column instead. If it overflows + // at first column it means we are done, so it clears the scan target idxs + // array. + CHECKED_STATUS IncrementScanTargetAtColumn(int start_col); + + private: + KeyBytes prev_scan_target_; + + // The following encodes the list of ranges we are iterating over + std::vector> range_cols_scan_options_lower_; + std::vector> range_cols_scan_options_upper_; + + std::vector range_options_indexes_; + mutable std::vector current_scan_target_idxs_; + + bool is_options_done_ = false; + + const KeyBytes lower_doc_key_; + const KeyBytes upper_doc_key_; +}; + +// Sets current_scan_target_ to the first tuple in the filter space +// that is >= new_target. +Status HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { + VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= " + << DocKey::DebugSliceToString(new_target); + DCHECK(!FinishedWithScanChoices()); + is_options_done_ = false; + + /* + Let's say we have a row key with (A B) as the hash part and C, D as the range part: + ((A B) C D) E F + + Let's say our current constraints : + l_c_k <= C <= u_c_k + 4 6 + + l_d_j <= D <= u_d_j + 3 5 + + a b 0 d -> a b l_c d + + a b 5 d -> a b 5 d + [ Will subsequently seek out of document on reading the subdoc] + + a b 7 d -> a b l_c_(k+1) 0 + [ If there is another range bound filter that's higher than the + current one, effectively, moving this column to the next + range in the filter list.] + -> a b Inf + [ This will seek to and on the next invocation update: + a ? ? -> a l_c_0 0 ] + + a b c 6 -> a b c l_d_(j+1) + [ If there is another range bound filter that's higher than the + d, effectively, moving column D to the next + range in the filter list.] + -> a b c Inf + [ If c_next is between l_c_k and u_c_k. This will seek to > and on the next invocation update: + a b ? -> a b l_d_0 ] + -> a b l_c_(k+1) l_d_0 + [ If c_next is above u_c_k. We do this because we know + exactly what the next tuple in our filter space should be.] + */ + DocKeyDecoder decoder(new_target); + RETURN_NOT_OK(decoder.DecodeToRangeGroup()); + current_scan_target_.Reset(Slice(new_target.data(), + decoder.left_input().data())); + + size_t col_idx = 0; + PrimitiveValue target_value; + for (col_idx = 0; col_idx < current_scan_target_idxs_.size(); col_idx++) { + RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value)); + const auto& lower_choices = (range_cols_scan_options_lower_)[col_idx]; + const auto& upper_choices = (range_cols_scan_options_upper_)[col_idx]; + auto current_ind = current_scan_target_idxs_[col_idx]; + DCHECK(current_ind < lower_choices.size()); + const auto& lower = lower_choices[current_ind]; + const auto& upper = upper_choices[current_ind]; + + // If it's in range then good, continue after appending the target value + // column. + + if (target_value >= lower && target_value <= upper) { + target_value.AppendToKey(¤t_scan_target_); + continue; + } + + // If target_value is not in the current range then we must find a range + // that works for it. + // If we are above all ranges then increment the index of the previous + // column. + // Else, target_value is below at least one range: find the lowest lower + // bound above target_value and use that, this relies on the assumption + // that all our filter ranges are disjoint. + + auto it = lower_choices.begin(); + size_t ind = 0; + + // Find an upper (lower) bound closest to target_value + if (is_forward_scan_) { + it = std::lower_bound(upper_choices.begin(), + upper_choices.end(), target_value); + ind = it - upper_choices.begin(); + } else { + it = std::lower_bound(lower_choices.begin(), lower_choices.end(), + target_value, std::greater<>()); + ind = it - lower_choices.begin(); + } + + if (ind == lower_choices.size()) { + // target value is higher than all range options and + // we need to increment. + RETURN_NOT_OK(IncrementScanTargetAtColumn(static_cast(col_idx) - 1)); + col_idx = current_scan_target_idxs_.size(); + break; + } + + current_scan_target_idxs_[col_idx] = ind; + + // If we are within a range then target_value itself should work. + if (lower_choices[ind] <= target_value + && upper_choices[ind] >= target_value) { + target_value.AppendToKey(¤t_scan_target_); + continue; + } + + // Otherwise we must set it to the next lower bound. + // This only works as we are assuming all given ranges are + // disjoint. + + DCHECK((is_forward_scan_ && lower_choices[ind] > target_value) + || (!is_forward_scan_ && upper_choices[ind] + < target_value)); + + if (is_forward_scan_) { + lower_choices[ind].AppendToKey(¤t_scan_target_); + } else { + upper_choices[ind].AppendToKey(¤t_scan_target_); + } + col_idx++; + break; + } + + // Reset the remaining range columns to lower bounds for forward scans + // or upper bounds for backward scans. + for (size_t i = col_idx; i < range_cols_scan_options_lower_.size(); i++) { + current_scan_target_idxs_[i] = 0; + if (is_forward_scan_) { + range_cols_scan_options_lower_[i][0] + .AppendToKey(¤t_scan_target_); + } else { + range_cols_scan_options_upper_[i][0] + .AppendToKey(¤t_scan_target_); + } + } + + current_scan_target_.AppendValueType(ValueType::kGroupEnd); + VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " + << DocKey::DebugSliceToString(current_scan_target_); + return Status::OK(); +} + +// Update the value at start column by setting it up for incrementing to the +// next allowed value in the filter space +// --------------------------------------------------------------------------- +// There are two important cases to consider here. +// Let's say the value of current_scan_target_ at start_col, c, +// is currently V and the current bounds for that column +// is l_c_k <= V <= u_c_k. In the usual case where V != u_c_k +// (or V != l_c_k for backwards scans) such that V_next is still in the given +// restriction, we set column c + 1 to kHighest (kLowest), such that the next +// invocation of GetNext() produces V_next at column similar to what is done +// in SkipTargetsUpTo. In this case, doing a SkipTargetsUpTo on the resulting +// current_scan_target_ should yield the next allowed value in the filter space +// In the case where V = u_c_k (V = l_c_k), or in other words V is at the +// EXTREMAL boundary of the current range, we know exactly what the next value +// of column C will be. So we move column c to the next +// range k+1 and set that column to the new value l_c_(k+1) (u_c_(k+1)) +// while setting all columns, b > c to l_b_0 (u_b_0) +// In the case of overflow on a column c (we want to increment the +// restriction range of c to the next range bound for that column but there +// are no restriction ranges remaining), we set the +// current column to the 0th range and move on to increment c - 1 +// Note that in almost all cases the resulting current_scan_target_ is strictly +// greater (lesser in the case of backwards scans) than the original +// current_scan_target_. This is necessary to allow the iterator seek out +// of the current scan target. The exception to this rule is below. +// --------------------------------------------------------------------------- +// This function leaves the scan target as is if the next tuple in the current +// scan direction is also the next tuple in the filter space and start_col +// is given as the last column +Status HybridScanChoices::IncrementScanTargetAtColumn(int start_col) { + + VLOG(2) << __PRETTY_FUNCTION__ + << " Incrementing at " << start_col; + + // Increment start col, move backwards in case of overflow. + int col_idx = start_col; + // lower and upper here are taken relative to the scan order + auto &lower_extremal_vector = is_forward_scan_ + ? range_cols_scan_options_lower_ + : range_cols_scan_options_upper_; + auto &upper_extremal_vector = is_forward_scan_ + ? range_cols_scan_options_upper_ + : range_cols_scan_options_lower_; + DocKeyDecoder t_decoder(current_scan_target_); + RETURN_NOT_OK(t_decoder.DecodeToRangeGroup()); + + // refer to the documentation of this function to see what extremal + // means here + std::vector is_extremal; + PrimitiveValue target_value; + for (int i = 0; i <= col_idx; ++i) { + RETURN_NOT_OK(t_decoder.DecodePrimitiveValue(&target_value)); + is_extremal.push_back(target_value == + upper_extremal_vector[i][current_scan_target_idxs_[i]]); + } + + // this variable tells us whether we start by appending + // kHighest/kLowest at col_idx after the following for loop + bool start_with_infinity = true; + + for (; col_idx >= 0; col_idx--) { + const auto& choices = lower_extremal_vector[col_idx]; + auto it = current_scan_target_idxs_[col_idx]; + + if (!is_extremal[col_idx]) { + col_idx++; + start_with_infinity = true; + break; + } + + if (++it < choices.size()) { + // and if this value is at the extremal bound + if (is_extremal[col_idx]) { + current_scan_target_idxs_[col_idx]++; + start_with_infinity = false; + } + break; + } + + current_scan_target_idxs_[col_idx] = 0; + } + + DocKeyDecoder decoder(current_scan_target_); + RETURN_NOT_OK(decoder.DecodeToRangeGroup()); + for (int i = 0; i < col_idx; ++i) { + RETURN_NOT_OK(decoder.DecodePrimitiveValue()); + } + + if (col_idx < 0) { + // If we got here we finished all the options and are done. + col_idx++; + start_with_infinity = true; + is_options_done_ = true; + } + + current_scan_target_.Truncate( + decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata()); + + + if (start_with_infinity && + (col_idx < static_cast(current_scan_target_idxs_.size()))) { + if (is_forward_scan_) { + PrimitiveValue(ValueType::kHighest).AppendToKey(¤t_scan_target_); + } else { + PrimitiveValue(ValueType::kLowest).AppendToKey(¤t_scan_target_); + } + col_idx++; + } + + if (start_with_infinity) { + // there's no point in appending anything after infinity + return Status::OK(); + } + + for (int i = col_idx; i <= start_col; ++i) { + lower_extremal_vector[i][current_scan_target_idxs_[i]] + .AppendToKey(¤t_scan_target_); + } + + for (size_t i = start_col + 1; i < current_scan_target_idxs_.size(); ++i) { + current_scan_target_idxs_[i] = 0; + lower_extremal_vector[i][current_scan_target_idxs_[i]] + .AppendToKey(¤t_scan_target_); + } + + return Status::OK(); +} + +// Method called when the scan target is done being used +Status HybridScanChoices::DoneWithCurrentTarget() { + // prev_scan_target_ is necessary for backwards scans + prev_scan_target_ = current_scan_target_; + RETURN_NOT_OK(IncrementScanTargetAtColumn( + static_cast(current_scan_target_idxs_.size()) - 1)); + current_scan_target_.AppendValueType(ValueType::kGroupEnd); + + // if we we incremented the last index then + // if this is a forward scan it doesn't matter what we do + // if this is a backwards scan then dont clear current_scan_target and we + // stay live + VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " + << DocKey::DebugSliceToString(current_scan_target_); + + VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target"; + DCHECK(!FinishedWithScanChoices()); + + if (is_options_done_) { + // It could be possible that we finished all our options but are not + // done because we haven't hit the bound key yet. This would usually be + // the case if we are moving onto the next hash key where we will + // restart our range options. + const KeyBytes &bound_key = is_forward_scan_ ? + upper_doc_key_ : lower_doc_key_; + finished_ = bound_key.empty() ? false + : is_forward_scan_ + == (current_scan_target_.CompareTo(bound_key) >= 0); + VLOG(4) << "finished_ = " << finished_; + } + + + VLOG(4) << "current_scan_target_ is " + << DocKey::DebugSliceToString(current_scan_target_) + << " and prev_scan_target_ is " + << DocKey::DebugSliceToString(prev_scan_target_); + + // The below condition is either indicative of the special case + // where IncrementScanTargetAtColumn didn't change the target due + // to the case specified in the last section of the + // documentation for IncrementScanTargetAtColumn or we have exhausted + // all available range keys for the given hash key (indicated + // by is_options_done_) + // We clear the scan target in these cases to indicate that the + // current_scan_target_ has been used and is invalid + // In all other cases, IncrementScanTargetAtColumn has updated + // current_scan_target_ to the new value that we want to seek to. + // Hence, we shouldn't clear it in those cases + if ((prev_scan_target_ == current_scan_target_) || is_options_done_) { + current_scan_target_.Clear(); + } + + return Status::OK(); +} + +// Seeks the given iterator to the current target as specified by +// current_scan_target_ and prev_scan_target_ (relevant in backwards +// scans) +Status HybridScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) { + VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target"; + + if (!FinishedWithScanChoices()) { + // if current_scan_target_ is valid we use it to determine + // what to seek to + if (!current_scan_target_.empty()) { + VLOG(3) << __PRETTY_FUNCTION__ + << " current_scan_target_ is non-empty. " + << DocKey::DebugSliceToString(current_scan_target_); + if (is_forward_scan_) { + VLOG(3) << __PRETTY_FUNCTION__ + << " Seeking to " + << DocKey::DebugSliceToString(current_scan_target_); + db_iter->Seek(current_scan_target_); + } else { + // seek to the highest key <= current_scan_target_ + // seeking to the highest key < current_scan_target_ + kHighest + // is equivalent to seeking to the highest key <= + // current_scan_target_ + auto tmp = current_scan_target_; + PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp); + VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp; + db_iter->PrevDocKey(tmp); + } + } else { + if (!is_forward_scan_ && !prev_scan_target_.empty()) { + db_iter->PrevDocKey(prev_scan_target_); + } + } + } + + return Status::OK(); +} + class RangeBasedScanChoices : public ScanChoices { public: - template - RangeBasedScanChoices(const Schema& schema, const ScanSpec& doc_spec) + RangeBasedScanChoices(const Schema& schema, const DocQLScanSpec& doc_spec) + : ScanChoices(doc_spec.is_forward_scan()) { + DCHECK(doc_spec.range_bounds()); + lower_.reserve(schema.num_range_key_columns()); + upper_.reserve(schema.num_range_key_columns()); + size_t idx = 0; + for (idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) { + const ColumnId col_idx = schema.column_id(idx); + const auto col_sort_type = schema.column(idx).sorting_type(); + const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx); + const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */); + const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */); + lower_.emplace_back(lower); + upper_.emplace_back(upper); + } + } + + RangeBasedScanChoices(const Schema& schema, const DocPgsqlScanSpec& doc_spec) : ScanChoices(doc_spec.is_forward_scan()) { DCHECK(doc_spec.range_bounds()); lower_.reserve(schema.num_range_key_columns()); @@ -412,9 +963,9 @@ Status RangeBasedScanChoices::SkipTargetsUpTo(const Slice& new_target) { last_was_infinity = upper_[col_idx].IsInfinity(); } } + current_scan_target_.AppendValueType(ValueType::kGroupEnd); VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " << DocKey::DebugSliceToString(current_scan_target_); - current_scan_target_.AppendValueType(ValueType::kGroupEnd); return Status::OK(); } @@ -430,10 +981,13 @@ Status RangeBasedScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) if (!FinishedWithScanChoices()) { if (!current_scan_target_.empty()) { - VLOG(3) << __PRETTY_FUNCTION__ << " current_scan_target_ is non-empty. " + VLOG(3) << __PRETTY_FUNCTION__ + << " current_scan_target_ is non-empty. " << current_scan_target_; if (is_forward_scan_) { - VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << current_scan_target_; + VLOG(3) << __PRETTY_FUNCTION__ + << " Seeking to " + << DocKey::DebugSliceToString(current_scan_target_); db_iter->Seek(current_scan_target_); } else { auto tmp = current_scan_target_; @@ -504,6 +1058,16 @@ Status DocRowwiseIterator::Init(TableType table_type) { Result DocRowwiseIterator::InitScanChoices( const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) { + + if (!FLAGS_disable_hybrid_scan) { + if (doc_spec.range_options() || doc_spec.range_bounds()) { + scan_choices_.reset(new HybridScanChoices(schema_, doc_spec, + lower_doc_key, upper_doc_key)); + } + + return false; + } + if (doc_spec.range_options()) { scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key)); // Let's not seek to the lower doc key or upper doc key. We know exactly what we want. @@ -521,6 +1085,16 @@ Result DocRowwiseIterator::InitScanChoices( Result DocRowwiseIterator::InitScanChoices( const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) { + + if (!FLAGS_disable_hybrid_scan) { + if (doc_spec.range_options() || doc_spec.range_bounds()) { + scan_choices_.reset(new HybridScanChoices(schema_, doc_spec, + lower_doc_key, upper_doc_key)); + } + + return false; + } + if (doc_spec.range_options()) { scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key)); // Let's not seek to the lower doc key or upper doc key. We know exactly what we want. @@ -572,7 +1146,9 @@ Status DocRowwiseIterator::DoInit(const T& doc_spec) { } } - if (!VERIFY_RESULT(InitScanChoices(doc_spec, lower_doc_key, upper_doc_key))) { + if (!VERIFY_RESULT(InitScanChoices(doc_spec, + !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key, + is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) { if (is_forward_scan_) { VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key); db_iter_->Seek(lower_doc_key);