From c7f82397c557d6e53dba20fe0b48b37f63969eda Mon Sep 17 00:00:00 2001 From: "kyle.cao" Date: Thu, 6 May 2021 16:35:22 +0800 Subject: [PATCH] add IndexScan patch for or filter add tck cases fix tck --- src/planner/match/LabelIndexSeek.cpp | 48 +++++++ src/util/ExpressionUtils.cpp | 33 +++++ src/util/ExpressionUtils.h | 6 + src/util/test/ExpressionUtilsTest.cpp | 104 ++++++++++++++ tests/common/plan_differ.py | 4 +- .../features/optimizer/IndexScanRule.feature | 128 ++++++++++++++++++ 6 files changed, 321 insertions(+), 2 deletions(-) create mode 100644 tests/tck/features/optimizer/IndexScanRule.feature diff --git a/src/planner/match/LabelIndexSeek.cpp b/src/planner/match/LabelIndexSeek.cpp index 47f8b8392..6b3c4a725 100644 --- a/src/planner/match/LabelIndexSeek.cpp +++ b/src/planner/match/LabelIndexSeek.cpp @@ -85,6 +85,54 @@ StatusOr LabelIndexSeek::transformNode(NodeContext* nodeCtx) { plan.tail = scan; plan.root = scan; + // This if-block is a patch for or-filter-embeding to avoid OOM, + // and it should be converted to an `optRule` after the match validator is refactored + auto& whereCtx = matchClauseCtx->where; + if (whereCtx && whereCtx->filter) { + auto* filter = whereCtx->filter; + const auto nodeAlias = *nodeCtx->info->alias; + auto* objPool = matchClauseCtx->qctx->objPool(); + if (filter->kind() == Expression::Kind::kLogicalOr) { + auto labelExprs = ExpressionUtils::collectAll(filter, {Expression::Kind::kLabel}); + bool labelMatched = true; + for (auto* labelExpr : labelExprs) { + DCHECK_EQ(labelExpr->kind(), Expression::Kind::kLabel); + if (*(static_cast(labelExpr)->name()) != nodeAlias) { + labelMatched = false; + break; + } + } + if (labelMatched) { + auto flattenFilter = ExpressionUtils::flattenInnerLogicalExpr(filter); + DCHECK_EQ(flattenFilter->kind(), Expression::Kind::kLogicalOr); + auto& filterItems = + static_cast(flattenFilter.get())->operands(); + auto canBeEmbeded = [](Expression::Kind k) -> bool { + return k == Expression::Kind::kRelEQ || k == Expression::Kind::kRelLT || + k == Expression::Kind::kRelLE || k == Expression::Kind::kRelGT || + k == Expression::Kind::kRelGE; + }; + bool canBeEmbeded2IndexScan = true; + for (auto& f : filterItems) { + if (!canBeEmbeded(f->kind())) { + canBeEmbeded2IndexScan = false; + break; + } + } + if (canBeEmbeded2IndexScan) { + auto* srcFilter = objPool->add( + ExpressionUtils::rewriteLabelAttr2TagProp(flattenFilter.get())); + storage::cpp2::IndexQueryContext ctx; + ctx.set_filter(Expression::encode(*srcFilter)); + auto context = + std::make_unique>(); + context->emplace_back(std::move(ctx)); + scan->setIndexQueryContext(std::move(context)); + whereCtx.reset(); + } + } + } + } // initialize start expression in project node nodeCtx->initialExpr.reset(ExpressionUtils::newVarPropExpr(kVid)); return plan; diff --git a/src/util/ExpressionUtils.cpp b/src/util/ExpressionUtils.cpp index f66cace07..9534d0a19 100644 --- a/src/util/ExpressionUtils.cpp +++ b/src/util/ExpressionUtils.cpp @@ -240,6 +240,39 @@ void ExpressionUtils::pullOrsImpl(LogicalExpression *expr, } } +std::unique_ptr ExpressionUtils::flattenInnerLogicalAndExpr(const Expression *expr) { + auto matcher = [](const Expression *e) -> bool { + return e->kind() == Expression::Kind::kLogicalAnd; + }; + auto rewriter = [](const Expression *e) -> Expression * { + pullAnds(const_cast(e)); + return e->clone().release(); + }; + + return std::unique_ptr( + RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter))); +} + +std::unique_ptr ExpressionUtils::flattenInnerLogicalOrExpr(const Expression *expr) { + auto matcher = [](const Expression *e) -> bool { + return e->kind() == Expression::Kind::kLogicalOr; + }; + auto rewriter = [](const Expression *e) -> Expression * { + pullOrs(const_cast(e)); + return e->clone().release(); + }; + + return std::unique_ptr( + RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter))); +} + +std::unique_ptr ExpressionUtils::flattenInnerLogicalExpr(const Expression *expr) { + auto andFlattenExpr = flattenInnerLogicalAndExpr(expr); + auto allFlattenExpr = flattenInnerLogicalOrExpr(andFlattenExpr.get()); + + return allFlattenExpr; +} + VariablePropertyExpression *ExpressionUtils::newVarPropExpr(const std::string &prop, const std::string &var) { return new VariablePropertyExpression(new std::string(var), new std::string(prop)); diff --git a/src/util/ExpressionUtils.h b/src/util/ExpressionUtils.h index 3865b2784..fe6017737 100644 --- a/src/util/ExpressionUtils.h +++ b/src/util/ExpressionUtils.h @@ -101,6 +101,12 @@ class ExpressionUtils { Expression::Kind kind, const std::vector>& rels); + static std::unique_ptr flattenInnerLogicalAndExpr(const Expression* expr); + + static std::unique_ptr flattenInnerLogicalOrExpr(const Expression* expr); + + static std::unique_ptr flattenInnerLogicalExpr(const Expression* expr); + static std::unique_ptr expandExpr(const Expression* expr); static std::unique_ptr expandImplAnd(const Expression* expr); diff --git a/src/util/test/ExpressionUtilsTest.cpp b/src/util/test/ExpressionUtilsTest.cpp index 75b202db1..5d3b7c627 100644 --- a/src/util/test/ExpressionUtilsTest.cpp +++ b/src/util/test/ExpressionUtilsTest.cpp @@ -401,6 +401,110 @@ TEST_F(ExpressionUtilsTest, pushAnds) { ASSERT_EQ(expected, t->toString()); } +TEST_F(ExpressionUtilsTest, flattenInnerLogicalExpr) { + using Kind = Expression::Kind; + // true AND false AND true + { + auto *first = new ConstantExpression(true); + auto *second = new ConstantExpression(false); + auto *third = new ConstantExpression(true); + LogicalExpression expr(Kind::kLogicalAnd, + new LogicalExpression(Kind::kLogicalAnd, + first, + second), + third); + LogicalExpression expected(Kind::kLogicalAnd); + expected.addOperand(first->clone().release()); + expected.addOperand(second->clone().release()); + expected.addOperand(third->clone().release()); + auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr); + ASSERT_EQ(expected, *newExpr); + } + // true OR false OR true + { + auto *first = new ConstantExpression(true); + auto *second = new ConstantExpression(false); + auto *third = new ConstantExpression(true); + LogicalExpression expr(Kind::kLogicalOr, + new LogicalExpression(Kind::kLogicalOr, + first, + second), + third); + LogicalExpression expected(Kind::kLogicalOr); + expected.addOperand(first->clone().release()); + expected.addOperand(second->clone().release()); + expected.addOperand(third->clone().release()); + auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr); + ASSERT_EQ(expected, *newExpr); + } + // (true OR false OR true)==(true AND false AND true) + { + auto *or1 = new ConstantExpression(true); + auto *or2 = new ConstantExpression(false); + auto *or3 = new ConstantExpression(true); + auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr, + new LogicalExpression(Kind::kLogicalOr, + or1, + or2), + or3); + auto *and1 = new ConstantExpression(false); + auto *and2 = new ConstantExpression(false); + auto *and3 = new ConstantExpression(true); + auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd, + new LogicalExpression(Kind::kLogicalAnd, + and1, + and2), + and3); + RelationalExpression expr(Kind::kRelEQ, logicOrExpr, logicAndExpr); + + auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr); + logicOrFlatten->addOperand(or1->clone().release()); + logicOrFlatten->addOperand(or2->clone().release()); + logicOrFlatten->addOperand(or3->clone().release()); + auto* logicAndFlatten = new LogicalExpression(Kind::kLogicalAnd); + logicAndFlatten->addOperand(and1->clone().release()); + logicAndFlatten->addOperand(and2->clone().release()); + logicAndFlatten->addOperand(and3->clone().release()); + RelationalExpression expected(Kind::kRelEQ, logicOrFlatten, logicAndFlatten); + + auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr); + ASSERT_EQ(expected, *newExpr); + } + // (true OR false OR true) AND (true AND false AND true) + { + auto *or1 = new ConstantExpression(true); + auto *or2 = new ConstantExpression(false); + auto *or3 = new ConstantExpression(true); + auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr, + new LogicalExpression(Kind::kLogicalOr, + or1, + or2), + or3); + auto *and1 = new ConstantExpression(false); + auto *and2 = new ConstantExpression(false); + auto *and3 = new ConstantExpression(true); + auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd, + new LogicalExpression(Kind::kLogicalAnd, + and1, + and2), + and3); + LogicalExpression expr(Kind::kLogicalAnd, logicOrExpr, logicAndExpr); + + auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr); + logicOrFlatten->addOperand(or1->clone().release()); + logicOrFlatten->addOperand(or2->clone().release()); + logicOrFlatten->addOperand(or3->clone().release()); + LogicalExpression expected(Kind::kLogicalAnd); + expected.addOperand(logicOrFlatten); + expected.addOperand(and1->clone().release()); + expected.addOperand(and2->clone().release()); + expected.addOperand(and3->clone().release()); + + auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr); + ASSERT_EQ(expected, *newExpr); + } +} + std::unique_ptr parse(const std::string& expr) { std::string query = "LOOKUP on t1 WHERE " + expr; GQLParser parser; diff --git a/tests/common/plan_differ.py b/tests/common/plan_differ.py index 0c4bdf10c..1d722a40b 100644 --- a/tests/common/plan_differ.py +++ b/tests/common/plan_differ.py @@ -91,12 +91,12 @@ def _check_op_info(self, exp, resp): if resp is None: if exp: return f"expect: {exp} but resp plan node is None" - else: + if exp: resp_dict = { f"{bytes.decode(pair.key)}": f"{bytes.decode(pair.value)}" for pair in resp } - if exp and not self._is_subdict_nested(exp, resp_dict): + if not self._is_subdict_nested(exp, resp_dict): return "Invalid descriptions, expect: {} vs. resp: {}".format( json.dumps(exp), json.dumps(resp_dict)) return None diff --git a/tests/tck/features/optimizer/IndexScanRule.feature b/tests/tck/features/optimizer/IndexScanRule.feature new file mode 100644 index 000000000..214a2eaba --- /dev/null +++ b/tests/tck/features/optimizer/IndexScanRule.feature @@ -0,0 +1,128 @@ +# Copyright (c) 2021 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. +Feature: Match index selection + + Background: + Given a graph with space named "nba" + + Scenario: and filter embeding + When profiling query: + """ + MATCH (v:player) + WHERE v.name>"Tim Duncan" and v.name<="Yao Ming" + RETURN v + """ + Then the result should be, in any order: + | v | + | ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) | + | ("Yao Ming" :player{age: 38, name: "Yao Ming"}) | + | ("Tony Parker" :player{age: 36, name: "Tony Parker"}) | + | ("Vince Carter" :player{age: 42, name: "Vince Carter"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 10 | Project | 13 | | + | 13 | Filter | 7 | | + | 7 | Project | 6 | | + | 6 | Project | 5 | | + | 5 | Filter | 15 | | + | 15 | GetVertices | 11 | | + | 11 | IndexScan | 0 | {"indexCtx": {"columnHints":{"scanType":"RANGE","column":"name","beginValue":"\"Tim Duncan","endValue":"\"Yao Ming"}}} | + | 0 | Start | | | + + Scenario: or filter embeding + When profiling query: + """ + MATCH (v:player) + WHERE + v.name<="Aron Baynes" + or v.name>"Yao Ming" + or v.name=="Kobe Bryant" + or v.age>40 + RETURN v + """ + Then the result should be, in any order: + | v | + | ("Kobe Bryant" :player{age: 40, name: "Kobe Bryant"}) | + | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | + | ("Steve Nash" :player{age: 45, name: "Steve Nash"}) | + | ("Grant Hill" :player{age: 46, name: "Grant Hill"}) | + | ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) | + | ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) | + | ("Jason Kidd" :player{age: 45, name: "Jason Kidd"}) | + | ("Vince Carter" :player{age: 42, name: "Vince Carter"}) | + | ("Ray Allen" :player{age: 43, name: "Ray Allen"}) | + | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + | ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 10 | Project | 13 | | + | 13 | Filter | 7 | {"condition":"!(hasSameEdgeInPath($-.__COL_0))"} | + | 7 | Project | 6 | | + | 6 | Project | 5 | | + | 5 | Filter | 15 | | + | 15 | GetVertices | 11 | | + | 11 | IndexScan | 0 | | + | 0 | Start | | | + + Scenario: degenerate to full tag scan + When profiling query: + """ + MATCH (v:player)-[:like]->(n) + WHERE + v.name<="Aron Baynes" + or n.age>45 + RETURN v, n + """ + Then the result should be, in any order: + | v | n | + | ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) | ("Grant Hill" :player{age: 46, name: "Grant Hill"}) | + | ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) | ("Steve Nash" :player{age: 45, name: "Steve Nash"}) | + | ("Yao Ming" :player{age: 38, name: "Yao Ming"}) | ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) | + | ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) | + And the execution plan should be: + | id | name | dependencies | operator info | + | 16 | Project | 19 | | + | 19 | Filter | 13 | { "condition": "((($v.name<=\"Aron Baynes\") OR ($n.age>45)) AND !(hasSameEdgeInPath($-.__COL_0)))"} | + | 13 | Project | 12 | | + | 12 | InnerJoin | 11 | | + | 11 | Project | 21 | | + | 21 | GetVertices | 7 | | + | 7 | Filter | 6 | | + | 6 | Project | 5 | | + | 5 | Filter | 23 | | + | 23 | GetNeighbors | 17 | | + | 17 | IndexScan | 0 | | + | 0 | Start | | | + # This is actually the optimization for another optRule, + # but it is necessary to ensure that the current optimization does not destroy this scenario + # and it can be considered in the subsequent refactoring + When profiling query: + """ + MATCH (v:player)-[:like]->(n) + WHERE + v.name<="Aron Baynes" + or v.age>45 + or true + or v.age+1 + or v.name + RETURN count(*) AS count + """ + Then the result should be, in any order: + | count | + | 81 | + And the execution plan should be: + | id | name | dependencies | operator info | + | 16 | Aggregate | 18 | | + | 18 | Filter | 13 | | + | 13 | Project | 12 | | + | 12 | InnerJoin | 11 | | + | 11 | Project | 20 | | + | 20 | GetVertices | 7 | | + | 7 | Filter | 6 | | + | 6 | Project | 5 | | + | 5 | Filter | 22 | | + | 22 | GetNeighbors | 17 | | + | 17 | IndexScan | 0 | | + | 0 | Start | | |