Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

Commit

Permalink
add IndexScan patch for or filter (#1005)
Browse files Browse the repository at this point in the history
add tck cases

fix tck

Co-authored-by: Yee <2520865+yixinglu@users.noreply.github.com>
  • Loading branch information
czpmango and yixinglu committed May 8, 2021
1 parent b1c415d commit 9853925
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 0 deletions.
48 changes: 48 additions & 0 deletions src/planner/match/LabelIndexSeek.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,54 @@ StatusOr<SubPlan> LabelIndexSeek::transformNode(NodeContext* nodeCtx) {
plan.tail = scan;
plan.root = scan;

// This if-block is a patch for or-filter-embeding to avoid OOM,
// and it should be converted to an `optRule` after the match validator is refactored
auto& whereCtx = matchClauseCtx->where;
if (whereCtx && whereCtx->filter) {
auto* filter = whereCtx->filter;
const auto nodeAlias = *nodeCtx->info->alias;
auto* objPool = matchClauseCtx->qctx->objPool();
if (filter->kind() == Expression::Kind::kLogicalOr) {
auto labelExprs = ExpressionUtils::collectAll(filter, {Expression::Kind::kLabel});
bool labelMatched = true;
for (auto* labelExpr : labelExprs) {
DCHECK_EQ(labelExpr->kind(), Expression::Kind::kLabel);
if (*(static_cast<const LabelExpression*>(labelExpr)->name()) != nodeAlias) {
labelMatched = false;
break;
}
}
if (labelMatched) {
auto flattenFilter = ExpressionUtils::flattenInnerLogicalExpr(filter);
DCHECK_EQ(flattenFilter->kind(), Expression::Kind::kLogicalOr);
auto& filterItems =
static_cast<LogicalExpression*>(flattenFilter.get())->operands();
auto canBeEmbeded = [](Expression::Kind k) -> bool {
return k == Expression::Kind::kRelEQ || k == Expression::Kind::kRelLT ||
k == Expression::Kind::kRelLE || k == Expression::Kind::kRelGT ||
k == Expression::Kind::kRelGE;
};
bool canBeEmbeded2IndexScan = true;
for (auto& f : filterItems) {
if (!canBeEmbeded(f->kind())) {
canBeEmbeded2IndexScan = false;
break;
}
}
if (canBeEmbeded2IndexScan) {
auto* srcFilter = objPool->add(
ExpressionUtils::rewriteLabelAttr2TagProp(flattenFilter.get()));
storage::cpp2::IndexQueryContext ctx;
ctx.set_filter(Expression::encode(*srcFilter));
auto context =
std::make_unique<std::vector<storage::cpp2::IndexQueryContext>>();
context->emplace_back(std::move(ctx));
scan->setIndexQueryContext(std::move(context));
whereCtx.reset();
}
}
}
}
// initialize start expression in project node
nodeCtx->initialExpr.reset(ExpressionUtils::newVarPropExpr(kVid));
return plan;
Expand Down
33 changes: 33 additions & 0 deletions src/util/ExpressionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,39 @@ void ExpressionUtils::pullOrsImpl(LogicalExpression *expr,
}
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalAndExpr(const Expression *expr) {
auto matcher = [](const Expression *e) -> bool {
return e->kind() == Expression::Kind::kLogicalAnd;
};
auto rewriter = [](const Expression *e) -> Expression * {
pullAnds(const_cast<Expression *>(e));
return e->clone().release();
};

return std::unique_ptr<Expression>(
RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)));
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalOrExpr(const Expression *expr) {
auto matcher = [](const Expression *e) -> bool {
return e->kind() == Expression::Kind::kLogicalOr;
};
auto rewriter = [](const Expression *e) -> Expression * {
pullOrs(const_cast<Expression *>(e));
return e->clone().release();
};

return std::unique_ptr<Expression>(
RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)));
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalExpr(const Expression *expr) {
auto andFlattenExpr = flattenInnerLogicalAndExpr(expr);
auto allFlattenExpr = flattenInnerLogicalOrExpr(andFlattenExpr.get());

return allFlattenExpr;
}

VariablePropertyExpression *ExpressionUtils::newVarPropExpr(const std::string &prop,
const std::string &var) {
return new VariablePropertyExpression(new std::string(var), new std::string(prop));
Expand Down
6 changes: 6 additions & 0 deletions src/util/ExpressionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ class ExpressionUtils {
Expression::Kind kind,
const std::vector<std::unique_ptr<Expression>>& rels);

static std::unique_ptr<Expression> flattenInnerLogicalAndExpr(const Expression* expr);

static std::unique_ptr<Expression> flattenInnerLogicalOrExpr(const Expression* expr);

static std::unique_ptr<Expression> flattenInnerLogicalExpr(const Expression* expr);

static std::unique_ptr<Expression> expandExpr(const Expression* expr);

static std::unique_ptr<Expression> expandImplAnd(const Expression* expr);
Expand Down
104 changes: 104 additions & 0 deletions src/util/test/ExpressionUtilsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,110 @@ TEST_F(ExpressionUtilsTest, pushAnds) {
ASSERT_EQ(expected, t->toString());
}

TEST_F(ExpressionUtilsTest, flattenInnerLogicalExpr) {
using Kind = Expression::Kind;
// true AND false AND true
{
auto *first = new ConstantExpression(true);
auto *second = new ConstantExpression(false);
auto *third = new ConstantExpression(true);
LogicalExpression expr(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
first,
second),
third);
LogicalExpression expected(Kind::kLogicalAnd);
expected.addOperand(first->clone().release());
expected.addOperand(second->clone().release());
expected.addOperand(third->clone().release());
auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// true OR false OR true
{
auto *first = new ConstantExpression(true);
auto *second = new ConstantExpression(false);
auto *third = new ConstantExpression(true);
LogicalExpression expr(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
first,
second),
third);
LogicalExpression expected(Kind::kLogicalOr);
expected.addOperand(first->clone().release());
expected.addOperand(second->clone().release());
expected.addOperand(third->clone().release());
auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// (true OR false OR true)==(true AND false AND true)
{
auto *or1 = new ConstantExpression(true);
auto *or2 = new ConstantExpression(false);
auto *or3 = new ConstantExpression(true);
auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
or1,
or2),
or3);
auto *and1 = new ConstantExpression(false);
auto *and2 = new ConstantExpression(false);
auto *and3 = new ConstantExpression(true);
auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
and1,
and2),
and3);
RelationalExpression expr(Kind::kRelEQ, logicOrExpr, logicAndExpr);

auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr);
logicOrFlatten->addOperand(or1->clone().release());
logicOrFlatten->addOperand(or2->clone().release());
logicOrFlatten->addOperand(or3->clone().release());
auto* logicAndFlatten = new LogicalExpression(Kind::kLogicalAnd);
logicAndFlatten->addOperand(and1->clone().release());
logicAndFlatten->addOperand(and2->clone().release());
logicAndFlatten->addOperand(and3->clone().release());
RelationalExpression expected(Kind::kRelEQ, logicOrFlatten, logicAndFlatten);

auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// (true OR false OR true) AND (true AND false AND true)
{
auto *or1 = new ConstantExpression(true);
auto *or2 = new ConstantExpression(false);
auto *or3 = new ConstantExpression(true);
auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
or1,
or2),
or3);
auto *and1 = new ConstantExpression(false);
auto *and2 = new ConstantExpression(false);
auto *and3 = new ConstantExpression(true);
auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
and1,
and2),
and3);
LogicalExpression expr(Kind::kLogicalAnd, logicOrExpr, logicAndExpr);

auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr);
logicOrFlatten->addOperand(or1->clone().release());
logicOrFlatten->addOperand(or2->clone().release());
logicOrFlatten->addOperand(or3->clone().release());
LogicalExpression expected(Kind::kLogicalAnd);
expected.addOperand(logicOrFlatten);
expected.addOperand(and1->clone().release());
expected.addOperand(and2->clone().release());
expected.addOperand(and3->clone().release());

auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
}

std::unique_ptr<Expression> parse(const std::string& expr) {
std::string query = "LOOKUP on t1 WHERE " + expr;
GQLParser parser;
Expand Down
128 changes: 128 additions & 0 deletions tests/tck/features/optimizer/IndexScanRule.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright (c) 2021 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.
Feature: Match index selection

Background:
Given a graph with space named "nba"

Scenario: and filter embeding
When profiling query:
"""
MATCH (v:player)
WHERE v.name>"Tim Duncan" and v.name<="Yao Ming"
RETURN v
"""
Then the result should be, in any order:
| v |
| ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) |
| ("Yao Ming" :player{age: 38, name: "Yao Ming"}) |
| ("Tony Parker" :player{age: 36, name: "Tony Parker"}) |
| ("Vince Carter" :player{age: 42, name: "Vince Carter"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 10 | Project | 13 | |
| 13 | Filter | 7 | |
| 7 | Project | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 15 | |
| 15 | GetVertices | 11 | |
| 11 | IndexScan | 0 | {"indexCtx": {"columnHints":{"scanType":"RANGE","column":"name","beginValue":"\"Tim Duncan","endValue":"\"Yao Ming"}}} |
| 0 | Start | | |

Scenario: or filter embeding
When profiling query:
"""
MATCH (v:player)
WHERE
v.name<="Aron Baynes"
or v.name>"Yao Ming"
or v.name=="Kobe Bryant"
or v.age>40
RETURN v
"""
Then the result should be, in any order:
| v |
| ("Kobe Bryant" :player{age: 40, name: "Kobe Bryant"}) |
| ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) |
| ("Steve Nash" :player{age: 45, name: "Steve Nash"}) |
| ("Grant Hill" :player{age: 46, name: "Grant Hill"}) |
| ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) |
| ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) |
| ("Jason Kidd" :player{age: 45, name: "Jason Kidd"}) |
| ("Vince Carter" :player{age: 42, name: "Vince Carter"}) |
| ("Ray Allen" :player{age: 43, name: "Ray Allen"}) |
| ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) |
| ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 10 | Project | 13 | |
| 13 | Filter | 7 | {"condition":"!(hasSameEdgeInPath($-.__COL_0))"} |
| 7 | Project | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 15 | |
| 15 | GetVertices | 11 | |
| 11 | IndexScan | 0 | |
| 0 | Start | | |

Scenario: degenerate to full tag scan
When profiling query:
"""
MATCH (v:player)-[:like]->(n)
WHERE
v.name<="Aron Baynes"
or n.age>45
RETURN v, n
"""
Then the result should be, in any order:
| v | n |
| ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) | ("Grant Hill" :player{age: 46, name: "Grant Hill"}) |
| ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) | ("Steve Nash" :player{age: 45, name: "Steve Nash"}) |
| ("Yao Ming" :player{age: 38, name: "Yao Ming"}) | ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) |
| ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 16 | Project | 19 | |
| 19 | Filter | 13 | { "condition": "((($v.name<=\"Aron Baynes\") OR ($n.age>45)) AND !(hasSameEdgeInPath($-.__COL_0)))"} |
| 13 | Project | 12 | |
| 12 | InnerJoin | 11 | |
| 11 | Project | 21 | |
| 21 | GetVertices | 7 | |
| 7 | Filter | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 23 | |
| 23 | GetNeighbors | 17 | |
| 17 | IndexScan | 0 | |
| 0 | Start | | |
# This is actually the optimization for another optRule,
# but it is necessary to ensure that the current optimization does not destroy this scenario
# and it can be considered in the subsequent refactoring
When profiling query:
"""
MATCH (v:player)-[:like]->(n)
WHERE
v.name<="Aron Baynes"
or v.age>45
or true
or v.age+1
or v.name
RETURN count(*) AS count
"""
Then the result should be, in any order:
| count |
| 81 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 16 | Aggregate | 18 | |
| 18 | Filter | 13 | |
| 13 | Project | 12 | |
| 12 | InnerJoin | 11 | |
| 11 | Project | 20 | |
| 20 | GetVertices | 7 | |
| 7 | Filter | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 22 | |
| 22 | GetNeighbors | 17 | |
| 17 | IndexScan | 0 | |
| 0 | Start | | |

0 comments on commit 9853925

Please sign in to comment.