Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

embed or-filter into IndexScan #1005

Merged
merged 3 commits into from
May 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/planner/match/LabelIndexSeek.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,54 @@ StatusOr<SubPlan> LabelIndexSeek::transformNode(NodeContext* nodeCtx) {
plan.tail = scan;
plan.root = scan;

// This if-block is a patch for or-filter-embeding to avoid OOM,
// and it should be converted to an `optRule` after the match validator is refactored
auto& whereCtx = matchClauseCtx->where;
if (whereCtx && whereCtx->filter) {
auto* filter = whereCtx->filter;
const auto nodeAlias = *nodeCtx->info->alias;
auto* objPool = matchClauseCtx->qctx->objPool();
if (filter->kind() == Expression::Kind::kLogicalOr) {
auto labelExprs = ExpressionUtils::collectAll(filter, {Expression::Kind::kLabel});
bool labelMatched = true;
for (auto* labelExpr : labelExprs) {
DCHECK_EQ(labelExpr->kind(), Expression::Kind::kLabel);
if (*(static_cast<const LabelExpression*>(labelExpr)->name()) != nodeAlias) {
labelMatched = false;
break;
}
}
if (labelMatched) {
auto flattenFilter = ExpressionUtils::flattenInnerLogicalExpr(filter);
DCHECK_EQ(flattenFilter->kind(), Expression::Kind::kLogicalOr);
auto& filterItems =
static_cast<LogicalExpression*>(flattenFilter.get())->operands();
auto canBeEmbeded = [](Expression::Kind k) -> bool {
return k == Expression::Kind::kRelEQ || k == Expression::Kind::kRelLT ||
k == Expression::Kind::kRelLE || k == Expression::Kind::kRelGT ||
k == Expression::Kind::kRelGE;
};
bool canBeEmbeded2IndexScan = true;
for (auto& f : filterItems) {
if (!canBeEmbeded(f->kind())) {
canBeEmbeded2IndexScan = false;
czpmango marked this conversation as resolved.
Show resolved Hide resolved
break;
}
}
if (canBeEmbeded2IndexScan) {
auto* srcFilter = objPool->add(
ExpressionUtils::rewriteLabelAttr2TagProp(flattenFilter.get()));
storage::cpp2::IndexQueryContext ctx;
ctx.set_filter(Expression::encode(*srcFilter));
auto context =
std::make_unique<std::vector<storage::cpp2::IndexQueryContext>>();
context->emplace_back(std::move(ctx));
scan->setIndexQueryContext(std::move(context));
whereCtx.reset();
}
}
}
}
// initialize start expression in project node
nodeCtx->initialExpr.reset(ExpressionUtils::newVarPropExpr(kVid));
return plan;
Expand Down
33 changes: 33 additions & 0 deletions src/util/ExpressionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,39 @@ void ExpressionUtils::pullOrsImpl(LogicalExpression *expr,
}
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalAndExpr(const Expression *expr) {
auto matcher = [](const Expression *e) -> bool {
return e->kind() == Expression::Kind::kLogicalAnd;
};
auto rewriter = [](const Expression *e) -> Expression * {
pullAnds(const_cast<Expression *>(e));
return e->clone().release();
};

return std::unique_ptr<Expression>(
RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)));
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalOrExpr(const Expression *expr) {
auto matcher = [](const Expression *e) -> bool {
return e->kind() == Expression::Kind::kLogicalOr;
};
auto rewriter = [](const Expression *e) -> Expression * {
pullOrs(const_cast<Expression *>(e));
return e->clone().release();
};

return std::unique_ptr<Expression>(
RewriteVisitor::transform(expr, std::move(matcher), std::move(rewriter)));
}

std::unique_ptr<Expression> ExpressionUtils::flattenInnerLogicalExpr(const Expression *expr) {
auto andFlattenExpr = flattenInnerLogicalAndExpr(expr);
auto allFlattenExpr = flattenInnerLogicalOrExpr(andFlattenExpr.get());

return allFlattenExpr;
}

VariablePropertyExpression *ExpressionUtils::newVarPropExpr(const std::string &prop,
const std::string &var) {
return new VariablePropertyExpression(new std::string(var), new std::string(prop));
Expand Down
6 changes: 6 additions & 0 deletions src/util/ExpressionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ class ExpressionUtils {
Expression::Kind kind,
const std::vector<std::unique_ptr<Expression>>& rels);

static std::unique_ptr<Expression> flattenInnerLogicalAndExpr(const Expression* expr);

static std::unique_ptr<Expression> flattenInnerLogicalOrExpr(const Expression* expr);

static std::unique_ptr<Expression> flattenInnerLogicalExpr(const Expression* expr);

static std::unique_ptr<Expression> expandExpr(const Expression* expr);

static std::unique_ptr<Expression> expandImplAnd(const Expression* expr);
Expand Down
104 changes: 104 additions & 0 deletions src/util/test/ExpressionUtilsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,110 @@ TEST_F(ExpressionUtilsTest, pushAnds) {
ASSERT_EQ(expected, t->toString());
}

TEST_F(ExpressionUtilsTest, flattenInnerLogicalExpr) {
using Kind = Expression::Kind;
// true AND false AND true
{
auto *first = new ConstantExpression(true);
auto *second = new ConstantExpression(false);
auto *third = new ConstantExpression(true);
LogicalExpression expr(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
first,
second),
third);
LogicalExpression expected(Kind::kLogicalAnd);
expected.addOperand(first->clone().release());
expected.addOperand(second->clone().release());
expected.addOperand(third->clone().release());
auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// true OR false OR true
{
auto *first = new ConstantExpression(true);
auto *second = new ConstantExpression(false);
auto *third = new ConstantExpression(true);
LogicalExpression expr(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
first,
second),
third);
LogicalExpression expected(Kind::kLogicalOr);
expected.addOperand(first->clone().release());
expected.addOperand(second->clone().release());
expected.addOperand(third->clone().release());
auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// (true OR false OR true)==(true AND false AND true)
{
auto *or1 = new ConstantExpression(true);
auto *or2 = new ConstantExpression(false);
auto *or3 = new ConstantExpression(true);
auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
or1,
or2),
or3);
auto *and1 = new ConstantExpression(false);
auto *and2 = new ConstantExpression(false);
auto *and3 = new ConstantExpression(true);
auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
and1,
and2),
and3);
RelationalExpression expr(Kind::kRelEQ, logicOrExpr, logicAndExpr);

auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr);
logicOrFlatten->addOperand(or1->clone().release());
logicOrFlatten->addOperand(or2->clone().release());
logicOrFlatten->addOperand(or3->clone().release());
auto* logicAndFlatten = new LogicalExpression(Kind::kLogicalAnd);
logicAndFlatten->addOperand(and1->clone().release());
logicAndFlatten->addOperand(and2->clone().release());
logicAndFlatten->addOperand(and3->clone().release());
RelationalExpression expected(Kind::kRelEQ, logicOrFlatten, logicAndFlatten);

auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
// (true OR false OR true) AND (true AND false AND true)
{
auto *or1 = new ConstantExpression(true);
auto *or2 = new ConstantExpression(false);
auto *or3 = new ConstantExpression(true);
auto* logicOrExpr = new LogicalExpression(Kind::kLogicalOr,
new LogicalExpression(Kind::kLogicalOr,
or1,
or2),
or3);
auto *and1 = new ConstantExpression(false);
auto *and2 = new ConstantExpression(false);
auto *and3 = new ConstantExpression(true);
auto* logicAndExpr = new LogicalExpression(Kind::kLogicalAnd,
new LogicalExpression(Kind::kLogicalAnd,
and1,
and2),
and3);
LogicalExpression expr(Kind::kLogicalAnd, logicOrExpr, logicAndExpr);

auto* logicOrFlatten = new LogicalExpression(Kind::kLogicalOr);
logicOrFlatten->addOperand(or1->clone().release());
logicOrFlatten->addOperand(or2->clone().release());
logicOrFlatten->addOperand(or3->clone().release());
LogicalExpression expected(Kind::kLogicalAnd);
expected.addOperand(logicOrFlatten);
expected.addOperand(and1->clone().release());
expected.addOperand(and2->clone().release());
expected.addOperand(and3->clone().release());

auto newExpr = ExpressionUtils::flattenInnerLogicalExpr(&expr);
ASSERT_EQ(expected, *newExpr);
}
}

std::unique_ptr<Expression> parse(const std::string& expr) {
std::string query = "LOOKUP on t1 WHERE " + expr;
GQLParser parser;
Expand Down
128 changes: 128 additions & 0 deletions tests/tck/features/optimizer/IndexScanRule.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Copyright (c) 2021 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.
Feature: Match index selection

Background:
Given a graph with space named "nba"

Scenario: and filter embeding
When profiling query:
"""
MATCH (v:player)
WHERE v.name>"Tim Duncan" and v.name<="Yao Ming"
RETURN v
"""
Then the result should be, in any order:
| v |
| ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) |
| ("Yao Ming" :player{age: 38, name: "Yao Ming"}) |
| ("Tony Parker" :player{age: 36, name: "Tony Parker"}) |
| ("Vince Carter" :player{age: 42, name: "Vince Carter"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 10 | Project | 13 | |
| 13 | Filter | 7 | |
| 7 | Project | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 15 | |
| 15 | GetVertices | 11 | |
| 11 | IndexScan | 0 | {"indexCtx": {"columnHints":{"scanType":"RANGE","column":"name","beginValue":"\"Tim Duncan","endValue":"\"Yao Ming"}}} |
| 0 | Start | | |

Scenario: or filter embeding
When profiling query:
"""
MATCH (v:player)
WHERE
v.name<="Aron Baynes"
or v.name>"Yao Ming"
or v.name=="Kobe Bryant"
or v.age>40
RETURN v
"""
Then the result should be, in any order:
| v |
| ("Kobe Bryant" :player{age: 40, name: "Kobe Bryant"}) |
| ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) |
| ("Steve Nash" :player{age: 45, name: "Steve Nash"}) |
| ("Grant Hill" :player{age: 46, name: "Grant Hill"}) |
| ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) |
| ("Manu Ginobili" :player{age: 41, name: "Manu Ginobili"}) |
| ("Jason Kidd" :player{age: 45, name: "Jason Kidd"}) |
| ("Vince Carter" :player{age: 42, name: "Vince Carter"}) |
| ("Ray Allen" :player{age: 43, name: "Ray Allen"}) |
| ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) |
| ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 10 | Project | 13 | |
| 13 | Filter | 7 | {"condition":"!(hasSameEdgeInPath($-.__COL_0))"} |
| 7 | Project | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 15 | |
| 15 | GetVertices | 11 | |
| 11 | IndexScan | 0 | |
| 0 | Start | | |

Scenario: degenerate to full tag scan
When profiling query:
"""
MATCH (v:player)-[:like]->(n)
WHERE
v.name<="Aron Baynes"
or n.age>45
RETURN v, n
"""
Then the result should be, in any order:
| v | n |
| ("Tracy McGrady" :player{age: 39, name: "Tracy McGrady"}) | ("Grant Hill" :player{age: 46, name: "Grant Hill"}) |
| ("Amar'e Stoudemire" :player{age: 36, name: "Amar'e Stoudemire"}) | ("Steve Nash" :player{age: 45, name: "Steve Nash"}) |
| ("Yao Ming" :player{age: 38, name: "Yao Ming"}) | ("Shaquile O'Neal" :player{age: 47, name: "Shaquile O'Neal"}) |
| ("Aron Baynes" :player{age: 32, name: "Aron Baynes"}) | ("Tim Duncan" :bachelor{name: "Tim Duncan", speciality: "psychology"} :player{age: 42, name: "Tim Duncan"}) |
And the execution plan should be:
| id | name | dependencies | operator info |
| 16 | Project | 19 | |
| 19 | Filter | 13 | { "condition": "((($v.name<=\"Aron Baynes\") OR ($n.age>45)) AND !(hasSameEdgeInPath($-.__COL_0)))"} |
| 13 | Project | 12 | |
| 12 | InnerJoin | 11 | |
| 11 | Project | 21 | |
| 21 | GetVertices | 7 | |
| 7 | Filter | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 23 | |
| 23 | GetNeighbors | 17 | |
| 17 | IndexScan | 0 | |
| 0 | Start | | |
# This is actually the optimization for another optRule,
# but it is necessary to ensure that the current optimization does not destroy this scenario
# and it can be considered in the subsequent refactoring
When profiling query:
"""
MATCH (v:player)-[:like]->(n)
WHERE
v.name<="Aron Baynes"
or v.age>45
or true
or v.age+1
or v.name
RETURN count(*) AS count
"""
Then the result should be, in any order:
| count |
| 81 |
And the execution plan should be:
| id | name | dependencies | operator info |
| 16 | Aggregate | 18 | |
| 18 | Filter | 13 | |
| 13 | Project | 12 | |
| 12 | InnerJoin | 11 | |
| 11 | Project | 20 | |
| 20 | GetVertices | 7 | |
| 7 | Filter | 6 | |
| 6 | Project | 5 | |
| 5 | Filter | 22 | |
| 22 | GetNeighbors | 17 | |
| 17 | IndexScan | 0 | |
| 0 | Start | | |