From 98ecfc1ae4a577d465f87ff6c78037ef461d35e0 Mon Sep 17 00:00:00 2001 From: Yee <2520865+yixinglu@users.noreply.github.com> Date: Mon, 5 Jul 2021 14:05:25 +0800 Subject: [PATCH] Rewrite lookup index selection implementation (#1188) * Add specified index scan nodes * Refactor lookup validator * Fix getcontext in validator * Refactor index scan plan node * construct IndexScan plan node * Add opt rule for indexscan and filter * Select index * Fix file edge scan rule * Fix crash * Add lookup tests * Fix bug * Resolve conflicts * Cleanup * Rename priority to score * Refactor tck test cases * More cases * Cleanup * Fix logicalor and lookup on tag and edge * fix nullptr initialize bug * Fix invalid column name * Fix bug * Fix lookup column errors * Add more or expr cases * Limit IndexScanRule usage * drop space * Fix IndexFullScanRule * Fix multiple range error * Fix failed tests * Fix comment * Cleanup and comment * more comments for logical or expression * Fix lookup validator unit tests Move comment to header file * Fix debug option * cleanup Makefile * Fix tck cases about examples usage * improve slow query test cases * format and fix optrule match * cleanup * Define extern string const values * Fix reclaim secs Co-authored-by: cpw <13495049+CPWstatic@users.noreply.github.com> --- .github/workflows/pull_request.yml | 6 +- src/context/ast/AstContext.h | 3 + src/context/ast/QueryAstContext.h | 10 + src/executor/Executor.cpp | 8 +- src/executor/query/IndexScanExecutor.cpp | 4 +- src/optimizer/CMakeLists.txt | 8 + src/optimizer/OptRule.cpp | 17 + src/optimizer/OptRule.h | 11 + src/optimizer/OptimizerUtils.cpp | 486 ++++++++++++++++-- src/optimizer/OptimizerUtils.h | 71 ++- src/optimizer/rule/EdgeIndexFullScanRule.cpp | 42 ++ src/optimizer/rule/EdgeIndexFullScanRule.h | 30 ++ src/optimizer/rule/IndexFullScanBaseRule.cpp | 86 ++++ src/optimizer/rule/IndexFullScanBaseRule.h | 34 ++ src/optimizer/rule/IndexScanRule.cpp | 125 +---- src/optimizer/rule/IndexScanRule.h | 28 +- .../rule/PushFilterDownEdgeIndexScanRule.cpp | 132 +++++ .../rule/PushFilterDownEdgeIndexScanRule.h | 35 ++ .../rule/PushFilterDownTagIndexScanRule.cpp | 125 +++++ .../rule/PushFilterDownTagIndexScanRule.h | 35 ++ src/optimizer/rule/TagIndexFullScanRule.cpp | 38 ++ src/optimizer/rule/TagIndexFullScanRule.h | 30 ++ .../rule/UnionAllEdgeIndexScanRule.cpp | 32 ++ .../rule/UnionAllEdgeIndexScanRule.h | 29 ++ .../rule/UnionAllIndexScanBaseRule.cpp | 98 ++++ .../rule/UnionAllIndexScanBaseRule.h | 25 + .../rule/UnionAllTagIndexScanRule.cpp | 32 ++ src/optimizer/rule/UnionAllTagIndexScanRule.h | 29 ++ src/optimizer/test/IndexScanRuleTest.cpp | 60 ++- src/parser/TraverseSentences.cpp | 5 + src/parser/TraverseSentences.h | 17 +- src/parser/parser.yy | 5 +- src/planner/CMakeLists.txt | 1 + src/planner/Planner.cpp | 5 + src/planner/Planner.h | 5 + src/planner/PlannersRegister.cpp | 32 +- src/planner/PlannersRegister.h | 8 +- src/planner/match/LabelIndexSeek.cpp | 28 +- src/planner/match/PropIndexSeek.cpp | 23 +- src/planner/ngql/LookupPlanner.cpp | 148 ++++++ src/planner/ngql/LookupPlanner.h | 45 ++ src/planner/plan/PlanNode.cpp | 12 + src/planner/plan/PlanNode.h | 9 + src/planner/plan/Query.cpp | 12 +- src/planner/plan/Query.h | 63 +-- src/planner/plan/Scan.h | 435 ++++++++++++++++ src/util/FTIndexUtils.cpp | 34 +- src/util/FTIndexUtils.h | 9 +- src/util/IndexUtil.cpp | 20 + src/util/IndexUtil.h | 2 + src/validator/LookupValidator.cpp | 316 ++++++------ src/validator/LookupValidator.h | 55 +- src/validator/MatchValidator.cpp | 4 +- src/validator/SequentialValidator.h | 4 +- src/validator/test/LookupValidatorTest.cpp | 17 +- tests/Makefile | 29 +- tests/admin/test_configs.py | 2 +- tests/common/nebula_service.py | 2 +- tests/job/test_session.py | 3 +- tests/nebula-test-run.py | 21 +- tests/requirements.txt | 2 +- .../tck/features/lookup/LookUp.IntVid.feature | 106 +--- tests/tck/features/lookup/LookUp.feature | 88 +--- tests/tck/features/lookup/LookupEdge.feature | 120 +++++ tests/tck/features/lookup/LookupEdge2.feature | 51 ++ tests/tck/features/lookup/LookupTag.feature | 121 +++++ tests/tck/features/lookup/LookupTag2.feature | 82 +++ tests/tck/features/match/SeekByEdge.feature | 10 +- .../PushFilterDownLeftJoinRule.feature | 41 +- .../KillSlowQueryViaDiffrentService.feature | 2 +- .../KillSlowQueryViaSameService.feature | 6 +- 71 files changed, 2891 insertions(+), 778 deletions(-) create mode 100644 src/optimizer/rule/EdgeIndexFullScanRule.cpp create mode 100644 src/optimizer/rule/EdgeIndexFullScanRule.h create mode 100644 src/optimizer/rule/IndexFullScanBaseRule.cpp create mode 100644 src/optimizer/rule/IndexFullScanBaseRule.h create mode 100644 src/optimizer/rule/PushFilterDownEdgeIndexScanRule.cpp create mode 100644 src/optimizer/rule/PushFilterDownEdgeIndexScanRule.h create mode 100644 src/optimizer/rule/PushFilterDownTagIndexScanRule.cpp create mode 100644 src/optimizer/rule/PushFilterDownTagIndexScanRule.h create mode 100644 src/optimizer/rule/TagIndexFullScanRule.cpp create mode 100644 src/optimizer/rule/TagIndexFullScanRule.h create mode 100644 src/optimizer/rule/UnionAllEdgeIndexScanRule.cpp create mode 100644 src/optimizer/rule/UnionAllEdgeIndexScanRule.h create mode 100644 src/optimizer/rule/UnionAllIndexScanBaseRule.cpp create mode 100644 src/optimizer/rule/UnionAllIndexScanBaseRule.h create mode 100644 src/optimizer/rule/UnionAllTagIndexScanRule.cpp create mode 100644 src/optimizer/rule/UnionAllTagIndexScanRule.h create mode 100644 src/planner/ngql/LookupPlanner.cpp create mode 100644 src/planner/ngql/LookupPlanner.h create mode 100644 src/planner/plan/Scan.h create mode 100644 tests/tck/features/lookup/LookupEdge.feature create mode 100644 tests/tck/features/lookup/LookupEdge2.feature create mode 100644 tests/tck/features/lookup/LookupTag.feature create mode 100644 tests/tck/features/lookup/LookupTag2.feature diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5fc75a485..916184856 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -138,17 +138,17 @@ jobs: - name: Pytest run: | make up - make RM_DIR=false J=${{ steps.cmake.outputs.j }} test + make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} test make down working-directory: tests/ timeout-minutes: 15 - name: TCK run: | make up - make RM_DIR=false J=${{ steps.cmake.outputs.j }} tck + make RM_DIR=false DEBUG=false J=${{ steps.cmake.outputs.j }} tck make down working-directory: tests/ - timeout-minutes: 20 + timeout-minutes: 25 - name: Sanitizer if: ${{ always() }} run: | diff --git a/src/context/ast/AstContext.h b/src/context/ast/AstContext.h index 842ee8d18..80d15f4e8 100644 --- a/src/context/ast/AstContext.h +++ b/src/context/ast/AstContext.h @@ -12,11 +12,14 @@ namespace nebula { namespace graph { + struct AstContext { QueryContext* qctx; Sentence* sentence; SpaceInfo space; }; + } // namespace graph } // namespace nebula + #endif // CONTEXT_ASTCONTEXT_H_ diff --git a/src/context/ast/QueryAstContext.h b/src/context/ast/QueryAstContext.h index b1c4597a1..031c0c0e2 100644 --- a/src/context/ast/QueryAstContext.h +++ b/src/context/ast/QueryAstContext.h @@ -106,6 +106,16 @@ struct GoContext final : AstContext { std::string inputVarName; }; +struct LookupContext final : public AstContext { + bool isEdge{false}; + bool dedup{false}; + bool isEmptyResultSet{false}; + int32_t schemaId{-1}; + int32_t limit{-1}; + Expression* filter{nullptr}; + // order by +}; + } // namespace graph } // namespace nebula #endif // CONTEXT_AST_QUERYASTCONTEXT_H_ diff --git a/src/executor/Executor.cpp b/src/executor/Executor.cpp index 663bbedd6..5fea7002b 100644 --- a/src/executor/Executor.cpp +++ b/src/executor/Executor.cpp @@ -181,7 +181,13 @@ Executor *Executor::makeExecutor(QueryContext *qctx, const PlanNode *node) { case PlanNode::Kind::kUnwind: { return pool->add(new UnwindExecutor(node, qctx)); } - case PlanNode::Kind::kIndexScan: { + case PlanNode::Kind::kIndexScan: + case PlanNode::Kind::kEdgeIndexFullScan: + case PlanNode::Kind::kEdgeIndexPrefixScan: + case PlanNode::Kind::kEdgeIndexRangeScan: + case PlanNode::Kind::kTagIndexFullScan: + case PlanNode::Kind::kTagIndexPrefixScan: + case PlanNode::Kind::kTagIndexRangeScan: { return pool->add(new IndexScanExecutor(node, qctx)); } case PlanNode::Kind::kStart: { diff --git a/src/executor/query/IndexScanExecutor.cpp b/src/executor/query/IndexScanExecutor.cpp index 3ea0c61f2..2d270d313 100644 --- a/src/executor/query/IndexScanExecutor.cpp +++ b/src/executor/query/IndexScanExecutor.cpp @@ -29,10 +29,10 @@ folly::Future IndexScanExecutor::indexScan() { return finish(ResultBuilder().value(Value(std::move(dataSet))).finish()); } return storageClient->lookupIndex(lookup->space(), - *lookup->queryContext(), + lookup->queryContext(), lookup->isEdge(), lookup->schemaId(), - *lookup->returnColumns()) + lookup->returnColumns()) .via(runner()) .thenValue([this](StorageRpcResponse &&rpcResp) { return handleResp(std::move(rpcResp)); diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 36e601a1b..340723415 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -24,6 +24,14 @@ nebula_add_library( rule/PushFilterDownAggregateRule.cpp rule/PushFilterDownProjectRule.cpp rule/PushFilterDownLeftJoinRule.cpp + rule/PushFilterDownEdgeIndexScanRule.cpp + rule/PushFilterDownTagIndexScanRule.cpp + rule/UnionAllIndexScanBaseRule.cpp + rule/UnionAllTagIndexScanRule.cpp + rule/UnionAllEdgeIndexScanRule.cpp + rule/IndexFullScanBaseRule.cpp + rule/TagIndexFullScanRule.cpp + rule/EdgeIndexFullScanRule.cpp ) nebula_add_subdirectory(test) diff --git a/src/optimizer/OptRule.cpp b/src/optimizer/OptRule.cpp index 6f087df56..2bc112880 100644 --- a/src/optimizer/OptRule.cpp +++ b/src/optimizer/OptRule.cpp @@ -12,9 +12,26 @@ #include "optimizer/OptGroup.h" #include "planner/plan/PlanNode.h" +using nebula::graph::PlanNode; + namespace nebula { namespace opt { +const PlanNode *MatchedResult::planNode(const std::vector &pos) const { + if (pos.empty()) { + return DCHECK_NOTNULL(node)->node(); + } + + DCHECK_EQ(pos[0], 0); + + const MatchedResult *result = this; + for (size_t i = 1; i < pos.size(); ++i) { + DCHECK_LT(pos[i], result->dependencies.size()); + result = &result->dependencies[pos[i]]; + } + return DCHECK_NOTNULL(result->node)->node(); +} + Pattern Pattern::create(graph::PlanNode::Kind kind, std::initializer_list patterns) { Pattern pattern; pattern.kind_ = kind; diff --git a/src/optimizer/OptRule.h b/src/optimizer/OptRule.h index df3871e1c..ffb920ef4 100644 --- a/src/optimizer/OptRule.h +++ b/src/optimizer/OptRule.h @@ -19,6 +19,7 @@ namespace nebula { namespace graph { class QueryContext; +class PlanNode; } // namespace graph namespace opt { @@ -30,6 +31,16 @@ class OptGroup; struct MatchedResult { const OptGroupNode *node{nullptr}; std::vector dependencies; + + // params | plan node + // -------------+------------ + // {} | this->node + // {0} | this->node + // {1} | error + // {0, 1} | this->dependencies[1] + // {0, 1, 0} | this->dependencies[1].dependencies[0] + // {0, 1, 0, 1} | this->dependencies[1].dependencies[0].dependencies[1] + const graph::PlanNode *planNode(const std::vector &pos = {}) const; }; class Pattern final { diff --git a/src/optimizer/OptimizerUtils.cpp b/src/optimizer/OptimizerUtils.cpp index 19b7283fd..e95c4a491 100644 --- a/src/optimizer/OptimizerUtils.cpp +++ b/src/optimizer/OptimizerUtils.cpp @@ -6,6 +6,29 @@ #include "optimizer/OptimizerUtils.h" +#include +#include +#include +#include + +#include "common/base/Status.h" +#include "common/datatypes/Value.h" +#include "common/expression/ConstantExpression.h" +#include "common/expression/Expression.h" +#include "common/expression/LogicalExpression.h" +#include "common/expression/PropertyExpression.h" +#include "common/expression/RelationalExpression.h" +#include "common/interface/gen-cpp2/meta_types.h" +#include "common/interface/gen-cpp2/storage_types.h" +#include "planner/plan/Query.h" + +using nebula::meta::cpp2::ColumnDef; +using nebula::meta::cpp2::IndexItem; +using nebula::storage::cpp2::IndexColumnHint; +using nebula::storage::cpp2::IndexQueryContext; + +using BVO = nebula::graph::OptimizerUtils::BoundValueOperator; + namespace nebula { namespace graph { @@ -13,16 +36,16 @@ Value OptimizerUtils::boundValue(const meta::cpp2::ColumnDef& col, BoundValueOperator op, const Value& v) { switch (op) { - case BoundValueOperator::GREATER_THAN : { + case BoundValueOperator::GREATER_THAN: { return boundValueWithGT(col, v); } - case BoundValueOperator::LESS_THAN : { + case BoundValueOperator::LESS_THAN: { return boundValueWithLT(col, v); } - case BoundValueOperator::MAX : { + case BoundValueOperator::MAX: { return boundValueWithMax(col); } - case BoundValueOperator::MIN : { + case BoundValueOperator::MIN: { return boundValueWithMin(col); } } @@ -32,14 +55,14 @@ Value OptimizerUtils::boundValue(const meta::cpp2::ColumnDef& col, Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const Value& v) { auto type = SchemaUtil::propTypeToValueType(col.get_type().get_type()); switch (type) { - case Value::Type::INT : { + case Value::Type::INT: { if (v.getInt() == std::numeric_limits::max()) { return v; } else { return v + 1; } } - case Value::Type::FLOAT : { + case Value::Type::FLOAT: { if (v.getFloat() > 0.0) { if (v.getFloat() == std::numeric_limits::max()) { return v; @@ -53,7 +76,7 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V } return v.getFloat() + kEpsilon; } - case Value::Type::STRING : { + case Value::Type::STRING: { if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } @@ -61,14 +84,14 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V bytes.resize(*col.get_type().type_length_ref()); for (size_t i = bytes.size();; i--) { if (i > 0) { - if (bytes[i-1]++ != 255) break; + if (bytes[i - 1]++ != 255) break; } else { return Value(std::string(*col.get_type().type_length_ref(), '\377')); } } return Value(std::string(bytes.begin(), bytes.end())); } - case Value::Type::DATE : { + case Value::Type::DATE: { if (Date(std::numeric_limits::max(), 12, 31) == v.getDate()) { return v.getDate(); } else if (Date() == v.getDate()) { @@ -92,7 +115,7 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V } return Value(d); } - case Value::Type::TIME : { + case Value::Type::TIME: { auto t = v.getTime(); // Ignore the time zone. if (t.microsec < 999999) { @@ -117,7 +140,7 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V } return Value(t); } - case Value::Type::DATETIME : { + case Value::Type::DATETIME: { auto dt = v.getDateTime(); // Ignore the time zone. if (dt.microsec < 999999) { @@ -167,8 +190,7 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V case Value::Type::MAP: case Value::Type::DATASET: case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << type - << "for index."; + DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; } } @@ -179,14 +201,14 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const Value& v) { auto type = SchemaUtil::propTypeToValueType(col.get_type().get_type()); switch (type) { - case Value::Type::INT : { + case Value::Type::INT: { if (v.getInt() == std::numeric_limits::min()) { return v; } else { return v - 1; } } - case Value::Type::FLOAT : { + case Value::Type::FLOAT: { if (v.getFloat() < 0.0) { if (v.getFloat() == -std::numeric_limits::max()) { return v; @@ -198,7 +220,7 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V } return v.getFloat() - kEpsilon; } - case Value::Type::STRING : { + case Value::Type::STRING: { if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } @@ -206,14 +228,14 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V bytes.resize(*col.get_type().type_length_ref()); for (size_t i = bytes.size();; i--) { if (i > 0) { - if (bytes[i-1]-- != 0) break; + if (bytes[i - 1]-- != 0) break; } else { return Value(std::string(*col.get_type().type_length_ref(), '\0')); } } return Value(std::string(bytes.begin(), bytes.end())); } - case Value::Type::DATE : { + case Value::Type::DATE: { if (Date() == v.getDate()) { return v.getDate(); } @@ -235,7 +257,7 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V } return Value(d); } - case Value::Type::TIME : { + case Value::Type::TIME: { if (Time() == v.getTime()) { return v.getTime(); } @@ -262,7 +284,7 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V } return Value(t); } - case Value::Type::DATETIME : { + case Value::Type::DATETIME: { if (DateTime() == v.getDateTime()) { return v.getDateTime(); } @@ -314,8 +336,7 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V case Value::Type::MAP: case Value::Type::DATASET: case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << type - << "for index."; + DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; } } @@ -326,19 +347,19 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V Value OptimizerUtils::boundValueWithMax(const meta::cpp2::ColumnDef& col) { auto type = SchemaUtil::propTypeToValueType(col.get_type().get_type()); switch (type) { - case Value::Type::INT : { + case Value::Type::INT: { return Value(std::numeric_limits::max()); } - case Value::Type::FLOAT : { + case Value::Type::FLOAT: { return Value(std::numeric_limits::max()); } - case Value::Type::STRING : { + case Value::Type::STRING: { if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } return Value(std::string(*col.get_type().type_length_ref(), '\377')); } - case Value::Type::DATE : { + case Value::Type::DATE: { Date d; d.year = std::numeric_limits::max(); d.month = 12; @@ -374,8 +395,7 @@ Value OptimizerUtils::boundValueWithMax(const meta::cpp2::ColumnDef& col) { case Value::Type::MAP: case Value::Type::DATASET: case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << type - << "for index."; + DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; } } @@ -386,25 +406,25 @@ Value OptimizerUtils::boundValueWithMax(const meta::cpp2::ColumnDef& col) { Value OptimizerUtils::boundValueWithMin(const meta::cpp2::ColumnDef& col) { auto type = SchemaUtil::propTypeToValueType(col.get_type().get_type()); switch (type) { - case Value::Type::INT : { + case Value::Type::INT: { return Value(std::numeric_limits::min()); } - case Value::Type::FLOAT : { + case Value::Type::FLOAT: { return Value(-std::numeric_limits::max()); } - case Value::Type::STRING : { + case Value::Type::STRING: { if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } return Value(std::string(*col.get_type().type_length_ref(), '\0')); } - case Value::Type::DATE : { + case Value::Type::DATE: { return Value(Date()); } case Value::Type::TIME: { return Value(Time()); } - case Value::Type::DATETIME : { + case Value::Type::DATETIME: { return Value(DateTime()); } case Value::Type::__EMPTY__: @@ -417,8 +437,7 @@ Value OptimizerUtils::boundValueWithMin(const meta::cpp2::ColumnDef& col) { case Value::Type::MAP: case Value::Type::DATASET: case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << type - << "for index."; + DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; } } @@ -437,7 +456,7 @@ Value OptimizerUtils::normalizeValue(const meta::cpp2::ColumnDef& col, const Val case Value::Type::DATETIME: { return v; } - case Value::Type::STRING : { + case Value::Type::STRING: { if (!col.type.type_length_ref().has_value()) { return Value::kNullBadType; } @@ -463,14 +482,399 @@ Value OptimizerUtils::normalizeValue(const meta::cpp2::ColumnDef& col, const Val case Value::Type::MAP: case Value::Type::DATASET: case Value::Type::PATH: { - DLOG(FATAL) << "Not supported value type " << type - << "for index."; + DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; } } DLOG(FATAL) << "Unknown value type " << static_cast(type); - return Value::kNullBadType;; + return Value::kNullBadType; +} + +Status OptimizerUtils::boundValue(Expression::Kind kind, + const Value& val, + const meta::cpp2::ColumnDef& col, + Value& begin, + Value& end) { + if (val.type() != graph::SchemaUtil::propTypeToValueType(col.type.type)) { + return Status::SemanticError("Data type error of field : %s", col.get_name().c_str()); + } + switch (kind) { + case Expression::Kind::kRelLE: { + // if c1 <= int(5) , the range pair should be (min, 6) + // if c1 < int(5), the range pair should be (min, 5) + auto v = OptimizerUtils::boundValue(col, BoundValueOperator::GREATER_THAN, val); + if (v == Value::kNullBadType) { + LOG(ERROR) << "Get bound value error. field : " << col.get_name(); + return Status::Error("Get bound value error. field : %s", col.get_name().c_str()); + } + // where c <= 1 and c <= 2 , 1 should be valid. + if (end.empty()) { + end = v; + } else { + end = v < end ? v : end; + } + break; + } + case Expression::Kind::kRelGE: { + // where c >= 1 and c >= 2 , 2 should be valid. + if (begin.empty()) { + begin = val; + } else { + begin = val < begin ? begin : val; + } + break; + } + case Expression::Kind::kRelLT: { + // c < 5 and c < 6 , 5 should be valid. + if (end.empty()) { + end = val; + } else { + end = val < end ? val : end; + } + break; + } + case Expression::Kind::kRelGT: { + // if c >= 5, the range pair should be (5, max) + // if c > 5, the range pair should be (6, max) + auto v = OptimizerUtils::boundValue(col, BoundValueOperator::GREATER_THAN, val); + if (v == Value::kNullBadType) { + LOG(ERROR) << "Get bound value error. field : " << col.get_name(); + return Status::Error("Get bound value error. field : %s", col.get_name().c_str()); + } + // where c > 1 and c > 2 , 2 should be valid. + if (begin.empty()) { + begin = v; + } else { + begin = v < begin ? begin : v; + } + break; + } + default: { + // TODO(yee): Semantic error + return Status::Error("Invalid expression kind."); + } + } + return Status::OK(); +} + +namespace { + +// IndexScore is used to find the optimal index. The larger the score, the better the index. +// When it is a score sequence, the length of the sequence should also be considered, such as: +// {2, 1, 0} > {2, 1} > {2, 0, 1} > {2, 0} > {2} > {1, 2} > {1, 1} > {1} +enum class IndexScore : uint8_t { + kNotEqual = 0, + kRange = 1, + kPrefix = 2, +}; + +struct ScoredColumnHint { + storage::cpp2::IndexColumnHint hint; + IndexScore score; +}; + +struct IndexResult { + const meta::cpp2::IndexItem* index; + // expressions not used in all `ScoredColumnHint' + std::vector unusedExprs; + std::vector hints; + + bool operator<(const IndexResult& rhs) const { + if (hints.empty()) return true; + auto sz = std::min(hints.size(), rhs.hints.size()); + for (size_t i = 0; i < sz; i++) { + if (hints[i].score < rhs.hints[i].score) { + return true; + } + if (hints[i].score > rhs.hints[i].score) { + return false; + } + } + return hints.size() < rhs.hints.size(); + } +}; + +Status checkValue(const ColumnDef& field, BVO bvo, Value* value) { + if (value->empty()) { + *value = OptimizerUtils::boundValue(field, bvo, Value()); + if (value->isBadNull()) { + return Status::Error("Get bound value error. field : %s", field.get_name().c_str()); + } + } + return Status::OK(); +} + +Status handleRangeIndex(const meta::cpp2::ColumnDef& field, + const Expression* expr, + const Value& value, + IndexColumnHint* hint) { + if (field.get_type().get_type() == meta::cpp2::PropertyType::BOOL) { + return Status::Error("Range scan for bool type is illegal"); + } + Value begin, end; + NG_RETURN_IF_ERROR(OptimizerUtils::boundValue(expr->kind(), value, field, begin, end)); + NG_RETURN_IF_ERROR(checkValue(field, BVO::MIN, &begin)); + NG_RETURN_IF_ERROR(checkValue(field, BVO::MAX, &end)); + hint->set_begin_value(std::move(begin)); + hint->set_end_value(std::move(end)); + hint->set_scan_type(storage::cpp2::ScanType::RANGE); + hint->set_column_name(field.get_name()); + return Status::OK(); +} + +void handleEqualIndex(const ColumnDef& field, const Value& value, IndexColumnHint* hint) { + hint->set_scan_type(storage::cpp2::ScanType::PREFIX); + hint->set_column_name(field.get_name()); + hint->set_begin_value(OptimizerUtils::normalizeValue(field, value)); +} + +StatusOr selectRelExprIndex(const ColumnDef& field, + const RelationalExpression* expr) { + // TODO(yee): Reverse expression + auto left = expr->left(); + DCHECK(left->kind() == Expression::Kind::kEdgeProperty || + left->kind() == Expression::Kind::kTagProperty); + auto propExpr = static_cast(left); + if (propExpr->prop() != field.get_name()) { + return Status::Error("Invalid field name."); + } + + auto right = expr->right(); + DCHECK(right->kind() == Expression::Kind::kConstant); + const auto& value = static_cast(right)->value(); + + ScoredColumnHint hint; + switch (expr->kind()) { + case Expression::Kind::kRelEQ: { + handleEqualIndex(field, value, &hint.hint); + hint.score = IndexScore::kPrefix; + break; + } + case Expression::Kind::kRelGE: + case Expression::Kind::kRelGT: + case Expression::Kind::kRelLE: + case Expression::Kind::kRelLT: { + NG_RETURN_IF_ERROR(handleRangeIndex(field, expr, value, &hint.hint)); + hint.score = IndexScore::kRange; + break; + } + case Expression::Kind::kRelNE: { + hint.score = IndexScore::kNotEqual; + break; + } + default: { + return Status::Error("Invalid expression kind"); + } + } + return hint; +} + +StatusOr selectRelExprIndex(const RelationalExpression* expr, const IndexItem& index) { + const auto& fields = index.get_fields(); + if (fields.empty()) { + return Status::Error("Index(%s) does not have any fields.", index.get_index_name().c_str()); + } + auto status = selectRelExprIndex(fields[0], expr); + NG_RETURN_IF_ERROR(status); + IndexResult result; + result.hints.emplace_back(std::move(status).value()); + result.index = &index; + return result; +} + +bool mergeRangeColumnHints(const std::vector& hints, Value* begin, Value* end) { + for (auto& h : hints) { + if (h.score != IndexScore::kRange) { + return false; + } + if (h.hint.begin_value_ref().is_set()) { + const auto& value = h.hint.get_begin_value(); + if (begin->empty() || *begin < value) { + *begin = value; + } + } + if (h.hint.end_value_ref().is_set()) { + const auto& value = h.hint.get_end_value(); + if (end->empty() || *end > value) { + *end = value; + } + } + } + return !(*begin > *end); +} + +bool getIndexColumnHintInExpr(const ColumnDef& field, + const LogicalExpression* expr, + ScoredColumnHint* hint, + std::vector* operands) { + std::vector hints; + for (auto& operand : expr->operands()) { + if (!operand->isRelExpr()) continue; + auto relExpr = static_cast(operand); + auto status = selectRelExprIndex(field, relExpr); + if (status.ok()) { + hints.emplace_back(std::move(status).value()); + operands->emplace_back(operand); + } + } + + if (hints.empty()) return false; + + if (hints.size() == 1) { + *hint = hints.front(); + } else { + Value begin, end; + if (!mergeRangeColumnHints(hints, &begin, &end)) { + return false; + } + ScoredColumnHint h; + h.hint.set_column_name(field.get_name()); + h.hint.set_scan_type(storage::cpp2::ScanType::RANGE); + h.hint.set_begin_value(std::move(begin)); + h.hint.set_end_value(std::move(end)); + h.score = IndexScore::kRange; + *hint = std::move(h); + } + + return true; +} + +std::vector collectUnusedExpr( + const LogicalExpression* expr, + const std::unordered_set& usedOperands) { + std::vector unusedOperands; + for (auto& operand : expr->operands()) { + auto iter = std::find(usedOperands.begin(), usedOperands.end(), operand); + if (iter == usedOperands.end()) { + unusedOperands.emplace_back(operand); + } + } + return unusedOperands; +} + +StatusOr selectLogicalExprIndex(const LogicalExpression* expr, + const IndexItem& index) { + if (expr->kind() != Expression::Kind::kLogicalAnd) { + return Status::Error("Invalid expression kind."); + } + IndexResult result; + result.hints.reserve(index.get_fields().size()); + std::unordered_set usedOperands; + for (auto& field : index.get_fields()) { + ScoredColumnHint hint; + std::vector operands; + if (!getIndexColumnHintInExpr(field, expr, &hint, &operands)) { + break; + } + result.hints.emplace_back(std::move(hint)); + for (auto op : operands) { + usedOperands.insert(op); + } + } + if (result.hints.empty()) { + return Status::Error("There is not index to use."); + } + result.unusedExprs = collectUnusedExpr(expr, usedOperands); + result.index = &index; + return result; +} + +StatusOr selectIndex(const Expression* expr, const IndexItem& index) { + if (expr->isRelExpr()) { + return selectRelExprIndex(static_cast(expr), index); + } + + if (expr->isLogicalExpr()) { + return selectLogicalExprIndex(static_cast(expr), index); + } + + return Status::Error("Invalid expression kind."); +} + +} // namespace + +void OptimizerUtils::eraseInvalidIndexItems( + int32_t schemaId, + std::vector>* indexItems) { + // Erase invalid index items + for (auto iter = indexItems->begin(); iter != indexItems->end();) { + auto schema = (*iter)->get_schema_id(); + if (schema.tag_id_ref().has_value() && schema.get_tag_id() != schemaId) { + iter = indexItems->erase(iter); + } else if (schema.edge_type_ref().has_value() && schema.get_edge_type() != schemaId) { + iter = indexItems->erase(iter); + } else { + iter++; + } + } +} + +bool OptimizerUtils::findOptimalIndex(const Expression* condition, + const std::vector>& indexItems, + bool* isPrefixScan, + IndexQueryContext* ictx) { + // Return directly if there is not valid index to use. + if (indexItems.empty()) { + return false; + } + + std::vector results; + for (auto& index : indexItems) { + auto resStatus = selectIndex(condition, *index); + if (resStatus.ok()) { + results.emplace_back(std::move(resStatus).value()); + } + } + + if (results.empty()) { + return false; + } + + std::sort(results.begin(), results.end()); + + auto& index = results.back(); + if (index.hints.empty()) { + return false; + } + + *isPrefixScan = false; + std::vector hints; + hints.reserve(index.hints.size()); + auto iter = index.hints.begin(); + for (; iter != index.hints.end(); ++iter) { + auto& hint = *iter; + if (hint.score == IndexScore::kPrefix) { + hints.emplace_back(std::move(hint.hint)); + *isPrefixScan = true; + continue; + } + if (hint.score == IndexScore::kRange) { + hints.emplace_back(std::move(hint.hint)); + // skip the case first range hint is the last hint + // when set filter in index query context + ++iter; + } + break; + } + if (iter != index.hints.end() || !index.unusedExprs.empty()) { + ictx->set_filter(condition->encode()); + } + ictx->set_index_id(index.index->get_index_id()); + ictx->set_column_hints(std::move(hints)); + return true; +} + +void OptimizerUtils::copyIndexScanData(const nebula::graph::IndexScan* from, + nebula::graph::IndexScan* to) { + to->setEmptyResultSet(from->isEmptyResultSet()); + to->setSpace(from->space()); + to->setReturnCols(from->returnColumns()); + to->setSchemaId(from->schemaId()); + to->setDedup(from->dedup()); + to->setOrderBy(from->orderBy()); + to->setLimit(from->limit()); + to->setFilter(from->filter()); } -} // namespace graph -} // namespace nebula +} // namespace graph +} // namespace nebula diff --git a/src/optimizer/OptimizerUtils.h b/src/optimizer/OptimizerUtils.h index 6194f5134..56e2784f1 100644 --- a/src/optimizer/OptimizerUtils.h +++ b/src/optimizer/OptimizerUtils.h @@ -7,11 +7,28 @@ #define NEBULA_GRAPH_OPTIMIZER_OPTIMIZERUTILS_H_ #include "util/SchemaUtil.h" -#include namespace nebula { + +class Expression; + +namespace meta { +namespace cpp2 { +class ColumnDef; +class IndexItem; +} // namespace cpp2 +} // namespace meta + +namespace storage { +namespace cpp2 { +class IndexQueryContext; +} // namespace cpp2 +} // namespace storage + namespace graph { +class IndexScan; + class OptimizerUtils { public: enum class BoundValueOperator { @@ -21,7 +38,6 @@ class OptimizerUtils { MIN, }; -public: OptimizerUtils() = delete; static Value boundValue(const meta::cpp2::ColumnDef& col, @@ -37,8 +53,53 @@ class OptimizerUtils { static Value boundValueWithMin(const meta::cpp2::ColumnDef& col); static Value normalizeValue(const meta::cpp2::ColumnDef& col, const Value& v); + + static Status boundValue(Expression::Kind kind, + const Value& val, + const meta::cpp2::ColumnDef& col, + Value& begin, + Value& end); + + static void eraseInvalidIndexItems( + int32_t schemaId, + std::vector>* indexItems); + + // Find optimal index according to filter expression and all valid indexes. + // + // For relational condition expression: + // 1. iterate all indexes + // 2. select the best column hint for each index + // 2.1. generate column hint according to the first field of index + // + // For logical condition expression(only logical `AND' expression): + // 1. same steps as above 1, 2 + // 2. for multiple columns combined index: + // * iterate each field of index + // * iterate each operand expression of filter condition + // * collect all column hints generated by operand expression for each index field + // * process collected column hints, for example, merge the begin and end values of + // range scan + // 3. sort all index results generated by each index + // 4. select the largest score index result + // 5. process the selected index result: + // * find the first not prefix column hint and ignore all followed hints except first + // range hint + // * check whether filter conditions are used, if not, place the unused expression parts + // into column hint filter + // + // For logical `OR' condition expression, use above steps to generate different + // `IndexQueryContext' for each operand of filter condition, nebula storage will union all + // results of multiple index contexts + static bool findOptimalIndex( + const Expression* condition, + const std::vector>& indexItems, + bool* isPrefixScan, + nebula::storage::cpp2::IndexQueryContext* ictx); + + static void copyIndexScanData(const nebula::graph::IndexScan* from, + nebula::graph::IndexScan* to); }; -} // namespace graph -} // namespace nebula -#endif // NEBULA_GRAPH_OPTIMIZER_OPTIMIZERUTILS_H_ +} // namespace graph +} // namespace nebula +#endif // NEBULA_GRAPH_OPTIMIZER_OPTIMIZERUTILS_H_ diff --git a/src/optimizer/rule/EdgeIndexFullScanRule.cpp b/src/optimizer/rule/EdgeIndexFullScanRule.cpp new file mode 100644 index 000000000..3a583b195 --- /dev/null +++ b/src/optimizer/rule/EdgeIndexFullScanRule.cpp @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/EdgeIndexFullScanRule.h" +#include "optimizer/OptContext.h" +#include "planner/plan/Query.h" +#include "planner/plan/Scan.h" + +using nebula::graph::EdgeIndexFullScan; +using nebula::graph::IndexScan; + +using Kind = nebula::graph::PlanNode::Kind; + +namespace nebula { +namespace opt { + +std::unique_ptr EdgeIndexFullScanRule::kInstance = + std::unique_ptr(new EdgeIndexFullScanRule()); + +EdgeIndexFullScanRule::EdgeIndexFullScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& EdgeIndexFullScanRule::pattern() const { + static Pattern pattern = Pattern::create(Kind::kEdgeIndexFullScan); + return pattern; +} + +std::string EdgeIndexFullScanRule::toString() const { + return "EdgeIndexFullScanRule"; +} + +IndexScan* EdgeIndexFullScanRule::scan(OptContext* ctx, const graph::PlanNode* node) const { + auto scan = static_cast(node); + return EdgeIndexFullScan::make(ctx->qctx(), nullptr, scan->edgeType()); +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/EdgeIndexFullScanRule.h b/src/optimizer/rule/EdgeIndexFullScanRule.h new file mode 100644 index 000000000..508941c56 --- /dev/null +++ b/src/optimizer/rule/EdgeIndexFullScanRule.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_EDGEINDEXFULLSCANRULE_H_ +#define OPTIMIZER_RULE_EDGEINDEXFULLSCANRULE_H_ + +#include "optimizer/rule/IndexFullScanBaseRule.h" + +namespace nebula { +namespace opt { + +class EdgeIndexFullScanRule final : public IndexFullScanBaseRule { +public: + const Pattern& pattern() const override; + std::string toString() const override; + +private: + EdgeIndexFullScanRule(); + graph::IndexScan* scan(OptContext* ctx, const graph::PlanNode* node) const override; + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_EDGEINDEXFULLSCANRULE_H_ diff --git a/src/optimizer/rule/IndexFullScanBaseRule.cpp b/src/optimizer/rule/IndexFullScanBaseRule.cpp new file mode 100644 index 000000000..d3e30d11f --- /dev/null +++ b/src/optimizer/rule/IndexFullScanBaseRule.cpp @@ -0,0 +1,86 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/IndexFullScanBaseRule.h" + +#include "common/interface/gen-cpp2/storage_types.h" +#include "context/QueryContext.h" +#include "optimizer/OptContext.h" +#include "optimizer/OptGroup.h" +#include "optimizer/OptRule.h" +#include "optimizer/OptimizerUtils.h" +#include "planner/plan/Query.h" +#include "planner/plan/Scan.h" + +using nebula::graph::IndexScan; +using nebula::graph::OptimizerUtils; +using nebula::storage::cpp2::IndexQueryContext; + +using TransformResult = nebula::opt::OptRule::TransformResult; + +namespace nebula { +namespace opt { + +bool IndexFullScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const { + if (!OptRule::match(ctx, matched)) { + return false; + } + auto scan = static_cast(matched.planNode()); + for (auto& ictx : scan->queryContext()) { + if (ictx.index_id_ref().is_set()) { + return false; + } + } + return true; +} + +StatusOr IndexFullScanBaseRule::transform(OptContext* ctx, + const MatchedResult& matched) const { + auto scan = static_cast(matched.planNode()); + + auto metaClient = ctx->qctx()->getMetaClient(); + auto status = scan->isEdge() ? metaClient->getEdgeIndexesFromCache(scan->space()) + : metaClient->getTagIndexesFromCache(scan->space()); + NG_RETURN_IF_ERROR(status); + auto indexItems = std::move(status).value(); + + OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + + if (indexItems.empty()) { + return TransformResult::noTransform(); + } + + std::vector idxCtxs; + IndexQueryContext ictx; + auto idxId = indexItems[0]->get_index_id(); + auto numFields = indexItems[0]->get_fields().size(); + for (size_t i = 1; i < indexItems.size(); ++i) { + const auto& index = indexItems[i]; + if (numFields > index->get_fields().size()) { + idxId = index->get_index_id(); + } + } + ictx.set_index_id(idxId); + idxCtxs.emplace_back(std::move(ictx)); + + auto scanNode = this->scan(ctx, scan); + OptimizerUtils::copyIndexScanData(scan, scanNode); + scanNode->setOutputVar(scan->outputVar()); + scanNode->setColNames(scan->colNames()); + scanNode->setIndexQueryContext(std::move(idxCtxs)); + auto filterGroup = matched.node->group(); + auto optScanNode = OptGroupNode::create(ctx, scanNode, filterGroup); + for (auto group : matched.node->dependencies()) { + optScanNode->dependsOn(group); + } + TransformResult result; + result.newGroupNodes.emplace_back(optScanNode); + result.eraseCurr = true; + return result; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/IndexFullScanBaseRule.h b/src/optimizer/rule/IndexFullScanBaseRule.h new file mode 100644 index 000000000..ff4b9c225 --- /dev/null +++ b/src/optimizer/rule/IndexFullScanBaseRule.h @@ -0,0 +1,34 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_INDEXFULLSCANBASERULE_H_ +#define OPTIMIZER_RULE_INDEXFULLSCANBASERULE_H_ + +#include "optimizer/OptRule.h" + +namespace nebula { +class PlanNode; + +namespace graph { +class IndexScan; +} // namespace graph + +namespace opt { + +class IndexFullScanBaseRule : public OptRule { +public: + bool match(OptContext *ctx, const MatchedResult &matched) const override; + StatusOr transform(OptContext *ctx, + const MatchedResult &matched) const override; + +protected: + virtual graph::IndexScan *scan(OptContext *ctx, const graph::PlanNode *node) const = 0; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_INDEXFULLSCANBASERULE_H_ diff --git a/src/optimizer/rule/IndexScanRule.cpp b/src/optimizer/rule/IndexScanRule.cpp index 5637b839d..d3ab1daa9 100644 --- a/src/optimizer/rule/IndexScanRule.cpp +++ b/src/optimizer/rule/IndexScanRule.cpp @@ -5,15 +5,23 @@ */ #include "optimizer/rule/IndexScanRule.h" +#include #include "common/expression/LabelAttributeExpression.h" #include "optimizer/OptContext.h" #include "optimizer/OptGroup.h" +#include "optimizer/OptRule.h" +#include "optimizer/OptimizerUtils.h" #include "planner/plan/PlanNode.h" #include "planner/plan/Query.h" +#include "util/IndexUtil.h" +using nebula::graph::IndexScan; +using nebula::graph::IndexUtil; using nebula::graph::IndexScan; using nebula::graph::OptimizerUtils; +using IndexQueryCtx = std::vector; + namespace nebula { namespace opt { @@ -30,15 +38,13 @@ const Pattern& IndexScanRule::pattern() const { } bool IndexScanRule::match(OptContext* ctx, const MatchedResult& matched) const { - UNUSED(ctx); - auto idxScan = static_cast(matched.node->node()); - auto ictxs = idxScan->queryContext(); - if (!ictxs) { - return true; + if (!OptRule::match(ctx, matched)) { + return false; } + auto scan = static_cast(matched.planNode()); // Has been optimized, skip this rule - for (auto& ictx : *ictxs) { - if (ictx.index_id_ref().is_set() && ictx.column_hints_ref().is_set()) { + for (auto& ictx : scan->queryContext()) { + if (ictx.index_id_ref().is_set()) { return false; } } @@ -54,7 +60,7 @@ StatusOr IndexScanRule::transform(OptContext* ctx, auto filter = filterExpr(groupNode); auto qctx = ctx->qctx(); - IndexQueryCtx iqctx = std::make_unique>(); + std::vector iqctx; if (filter == nullptr) { // Only filter is nullptr when lookup on tagname NG_RETURN_IF_ERROR(createIndexQueryCtx(iqctx, qctx, groupNode)); @@ -117,7 +123,7 @@ Status IndexScanRule::createSingleIQC(IndexQueryCtx &iqctx, return Status::IndexNotFound("No valid index found"); } auto in = static_cast(groupNode->node()); - const auto& filter = in->queryContext()->begin()->get_filter(); + const auto& filter = in->queryContext().begin()->get_filter(); return appendIQCtx(index, items, iqctx, filter); } @@ -180,7 +186,7 @@ Status IndexScanRule::appendIQCtx(const IndexItem& index, ctx.set_filter(filter); } ctx.set_column_hints(std::move(hints)); - iqctx->emplace_back(std::move(ctx)); + iqctx.emplace_back(std::move(ctx)); return Status::OK(); } @@ -189,7 +195,7 @@ Status IndexScanRule::appendIQCtx(const IndexItem& index, IndexQueryContext ctx; ctx.set_index_id(index->get_index_id()); ctx.set_filter(""); - iqctx->emplace_back(std::move(ctx)); + iqctx.emplace_back(std::move(ctx)); return Status::OK(); } @@ -222,15 +228,15 @@ Status IndexScanRule::appendColHint(std::vector& hints, if (col.get_type().get_type() == meta::cpp2::PropertyType::BOOL) { return Status::SemanticError("Range scan for bool type is illegal"); } - NG_RETURN_IF_ERROR(boundValue(item, col, begin, end)); + NG_RETURN_IF_ERROR(OptimizerUtils::boundValue(item.relOP_, item.value_, col, begin, end)); } if (isRangeScan) { - if (begin == Value()) { + if (begin.empty()) { begin = OptimizerUtils::boundValue(col, BVO::MIN, Value()); CHECK_BOUND_VALUE(begin, col.get_name()); } - if (end == Value()) { + if (end.empty()) { end = OptimizerUtils::boundValue(col, BVO::MAX, Value()); CHECK_BOUND_VALUE(end, col.get_name()); } @@ -245,64 +251,6 @@ Status IndexScanRule::appendColHint(std::vector& hints, return Status::OK(); } -Status IndexScanRule::boundValue(const FilterItem& item, - const meta::cpp2::ColumnDef& col, - Value& begin, Value& end) const { - auto val = item.value_; - if (val.type() != graph::SchemaUtil::propTypeToValueType(col.type.type)) { - return Status::SemanticError("Data type error of field : %s", col.get_name().c_str()); - } - switch (item.relOP_) { - case Expression::Kind::kRelLE: { - // if c1 <= int(5) , the range pair should be (min, 6) - // if c1 < int(5), the range pair should be (min, 5) - auto v = OptimizerUtils::boundValue(col, BVO::GREATER_THAN, val); - CHECK_BOUND_VALUE(v, col.get_name()); - // where c <= 1 and c <= 2 , 1 should be valid. - if (end == Value()) { - end = v; - } else { - end = v < end ? v : end; - } - break; - } - case Expression::Kind::kRelGE: { - // where c >= 1 and c >= 2 , 2 should be valid. - if (begin == Value()) { - begin = val; - } else { - begin = val < begin ? begin : val; - } - break; - } - case Expression::Kind::kRelLT: { - // c < 5 and c < 6 , 5 should be valid. - if (end == Value()) { - end = val; - } else { - end = val < end ? val : end; - } - break; - } - case Expression::Kind::kRelGT: { - // if c >= 5, the range pair should be (5, max) - // if c > 5, the range pair should be (6, max) - auto v = OptimizerUtils::boundValue(col, BVO::GREATER_THAN, val); - CHECK_BOUND_VALUE(v, col.get_name()); - // where c > 1 and c > 2 , 2 should be valid. - if (begin == Value()) { - begin = v; - } else { - begin = v < begin ? begin : v; - } - break; - } - default: - return Status::SemanticError(); - } - return Status::OK(); -} - bool IndexScanRule::isEdge(const OptGroupNode *groupNode) const { auto in = static_cast(groupNode->node()); return in->isEdge(); @@ -320,19 +268,19 @@ GraphSpaceID IndexScanRule::spaceId(const OptGroupNode *groupNode) const { Expression* IndexScanRule::filterExpr(const OptGroupNode* groupNode) const { auto in = static_cast(groupNode->node()); - auto qct = in->queryContext(); + const auto& qct = in->queryContext(); // The initial IndexScan plan node has only zero or one queryContext. // TODO(yee): Move this condition to match interface - if (qct == nullptr) { + if (qct.empty()) { return nullptr; } - if (qct->size() != 1) { + if (qct.size() != 1) { LOG(ERROR) << "Index Scan plan node error"; return nullptr; } auto* pool = in->qctx()->objPool(); - return Expression::decode(pool, qct->begin()->get_filter()); + return Expression::decode(pool, qct.begin()->get_filter()); } Status IndexScanRule::analyzeExpression(Expression* expr, @@ -397,17 +345,16 @@ Status IndexScanRule::addFilterItem(RelationalExpression* expr, FilterItems* ite auto relType = std::is_same::value ? Expression::Kind::kEdgeProperty : Expression::Kind::kTagProperty; - graph::QueryExpressionContext ctx(nullptr); if (expr->left()->kind() == relType && expr->right()->kind() == Expression::Kind::kConstant) { auto* l = static_cast(expr->left()); auto* r = static_cast(expr->right()); - items->addItem(l->prop(), expr->kind(), r->eval(ctx)); + items->addItem(l->prop(), expr->kind(), r->value()); } else if (expr->left()->kind() == Expression::Kind::kConstant && expr->right()->kind() == relType) { auto* r = static_cast(expr->right()); auto* l = static_cast(expr->left()); - items->addItem(r->prop(), reverseRelationalExprKind(expr->kind()), l->eval(ctx)); + items->addItem(r->prop(), IndexUtil::reverseRelationalExprKind(expr->kind()), l->value()); } else { return Status::Error("Optimizer error, when rewrite relational expression"); } @@ -415,26 +362,6 @@ Status IndexScanRule::addFilterItem(RelationalExpression* expr, FilterItems* ite return Status::OK(); } -Expression::Kind IndexScanRule::reverseRelationalExprKind(Expression::Kind kind) const { - switch (kind) { - case Expression::Kind::kRelGE: { - return Expression::Kind::kRelLE; - } - case Expression::Kind::kRelGT: { - return Expression::Kind::kRelLT; - } - case Expression::Kind::kRelLE: { - return Expression::Kind::kRelGE; - } - case Expression::Kind::kRelLT: { - return Expression::Kind::kRelGT; - } - default: { - return kind; - } - } -} - IndexItem IndexScanRule::findOptimalIndex(graph::QueryContext *qctx, const OptGroupNode *groupNode, const FilterItems& items) const { diff --git a/src/optimizer/rule/IndexScanRule.h b/src/optimizer/rule/IndexScanRule.h index 6b254191d..668531ea7 100644 --- a/src/optimizer/rule/IndexScanRule.h +++ b/src/optimizer/rule/IndexScanRule.h @@ -18,7 +18,6 @@ using storage::cpp2::IndexQueryContext; using storage::cpp2::IndexColumnHint; using BVO = graph::OptimizerUtils::BoundValueOperator; using IndexItem = std::shared_ptr; -using IndexQueryCtx = std::unique_ptr>; class OptContext; @@ -29,6 +28,7 @@ class IndexScanRule final : public OptRule { public: const Pattern& pattern() const override; + bool match(OptContext* ctx, const MatchedResult& matched) const override; StatusOr transform(OptContext* ctx, const MatchedResult& matched) const override; @@ -97,42 +97,38 @@ class IndexScanRule final : public OptRule { IndexScanRule(); - Status createIndexQueryCtx(IndexQueryCtx &iqctx, + Status createIndexQueryCtx(std::vector& iqctx, ScanKind kind, const FilterItems& items, - graph::QueryContext *qctx, - const OptGroupNode *groupNode) const; + graph::QueryContext* qctx, + const OptGroupNode* groupNode) const; - Status createIndexQueryCtx(IndexQueryCtx &iqctx, - graph::QueryContext *qctx, - const OptGroupNode *groupNode) const; + Status createIndexQueryCtx(std::vector& iqctx, + graph::QueryContext* qctx, + const OptGroupNode* groupNode) const; - Status createSingleIQC(IndexQueryCtx &iqctx, + Status createSingleIQC(std::vector &iqctx, const FilterItems& items, graph::QueryContext *qctx, const OptGroupNode *groupNode) const; - Status createMultipleIQC(IndexQueryCtx &iqctx, + Status createMultipleIQC(std::vector &iqctx, const FilterItems& items, graph::QueryContext *qctx, const OptGroupNode *groupNode) const; Status appendIQCtx(const IndexItem& index, const FilterItems& items, - IndexQueryCtx &iqctx, + std::vector &iqctx, const std::string& filter = "") const; Status appendIQCtx(const IndexItem& index, - IndexQueryCtx &iqctx) const; + std::vector &iqctx) const; Status appendColHint(std::vector& hitns, const FilterItems& items, const meta::cpp2::ColumnDef& col) const; - Status boundValue(const FilterItem& item, - const meta::cpp2::ColumnDef& col, - Value& begin, Value& end) const; - size_t hintCount(const FilterItems& items) const noexcept; bool isEdge(const OptGroupNode *groupNode) const; @@ -151,8 +147,6 @@ class IndexScanRule final : public OptRule { std::is_same::value>> Status addFilterItem(RelationalExpression* expr, FilterItems* items) const; - Expression::Kind reverseRelationalExprKind(Expression::Kind kind) const; - IndexItem findOptimalIndex(graph::QueryContext *qctx, const OptGroupNode *groupNode, const FilterItems& items) const; diff --git a/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.cpp b/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.cpp new file mode 100644 index 000000000..86cd4c231 --- /dev/null +++ b/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.cpp @@ -0,0 +1,132 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/PushFilterDownEdgeIndexScanRule.h" +#include +#include +#include + +#include "common/base/Base.h" +#include "common/base/Status.h" +#include "common/expression/Expression.h" +#include "common/expression/LogicalExpression.h" +#include "common/expression/PropertyExpression.h" +#include "common/expression/RelationalExpression.h" +#include "common/interface/gen-cpp2/meta_types.h" +#include "common/interface/gen-cpp2/storage_types.h" +#include "context/QueryContext.h" +#include "optimizer/OptContext.h" +#include "optimizer/OptGroup.h" +#include "optimizer/OptimizerUtils.h" +#include "planner/plan/PlanNode.h" +#include "planner/plan/Scan.h" + +using nebula::Expression; +using nebula::graph::EdgeIndexFullScan; +using nebula::graph::EdgeIndexPrefixScan; +using nebula::graph::EdgeIndexRangeScan; +using nebula::graph::EdgeIndexScan; +using nebula::graph::Filter; +using nebula::graph::OptimizerUtils; +using nebula::graph::QueryContext; +using nebula::meta::cpp2::IndexItem; +using nebula::storage::cpp2::IndexQueryContext; + +using Kind = nebula::graph::PlanNode::Kind; +using ExprKind = nebula::Expression::Kind; +using TransformResult = nebula::opt::OptRule::TransformResult; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownEdgeIndexScanRule::kInstance = + std::unique_ptr(new PushFilterDownEdgeIndexScanRule()); + +PushFilterDownEdgeIndexScanRule::PushFilterDownEdgeIndexScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownEdgeIndexScanRule::pattern() const { + static Pattern pattern = + Pattern::create(Kind::kFilter, {Pattern::create(Kind::kEdgeIndexFullScan)}); + return pattern; +} + +bool PushFilterDownEdgeIndexScanRule::match(OptContext* ctx, const MatchedResult& matched) const { + if (!OptRule::match(ctx, matched)) { + return false; + } + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + for (auto& ictx : scan->queryContext()) { + if (ictx.column_hints_ref().is_set()) { + return false; + } + } + auto condition = filter->condition(); + if (condition->isRelExpr()) { + auto relExpr = static_cast(condition); + return relExpr->left()->kind() == ExprKind::kEdgeProperty && + relExpr->right()->kind() == ExprKind::kConstant; + } + if (condition->isLogicalExpr()) { + return condition->kind() == Expression::Kind::kLogicalAnd; + } + + return false; +} + +EdgeIndexScan* makeEdgeIndexScan(QueryContext* qctx, const EdgeIndexScan* scan, bool isPrefixScan) { + EdgeIndexScan* scanNode = nullptr; + if (isPrefixScan) { + scanNode = EdgeIndexPrefixScan::make(qctx, nullptr, scan->edgeType()); + } else { + scanNode = EdgeIndexRangeScan::make(qctx, nullptr, scan->edgeType()); + } + OptimizerUtils::copyIndexScanData(scan, scanNode); + return scanNode; +} + +StatusOr PushFilterDownEdgeIndexScanRule::transform( + OptContext* ctx, + const MatchedResult& matched) const { + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + + auto metaClient = ctx->qctx()->getMetaClient(); + auto status = metaClient->getEdgeIndexesFromCache(scan->space()); + NG_RETURN_IF_ERROR(status); + auto indexItems = std::move(status).value(); + + OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + + IndexQueryContext ictx; + bool isPrefixScan = false; + if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) { + return TransformResult::noTransform(); + } + std::vector idxCtxs = {ictx}; + EdgeIndexScan* scanNode = makeEdgeIndexScan(ctx->qctx(), scan, isPrefixScan); + scanNode->setIndexQueryContext(std::move(idxCtxs)); + scanNode->setOutputVar(filter->outputVar()); + scanNode->setColNames(filter->colNames()); + auto filterGroup = matched.node->group(); + auto optScanNode = OptGroupNode::create(ctx, scanNode, filterGroup); + for (auto group : matched.dependencies[0].node->dependencies()) { + optScanNode->dependsOn(group); + } + TransformResult result; + result.newGroupNodes.emplace_back(optScanNode); + result.eraseCurr = true; + return result; +} + +std::string PushFilterDownEdgeIndexScanRule::toString() const { + return "PushFilterDownEdgeIndexScanRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.h b/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.h new file mode 100644 index 000000000..b918d1b8a --- /dev/null +++ b/src/optimizer/rule/PushFilterDownEdgeIndexScanRule.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_PUSHFILTERDOWNEDGEINDEXSCANRULE_H_ +#define OPTIMIZER_RULE_PUSHFILTERDOWNEDGEINDEXSCANRULE_H_ + +#include + +#include "optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +class PushFilterDownEdgeIndexScanRule final : public OptRule { +public: + const Pattern &pattern() const override; + bool match(OptContext *ctx, const MatchedResult &matched) const override; + StatusOr transform(OptContext *ctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + +private: + PushFilterDownEdgeIndexScanRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_PUSHFILTERDOWNEDGEINDEXSCANRULE_H_ diff --git a/src/optimizer/rule/PushFilterDownTagIndexScanRule.cpp b/src/optimizer/rule/PushFilterDownTagIndexScanRule.cpp new file mode 100644 index 000000000..04f7a2a7d --- /dev/null +++ b/src/optimizer/rule/PushFilterDownTagIndexScanRule.cpp @@ -0,0 +1,125 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/PushFilterDownTagIndexScanRule.h" + +#include "common/expression/Expression.h" +#include "common/interface/gen-cpp2/storage_types.h" +#include "context/QueryContext.h" +#include "optimizer/OptContext.h" +#include "optimizer/OptGroup.h" +#include "optimizer/OptimizerUtils.h" +#include "optimizer/rule/IndexScanRule.h" +#include "planner/plan/PlanNode.h" +#include "planner/plan/Scan.h" + +using nebula::graph::Filter; +using nebula::graph::OptimizerUtils; +using nebula::graph::QueryContext; +using nebula::graph::TagIndexFullScan; +using nebula::graph::TagIndexPrefixScan; +using nebula::graph::TagIndexRangeScan; +using nebula::graph::TagIndexScan; +using nebula::storage::cpp2::IndexColumnHint; +using nebula::storage::cpp2::IndexQueryContext; + +using Kind = nebula::graph::PlanNode::Kind; +using ExprKind = nebula::Expression::Kind; +using TransformResult = nebula::opt::OptRule::TransformResult; + +namespace nebula { +namespace opt { + +std::unique_ptr PushFilterDownTagIndexScanRule::kInstance = + std::unique_ptr(new PushFilterDownTagIndexScanRule()); + +PushFilterDownTagIndexScanRule::PushFilterDownTagIndexScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& PushFilterDownTagIndexScanRule::pattern() const { + static Pattern pattern = + Pattern::create(Kind::kFilter, {Pattern::create(Kind::kTagIndexFullScan)}); + return pattern; +} + +bool PushFilterDownTagIndexScanRule::match(OptContext* ctx, const MatchedResult& matched) const { + if (!OptRule::match(ctx, matched)) { + return false; + } + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + for (auto& ictx : scan->queryContext()) { + if (ictx.column_hints_ref().is_set()) { + return false; + } + } + auto condition = filter->condition(); + if (condition->isRelExpr()) { + auto relExpr = static_cast(condition); + return relExpr->left()->kind() == ExprKind::kTagProperty && + relExpr->right()->kind() == ExprKind::kConstant; + } + if (condition->isLogicalExpr()) { + return condition->kind() == Expression::Kind::kLogicalAnd; + } + + return false; +} + +TagIndexScan* makeTagIndexScan(QueryContext* qctx, const TagIndexScan* scan, bool isPrefixScan) { + TagIndexScan* tagScan = nullptr; + if (isPrefixScan) { + tagScan = TagIndexPrefixScan::make(qctx, nullptr, scan->tagName()); + } else { + tagScan = TagIndexRangeScan::make(qctx, nullptr, scan->tagName()); + } + + OptimizerUtils::copyIndexScanData(scan, tagScan); + return tagScan; +} + +StatusOr PushFilterDownTagIndexScanRule::transform( + OptContext* ctx, + const MatchedResult& matched) const { + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + + auto metaClient = ctx->qctx()->getMetaClient(); + auto status = metaClient->getTagIndexesFromCache(scan->space()); + NG_RETURN_IF_ERROR(status); + auto indexItems = std::move(status).value(); + + OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + + IndexQueryContext ictx; + bool isPrefixScan = false; + if (!OptimizerUtils::findOptimalIndex(filter->condition(), indexItems, &isPrefixScan, &ictx)) { + return TransformResult::noTransform(); + } + + std::vector idxCtxs = {ictx}; + auto scanNode = makeTagIndexScan(ctx->qctx(), scan, isPrefixScan); + scanNode->setIndexQueryContext(std::move(idxCtxs)); + scanNode->setOutputVar(filter->outputVar()); + scanNode->setColNames(filter->colNames()); + auto filterGroup = matched.node->group(); + auto optScanNode = OptGroupNode::create(ctx, scanNode, filterGroup); + for (auto group : matched.dependencies[0].node->dependencies()) { + optScanNode->dependsOn(group); + } + TransformResult result; + result.newGroupNodes.emplace_back(optScanNode); + result.eraseCurr = true; + return result; +} + +std::string PushFilterDownTagIndexScanRule::toString() const { + return "PushFilterDownTagIndexScanRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/PushFilterDownTagIndexScanRule.h b/src/optimizer/rule/PushFilterDownTagIndexScanRule.h new file mode 100644 index 000000000..99bf3ea62 --- /dev/null +++ b/src/optimizer/rule/PushFilterDownTagIndexScanRule.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_PUSHFILTERDOWNTAGINDEXSCANRULE_H_ +#define OPTIMIZER_RULE_PUSHFILTERDOWNTAGINDEXSCANRULE_H_ + +#include + +#include "optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +class PushFilterDownTagIndexScanRule final : public OptRule { +public: + const Pattern &pattern() const override; + bool match(OptContext *ctx, const MatchedResult &matched) const override; + StatusOr transform(OptContext *ctx, + const MatchedResult &matched) const override; + + std::string toString() const override; + +private: + PushFilterDownTagIndexScanRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_PUSHFILTERDOWNTAGINDEXSCANRULE_H_ diff --git a/src/optimizer/rule/TagIndexFullScanRule.cpp b/src/optimizer/rule/TagIndexFullScanRule.cpp new file mode 100644 index 000000000..e601195c5 --- /dev/null +++ b/src/optimizer/rule/TagIndexFullScanRule.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/TagIndexFullScanRule.h" +#include "optimizer/OptContext.h" +#include "planner/plan/Scan.h" + +using Kind = nebula::graph::PlanNode::Kind; + +namespace nebula { +namespace opt { + +std::unique_ptr TagIndexFullScanRule::kInstance = + std::unique_ptr(new TagIndexFullScanRule()); + +TagIndexFullScanRule::TagIndexFullScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& TagIndexFullScanRule::pattern() const { + static Pattern pattern = Pattern::create(Kind::kTagIndexFullScan); + return pattern; +} + +std::string TagIndexFullScanRule::toString() const { + return "TagIndexFullScanRule"; +} + +graph::IndexScan* TagIndexFullScanRule::scan(OptContext* ctx, const graph::PlanNode* node) const { + auto scan = static_cast(node); + return graph::TagIndexFullScan::make(ctx->qctx(), nullptr, scan->tagName()); +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/TagIndexFullScanRule.h b/src/optimizer/rule/TagIndexFullScanRule.h new file mode 100644 index 000000000..70247f17e --- /dev/null +++ b/src/optimizer/rule/TagIndexFullScanRule.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_TAGINDEXFULLSCANRULE_H_ +#define OPTIMIZER_RULE_TAGINDEXFULLSCANRULE_H_ + +#include "optimizer/rule/IndexFullScanBaseRule.h" + +namespace nebula { +namespace opt { + +class TagIndexFullScanRule final : public IndexFullScanBaseRule { +public: + const Pattern& pattern() const override; + std::string toString() const override; + +private: + TagIndexFullScanRule(); + graph::IndexScan* scan(OptContext* ctx, const graph::PlanNode* node) const override; + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_TAGINDEXFULLSCANRULE_H_ diff --git a/src/optimizer/rule/UnionAllEdgeIndexScanRule.cpp b/src/optimizer/rule/UnionAllEdgeIndexScanRule.cpp new file mode 100644 index 000000000..5f44fec2d --- /dev/null +++ b/src/optimizer/rule/UnionAllEdgeIndexScanRule.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/UnionAllEdgeIndexScanRule.h" + +using Kind = nebula::graph::PlanNode::Kind; + +namespace nebula { +namespace opt { + +std::unique_ptr UnionAllEdgeIndexScanRule::kInstance = + std::unique_ptr(new UnionAllEdgeIndexScanRule()); + +UnionAllEdgeIndexScanRule::UnionAllEdgeIndexScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& UnionAllEdgeIndexScanRule::pattern() const { + static Pattern pattern = + Pattern::create(Kind::kFilter, {Pattern::create(Kind::kEdgeIndexFullScan)}); + return pattern; +} + +std::string UnionAllEdgeIndexScanRule::toString() const { + return "UnionAllEdgeIndexScanRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/UnionAllEdgeIndexScanRule.h b/src/optimizer/rule/UnionAllEdgeIndexScanRule.h new file mode 100644 index 000000000..8edef139b --- /dev/null +++ b/src/optimizer/rule/UnionAllEdgeIndexScanRule.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_UNIONALLEDGEINDEXSCANRULE_H_ +#define OPTIMIZER_RULE_UNIONALLEDGEINDEXSCANRULE_H_ + +#include "optimizer/rule/UnionAllIndexScanBaseRule.h" + +namespace nebula { +namespace opt { + +class UnionAllEdgeIndexScanRule final : public UnionAllIndexScanBaseRule { +public: + const Pattern &pattern() const override; + std::string toString() const override; + +private: + UnionAllEdgeIndexScanRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_UNIONALLEDGEINDEXSCANRULE_H_ diff --git a/src/optimizer/rule/UnionAllIndexScanBaseRule.cpp b/src/optimizer/rule/UnionAllIndexScanBaseRule.cpp new file mode 100644 index 000000000..f541e950a --- /dev/null +++ b/src/optimizer/rule/UnionAllIndexScanBaseRule.cpp @@ -0,0 +1,98 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/UnionAllIndexScanBaseRule.h" + +#include "common/expression/Expression.h" +#include "common/expression/LogicalExpression.h" +#include "common/interface/gen-cpp2/storage_types.h" +#include "optimizer/OptContext.h" +#include "optimizer/OptGroup.h" +#include "optimizer/OptRule.h" +#include "optimizer/OptimizerUtils.h" +#include "planner/plan/PlanNode.h" +#include "planner/plan/Query.h" +#include "planner/plan/Scan.h" + +using nebula::graph::Filter; +using nebula::graph::IndexScan; +using nebula::graph::OptimizerUtils; +using nebula::graph::TagIndexFullScan; +using nebula::storage::cpp2::IndexQueryContext; + +using Kind = nebula::graph::PlanNode::Kind; +using TransformResult = nebula::opt::OptRule::TransformResult; + +namespace nebula { +namespace opt { + +bool UnionAllIndexScanBaseRule::match(OptContext* ctx, const MatchedResult& matched) const { + if (!OptRule::match(ctx, matched)) { + return false; + } + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + auto condition = filter->condition(); + if (!condition->isLogicalExpr() || condition->kind() != Expression::Kind::kLogicalOr) { + return false; + } + + for (auto operand : static_cast(condition)->operands()) { + if (!operand->isRelExpr()) { + return false; + } + } + + for (auto& ictx : scan->queryContext()) { + if (ictx.column_hints_ref().is_set()) { + return false; + } + } + + return true; +} + +StatusOr UnionAllIndexScanBaseRule::transform(OptContext* ctx, + const MatchedResult& matched) const { + auto filter = static_cast(matched.planNode()); + auto scan = static_cast(matched.planNode({0, 0})); + + auto metaClient = ctx->qctx()->getMetaClient(); + auto status = metaClient->getTagIndexesFromCache(scan->space()); + NG_RETURN_IF_ERROR(status); + auto indexItems = std::move(status).value(); + + OptimizerUtils::eraseInvalidIndexItems(scan->schemaId(), &indexItems); + + std::vector idxCtxs; + auto condition = static_cast(filter->condition()); + for (auto operand : condition->operands()) { + IndexQueryContext ictx; + bool isPrefixScan = false; + if (!OptimizerUtils::findOptimalIndex(operand, indexItems, &isPrefixScan, &ictx)) { + return TransformResult::noTransform(); + } + idxCtxs.emplace_back(std::move(ictx)); + } + + auto scanNode = IndexScan::make(ctx->qctx(), nullptr); + OptimizerUtils::copyIndexScanData(scan, scanNode); + scanNode->setIndexQueryContext(std::move(idxCtxs)); + scanNode->setOutputVar(filter->outputVar()); + scanNode->setColNames(filter->colNames()); + auto filterGroup = matched.node->group(); + auto optScanNode = OptGroupNode::create(ctx, scanNode, filterGroup); + for (auto group : matched.dependencies[0].node->dependencies()) { + optScanNode->dependsOn(group); + } + TransformResult result; + result.newGroupNodes.emplace_back(optScanNode); + result.eraseCurr = true; + return result; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/UnionAllIndexScanBaseRule.h b/src/optimizer/rule/UnionAllIndexScanBaseRule.h new file mode 100644 index 000000000..843548693 --- /dev/null +++ b/src/optimizer/rule/UnionAllIndexScanBaseRule.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_UNIONALLINDEXSCANBASERULE_H_ +#define OPTIMIZER_RULE_UNIONALLINDEXSCANBASERULE_H_ + +#include "optimizer/OptRule.h" + +namespace nebula { +namespace opt { + +class UnionAllIndexScanBaseRule : public OptRule { +public: + bool match(OptContext *ctx, const MatchedResult &matched) const override; + StatusOr transform(OptContext *ctx, + const MatchedResult &matched) const override; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_UNIONALLINDEXSCANBASERULE_H_ diff --git a/src/optimizer/rule/UnionAllTagIndexScanRule.cpp b/src/optimizer/rule/UnionAllTagIndexScanRule.cpp new file mode 100644 index 000000000..6ccbd30b5 --- /dev/null +++ b/src/optimizer/rule/UnionAllTagIndexScanRule.cpp @@ -0,0 +1,32 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "optimizer/rule/UnionAllTagIndexScanRule.h" + +using Kind = nebula::graph::PlanNode::Kind; + +namespace nebula { +namespace opt { + +std::unique_ptr UnionAllTagIndexScanRule::kInstance = + std::unique_ptr(new UnionAllTagIndexScanRule()); + +UnionAllTagIndexScanRule::UnionAllTagIndexScanRule() { + RuleSet::QueryRules().addRule(this); +} + +const Pattern& UnionAllTagIndexScanRule::pattern() const { + static Pattern pattern = + Pattern::create(Kind::kFilter, {Pattern::create(Kind::kTagIndexFullScan)}); + return pattern; +} + +std::string UnionAllTagIndexScanRule::toString() const { + return "UnionAllTagIndexScanRule"; +} + +} // namespace opt +} // namespace nebula diff --git a/src/optimizer/rule/UnionAllTagIndexScanRule.h b/src/optimizer/rule/UnionAllTagIndexScanRule.h new file mode 100644 index 000000000..fcf700b5f --- /dev/null +++ b/src/optimizer/rule/UnionAllTagIndexScanRule.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef OPTIMIZER_RULE_UNIONALLTAGINDEXSCANRULE_H_ +#define OPTIMIZER_RULE_UNIONALLTAGINDEXSCANRULE_H_ + +#include "optimizer/rule/UnionAllIndexScanBaseRule.h" + +namespace nebula { +namespace opt { + +class UnionAllTagIndexScanRule final : public UnionAllIndexScanBaseRule { +public: + const Pattern &pattern() const override; + std::string toString() const override; + +private: + UnionAllTagIndexScanRule(); + + static std::unique_ptr kInstance; +}; + +} // namespace opt +} // namespace nebula + +#endif // OPTIMIZER_RULE_UNIONALLTAGINDEXSCANRULE_H_ diff --git a/src/optimizer/test/IndexScanRuleTest.cpp b/src/optimizer/test/IndexScanRuleTest.cpp index b47cd28f0..c11acc320 100644 --- a/src/optimizer/test/IndexScanRuleTest.cpp +++ b/src/optimizer/test/IndexScanRuleTest.cpp @@ -15,8 +15,6 @@ namespace opt { TEST(IndexScanRuleTest, BoundValueTest) { meta::cpp2::ColumnDef col; - auto* inst = std::move(IndexScanRule::kInstance).get(); - auto* instance = static_cast(inst); IndexScanRule::FilterItems items; { Value begin, end; @@ -26,7 +24,7 @@ TEST(IndexScanRuleTest, BoundValueTest) { items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelLT, Value(5L)); for (const auto& item : items.items) { - auto ret = instance->boundValue(item, col, begin, end); + auto ret = OptimizerUtils::boundValue(item.relOP_, item.value_, col, begin, end); ASSERT_TRUE(ret.ok()); } // Expect begin = 2 , end = 5; @@ -42,7 +40,7 @@ TEST(IndexScanRuleTest, BoundValueTest) { items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(6L)); for (const auto& item : items.items) { - auto ret = instance->boundValue(item, col, begin, end); + auto ret = OptimizerUtils::boundValue(item.relOP_, item.value_, col, begin, end); ASSERT_TRUE(ret.ok()); } // Expect begin = 7 @@ -58,7 +56,7 @@ TEST(IndexScanRuleTest, BoundValueTest) { items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelGE, Value(6L)); for (const auto& item : items.items) { - auto ret = instance->boundValue(item, col, begin, end); + auto ret = OptimizerUtils::boundValue(item.relOP_, item.value_, col, begin, end); ASSERT_TRUE(ret.ok()); } // Expect begin = 6 @@ -74,7 +72,7 @@ TEST(IndexScanRuleTest, BoundValueTest) { items.addItem("col1", RelationalExpression::Kind::kRelLT, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelLE, Value(6L)); for (const auto& item : items.items) { - auto ret = instance->boundValue(item, col, begin, end); + auto ret = OptimizerUtils::boundValue(item.relOP_, item.value_, col, begin, end); ASSERT_TRUE(ret.ok()); } // Expect end = 1 @@ -89,14 +87,14 @@ TEST(IndexScanRuleTest, IQCtxTest) { { IndexItem index = std::make_unique(); IndexScanRule::FilterItems items; - IndexQueryCtx iqctx = std::make_unique>(); + std::vector iqctx; auto ret = instance->appendIQCtx(index, items, iqctx); ASSERT_TRUE(ret.ok()); } { IndexItem index = std::make_unique(); IndexScanRule::FilterItems items; - IndexQueryCtx iqctx = std::make_unique>(); + std::vector iqctx; // setup index { std::vector cols; @@ -118,11 +116,11 @@ TEST(IndexScanRuleTest, IQCtxTest) { auto ret = instance->appendIQCtx(index, items, iqctx); ASSERT_TRUE(ret.ok()); - ASSERT_EQ(1, iqctx->size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_column_hints().size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_index_id()); - ASSERT_EQ("", (iqctx.get()->begin())->get_filter()); - const auto& colHints = (iqctx.get()->begin())->get_column_hints(); + ASSERT_EQ(1, iqctx.size()); + ASSERT_EQ(1, iqctx.begin()->get_column_hints().size()); + ASSERT_EQ(1, iqctx.begin()->get_index_id()); + ASSERT_EQ("", iqctx.begin()->get_filter()); + const auto& colHints = iqctx.begin()->get_column_hints(); ASSERT_EQ("col0", colHints.begin()->get_column_name()); ASSERT_EQ(storage::cpp2::ScanType::RANGE, colHints.begin()->get_scan_type()); ASSERT_EQ(Value(std::numeric_limits::min()), @@ -134,7 +132,7 @@ TEST(IndexScanRuleTest, IQCtxTest) { // and col3 < 4 and col4 == 4 { items.items.clear(); - iqctx.get()->clear(); + iqctx.clear(); items.addItem("col0", RelationalExpression::Kind::kRelGT, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelLE, Value(2L)); items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(-1L)); @@ -145,11 +143,11 @@ TEST(IndexScanRuleTest, IQCtxTest) { auto ret = instance->appendIQCtx(index, items, iqctx); ASSERT_TRUE(ret.ok()); - ASSERT_EQ(1, iqctx->size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_column_hints().size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_index_id()); - ASSERT_EQ("", (iqctx.get()->begin())->get_filter()); - const auto& colHints = (iqctx.get()->begin())->get_column_hints(); + ASSERT_EQ(1, iqctx.size()); + ASSERT_EQ(1, iqctx.begin()->get_column_hints().size()); + ASSERT_EQ(1, iqctx.begin()->get_index_id()); + ASSERT_EQ("", iqctx.begin()->get_filter()); + const auto& colHints = iqctx.begin()->get_column_hints(); { auto hint = colHints[0]; ASSERT_EQ("col0", hint.get_column_name()); @@ -165,7 +163,7 @@ TEST(IndexScanRuleTest, IQCtxTest) { // col2 and col3 should be filter in storage layer. { items.items.clear(); - iqctx.get()->clear(); + iqctx.clear(); items.addItem("col0", RelationalExpression::Kind::kRelEQ, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelLE, Value(2L)); items.addItem("col1", RelationalExpression::Kind::kRelGT, Value(-1L)); @@ -175,11 +173,11 @@ TEST(IndexScanRuleTest, IQCtxTest) { auto ret = instance->appendIQCtx(index, items, iqctx); ASSERT_TRUE(ret.ok()); - ASSERT_EQ(1, iqctx->size()); - ASSERT_EQ(2, (iqctx.get()->begin())->get_column_hints().size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_index_id()); - ASSERT_EQ("", (iqctx.get()->begin())->get_filter()); - const auto& colHints = (iqctx.get()->begin())->get_column_hints(); + ASSERT_EQ(1, iqctx.size()); + ASSERT_EQ(2, iqctx.begin()->get_column_hints().size()); + ASSERT_EQ(1, iqctx.begin()->get_index_id()); + ASSERT_EQ("", iqctx.begin()->get_filter()); + const auto& colHints = iqctx.begin()->get_column_hints(); { auto hint = colHints[0]; ASSERT_EQ("col0", hint.get_column_name()); @@ -200,7 +198,7 @@ TEST(IndexScanRuleTest, IQCtxTest) { // col4 should be filter in storage layer. { items.items.clear(); - iqctx.get()->clear(); + iqctx.clear(); items.addItem("col0", RelationalExpression::Kind::kRelEQ, Value(1L)); items.addItem("col1", RelationalExpression::Kind::kRelEQ, Value(2L)); items.addItem("col2", RelationalExpression::Kind::kRelEQ, Value(-1L)); @@ -210,11 +208,11 @@ TEST(IndexScanRuleTest, IQCtxTest) { auto ret = instance->appendIQCtx(index, items, iqctx, "col4 < 4"); ASSERT_TRUE(ret.ok()); - ASSERT_EQ(1, iqctx->size()); - ASSERT_EQ(4, (iqctx.get()->begin())->get_column_hints().size()); - ASSERT_EQ(1, (iqctx.get()->begin())->get_index_id()); - ASSERT_EQ("col4 < 4", (iqctx.get()->begin())->get_filter()); - const auto& colHints = (iqctx.get()->begin())->get_column_hints(); + ASSERT_EQ(1, iqctx.size()); + ASSERT_EQ(4, iqctx.begin()->get_column_hints().size()); + ASSERT_EQ(1, iqctx.begin()->get_index_id()); + ASSERT_EQ("col4 < 4", iqctx.begin()->get_filter()); + const auto& colHints = iqctx.begin()->get_column_hints(); { auto hint = colHints[0]; ASSERT_EQ("col0", hint.get_column_name()); diff --git a/src/parser/TraverseSentences.cpp b/src/parser/TraverseSentences.cpp index c6f656098..b10727960 100644 --- a/src/parser/TraverseSentences.cpp +++ b/src/parser/TraverseSentences.cpp @@ -40,6 +40,11 @@ std::string GoSentence::toString() const { return buf; } +LookupSentence::LookupSentence(std::string *from, WhereClause *where, YieldClause *yield) + : Sentence(Kind::kLookup), + from_(DCHECK_NOTNULL(from)), + whereClause_(where), + yieldClause_(yield) {} std::string LookupSentence::toString() const { std::string buf; diff --git a/src/parser/TraverseSentences.h b/src/parser/TraverseSentences.h index ee5c81c25..2ce371ab1 100644 --- a/src/parser/TraverseSentences.h +++ b/src/parser/TraverseSentences.h @@ -82,27 +82,16 @@ class GoSentence final : public Sentence { class LookupSentence final : public Sentence { public: - explicit LookupSentence(std::string *from) { - from_.reset(from); - kind_ = Kind::kLookup; - } - - const std::string* from() const { - return from_.get(); - } + LookupSentence(std::string* from, WhereClause* where, YieldClause* yield); - void setWhereClause(WhereClause *whereClause) { - whereClause_.reset(whereClause); + const std::string& from() const { + return *from_; } const WhereClause* whereClause() const { return whereClause_.get(); } - void setYieldClause(YieldClause *clause) { - yieldClause_.reset(clause); - } - const YieldClause* yieldClause() const { return yieldClause_.get(); } diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 3071792e0..90184e374 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -1822,10 +1822,7 @@ lookup_where_clause lookup_sentence : KW_LOOKUP KW_ON name_label lookup_where_clause yield_clause { - auto sentence = new LookupSentence($3); - sentence->setWhereClause($4); - sentence->setYieldClause($5); - $$ = sentence; + $$ = new LookupSentence($3, $4, $5); } ; diff --git a/src/planner/CMakeLists.txt b/src/planner/CMakeLists.txt index 4be9c24a6..614d7d91c 100644 --- a/src/planner/CMakeLists.txt +++ b/src/planner/CMakeLists.txt @@ -38,4 +38,5 @@ nebula_add_library( plan/Maintain.cpp ngql/PathPlanner.cpp ngql/GoPlanner.cpp + ngql/LookupPlanner.cpp ) diff --git a/src/planner/Planner.cpp b/src/planner/Planner.cpp index 30e3194b8..c738f5b57 100644 --- a/src/planner/Planner.cpp +++ b/src/planner/Planner.cpp @@ -11,6 +11,11 @@ namespace nebula { namespace graph { +const char* kSrcVID = "SrcVID"; +const char* kDstVID = "DstVID"; +const char* kRanking = "Ranking"; +const char* kVertexID = "VertexID"; + std::ostream& operator<<(std::ostream& os, const SubPlan& subplan) { os << "root(" << subplan.root->toString() << "): " << subplan.root->outputVar() << ", tail(" << subplan.tail->toString() << "): " << subplan.tail->outputVar(); diff --git a/src/planner/Planner.h b/src/planner/Planner.h index da072c4f4..ff37a6025 100644 --- a/src/planner/Planner.h +++ b/src/planner/Planner.h @@ -17,6 +17,11 @@ namespace nebula { namespace graph { class Planner; +extern const char* kSrcVID; +extern const char* kDstVID; +extern const char* kRanking; +extern const char* kVertexID; + struct SubPlan { // root and tail of a subplan. PlanNode* root{nullptr}; diff --git a/src/planner/PlannersRegister.cpp b/src/planner/PlannersRegister.cpp index 3a4ff3289..5dd835adc 100644 --- a/src/planner/PlannersRegister.cpp +++ b/src/planner/PlannersRegister.cpp @@ -13,21 +13,35 @@ #include "planner/match/PropIndexSeek.h" #include "planner/match/VertexIdSeek.h" #include "planner/match/LabelIndexSeek.h" +#include "planner/ngql/LookupPlanner.h" #include "planner/ngql/PathPlanner.h" #include "planner/ngql/GoPlanner.h" namespace nebula { namespace graph { + void PlannersRegister::registPlanners() { registSequential(); registMatch(); - registPath(); - registGo(); } void PlannersRegister::registSequential() { - auto& planners = Planner::plannersMap()[Sentence::Kind::kSequential]; - planners.emplace_back(&SequentialPlanner::match, &SequentialPlanner::make); + { + auto& planners = Planner::plannersMap()[Sentence::Kind::kSequential]; + planners.emplace_back(&SequentialPlanner::match, &SequentialPlanner::make); + } + { + auto& planners = Planner::plannersMap()[Sentence::Kind::kFindPath]; + planners.emplace_back(&PathPlanner::match, &PathPlanner::make); + } + { + auto& planners = Planner::plannersMap()[Sentence::Kind::kGo]; + planners.emplace_back(&GoPlanner::match, &GoPlanner::make); + } + { + auto& planners = Planner::plannersMap()[Sentence::Kind::kLookup]; + planners.emplace_back(&LookupPlanner::match, &LookupPlanner::make); + } } void PlannersRegister::registMatch() { @@ -50,15 +64,5 @@ void PlannersRegister::registMatch() { startVidFinders.emplace_back(&LabelIndexSeek::make); } -void PlannersRegister::registPath() { - auto& planners = Planner::plannersMap()[Sentence::Kind::kFindPath]; - planners.emplace_back(&PathPlanner::match, &PathPlanner::make); -} - -void PlannersRegister::registGo() { - auto& planners = Planner::plannersMap()[Sentence::Kind::kGo]; - planners.emplace_back(&GoPlanner::match, &GoPlanner::make); -} - } // namespace graph } // namespace nebula diff --git a/src/planner/PlannersRegister.h b/src/planner/PlannersRegister.h index 6a49f1d46..69481e8be 100644 --- a/src/planner/PlannersRegister.h +++ b/src/planner/PlannersRegister.h @@ -9,6 +9,7 @@ namespace nebula { namespace graph { + class PlannersRegister final { public: PlannersRegister() = delete; @@ -18,13 +19,10 @@ class PlannersRegister final { private: static void registSequential(); - static void registMatch(); - - static void registPath(); - - static void registGo(); }; + } // namespace graph } // namespace nebula + #endif // PLANNER_PLANNERREGISTER_H_ diff --git a/src/planner/match/LabelIndexSeek.cpp b/src/planner/match/LabelIndexSeek.cpp index 38c3f84fc..056d6d777 100644 --- a/src/planner/match/LabelIndexSeek.cpp +++ b/src/planner/match/LabelIndexSeek.cpp @@ -70,15 +70,11 @@ StatusOr LabelIndexSeek::transformNode(NodeContext* nodeCtx) { using IQC = nebula::storage::cpp2::IndexQueryContext; IQC iqctx; iqctx.set_index_id(nodeCtx->scanInfo.indexIds.back()); - auto contexts = std::make_unique>(); - contexts->emplace_back(std::move(iqctx)); - auto columns = std::make_unique>(); - columns->emplace_back(kVid); auto scan = IndexScan::make(matchClauseCtx->qctx, nullptr, matchClauseCtx->space.id, - std::move(contexts), - std::move(columns), + {iqctx}, + {kVid}, false, nodeCtx->scanInfo.schemaIds.back()); scan->setColNames({kVid}); @@ -125,10 +121,7 @@ StatusOr LabelIndexSeek::transformNode(NodeContext* nodeCtx) { ExpressionUtils::rewriteLabelAttr2TagProp(pool, flattenFilter); storage::cpp2::IndexQueryContext ctx; ctx.set_filter(Expression::encode(*srcFilter)); - auto context = - std::make_unique>(); - context->emplace_back(std::move(ctx)); - scan->setIndexQueryContext(std::move(context)); + scan->setIndexQueryContext({ctx}); whereCtx.reset(); } } @@ -146,22 +139,19 @@ StatusOr LabelIndexSeek::transformEdge(EdgeContext* edgeCtx) { using IQC = nebula::storage::cpp2::IndexQueryContext; IQC iqctx; iqctx.set_index_id(edgeCtx->scanInfo.indexIds.back()); - auto contexts = std::make_unique>(); - contexts->emplace_back(std::move(iqctx)); - auto columns = std::make_unique>(); - std::vector columnsName; + std::vector columns, columnsName; switch (edgeCtx->scanInfo.direction) { case MatchEdge::Direction::OUT_EDGE: - columns->emplace_back(kSrc); + columns.emplace_back(kSrc); columnsName.emplace_back(kVid); break; case MatchEdge::Direction::IN_EDGE: - columns->emplace_back(kDst); + columns.emplace_back(kDst); columnsName.emplace_back(kVid); break; case MatchEdge::Direction::BOTH: - columns->emplace_back(kSrc); - columns->emplace_back(kDst); + columns.emplace_back(kSrc); + columns.emplace_back(kDst); columnsName.emplace_back(kSrc); columnsName.emplace_back(kDst); break; @@ -171,7 +161,7 @@ StatusOr LabelIndexSeek::transformEdge(EdgeContext* edgeCtx) { auto scan = IndexScan::make(qctx, nullptr, matchClauseCtx->space.id, - std::move(contexts), + {iqctx}, std::move(columns), true, edgeCtx->scanInfo.schemaIds.back()); diff --git a/src/planner/match/PropIndexSeek.cpp b/src/planner/match/PropIndexSeek.cpp index 11f34c46b..c8e164fed 100644 --- a/src/planner/match/PropIndexSeek.cpp +++ b/src/planner/match/PropIndexSeek.cpp @@ -63,22 +63,19 @@ StatusOr PropIndexSeek::transformEdge(EdgeContext* edgeCtx) { using IQC = nebula::storage::cpp2::IndexQueryContext; IQC iqctx; iqctx.set_filter(Expression::encode(*edgeCtx->scanInfo.filter)); - auto contexts = std::make_unique>(); - contexts->emplace_back(std::move(iqctx)); - auto columns = std::make_unique>(); - std::vector columnsName; + std::vector columns, columnsName; switch (edgeCtx->scanInfo.direction) { case MatchEdge::Direction::OUT_EDGE: - columns->emplace_back(kSrc); + columns.emplace_back(kSrc); columnsName.emplace_back(kVid); break; case MatchEdge::Direction::IN_EDGE: - columns->emplace_back(kDst); + columns.emplace_back(kDst); columnsName.emplace_back(kVid); break; case MatchEdge::Direction::BOTH: - columns->emplace_back(kSrc); - columns->emplace_back(kDst); + columns.emplace_back(kSrc); + columns.emplace_back(kDst); columnsName.emplace_back(kSrc); columnsName.emplace_back(kDst); break; @@ -88,7 +85,7 @@ StatusOr PropIndexSeek::transformEdge(EdgeContext* edgeCtx) { auto scan = IndexScan::make(qctx, nullptr, matchClauseCtx->space.id, - std::move(contexts), + {iqctx}, std::move(columns), true, edgeCtx->scanInfo.schemaIds.back()); @@ -166,15 +163,11 @@ StatusOr PropIndexSeek::transformNode(NodeContext* nodeCtx) { using IQC = nebula::storage::cpp2::IndexQueryContext; IQC iqctx; iqctx.set_filter(Expression::encode(*nodeCtx->scanInfo.filter)); - auto contexts = std::make_unique>(); - contexts->emplace_back(std::move(iqctx)); - auto columns = std::make_unique>(); - columns->emplace_back(kVid); auto scan = IndexScan::make(matchClauseCtx->qctx, nullptr, matchClauseCtx->space.id, - std::move(contexts), - std::move(columns), + {iqctx}, + {kVid}, false, nodeCtx->scanInfo.schemaIds.back()); scan->setColNames({kVid}); diff --git a/src/planner/ngql/LookupPlanner.cpp b/src/planner/ngql/LookupPlanner.cpp new file mode 100644 index 000000000..a45310b72 --- /dev/null +++ b/src/planner/ngql/LookupPlanner.cpp @@ -0,0 +1,148 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "planner/ngql/LookupPlanner.h" + +#include +#include + +#include "common/base/Base.h" +#include "common/base/Status.h" +#include "common/expression/Expression.h" +#include "common/expression/LabelAttributeExpression.h" +#include "common/expression/PropertyExpression.h" +#include "context/ast/QueryAstContext.h" +#include "parser/Clauses.h" +#include "parser/TraverseSentences.h" +#include "planner/Planner.h" +#include "planner/plan/Scan.h" +#include "visitor/FindVisitor.h" + +namespace nebula { +namespace graph { + +static std::tuple kEdgeKeys[3] = { + {kSrcVID, kSrc}, + {kDstVID, kDst}, + {kRanking, kRank}, +}; + +std::unique_ptr LookupPlanner::make() { + return std::unique_ptr(new LookupPlanner()); +} + +bool LookupPlanner::match(AstContext* astCtx) { + return astCtx->sentence->kind() == Sentence::Kind::kLookup; +} + +StatusOr LookupPlanner::transform(AstContext* astCtx) { + auto lookupCtx = static_cast(astCtx); + auto yieldCols = prepareReturnCols(lookupCtx); + auto qctx = lookupCtx->qctx; + auto from = static_cast(lookupCtx->sentence)->from(); + SubPlan plan; + if (lookupCtx->isEdge) { + plan.tail = EdgeIndexFullScan::make(qctx, + nullptr, + from, + lookupCtx->space.id, + {}, + returnCols_, + lookupCtx->schemaId, + lookupCtx->isEmptyResultSet); + } else { + plan.tail = TagIndexFullScan::make(qctx, + nullptr, + from, + lookupCtx->space.id, + {}, + returnCols_, + lookupCtx->schemaId, + lookupCtx->isEmptyResultSet); + } + plan.tail->setColNames(colNames_); + + plan.root = plan.tail; + + if (lookupCtx->filter) { + plan.root = Filter::make(qctx, plan.root, lookupCtx->filter); + } + + plan.root = Project::make(qctx, plan.root, yieldCols); + return plan; +} + +YieldColumns* LookupPlanner::prepareReturnCols(LookupContext* lookupCtx) { + auto pool = lookupCtx->qctx->objPool(); + auto columns = pool->makeAndAdd(); + auto addColumn = [this, pool, columns](const auto& tup) { + std::string name(std::get<0>(tup)); + auto expr = InputPropertyExpression::make(pool, name); + columns->addColumn(new YieldColumn(expr, name)); + addLookupColumns(std::get<1>(tup), name); + }; + if (lookupCtx->isEdge) { + for (auto& key : kEdgeKeys) { + addColumn(key); + } + } else { + addColumn(std::make_tuple(kVertexID, kVid)); + } + extractUsedColumns(lookupCtx->filter); + appendColumns(lookupCtx, columns); + return columns; +} + +void LookupPlanner::appendColumns(LookupContext* lookupCtx, YieldColumns* columns) { + auto sentence = static_cast(lookupCtx->sentence); + auto yieldClause = sentence->yieldClause(); + if (yieldClause == nullptr) return; + auto pool = lookupCtx->qctx->objPool(); + for (auto col : yieldClause->columns()) { + auto expr = col->expr(); + DCHECK(expr->kind() == Expression::Kind::kLabelAttribute); + auto laExpr = static_cast(expr); + const auto& schemaName = laExpr->left()->name(); + const auto& colName = laExpr->right()->value().getStr(); + Expression* propExpr = nullptr; + if (lookupCtx->isEdge) { + propExpr = EdgePropertyExpression::make(pool, schemaName, colName); + } else { + propExpr = TagPropertyExpression::make(pool, schemaName, colName); + } + columns->addColumn(new YieldColumn(propExpr, col->alias())); + addLookupColumns(colName, propExpr->toString()); + } +} + +void LookupPlanner::extractUsedColumns(Expression* filter) { + if (filter == nullptr) return; + + auto finder = [](Expression* expr) { + return expr->kind() == Expression::Kind::kTagProperty || + expr->kind() == Expression::Kind::kEdgeProperty; + }; + FindVisitor visitor(finder, true); + filter->accept(&visitor); + for (auto expr : std::move(visitor).results()) { + auto propExpr = static_cast(expr); + addLookupColumns(propExpr->prop(), propExpr->toString()); + } +} + +void LookupPlanner::addLookupColumns(const std::string& retCol, const std::string& outCol) { + auto iter = std::find(returnCols_.begin(), returnCols_.end(), retCol); + if (iter == returnCols_.end()) { + returnCols_.emplace_back(retCol); + } + iter = std::find(colNames_.begin(), colNames_.end(), outCol); + if (iter == colNames_.end()) { + colNames_.emplace_back(outCol); + } +} + +} // namespace graph +} // namespace nebula diff --git a/src/planner/ngql/LookupPlanner.h b/src/planner/ngql/LookupPlanner.h new file mode 100644 index 000000000..e0bea2e7a --- /dev/null +++ b/src/planner/ngql/LookupPlanner.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef PLANNER_NGQL_LOOKUPPLANNER_H_ +#define PLANNER_NGQL_LOOKUPPLANNER_H_ + +#include +#include + +#include "planner/Planner.h" + +namespace nebula { + +class Expression; +class YieldColumns; + +namespace graph { + +struct LookupContext; +struct AstContext; + +class LookupPlanner final : public Planner { +public: + static std::unique_ptr make(); + static bool match(AstContext* astCtx); + + StatusOr transform(AstContext* astCtx) override; + +private: + YieldColumns* prepareReturnCols(LookupContext* lookupCtx); + void appendColumns(LookupContext* lookupCtx, YieldColumns* columns); + void extractUsedColumns(Expression* filter); + void addLookupColumns(const std::string& retCol, const std::string& outCol); + + std::vector returnCols_; + std::vector colNames_; +}; + +} // namespace graph +} // namespace nebula + +#endif // PLANNER_NGQL_LOOKUPPLANNER_H_ diff --git a/src/planner/plan/PlanNode.cpp b/src/planner/plan/PlanNode.cpp index 43dde5423..5e35590a4 100644 --- a/src/planner/plan/PlanNode.cpp +++ b/src/planner/plan/PlanNode.cpp @@ -44,6 +44,18 @@ const char* PlanNode::toString(PlanNode::Kind kind) { return "GetEdges"; case Kind::kIndexScan: return "IndexScan"; + case Kind::kTagIndexFullScan: + return "TagIndexFullScan"; + case Kind::kTagIndexRangeScan: + return "TagIndexRangeScan"; + case Kind::kTagIndexPrefixScan: + return "TagIndexPrefixScan"; + case Kind::kEdgeIndexFullScan: + return "EdgeIndexFullScan"; + case Kind::kEdgeIndexRangeScan: + return "EdgeIndexRangeScan"; + case Kind::kEdgeIndexPrefixScan: + return "EdgeIndexPrefixScan"; case Kind::kFilter: return "Filter"; case Kind::kUnion: diff --git a/src/planner/plan/PlanNode.h b/src/planner/plan/PlanNode.h index acc43a86a..9cdc49dd1 100644 --- a/src/planner/plan/PlanNode.h +++ b/src/planner/plan/PlanNode.h @@ -28,7 +28,16 @@ class PlanNode { kGetNeighbors, kGetVertices, kGetEdges, + // ------------------ + // TODO(yee): refactor in logical plan kIndexScan, + kTagIndexFullScan, + kTagIndexPrefixScan, + kTagIndexRangeScan, + kEdgeIndexFullScan, + kEdgeIndexPrefixScan, + kEdgeIndexRangeScan, + // ------------------ kFilter, kUnion, kUnionAllVersionVar, diff --git a/src/planner/plan/Query.cpp b/src/planner/plan/Query.cpp index 140797645..4004eafb7 100644 --- a/src/planner/plan/Query.cpp +++ b/src/planner/plan/Query.cpp @@ -172,8 +172,8 @@ std::unique_ptr IndexScan::explain() const { auto desc = Explore::explain(); addDescription("schemaId", util::toJson(schemaId_), desc.get()); addDescription("isEdge", util::toJson(isEdge_), desc.get()); - addDescription("returnCols", folly::toJson(util::toJson(*returnCols_)), desc.get()); - addDescription("indexCtx", folly::toJson(util::toJson(*contexts_)), desc.get()); + addDescription("returnCols", folly::toJson(util::toJson(returnCols_)), desc.get()); + addDescription("indexCtx", folly::toJson(util::toJson(contexts_)), desc.get()); return desc; } @@ -186,12 +186,8 @@ PlanNode* IndexScan::clone() const { void IndexScan::cloneMembers(const IndexScan &g) { Explore::cloneMembers(g); - if (g.contexts_ != nullptr) { - contexts_ = std::make_unique>(*g.contexts_); - } - if (g.returnCols_ != nullptr) { - returnCols_ = std::make_unique>(*g.returnCols_); - } + contexts_ = g.contexts_; + returnCols_ = g.returnCols_; isEdge_ = g.isEdge(); schemaId_ = g.schemaId(); isEmptyResultSet_ = g.isEmptyResultSet(); diff --git a/src/planner/plan/Query.h b/src/planner/plan/Query.h index 89c5502aa..e7fab16ac 100644 --- a/src/planner/plan/Query.h +++ b/src/planner/plan/Query.h @@ -33,6 +33,10 @@ class Explore : public SingleInputNode { return space_; } + void setSpace(GraphSpaceID spaceId) { + space_ = spaceId; + } + bool dedup() const { return dedup_; } @@ -432,16 +436,15 @@ class GetEdges final : public Explore { /** * Read data through the index. */ -class IndexScan final : public Explore { +class IndexScan : public Explore { public: - using IndexQueryCtx = std::unique_ptr>; - using IndexReturnCols = std::unique_ptr>; + using IndexQueryContext = storage::cpp2::IndexQueryContext; static IndexScan* make(QueryContext* qctx, PlanNode* input, GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? - IndexQueryCtx&& contexts = nullptr, - IndexReturnCols&& returnCols = nullptr, + std::vector&& contexts = {}, + std::vector returnCols = {}, bool isEdge = false, int32_t schemaId = -1, bool isEmptyResultSet = false, @@ -463,12 +466,12 @@ class IndexScan final : public Explore { std::move(filter))); } - const std::vector* queryContext() const { - return contexts_.get(); + const std::vector& queryContext() const { + return contexts_; } - const std::vector* returnColumns() const { - return returnCols_.get(); + const std::vector& returnColumns() const { + return returnCols_; } bool isEdge() const { @@ -479,15 +482,23 @@ class IndexScan final : public Explore { return schemaId_; } + void setSchemaId(int32_t schema) { + schemaId_ = schema; + } + bool isEmptyResultSet() const { return isEmptyResultSet_; } - void setIndexQueryContext(IndexQueryCtx contexts) { + void setEmptyResultSet(bool isEmptyResultSet) { + isEmptyResultSet_ = isEmptyResultSet; + } + + void setIndexQueryContext(std::vector contexts) { contexts_ = std::move(contexts); } - void setReturnCols(IndexReturnCols cols) { + void setReturnCols(std::vector cols) { returnCols_ = std::move(cols); } @@ -495,34 +506,24 @@ class IndexScan final : public Explore { isEdge_ = isEdge; } - void setSchemaId(int32_t schema) { - schemaId_ = schema; - } - PlanNode* clone() const override; std::unique_ptr explain() const override; -private: +protected: IndexScan(QueryContext* qctx, PlanNode* input, GraphSpaceID space, - IndexQueryCtx&& contexts, - IndexReturnCols&& returnCols, + std::vector&& contexts, + std::vector&& returnCols, bool isEdge, int32_t schemaId, bool isEmptyResultSet, bool dedup, std::vector orderBy, int64_t limit, - std::string filter) - : Explore(qctx, - Kind::kIndexScan, - input, - space, - dedup, - limit, - std::move(filter), - std::move(orderBy)) { + std::string filter, + Kind kind = Kind::kIndexScan) + : Explore(qctx, kind, input, space, dedup, limit, std::move(filter), std::move(orderBy)) { contexts_ = std::move(contexts); returnCols_ = std::move(returnCols); isEdge_ = isEdge; @@ -533,11 +534,13 @@ class IndexScan final : public Explore { void cloneMembers(const IndexScan&); private: - IndexQueryCtx contexts_; - IndexReturnCols returnCols_; + std::vector contexts_; + std::vector returnCols_; bool isEdge_; int32_t schemaId_; - bool isEmptyResultSet_; + + // TODO(yee): Generate special plan for this scenario + bool isEmptyResultSet_{false}; }; /** diff --git a/src/planner/plan/Scan.h b/src/planner/plan/Scan.h new file mode 100644 index 000000000..30949e721 --- /dev/null +++ b/src/planner/plan/Scan.h @@ -0,0 +1,435 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef PLANNER_PLAN_SCAN_H_ +#define PLANNER_PLAN_SCAN_H_ + +#include "planner/plan/Query.h" + +namespace nebula { +namespace graph { + +// Logical Plan +class EdgeIndexScan : public IndexScan { +public: + const std::string& edgeType() const { + return edgeType_; + } + +protected: + EdgeIndexScan(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter, + Kind kind) + : IndexScan(qctx, + input, + space, + std::move(contexts), + std::move(returnCols), + true, + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + kind), + edgeType_(edgeType) {} + + std::string edgeType_; +}; + +class EdgeIndexPrefixScan : public EdgeIndexScan { +public: + static EdgeIndexPrefixScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new EdgeIndexPrefixScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + EdgeIndexPrefixScan(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : EdgeIndexScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kEdgeIndexPrefixScan) {} +}; + +class EdgeIndexRangeScan : public EdgeIndexScan { +public: + static EdgeIndexRangeScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new EdgeIndexRangeScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + EdgeIndexRangeScan(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : EdgeIndexScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kEdgeIndexRangeScan) {} +}; + +class EdgeIndexFullScan final : public EdgeIndexScan { +public: + static EdgeIndexFullScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new EdgeIndexFullScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + EdgeIndexFullScan(QueryContext* qctx, + PlanNode* input, + const std::string& edgeType, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : EdgeIndexScan(qctx, + input, + edgeType, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kEdgeIndexFullScan) {} +}; + +// class EdgeFullTextIndexScan : public EdgeIndexScan {}; + +class TagIndexScan : public IndexScan { +public: + const std::string& tagName() const { + return tagName_; + } + +protected: + TagIndexScan(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter, + Kind kind) + : IndexScan(qctx, + input, + space, + std::move(contexts), + std::move(returnCols), + false, + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + kind), + tagName_(tagName) {} + + std::string tagName_; +}; + +class TagIndexPrefixScan : public TagIndexScan { +public: + static TagIndexPrefixScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new TagIndexPrefixScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + TagIndexPrefixScan(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : TagIndexScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kTagIndexPrefixScan) {} +}; + +class TagIndexRangeScan : public TagIndexScan { +public: + static TagIndexRangeScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new TagIndexRangeScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + TagIndexRangeScan(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : TagIndexScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kTagIndexRangeScan) {} +}; + +class TagIndexFullScan final : public TagIndexScan { +public: + static TagIndexFullScan* make(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space = -1, // TBD: -1 is inValid spaceID? + std::vector&& contexts = {}, + std::vector returnCols = {}, + int32_t schemaId = -1, + bool isEmptyResultSet = false, + bool dedup = false, + std::vector orderBy = {}, + int64_t limit = std::numeric_limits::max(), + std::string filter = "") { + return qctx->objPool()->add(new TagIndexFullScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter))); + } + +private: + TagIndexFullScan(QueryContext* qctx, + PlanNode* input, + const std::string& tagName, + GraphSpaceID space, + std::vector&& contexts, + std::vector returnCols, + int32_t schemaId, + bool isEmptyResultSet, + bool dedup, + std::vector orderBy, + int64_t limit, + std::string filter) + : TagIndexScan(qctx, + input, + tagName, + space, + std::move(contexts), + std::move(returnCols), + schemaId, + isEmptyResultSet, + dedup, + std::move(orderBy), + limit, + std::move(filter), + Kind::kTagIndexFullScan) {} +}; + +// class TagFullTextIndexScan : public TagIndexScan {}; + +} // namespace graph +} // namespace nebula + +#endif // PLANNER_PLAN_SCAN_H_ diff --git a/src/util/FTIndexUtils.cpp b/src/util/FTIndexUtils.cpp index 75cac98bc..040460185 100644 --- a/src/util/FTIndexUtils.cpp +++ b/src/util/FTIndexUtils.cpp @@ -5,6 +5,7 @@ */ #include "util/FTIndexUtils.h" +#include "common/expression/Expression.h" DECLARE_uint32(ft_request_retry_times); @@ -76,7 +77,7 @@ FTIndexUtils::dropTSIndex(const std::vector& tsClien return Status::Error("drop fulltext index failed : %s", index.c_str()); } -StatusOr FTIndexUtils::rewriteTSFilter( +StatusOr FTIndexUtils::rewriteTSFilter( ObjectPool* pool, bool isEdge, Expression* expr, @@ -87,32 +88,25 @@ StatusOr FTIndexUtils::rewriteTSFilter( return Status::SemanticError("Text search error."); } if (vRet.value().empty()) { - return ""; + return nullptr; } - std::vector values; - auto tsExpr = static_cast(expr); + auto tsArg = static_cast(expr)->arg(); + Expression* propExpr; + if (isEdge) { + propExpr = EdgePropertyExpression::make(pool, tsArg->from(), tsArg->prop()); + } else { + propExpr = TagPropertyExpression::make(pool, tsArg->from(), tsArg->prop()); + } std::vector rels; for (const auto& row : vRet.value()) { - RelationalExpression* relExpr = nullptr; - if (isEdge) { - relExpr = RelationalExpression::makeEQ( - pool, - EdgePropertyExpression::make(pool, tsExpr->arg()->from(), tsExpr->arg()->prop()), - ConstantExpression::make(pool, Value(row))); - } else { - relExpr = RelationalExpression::makeEQ( - pool, - TagPropertyExpression::make(pool, tsExpr->arg()->from(), tsExpr->arg()->prop()), - ConstantExpression::make(pool, Value(row))); - } - rels.emplace_back(std::move(relExpr)); + auto constExpr = ConstantExpression::make(pool, Value(row)); + rels.emplace_back(RelationalExpression::makeEQ(pool, propExpr, constExpr)); } if (rels.size() == 1) { - return rels[0]->encode(); + return rels.front(); } - auto newExpr = ExpressionUtils::pushOrs(pool, rels); - return newExpr->encode(); + return ExpressionUtils::pushOrs(pool, rels); } StatusOr> diff --git a/src/util/FTIndexUtils.h b/src/util/FTIndexUtils.h index 459be831d..d6f310ac0 100644 --- a/src/util/FTIndexUtils.h +++ b/src/util/FTIndexUtils.h @@ -37,9 +37,12 @@ class FTIndexUtils final { dropTSIndex(const std::vector& tsClients, const std::string& index); - static - StatusOr rewriteTSFilter(ObjectPool* pool, bool isEdge, Expression* expr, - const std::string& index, const std::vector& tsClients); + static StatusOr rewriteTSFilter( + ObjectPool* pool, + bool isEdge, + Expression* expr, + const std::string& index, + const std::vector& tsClients); static StatusOr> textSearch(Expression* expr, diff --git a/src/util/IndexUtil.cpp b/src/util/IndexUtil.cpp index 4aa3583b5..ace19abcd 100644 --- a/src/util/IndexUtil.cpp +++ b/src/util/IndexUtil.cpp @@ -73,5 +73,25 @@ StatusOr IndexUtil::toShowCreateIndex(bool isTagIndex, return dataSet; } +Expression::Kind IndexUtil::reverseRelationalExprKind(Expression::Kind kind) { + switch (kind) { + case Expression::Kind::kRelGE: { + return Expression::Kind::kRelLE; + } + case Expression::Kind::kRelGT: { + return Expression::Kind::kRelLT; + } + case Expression::Kind::kRelLE: { + return Expression::Kind::kRelGE; + } + case Expression::Kind::kRelLT: { + return Expression::Kind::kRelGT; + } + default: { + return kind; + } + } +} + } // namespace graph } // namespace nebula diff --git a/src/util/IndexUtil.h b/src/util/IndexUtil.h index 416253ba6..25bfbf9c9 100644 --- a/src/util/IndexUtil.h +++ b/src/util/IndexUtil.h @@ -25,6 +25,8 @@ class IndexUtil final { static StatusOr toShowCreateIndex(bool isTagIndex, const std::string &indexName, const meta::cpp2::IndexItem &indexItem); + + static Expression::Kind reverseRelationalExprKind(Expression::Kind kind); }; } // namespace graph diff --git a/src/validator/LookupValidator.cpp b/src/validator/LookupValidator.cpp index 35fc6eaa5..85cfd83f7 100644 --- a/src/validator/LookupValidator.cpp +++ b/src/validator/LookupValidator.cpp @@ -5,203 +5,157 @@ */ #include "validator/LookupValidator.h" + +#include "common/base/Status.h" +#include "common/interface/gen-cpp2/meta_types.h" +#include "common/meta/NebulaSchemaProvider.h" +#include "context/ast/QueryAstContext.h" #include "planner/plan/Query.h" #include "util/ExpressionUtils.h" -#include "util/SchemaUtil.h" #include "util/FTIndexUtils.h" +#include "util/SchemaUtil.h" + +using nebula::meta::NebulaSchemaProvider; +using std::shared_ptr; +using std::unique_ptr; namespace nebula { namespace graph { -/*static*/ constexpr char LookupValidator::kSrcVID[]; -/*static*/ constexpr char LookupValidator::kDstVID[]; -/*static*/ constexpr char LookupValidator::kRanking[]; +LookupValidator::LookupValidator(Sentence* sentence, QueryContext* context) + : Validator(sentence, context) {} -/*static*/ constexpr char LookupValidator::kVertexID[]; +const LookupSentence* LookupValidator::sentence() const { + return static_cast(sentence_); +} -Status LookupValidator::validateImpl() { - NG_RETURN_IF_ERROR(prepareFrom()); - NG_RETURN_IF_ERROR(prepareYield()); - NG_RETURN_IF_ERROR(prepareFilter()); - return Status::OK(); +int32_t LookupValidator::schemaId() const { + return DCHECK_NOTNULL(lookupCtx_)->schemaId; } -Status LookupValidator::toPlan() { - auto* is = IndexScan::make(qctx_, - nullptr, - spaceId_, - std::move(contexts_), - std::move(returnCols_), - isEdge_, - schemaId_, - isEmptyResultSet_); - is->setColNames(std::move(idxScanColNames_)); - PlanNode* current = is; - - if (withProject_) { - current = Project::make(qctx_, current, newYieldColumns_); - } +GraphSpaceID LookupValidator::spaceId() const { + return DCHECK_NOTNULL(lookupCtx_)->space.id; +} - if (dedup_) { - current = Dedup::make(qctx_, current); +AstContext* LookupValidator::getAstContext() { + return lookupCtx_.get(); +} - // the framework will add data collect to collect the result - // if the result is required - } +Status LookupValidator::validateImpl() { + lookupCtx_ = getContext(); - root_ = current; - tail_ = is; + NG_RETURN_IF_ERROR(prepareFrom()); + NG_RETURN_IF_ERROR(prepareYield()); + NG_RETURN_IF_ERROR(prepareFilter()); return Status::OK(); } Status LookupValidator::prepareFrom() { - auto* sentence = static_cast(sentence_); - spaceId_ = vctx_->whichSpace().id; - from_ = *sentence->from(); - auto ret = qctx_->schemaMng()->getSchemaIDByName(spaceId_, from_); - if (!ret.ok()) { - return ret.status(); - } - isEdge_ = ret.value().first; - schemaId_ = ret.value().second; + auto spaceId = lookupCtx_->space.id; + auto from = sentence()->from(); + auto ret = qctx_->schemaMng()->getSchemaIDByName(spaceId, from); + NG_RETURN_IF_ERROR(ret); + lookupCtx_->isEdge = ret.value().first; + lookupCtx_->schemaId = ret.value().second; return Status::OK(); } Status LookupValidator::prepareYield() { - auto* sentence = static_cast(sentence_); - returnCols_ = std::make_unique>(); - // always return - if (isEdge_) { - returnCols_->emplace_back(kSrc); - idxScanColNames_.emplace_back(kSrcVID); - outputs_.emplace_back(idxScanColNames_.back(), vidType_); - returnCols_->emplace_back(kDst); - idxScanColNames_.emplace_back(kDstVID); - outputs_.emplace_back(idxScanColNames_.back(), vidType_); - returnCols_->emplace_back(kRank); - idxScanColNames_.emplace_back(kRanking); - outputs_.emplace_back(idxScanColNames_.back(), Value::Type::INT); + if (lookupCtx_->isEdge) { + outputs_.emplace_back(kSrcVID, vidType_); + outputs_.emplace_back(kDstVID, vidType_); + outputs_.emplace_back(kRanking, Value::Type::INT); } else { - returnCols_->emplace_back(kVid); - idxScanColNames_.emplace_back(kVertexID); - outputs_.emplace_back(idxScanColNames_.back(), vidType_); + outputs_.emplace_back(kVertexID, vidType_); } - if (sentence->yieldClause() == nullptr) { + + auto yieldClause = sentence()->yieldClause(); + if (yieldClause == nullptr) { return Status::OK(); } - withProject_ = true; - if (sentence->yieldClause()->isDistinct()) { - dedup_ = true; - } - auto* pool = qctx_->objPool(); - newYieldColumns_ = pool->makeAndAdd(); - if (isEdge_) { - // default columns - newYieldColumns_->addColumn( - new YieldColumn(InputPropertyExpression::make(pool, kSrcVID), kSrcVID)); - newYieldColumns_->addColumn( - new YieldColumn(InputPropertyExpression::make(pool, kDstVID), kDstVID)); - newYieldColumns_->addColumn( - new YieldColumn(InputPropertyExpression::make(pool, kRanking), kRanking)); - } else { - newYieldColumns_->addColumn( - new YieldColumn(InputPropertyExpression::make(pool, kVertexID), kVertexID)); - } - auto columns = sentence->yieldClause()->columns(); - auto schema = isEdge_ ? qctx_->schemaMng()->getEdgeSchema(spaceId_, schemaId_) - : qctx_->schemaMng()->getTagSchema(spaceId_, schemaId_); - if (schema == nullptr) { - return isEdge_ ? Status::EdgeNotFound("Edge schema not found : %s", from_.c_str()) - : Status::TagNotFound("Tag schema not found : %s", from_.c_str()); - } - for (auto col : columns) { - // TODO(shylock) support more expr - if (col->expr()->kind() == Expression::Kind::kLabelAttribute) { - auto la = static_cast(col->expr()); - const std::string& schemaName = la->left()->name(); - const auto& value = la->right()->value(); - const std::string& colName = value.getStr(); - if (isEdge_) { - newYieldColumns_->addColumn( - new YieldColumn(EdgePropertyExpression::make(pool, schemaName, colName))); - } else { - newYieldColumns_->addColumn( - new YieldColumn(TagPropertyExpression::make(pool, schemaName, colName))); - } - if (!col->alias().empty()) { - newYieldColumns_->back()->setAlias(col->alias()); - } - if (schemaName != from_) { - return Status::SemanticError("Schema name error : %s", schemaName.c_str()); - } - auto ret = schema->getFieldType(colName); - if (ret == meta::cpp2::PropertyType::UNKNOWN) { - return Status::SemanticError( - "Column %s not found in schema %s", colName.c_str(), from_.c_str()); - } - returnCols_->emplace_back(colName); - idxScanColNames_.emplace_back(from_ + "." + colName); - auto column = newYieldColumns_->back()->name(); - outputs_.emplace_back(column, SchemaUtil::propTypeToValueType(ret)); - } else { - return Status::SemanticError("Yield clauses are not supported: %s", + lookupCtx_->dedup = yieldClause->isDistinct(); + + shared_ptr schemaProvider; + NG_RETURN_IF_ERROR(getSchemaProvider(&schemaProvider)); + + auto from = sentence()->from(); + for (auto col : yieldClause->columns()) { + if (col->expr()->kind() != Expression::Kind::kLabelAttribute) { + // TODO(yee): support more exprs, such as (player.age + 1) AS age + return Status::SemanticError("Yield clauses are not supported: `%s'", col->toString().c_str()); } + auto la = static_cast(col->expr()); + const std::string& schemaName = la->left()->name(); + if (schemaName != from) { + return Status::SemanticError("Schema name error: %s", schemaName.c_str()); + } + + const auto& value = la->right()->value(); + DCHECK(value.isStr()); + const std::string& colName = value.getStr(); + auto ret = schemaProvider->getFieldType(colName); + if (ret == meta::cpp2::PropertyType::UNKNOWN) { + return Status::SemanticError( + "Column `%s' not found in schema `%s'", colName.c_str(), from.c_str()); + } + outputs_.emplace_back(col->name(), SchemaUtil::propTypeToValueType(ret)); } return Status::OK(); } Status LookupValidator::prepareFilter() { - auto* sentence = static_cast(sentence_); - if (sentence->whereClause() == nullptr) { + auto whereClause = sentence()->whereClause(); + if (whereClause == nullptr) { return Status::OK(); } - auto* filter = sentence->whereClause()->filter(); - storage::cpp2::IndexQueryContext ctx; + auto* filter = whereClause->filter(); if (FTIndexUtils::needTextSearch(filter)) { - auto tsRet = FTIndexUtils::getTSClients(qctx_->getMetaClient()); - NG_RETURN_IF_ERROR(tsRet); - tsClients_ = std::move(tsRet).value(); - auto tsIndex = checkTSExpr(filter); - NG_RETURN_IF_ERROR(tsIndex); - auto retFilter = FTIndexUtils::rewriteTSFilter(qctx_->objPool(), - isEdge_, - filter, - tsIndex.value(), - tsClients_); - if (!retFilter.ok()) { - return retFilter.status(); - } - if (retFilter.value().empty()) { + auto retFilter = genTsFilter(filter); + NG_RETURN_IF_ERROR(retFilter); + auto filterExpr = std::move(retFilter).value(); + if (filterExpr == nullptr) { // return empty result direct. - isEmptyResultSet_ = true; + lookupCtx_->isEmptyResultSet = true; return Status::OK(); } - ctx.set_filter(std::move(retFilter).value()); + lookupCtx_->filter = filterExpr; } else { auto ret = checkFilter(filter); NG_RETURN_IF_ERROR(ret); - ctx.set_filter(Expression::encode(*ret.value())); + lookupCtx_->filter = std::move(ret).value(); } - contexts_ = std::make_unique>(); - contexts_->emplace_back(std::move(ctx)); return Status::OK(); } +StatusOr LookupValidator::handleLogicalExprOperands(LogicalExpression* lExpr) { + auto& operands = lExpr->operands(); + for (auto i = 0u; i < operands.size(); i++) { + auto operand = lExpr->operand(i); + if (operand->isLogicalExpr()) { + // Not allow different logical expression to use: A AND B OR C + return Status::SemanticError("Not supported filter: %s", lExpr->toString().c_str()); + } + auto ret = checkFilter(operand); + NG_RETURN_IF_ERROR(ret); + auto newOperand = ret.value(); + if (operand != newOperand) { + lExpr->setOperand(i, newOperand); + } + } + return lExpr; +} + StatusOr LookupValidator::checkFilter(Expression* expr) { switch (expr->kind()) { - case Expression::Kind::kLogicalOr: + case Expression::Kind::kLogicalOr: { + ExpressionUtils::pullOrs(expr); + return handleLogicalExprOperands(static_cast(expr)); + } case Expression::Kind::kLogicalAnd: { - // TODO(dutor) Deal with n-ary operands - auto lExpr = static_cast(expr); - auto& operands = lExpr->operands(); - for (auto i = 0u; i < operands.size(); i++) { - auto ret = checkFilter(lExpr->operand(i)); - NG_RETURN_IF_ERROR(ret); - lExpr->setOperand(i, ret.value()->clone()); - } - break; + ExpressionUtils::pullAnds(expr); + return handleLogicalExprOperands(static_cast(expr)); } case Expression::Kind::kRelLE: case Expression::Kind::kRelGE: @@ -209,15 +163,13 @@ StatusOr LookupValidator::checkFilter(Expression* expr) { case Expression::Kind::kRelLT: case Expression::Kind::kRelGT: case Expression::Kind::kRelNE: { - auto* rExpr = static_cast(expr); - return checkRelExpr(rExpr); + return checkRelExpr(static_cast(expr)); } default: { return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); } } - return expr; } StatusOr LookupValidator::checkRelExpr(RelationalExpression* expr) { @@ -227,11 +179,10 @@ StatusOr LookupValidator::checkRelExpr(RelationalExpression* expr) if (left->kind() == Expression::Kind::kLabelAttribute && right->kind() == Expression::Kind::kLabelAttribute) { return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); - } else if (left->kind() == Expression::Kind::kLabelAttribute || - right->kind() == Expression::Kind::kLabelAttribute) { - auto ret = rewriteRelExpr(expr); - NG_RETURN_IF_ERROR(ret); - return ret.value(); + } + if (left->kind() == Expression::Kind::kLabelAttribute || + right->kind() == Expression::Kind::kLabelAttribute) { + return rewriteRelExpr(expr); } return Status::SemanticError("Expression %s not supported yet", expr->toString().c_str()); } @@ -246,8 +197,8 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr auto left = expr->left(); auto* la = static_cast(left); - if (la->left()->name() != from_) { - return Status::SemanticError("Schema name error : %s", la->left()->name().c_str()); + if (la->left()->name() != sentence()->from()) { + return Status::SemanticError("Schema name error: %s", la->left()->name().c_str()); } // fold constant expression @@ -260,13 +211,11 @@ StatusOr LookupValidator::rewriteRelExpr(RelationalExpression* expr std::string prop = la->right()->value().getStr(); auto relExprType = expr->kind(); auto c = checkConstExpr(expr->right(), prop, relExprType); - if (!c.ok()) { - return Status::SemanticError("expression error : %s", expr->right()->toString().c_str()); - } + NG_RETURN_IF_ERROR(c); expr->setRight(ConstantExpression::make(pool, std::move(c).value())); // rewrite PropertyExpression - if (isEdge_) { + if (lookupCtx_->isEdge) { expr->setLeft(ExpressionUtils::rewriteLabelAttr2EdgeProp(pool, la)); } else { expr->setLeft(ExpressionUtils::rewriteLabelAttr2TagProp(pool, la)); @@ -281,9 +230,13 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, return Status::SemanticError("'%s' is not an evaluable expression.", expr->toString().c_str()); } - auto schema = isEdge_ ? qctx_->schemaMng()->getEdgeSchema(spaceId_, schemaId_) - : qctx_->schemaMng()->getTagSchema(spaceId_, schemaId_); + auto schemaMgr = qctx_->schemaMng(); + auto schema = lookupCtx_->isEdge ? schemaMgr->getEdgeSchema(spaceId(), schemaId()) + : schemaMgr->getTagSchema(spaceId(), schemaId()); auto type = schema->getFieldType(prop); + if (type == meta::cpp2::PropertyType::UNKNOWN) { + return Status::SemanticError("Invalid column: %s", prop.c_str()); + } QueryExpressionContext dummy(nullptr); auto v = Expression::eval(expr, dummy); // TODO(Aiee) extract the type cast logic as a method if we decide to support more cross-type @@ -314,11 +267,9 @@ StatusOr LookupValidator::checkConstExpr(Expression* expr, } StatusOr LookupValidator::checkTSExpr(Expression* expr) { - auto tsi = qctx_->getMetaClient()->getFTIndexBySpaceSchemaFromCache(spaceId_, schemaId_); - if (!tsi.ok()) { - return tsi.status(); - } - auto tsExpr = static_cast(expr); + auto metaClient = qctx_->getMetaClient(); + auto tsi = metaClient->getFTIndexBySpaceSchemaFromCache(spaceId(), schemaId()); + NG_RETURN_IF_ERROR(tsi); auto tsName = tsi.value().first; auto ret = FTIndexUtils::checkTSIndex(tsClients_, tsName); NG_RETURN_IF_ERROR(ret); @@ -326,6 +277,7 @@ StatusOr LookupValidator::checkTSExpr(Expression* expr) { return Status::SemanticError("text search index not found : %s", tsName.c_str()); } auto ftFields = tsi.value().second.get_fields(); + auto tsExpr = static_cast(expr); auto prop = tsExpr->arg()->prop(); auto iter = std::find(ftFields.begin(), ftFields.end(), prop); @@ -366,5 +318,33 @@ Expression* LookupValidator::reverseRelKind(RelationalExpression* expr) { auto* pool = qctx_->objPool(); return RelationalExpression::makeKind(pool, reversedKind, right->clone(), left->clone()); } + +Status LookupValidator::getSchemaProvider(shared_ptr* provider) const { + auto from = sentence()->from(); + auto schemaMgr = qctx_->schemaMng(); + if (lookupCtx_->isEdge) { + *provider = schemaMgr->getEdgeSchema(spaceId(), schemaId()); + if (*provider == nullptr) { + return Status::EdgeNotFound("Edge schema not found : %s", from.c_str()); + } + } else { + *provider = schemaMgr->getTagSchema(spaceId(), schemaId()); + if (*provider == nullptr) { + return Status::TagNotFound("Tag schema not found : %s", from.c_str()); + } + } + return Status::OK(); +} + +StatusOr LookupValidator::genTsFilter(Expression* filter) { + auto tsRet = FTIndexUtils::getTSClients(qctx_->getMetaClient()); + NG_RETURN_IF_ERROR(tsRet); + tsClients_ = std::move(tsRet).value(); + auto tsIndex = checkTSExpr(filter); + NG_RETURN_IF_ERROR(tsIndex); + return FTIndexUtils::rewriteTSFilter( + qctx_->objPool(), lookupCtx_->isEdge, filter, tsIndex.value(), tsClients_); +} + } // namespace graph } // namespace nebula diff --git a/src/validator/LookupValidator.h b/src/validator/LookupValidator.h index 5a849edae..d9f7630cc 100644 --- a/src/validator/LookupValidator.h +++ b/src/validator/LookupValidator.h @@ -6,69 +6,62 @@ #ifndef _VALIDATOR_INDEXSCAN_VALIDATOR_H_ #define _VALIDATOR_INDEXSCAN_VALIDATOR_H_ -#include "planner/plan/Query.h" +#include + #include "common/base/Base.h" #include "common/interface/gen-cpp2/storage_types.h" #include "common/plugin/fulltext/elasticsearch/ESGraphAdapter.h" #include "parser/TraverseSentences.h" +#include "planner/plan/Query.h" #include "validator/Validator.h" namespace nebula { + +namespace meta { +class NebulaSchemaProvider; +} // namespace meta + namespace graph { +struct LookupContext; + class LookupValidator final : public Validator { public: - LookupValidator(Sentence* sentence, QueryContext* context) - : Validator(sentence, context) {} + LookupValidator(Sentence* sentence, QueryContext* context); + + AstContext* getAstContext() override; private: Status validateImpl() override; - Status toPlan() override; - Status prepareFrom(); - Status prepareYield(); - Status prepareFilter(); StatusOr checkFilter(Expression* expr); - StatusOr checkRelExpr(RelationalExpression* expr); - - StatusOr rewriteRelExpr(RelationalExpression* expr); - + StatusOr checkTSExpr(Expression* expr); StatusOr checkConstExpr(Expression* expr, const std::string& prop, const Expression::Kind kind); - StatusOr checkTSExpr(Expression* expr); - + StatusOr rewriteRelExpr(RelationalExpression* expr); Expression* reverseRelKind(RelationalExpression* expr); + const LookupSentence* sentence() const; + int32_t schemaId() const; + GraphSpaceID spaceId() const; + private: - static constexpr char kSrcVID[] = "SrcVID"; - static constexpr char kDstVID[] = "DstVID"; - static constexpr char kRanking[] = "Ranking"; - - static constexpr char kVertexID[] = "VertexID"; - - GraphSpaceID spaceId_{0}; - IndexScan::IndexQueryCtx contexts_{}; - IndexScan::IndexReturnCols returnCols_{}; - bool isEdge_{false}; - int32_t schemaId_; - bool isEmptyResultSet_{false}; - std::string from_; + Status getSchemaProvider(std::shared_ptr* provider) const; + StatusOr genTsFilter(Expression* filter); + StatusOr handleLogicalExprOperands(LogicalExpression* lExpr); + + std::unique_ptr lookupCtx_; std::vector tsClients_; - std::vector idxScanColNames_; - bool withProject_{false}; - bool dedup_{false}; - YieldColumns *newYieldColumns_{nullptr}; }; } // namespace graph } // namespace nebula - #endif // _VALIDATOR_INDEXSCAN_VALIDATOR_H_ diff --git a/src/validator/MatchValidator.cpp b/src/validator/MatchValidator.cpp index 46c021fbd..7998b49e7 100644 --- a/src/validator/MatchValidator.cpp +++ b/src/validator/MatchValidator.cpp @@ -14,9 +14,7 @@ namespace nebula { namespace graph { MatchValidator::MatchValidator(Sentence *sentence, QueryContext *context) : TraversalValidator(sentence, context) { - matchCtx_ = std::make_unique(); - matchCtx_->sentence = sentence; - matchCtx_->qctx = context; + matchCtx_ = getContext(); } AstContext *MatchValidator::getAstContext() { diff --git a/src/validator/SequentialValidator.h b/src/validator/SequentialValidator.h index edb84ef2d..78a7d8341 100644 --- a/src/validator/SequentialValidator.h +++ b/src/validator/SequentialValidator.h @@ -26,9 +26,7 @@ class SequentialValidator final : public Validator { SequentialValidator(Sentence* sentence, QueryContext* context) : Validator(sentence, context) { setNoSpaceRequired(); - seqAstCtx_ = std::make_unique(); - seqAstCtx_->sentence = sentence; - seqAstCtx_->qctx = context; + seqAstCtx_ = getContext(); } Status validateImpl() override; diff --git a/src/validator/test/LookupValidatorTest.cpp b/src/validator/test/LookupValidatorTest.cpp index 169131289..b0d3e078c 100644 --- a/src/validator/test/LookupValidatorTest.cpp +++ b/src/validator/test/LookupValidatorTest.cpp @@ -24,7 +24,9 @@ TEST_F(LookupValidatorTest, InputOutput) { { PlanNode::Kind::kProject, PlanNode::Kind::kGetVertices, - PlanNode::Kind::kIndexScan, + PlanNode::Kind::kProject, + PlanNode::Kind::kFilter, + PlanNode::Kind::kTagIndexFullScan, PlanNode::Kind::kStart, })); } @@ -38,7 +40,8 @@ TEST_F(LookupValidatorTest, InputOutput) { PlanNode::Kind::kProject, PlanNode::Kind::kGetVertices, PlanNode::Kind::kProject, - PlanNode::Kind::kIndexScan, + PlanNode::Kind::kFilter, + PlanNode::Kind::kTagIndexFullScan, PlanNode::Kind::kStart, })); } @@ -50,7 +53,9 @@ TEST_F(LookupValidatorTest, InputOutput) { { PlanNode::Kind::kProject, PlanNode::Kind::kGetVertices, - PlanNode::Kind::kIndexScan, + PlanNode::Kind::kProject, + PlanNode::Kind::kFilter, + PlanNode::Kind::kTagIndexFullScan, PlanNode::Kind::kStart, })); } @@ -64,7 +69,8 @@ TEST_F(LookupValidatorTest, InputOutput) { PlanNode::Kind::kProject, PlanNode::Kind::kGetVertices, PlanNode::Kind::kProject, - PlanNode::Kind::kIndexScan, + PlanNode::Kind::kFilter, + PlanNode::Kind::kTagIndexFullScan, PlanNode::Kind::kStart, })); } @@ -78,7 +84,8 @@ TEST_F(LookupValidatorTest, InvalidYieldExpression) { EXPECT_FALSE(checkResult(query, { PlanNode::Kind::kProject, - PlanNode::Kind::kIndexScan, + PlanNode::Kind::kFilter, + PlanNode::Kind::kTagIndexFullScan, PlanNode::Kind::kStart, })); } diff --git a/tests/Makefile b/tests/Makefile index 350db9123..360f66d3a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -6,6 +6,7 @@ .PHONY: fmt check init init-all clean test tck fail up down PYPI_MIRROR = https://mirrors.aliyun.com/pypi/simple/ +# PYPI_MIRROR = http://pypi.mirrors.ustc.edu.cn/simple --trusted-host pypi.mirrors.ustc.edu.cn CURR_DIR = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) gherkin_fmt = ~/.local/bin/reformat-gherkin PY_VERSION = $(shell python3 --version | cut -f2 -d' ' | cut -f2 -d'.') @@ -13,6 +14,7 @@ PY_VERSION = $(shell python3 --version | cut -f2 -d' ' | cut -f2 -d'.') RM_DIR ?= true TEST_DIR ?= $(CURR_DIR) BUILD_DIR ?= $(CURR_DIR)/../build +DEBUG ?= true J ?= 10 install-deps: @@ -45,23 +47,32 @@ fmt: check: @find $(CURR_DIR)/tck/ -type f -iname "*.feature" -print | xargs $(gherkin_fmt) --check -up: +up: clean @mkdir -p $(CURR_DIR)/.pytest - PYTHONPATH=$$PYTHONPATH:$(CURR_DIR)/.. $(CURR_DIR)/nebula-test-run.py --cmd=start --rm_dir=$(RM_DIR) --build_dir=$(BUILD_DIR) --multi_graphd=true + PYTHONPATH=$$PYTHONPATH:$(CURR_DIR)/.. $(CURR_DIR)/nebula-test-run.py --cmd=start --rm_dir=$(RM_DIR) --build_dir=$(BUILD_DIR) --debug=$(DEBUG) --multi_graphd=true down: PYTHONPATH=$$PYTHONPATH:$(CURR_DIR)/.. $(CURR_DIR)/nebula-test-run.py --cmd=stop --rm_dir=$(RM_DIR) -test: - cd $(CURR_DIR) && python3 -m pytest -n$(J) --dist=loadfile --rm_dir=$(RM_DIR) -m "not skip" -k "not tck" $(TEST_DIR) \ - && python3 -m pytest --rm_dir=$(RM_DIR) -m "not skip" -k "not tck" job/test_session.py +currdir: + cd $(CURR_DIR) -tck: - cd $(CURR_DIR) && python3 -m pytest --cucumber-json=$(CURR_DIR)/tck-report.json --cucumber-json-expanded -n$(J) --rm_dir=$(RM_DIR) -m "not skip" $(CURR_DIR)/tck/steps/test_tck.py && python3 -m pytest --cucumber-json=$(CURR_DIR)/tck-report.json --cucumber-json-expanded -n$(J) --rm_dir=$(RM_DIR) -m "not skip" $(CURR_DIR)/tck/steps/test_kill_slow_query_via_same_service.py && python3 -m pytest --cucumber-json=$(CURR_DIR)/tck-report.json --cucumber-json-expanded -n$(J) --rm_dir=$(RM_DIR) -m "not skip" $(CURR_DIR)/tck/steps/test_kill_slow_query_via_different_service.py +sess: currdir + python3 -m pytest -m "not skip" -k "not tck" job/test_session.py +test: sess + python3 -m pytest -n$(J) --dist=loadfile -m "not skip" -k "not tck" $(TEST_DIR) + +slow-query: currdir + python3 -m pytest -n$(J) -m "not skip" tck/steps/test_kill_slow_query_via_same_service.py && \ + python3 -m pytest -n$(J) -m "not skip" tck/steps/test_kill_slow_query_via_different_service.py + +tck: slow-query + python3 -m pytest -n$(J) -m "not skip" tck/steps/test_tck.py + +fail: currdir + python3 -m pytest --last-failed --gherkin-terminal-reporter --gherkin-terminal-reporter-expanded tck/steps/test_tck.py -fail: - cd $(CURR_DIR) && python3 -m pytest --last-failed --gherkin-terminal-reporter --gherkin-terminal-reporter-expanded --rm_dir=$(RM_DIR) -m "not skip" $(CURR_DIR)/tck/steps/test_tck.py report: @mv $(CURR_DIR)/tck-report.json $(CURR_DIR)/tck-report-bak.json @jq . $(CURR_DIR)/tck-report-bak.json > tck-report.json diff --git a/tests/admin/test_configs.py b/tests/admin/test_configs.py index a61f9a9b1..14f605355 100644 --- a/tests/admin/test_configs.py +++ b/tests/admin/test_configs.py @@ -67,7 +67,7 @@ def test_configs(self): ['GRAPH', 'accept_partial_success', 'bool', 'MUTABLE', False], ['GRAPH', 'system_memory_high_watermark_ratio', 'float', 'MUTABLE', 0.95], ['GRAPH', 'session_idle_timeout_secs', 'int', 'MUTABLE', 0], - ['GRAPH', 'session_reclaim_interval_secs', 'int', 'MUTABLE', 10], + ['GRAPH', 'session_reclaim_interval_secs', 'int', 'MUTABLE', 2], ['GRAPH', 'max_allowed_connections', 'int', 'MUTABLE', 9223372036854775807], ['GRAPH', 'disable_octal_escape_char', 'bool', 'MUTABLE', False], ] diff --git a/tests/common/nebula_service.py b/tests/common/nebula_service.py index 64f326ade..7ebe78e27 100644 --- a/tests/common/nebula_service.py +++ b/tests/common/nebula_service.py @@ -71,9 +71,9 @@ def _format_nebula_command(self, name, meta_port, ports, debug_log=True): ] if name == 'graphd': params.append('--local_config=false') - params.append('--enable_optimizer=true') params.append('--enable_authorize=true') params.append('--system_memory_high_watermark_ratio=0.95') + params.append('--session_reclaim_interval_secs=2') if name == 'storaged': params.append('--local_config=false') params.append('--raft_heartbeat_interval_secs=30') diff --git a/tests/job/test_session.py b/tests/job/test_session.py index 2dbaa8365..6f8e9873c 100644 --- a/tests/job/test_session.py +++ b/tests/job/test_session.py @@ -47,7 +47,7 @@ def prepare(self): @classmethod def cleanup(self): - resp = self.execute('UPDATE CONFIGS graph:session_reclaim_interval_secs = 10') + resp = self.execute('UPDATE CONFIGS graph:session_reclaim_interval_secs = 2') self.check_resp_succeeded(resp) time.sleep(3) @@ -222,4 +222,3 @@ def test_signout_and_execute(self): resp = conn.execute(session_id, 'SHOW HOSTS') assert resp.error_code == ttypes.ErrorCode.E_SESSION_INVALID, resp.error_msg assert resp.error_msg.find(b'Session not existed!') > 0 - diff --git a/tests/nebula-test-run.py b/tests/nebula-test-run.py index d01e0c642..39e31608d 100755 --- a/tests/nebula-test-run.py +++ b/tests/nebula-test-run.py @@ -45,12 +45,20 @@ def init_parser(): default='', help='Support multi graphds') opt_parser.add_option('--address', - dest='address', - default='', - help='Address of the Nebula') + dest='address', + default='', + help='Address of the Nebula') + opt_parser.add_option('--debug', + dest='debug', + default=True, + help='Print verbose debug logs') return opt_parser +def opt_is(val, expect): + return type(val) == str and val.lower() == expect + + def start_nebula(nb, configs): if configs.address is not None and configs.address != "": print('test remote nebula graph, address is {}'.format(configs.address)) @@ -61,7 +69,8 @@ def start_nebula(nb, configs): else: nb.install() address = "localhost" - ports = nb.start(multi_graphd=configs.multi_graphd) + debug = opt_is(configs.debug, "true") + ports = nb.start(debug_log=debug, multi_graphd=configs.multi_graphd) # Load csv data pool = get_conn_pool("localhost", ports[0]) @@ -101,10 +110,6 @@ def stop_nebula(nb, configs=None): print('nebula services have been stopped.') -def opt_is(val, expect): - return type(val) == str and val.lower() == expect - - if __name__ == "__main__": try: parser = init_parser() diff --git a/tests/requirements.txt b/tests/requirements.txt index 0ad8622ec..7d0db1115 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -6,7 +6,7 @@ pytest-xdist==1.31.0 pytest-benchmark==v3.2.3 pytest-metadata==1.8.0 pytest-drop-dup-tests==0.3.0 -pytest-bdd==4.0.1 +pytest-bdd==4.0.2 pytest-yapf3==0.5.1 filelock==3.0.12 ply==3.11 diff --git a/tests/tck/features/lookup/LookUp.IntVid.feature b/tests/tck/features/lookup/LookUp.IntVid.feature index 45500255d..1ec7040c9 100644 --- a/tests/tck/features/lookup/LookUp.IntVid.feature +++ b/tests/tck/features/lookup/LookUp.IntVid.feature @@ -1,104 +1,5 @@ Feature: LookUpTest_Vid_Int - Scenario: LookupTest IntVid SimpleVertex - Given an empty graph - And create a space with following options: - | partition_num | 9 | - | replica_factor | 1 | - | vid_type | int64 | - | charset | utf8 | - | collate | utf8_bin | - And having executed: - """ - CREATE TAG lookup_tag_1(col1 int, col2 int, col3 int); - CREATE TAG INDEX t_index_1 ON lookup_tag_1(col1, col2, col3); - CREATE TAG INDEX t_index_3 ON lookup_tag_1(col2, col3); - """ - And wait 6 seconds - When try to execute query: - """ - INSERT VERTEX lookup_tag_1(col1, col2, col3) VALUES 200:(200, 200, 200),201:(201, 201, 201), 202:(202, 202, 202); - """ - Then the execution should be successful - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE col1 == 200; - """ - Then a SemanticError should be raised at runtime: - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 300 - """ - Then the result should be, in any order: - | VertexID | - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 - """ - Then the result should be, in any order: - | VertexID | - | 200 | - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 - YIELD lookup_tag_1.col1, lookup_tag_1.col2, lookup_tag_1.col3 - """ - Then the result should be, in any order: - | VertexID | lookup_tag_1.col1 | lookup_tag_1.col2 | lookup_tag_1.col3 | - | 200 | 200 | 200 | 200 | - Then drop the used space - - Scenario: LookupTest IntVid SimpleEdge - Given an empty graph - And create a space with following options: - | partition_num | 9 | - | replica_factor | 1 | - | vid_type | int64 | - | charset | utf8 | - | collate | utf8_bin | - And having executed: - """ - CREATE EDGE lookup_edge_1(col1 int, col2 int, col3 int); - CREATE EDGE INDEX e_index_1 ON lookup_edge_1(col1, col2, col3); - CREATE EDGE INDEX e_index_3 ON lookup_edge_1(col2, col3); - """ - And wait 6 seconds - When try to execute query: - """ - INSERT EDGE - lookup_edge_1(col1, col2, col3) - VALUES - 200 -> 201@0:(201, 201, 201), - 200 -> 202@0:(202, 202, 202) - """ - Then the execution should be successful - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE col1 == 201 - """ - Then a SemanticError should be raised at runtime: - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 300 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | - | 200 | 201 | 0 | - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 YIELD lookup_edge_1.col1, lookup_edge_1.col2, lookup_edge_1.col3 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | lookup_edge_1.col1 | lookup_edge_1.col2 | lookup_edge_1.col3 | - | 200 | 201 | 0 | 201 | 201 | 201 | - Then drop the used space - Scenario: LookupTest IntVid VertexIndexHint Given an empty graph And create a space with following options: @@ -139,7 +40,8 @@ Feature: LookUpTest_Vid_Int """ LOOKUP ON lookup_tag_2 WHERE lookup_tag_2.col1 == true """ - Then a ExecutionError should be raised at runtime: + Then the result should be, in any order: + | VertexID | Then drop the used space Scenario: LookupTest IntVid EdgeIndexHint @@ -760,12 +662,12 @@ Feature: LookUpTest_Vid_Int """ LOOKUP on t1 WHERE t1.c4 > 1 """ - Then a ExecutionError should be raised at runtime: IndexNotFound: No valid index found + Then the execution should be successful When executing query: """ LOOKUP on t1 WHERE t1.c2 > 1 and t1.c3 > 1 """ - Then a ExecutionError should be raised at runtime: IndexNotFound: No valid index found + Then the execution should be successful When executing query: """ LOOKUP ON t1 where t1.c2 > 1 and t1.c1 != 1 diff --git a/tests/tck/features/lookup/LookUp.feature b/tests/tck/features/lookup/LookUp.feature index 496392814..5ab0caef6 100644 --- a/tests/tck/features/lookup/LookUp.feature +++ b/tests/tck/features/lookup/LookUp.feature @@ -7,87 +7,6 @@ Feature: LookUpTest_Vid_String | replica_factor | 1 | | vid_type | FIXED_STRING(32) | - Scenario: LookupTest SimpleVertex - Given having executed: - """ - CREATE TAG lookup_tag_1(col1 int, col2 int, col3 int); - CREATE TAG INDEX t_index_1 ON lookup_tag_1(col1, col2, col3); - CREATE TAG INDEX t_index_3 ON lookup_tag_1(col2, col3); - """ - And wait 6 seconds - When executing query: - """ - INSERT VERTEX lookup_tag_1(col1, col2, col3) VALUES "200":(200, 200, 200),"201":(201, 201, 201), "202":(202, 202, 202); - """ - Then the execution should be successful - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE col1 == 200; - """ - Then a SemanticError should be raised at runtime: - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 300 - """ - Then the result should be, in any order: - | VertexID | - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 - """ - Then the result should be, in any order: - | VertexID | - | "200" | - When executing query: - """ - LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 - YIELD lookup_tag_1.col1, lookup_tag_1.col2, lookup_tag_1.col3 - """ - Then the result should be, in any order: - | VertexID | lookup_tag_1.col1 | lookup_tag_1.col2 | lookup_tag_1.col3 | - | "200" | 200 | 200 | 200 | - Then drop the used space - - Scenario: LookupTest SimpleEdge - Given having executed: - """ - CREATE EDGE lookup_edge_1(col1 int, col2 int, col3 int); - CREATE EDGE INDEX e_index_1 ON lookup_edge_1(col1, col2, col3); - CREATE EDGE INDEX e_index_3 ON lookup_edge_1(col2, col3); - """ - And wait 6 seconds - When executing query: - """ - INSERT EDGE lookup_edge_1(col1, col2, col3) VALUES "200" -> "201"@0:(201, 201, 201), "200" -> "202"@0:(202, 202, 202) - """ - Then the execution should be successful - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE col1 == 201 - """ - Then a SemanticError should be raised at runtime: - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 300 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | - | "200" | "201" | 0 | - When executing query: - """ - LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 YIELD lookup_edge_1.col1, lookup_edge_1.col2, lookup_edge_1.col3 - """ - Then the result should be, in any order: - | SrcVID | DstVID | Ranking | lookup_edge_1.col1 | lookup_edge_1.col2 | lookup_edge_1.col3 | - | "200" | "201" | 0 | 201 | 201 | 201 | - Then drop the used space - Scenario: LookupTest VertexIndexHint Given having executed: """ @@ -121,7 +40,8 @@ Feature: LookUpTest_Vid_String """ LOOKUP ON lookup_tag_2 WHERE lookup_tag_2.col1 == true """ - Then a ExecutionError should be raised at runtime: + Then the result should be, in any order: + | VertexID | Then drop the used space Scenario: LookupTest EdgeIndexHint @@ -680,12 +600,12 @@ Feature: LookUpTest_Vid_String """ LOOKUP ON t1 WHERE t1.c4 > 1 """ - Then a ExecutionError should be raised at runtime: IndexNotFound: No valid index found + Then the execution should be successful When executing query: """ LOOKUP ON t1 WHERE t1.c2 > 1 AND t1.c3 > 1 """ - Then a ExecutionError should be raised at runtime: IndexNotFound: No valid index found + Then the execution should be successful When executing query: """ LOOKUP ON t1 WHERE t1.c2 > 1 AND t1.c1 != 1 diff --git a/tests/tck/features/lookup/LookupEdge.feature b/tests/tck/features/lookup/LookupEdge.feature new file mode 100644 index 000000000..1b3de7b4a --- /dev/null +++ b/tests/tck/features/lookup/LookupEdge.feature @@ -0,0 +1,120 @@ +Feature: Test lookup on edge index + Examples: + | where_condition | + | lookup_edge_1.col1 == 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 == 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 >= 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 != 200 | + | lookup_edge_1.col1 >= 201 AND lookup_edge_1.col2 == 201 | + | lookup_edge_1.col1 >= 201 AND lookup_edge_1.col1 <= 201 | + | lookup_edge_1.col1 != 202 AND lookup_edge_1.col2 >= 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 >= 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 != 200 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 >= 201 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 >= 201 AND lookup_edge_1.col3 >= 201 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 >= 201 AND lookup_edge_1.col3 != 200 | + | lookup_edge_1.col1 == 201 AND lookup_edge_1.col2 != 200 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 >= 201 AND lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 >= 201 AND lookup_edge_1.col2 >= 201 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 >= 201 AND lookup_edge_1.col2 != 200 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 != 200 AND lookup_edge_1.col2 != 200 AND lookup_edge_1.col3 == 201 | + | lookup_edge_1.col1 == 201 OR lookup_edge_1.col2 == 201 | + | lookup_edge_1.col1 == 201 OR lookup_edge_1.col2 >= 203 | + | lookup_edge_1.col1 == 201 OR lookup_edge_1.col3 == 201 | + + Scenario Outline: [edge] different condition and yield test for string vid + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | fixed_string(16) | + And having executed: + """ + CREATE EDGE lookup_edge_1(col1 int, col2 int, col3 int); + CREATE EDGE INDEX e_index_1 ON lookup_edge_1(col1, col2, col3); + CREATE EDGE INDEX e_index_3 ON lookup_edge_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT EDGE + lookup_edge_1(col1, col2, col3) + VALUES + '200' -> '201'@0:(201, 201, 201), + '200' -> '202'@0:(202, 202, 202) + """ + When executing query: + """ + LOOKUP ON + lookup_edge_1 + WHERE + + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | + | '200' | '201' | 0 | + When executing query: + """ + LOOKUP ON + lookup_edge_1 + WHERE + + YIELD + lookup_edge_1.col1 AS col1, + lookup_edge_1.col2 AS col2, + lookup_edge_1.col3 + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | col1 | col2 | lookup_edge_1.col3 | + | '200' | '201' | 0 | 201 | 201 | 201 | + Then drop the used space + + Scenario Outline: [edge] different condition and yield test for int vid + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | int64 | + And having executed: + """ + CREATE EDGE lookup_edge_1(col1 int, col2 int, col3 int); + CREATE EDGE INDEX e_index_1 ON lookup_edge_1(col1, col2, col3); + CREATE EDGE INDEX e_index_3 ON lookup_edge_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT EDGE + lookup_edge_1(col1, col2, col3) + VALUES + 200 -> 201@0:(201, 201, 201), + 200 -> 202@0:(202, 202, 202) + """ + When executing query: + """ + LOOKUP ON + lookup_edge_1 + WHERE + + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | + | 200 | 201 | 0 | + When executing query: + """ + LOOKUP ON + lookup_edge_1 + WHERE + + YIELD + lookup_edge_1.col1 AS col1, + lookup_edge_1.col2 AS col2, + lookup_edge_1.col3 + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | col1 | col2 | lookup_edge_1.col3 | + | 200 | 201 | 0 | 201 | 201 | 201 | + Then drop the used space + +# TODO(yee): Test bool expression diff --git a/tests/tck/features/lookup/LookupEdge2.feature b/tests/tck/features/lookup/LookupEdge2.feature new file mode 100644 index 000000000..5d97dfe61 --- /dev/null +++ b/tests/tck/features/lookup/LookupEdge2.feature @@ -0,0 +1,51 @@ +Feature: Test lookup on edge index 2 + Examples: + | vid_type | id_200 | id_201 | id_202 | + | int64 | 200 | 201 | 202 | + | FIXED_STRING(16) | "200" | "201" | "202" | + + Background: + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | | + And having executed: + """ + CREATE EDGE lookup_edge_1(col1 int, col2 int, col3 int); + CREATE EDGE INDEX e_index_1 ON lookup_edge_1(col1, col2, col3); + CREATE EDGE INDEX e_index_3 ON lookup_edge_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT EDGE + lookup_edge_1(col1, col2, col3) + VALUES + -> @0:(201, 201, 201), + -> @0:(202, 202, 202) + """ + + Scenario Outline: [edge] Simple test cases + When executing query: + """ + LOOKUP ON lookup_edge_1 WHERE col1 == 201 + """ + Then a SemanticError should be raised at runtime: Expression (col1==201) not supported yet + When executing query: + """ + LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 OR lookup_edge_1.col5 == 201 + """ + Then a SemanticError should be raised at runtime: Invalid column: col5 + When executing query: + """ + LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 201 OR lookup_edge_1.col2 == 201 AND lookup_edge_1.col3 == 202 + """ + Then a SemanticError should be raised at runtime: Not supported filter + When executing query: + """ + LOOKUP ON lookup_edge_1 WHERE lookup_edge_1.col1 == 300 + """ + Then the result should be, in any order: + | SrcVID | DstVID | Ranking | + Then drop the used space diff --git a/tests/tck/features/lookup/LookupTag.feature b/tests/tck/features/lookup/LookupTag.feature new file mode 100644 index 000000000..215b9095c --- /dev/null +++ b/tests/tck/features/lookup/LookupTag.feature @@ -0,0 +1,121 @@ +Feature: Test lookup on tag index + Examples: + | where_condition | + | lookup_tag_1.col1 == 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 == 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 >= 200 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 != 200 | + | lookup_tag_1.col1 >= 201 AND lookup_tag_1.col2 == 201 | + | lookup_tag_1.col1 >= 201 AND lookup_tag_1.col1 <= 201 | + | lookup_tag_1.col1 >= 201 AND lookup_tag_1.col2 != 202 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 == 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 >= 201 AND lookup_tag_1.col3 == 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 >= 201 AND lookup_tag_1.col3 >= 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 >= 201 AND lookup_tag_1.col3 != 202 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 != 202 AND lookup_tag_1.col3 == 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 != 202 AND lookup_tag_1.col3 >= 201 | + | lookup_tag_1.col1 == 201 AND lookup_tag_1.col2 != 202 AND lookup_tag_1.col3 != 202 | + | lookup_tag_1.col1 != 202 AND lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 == 201 | + | lookup_tag_1.col1 != 202 AND lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 >= 201 | + | lookup_tag_1.col1 != 202 AND lookup_tag_1.col2 >= 201 AND lookup_tag_1.col3 >= 201 | + | lookup_tag_1.col1 == 201 OR lookup_tag_1.col2 == 201 | + | lookup_tag_1.col1 == 201 OR lookup_tag_1.col2 >= 203 | + | lookup_tag_1.col1 == 201 OR lookup_tag_1.col3 == 201 | + + Scenario Outline: [tag] different condition and yield test for string vid + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | fixed_string(16) | + And having executed: + """ + CREATE TAG lookup_tag_1(col1 int, col2 int, col3 int); + CREATE TAG INDEX t_index_1 ON lookup_tag_1(col1, col2, col3); + CREATE TAG INDEX t_index_3 ON lookup_tag_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT VERTEX + lookup_tag_1(col1, col2, col3) + VALUES + '200':(200, 200, 200), + '201':(201, 201, 201), + '202':(202, 202, 202); + """ + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + + """ + Then the result should be, in any order: + | VertexID | + | '201' | + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + + YIELD + lookup_tag_1.col1, + lookup_tag_1.col2, + lookup_tag_1.col3 + """ + Then the result should be, in any order: + | VertexID | lookup_tag_1.col1 | lookup_tag_1.col2 | lookup_tag_1.col3 | + | '201' | 201 | 201 | 201 | + Then drop the used space + + Scenario Outline: [tag] different condition and yield test for int vid + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | int64 | + And having executed: + """ + CREATE TAG lookup_tag_1(col1 int, col2 int, col3 int); + CREATE TAG INDEX t_index_1 ON lookup_tag_1(col1, col2, col3); + CREATE TAG INDEX t_index_3 ON lookup_tag_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT VERTEX + lookup_tag_1(col1, col2, col3) + VALUES + 200:(200, 200, 200), + 201:(201, 201, 201), + 202:(202, 202, 202); + """ + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + + """ + Then the result should be, in any order: + | VertexID | + | 201 | + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + + YIELD + lookup_tag_1.col1, + lookup_tag_1.col2, + lookup_tag_1.col3 + """ + Then the result should be, in any order: + | VertexID | lookup_tag_1.col1 | lookup_tag_1.col2 | lookup_tag_1.col3 | + | 201 | 201 | 201 | 201 | + Then drop the used space + +# TODO(yee): Test bool expression diff --git a/tests/tck/features/lookup/LookupTag2.feature b/tests/tck/features/lookup/LookupTag2.feature new file mode 100644 index 000000000..1a4a7815f --- /dev/null +++ b/tests/tck/features/lookup/LookupTag2.feature @@ -0,0 +1,82 @@ +Feature: Test lookup on tag index 2 + Examples: + | vid_type | id_200 | id_201 | id_202 | + | int64 | 200 | 201 | 202 | + | FIXED_STRING(16) | "200" | "201" | "202" | + + Background: + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | | + And having executed: + """ + CREATE TAG lookup_tag_1(col1 int, col2 int, col3 int); + CREATE TAG INDEX t_index_1 ON lookup_tag_1(col1, col2, col3); + CREATE TAG INDEX t_index_3 ON lookup_tag_1(col2, col3); + """ + And wait 6 seconds + And having executed: + """ + INSERT VERTEX + lookup_tag_1(col1, col2, col3) + VALUES + :(200, 200, 200), + :(201, 201, 201), + :(202, 202, 202); + """ + + Scenario Outline: [tag] simple tag test cases + When executing query: + """ + LOOKUP ON lookup_tag_1 WHERE col1 == 200; + """ + Then a SemanticError should be raised at runtime: Expression (col1==200) not supported yet + When executing query: + """ + LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 200 OR lookup_tag_1.col5 == 20; + """ + Then a SemanticError should be raised at runtime: Invalid column: col5 + When executing query: + """ + LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 201 OR lookup_tag_1.col2 == 201 AND lookup_tag_1.col3 == 202 + """ + Then a SemanticError should be raised at runtime: Not supported filter + When executing query: + """ + LOOKUP ON lookup_tag_1 WHERE lookup_tag_1.col1 == 300 + """ + Then the result should be, in any order: + | VertexID | + Then drop the used space + + Scenario Outline: [tag] scan without hints + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + lookup_tag_1.col1 != 200 + """ + Then the result should be, in any order: + | VertexID | + | | + | | + When executing query: + """ + LOOKUP ON + lookup_tag_1 + WHERE + lookup_tag_1.col1 != 200 + YIELD + lookup_tag_1.col1 AS col1, + lookup_tag_1.col3 + """ + Then the result should be, in any order: + | VertexID | col1 | lookup_tag_1.col3 | + | | 201 | 201 | + | | 202 | 202 | + Then drop the used space + +# TODO(yee): Test bool expression diff --git a/tests/tck/features/match/SeekByEdge.feature b/tests/tck/features/match/SeekByEdge.feature index 0ad94e775..4992d9518 100644 --- a/tests/tck/features/match/SeekByEdge.feature +++ b/tests/tck/features/match/SeekByEdge.feature @@ -7,7 +7,7 @@ Feature: Match seek by edge Background: Prepare space Given a graph with space named "" - Scenario: seek by edge index + Scenario Outline: seek by edge index When executing query: """ MATCH (player)-[:serve]->(team) @@ -874,7 +874,7 @@ Feature: Match seek by edge | "Shaquile O'Neal" | "JaVale McGee" | "Nuggets" | | "Shaquile O'Neal" | "JaVale McGee" | "Mavericks" | - Scenario: Seek by edge with range + Scenario Outline: Seek by edge with range When executing query: """ match (p1)-[:like*2]->(p2) return p1.name, p2.name @@ -1291,7 +1291,7 @@ Feature: Match seek by edge Then the result should be, in any order: | p1.name | p2.name | - Scenario: Seek by edge with properties + Scenario Outline: Seek by edge with properties When executing query: """ match (player)-[:serve {start_year : 2001}]->(team) return player.name AS player, team.name AS team @@ -1345,7 +1345,7 @@ Feature: Match seek by edge | "Grizzlies" | "Paul Gasol" | | "Nets" | "Jason Kidd" | - Scenario: Seek by edge with range with properties + Scenario Outline: Seek by edge with range with properties When executing query: """ match (p1)-[:like*2 {likeness: 90}]->(p2) return p1.name, p2.name @@ -1463,7 +1463,7 @@ Feature: Match seek by edge | "Grant Hill" | "Grant Hill" | | "Grant Hill" | "Rudy Gay" | - Scenario: seek by edge without index + Scenario Outline: seek by edge without index When executing query: """ MATCH (p1)-[:teammate]->(p2) diff --git a/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature b/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature index 47401b177..25c79158f 100644 --- a/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature +++ b/tests/tck/features/optimizer/PushFilterDownLeftJoinRule.feature @@ -22,26 +22,27 @@ Feature: Push Filter down LeftJoin rule | $$.player.name | | "Tim Duncan" | And the execution plan should be: - | id | name | dependencies | operator info | - | 22 | Project | 21 | | - | 21 | Filter | 20 | | - | 20 | InnerJoin | 19 | | - | 19 | LeftJoin | 18 | | - | 18 | Project | 17 | | - | 17 | GetVertices | 16 | | - | 16 | Project | 28 | | - | 28 | GetNeighbors | 12 | | - | 12 | Project | 11 | | - | 11 | Filter | 10 | | - | 10 | InnerJoin | 9 | | - | 9 | LeftJoin | 8 | | - | 8 | Project | 7 | | - | 7 | GetVertices | 6 | | - | 6 | Project | 27 | | - | 27 | GetNeighbors | 2 | | - | 2 | Project | 23 | | - | 23 | IndexScan | 0 | | - | 0 | Start | | | + | id | name | dependencies | operator info | + | 22 | Project | 21 | | + | 21 | Filter | 20 | | + | 20 | InnerJoin | 19 | | + | 19 | LeftJoin | 18 | | + | 18 | Project | 17 | | + | 17 | GetVertices | 16 | | + | 16 | Project | 28 | | + | 28 | GetNeighbors | 12 | | + | 12 | Project | 11 | | + | 11 | Filter | 10 | | + | 10 | InnerJoin | 9 | | + | 9 | LeftJoin | 8 | | + | 8 | Project | 7 | | + | 7 | GetVertices | 6 | | + | 6 | Project | 27 | | + | 27 | GetNeighbors | 2 | | + | 2 | Project | 3 | | + | 3 | Project | 30 | | + | 30 | TagIndexPrefixScan | 0 | | + | 0 | Start | | | When profiling query: """ GO FROM "Tony Parker" OVER like diff --git a/tests/tck/slowquery/KillSlowQueryViaDiffrentService.feature b/tests/tck/slowquery/KillSlowQueryViaDiffrentService.feature index 94924e72a..85f7f65a2 100644 --- a/tests/tck/slowquery/KillSlowQueryViaDiffrentService.feature +++ b/tests/tck/slowquery/KillSlowQueryViaDiffrentService.feature @@ -25,7 +25,7 @@ Feature: Slow Query Test SHOW ALL QUERIES """ Then the execution should be successful - And wait 20 seconds + And wait 5 seconds When executing query via graph 1: """ SHOW ALL QUERIES diff --git a/tests/tck/slowquery/KillSlowQueryViaSameService.feature b/tests/tck/slowquery/KillSlowQueryViaSameService.feature index e0140e363..c97a0d82c 100644 --- a/tests/tck/slowquery/KillSlowQueryViaSameService.feature +++ b/tests/tck/slowquery/KillSlowQueryViaSameService.feature @@ -25,7 +25,7 @@ Feature: Slow Query Test SHOW ALL QUERIES """ Then the execution should be successful - And wait 20 seconds + And wait 5 seconds When executing query: """ SHOW ALL QUERIES @@ -37,7 +37,7 @@ Feature: Slow Query Test """ SHOW ALL QUERIES | YIELD $-.SessionID AS sid, $-.ExecutionPlanID AS eid, $-.DurationInUSec AS dur - WHERE $-.DurationInUSec > 1000 AND $-.`Query` CONTAINS "GO 100000 STEPS"; + WHERE $-.DurationInUSec > 1000000 AND $-.`Query` CONTAINS "GO 100000 STEPS"; """ Then the result should be, in order: | sid | eid | dur | @@ -84,7 +84,7 @@ Feature: Slow Query Test """ SHOW ALL QUERIES | YIELD $-.SessionID AS sid, $-.ExecutionPlanID AS eid, $-.DurationInUSec AS dur - WHERE $-.DurationInUSec > 10000000 AND $-.`Query` CONTAINS "GO" + WHERE $-.DurationInUSec > 1000000 AND $-.`Query` CONTAINS "GO" | ORDER BY $-.dur | KILL QUERY(session=$-.sid, plan=$-.eid) """