diff --git a/src/storage/mutate/AddEdgesProcessor.cpp b/src/storage/mutate/AddEdgesProcessor.cpp index 0c060509fcb..8a68b25ac18 100644 --- a/src/storage/mutate/AddEdgesProcessor.cpp +++ b/src/storage/mutate/AddEdgesProcessor.cpp @@ -161,8 +161,7 @@ void AddEdgesProcessor::doProcessWithIndex(const cpp2::AddEdgesRequest& req) { dummyLock.reserve(newEdges.size()); auto code = nebula::cpp2::ErrorCode::SUCCEEDED; - std::unordered_set visited; - visited.reserve(newEdges.size()); + deleteDupEdge(const_cast&>(newEdges)); for (auto& newEdge : newEdges) { auto edgeKey = *newEdge.key_ref(); auto l = std::make_tuple(spaceId_, @@ -203,9 +202,6 @@ void AddEdgesProcessor::doProcessWithIndex(const cpp2::AddEdgesRequest& req) { *edgeKey.edge_type_ref(), *edgeKey.ranking_ref(), edgeKey.dst_ref()->getStr()); - if (ifNotExists_ && !visited.emplace(key).second) { - continue; - } auto schema = env_->schemaMan_->getEdgeSchema(spaceId_, std::abs(*edgeKey.edge_type_ref())); if (!schema) { LOG(ERROR) << "Space " << spaceId_ << ", Edge " << *edgeKey.edge_type_ref() << " invalid"; @@ -439,7 +435,7 @@ ErrorOr AddEdgesProcessor::addEdges( } } /* - * step 3 , Insert new vertex data + * step 3 , Insert new edge data */ auto key = e.first; auto prop = e.second; @@ -488,5 +484,48 @@ std::vector AddEdgesProcessor::indexKeys( std::move(values).value()); } +/* + * Batch insert + * ifNotExist_ is true. Only keep the first one when edgeKey is same + * ifNotExist_ is false. Only keep the last one when edgeKey is same + */ +void AddEdgesProcessor::deleteDupEdge(std::vector& edges) { + std::unordered_set visited; + visited.reserve(edges.size()); + if (ifNotExists_) { + auto iter = edges.begin(); + while (iter != edges.end()) { + auto edgeKeyRef = iter->key_ref(); + auto key = NebulaKeyUtils::edgeKey(spaceVidLen_, + 0, // it's ok, just distinguish between different edgekey + edgeKeyRef->src_ref()->getStr(), + edgeKeyRef->get_edge_type(), + edgeKeyRef->get_ranking(), + edgeKeyRef->dst_ref()->getStr()); + if (!visited.emplace(key).second) { + iter = edges.erase(iter); + } else { + ++iter; + } + } + } else { + auto iter = edges.rbegin(); + while (iter != edges.rend()) { + auto edgeKeyRef = iter->key_ref(); + auto key = NebulaKeyUtils::edgeKey(spaceVidLen_, + 0, // it's ok, just distinguish between different edgekey + edgeKeyRef->src_ref()->getStr(), + edgeKeyRef->get_edge_type(), + edgeKeyRef->get_ranking(), + edgeKeyRef->dst_ref()->getStr()); + if (!visited.emplace(key).second) { + iter = decltype(iter)(edges.erase(std::next(iter).base())); + } else { + ++iter; + } + } + } +} + } // namespace storage } // namespace nebula diff --git a/src/storage/mutate/AddEdgesProcessor.h b/src/storage/mutate/AddEdgesProcessor.h index cec28b69e3c..7c3ee3d0c7f 100644 --- a/src/storage/mutate/AddEdgesProcessor.h +++ b/src/storage/mutate/AddEdgesProcessor.h @@ -49,6 +49,8 @@ class AddEdgesProcessor : public BaseProcessor { std::shared_ptr index, const meta::SchemaProviderIf* latestSchema); + void deleteDupEdge(std::vector& edges); + private: GraphSpaceID spaceId_; std::vector> indexes_; diff --git a/src/storage/mutate/AddVerticesProcessor.cpp b/src/storage/mutate/AddVerticesProcessor.cpp index 86a8ca9530e..83fcab649a3 100644 --- a/src/storage/mutate/AddVerticesProcessor.cpp +++ b/src/storage/mutate/AddVerticesProcessor.cpp @@ -147,8 +147,7 @@ void AddVerticesProcessor::doProcessWithIndex(const cpp2::AddVerticesRequest& re auto code = nebula::cpp2::ErrorCode::SUCCEEDED; // cache tagKey - std::unordered_set visited; - visited.reserve(vertices.size()); + deleteDupVid(const_cast&>(vertices)); for (auto& vertex : vertices) { auto vid = vertex.get_id().getStr(); const auto& newTags = vertex.get_tags(); @@ -181,9 +180,6 @@ void AddVerticesProcessor::doProcessWithIndex(const cpp2::AddVerticesRequest& re } auto key = NebulaKeyUtils::tagKey(spaceVidLen_, partId, vid, tagId); - if (ifNotExists_ && !visited.emplace(key).second) { - continue; - } auto props = newTag.get_props(); auto iter = propNamesMap.find(tagId); std::vector propNames; @@ -232,8 +228,7 @@ void AddVerticesProcessor::doProcessWithIndex(const cpp2::AddVerticesRequest& re if (oReader != nullptr) { auto ois = indexKeys(partId, vid, oReader.get(), index, schema.get()); if (!ois.empty()) { - // Check the index is building for the specified partition or - // not. + // Check the index is building for the specified partition or not auto indexState = env_->getIndexState(spaceId_, partId); if (env_->checkRebuilding(indexState)) { auto delOpKey = OperationKeyUtils::deleteOperationKey(partId); @@ -345,5 +340,36 @@ std::vector AddVerticesProcessor::indexKeys( spaceVidLen_, partId, index->get_index_id(), vId, std::move(values).value()); } +/* + * Batch insert + * ifNotExist_ is true. Only keep the first one when vid is same + * ifNotExist_ is false. Only keep the last one when vid is same + */ +void AddVerticesProcessor::deleteDupVid(std::vector& vertices) { + std::unordered_set visited; + visited.reserve(vertices.size()); + if (ifNotExists_) { + auto iter = vertices.begin(); + while (iter != vertices.end()) { + const auto& vid = iter->get_id().getStr(); + if (!visited.emplace(vid).second) { + iter = vertices.erase(iter); + } else { + ++iter; + } + } + } else { + auto iter = vertices.rbegin(); + while (iter != vertices.rend()) { + const auto& vid = iter->get_id().getStr(); + if (!visited.emplace(vid).second) { + iter = decltype(iter)(vertices.erase(std::next(iter).base())); + } else { + ++iter; + } + } + } +} + } // namespace storage } // namespace nebula diff --git a/src/storage/mutate/AddVerticesProcessor.h b/src/storage/mutate/AddVerticesProcessor.h index fecca60516b..71725c9b5be 100644 --- a/src/storage/mutate/AddVerticesProcessor.h +++ b/src/storage/mutate/AddVerticesProcessor.h @@ -44,6 +44,8 @@ class AddVerticesProcessor : public BaseProcessor { std::shared_ptr index, const meta::SchemaProviderIf* latestSchema); + void deleteDupVid(std::vector& vertices); + private: GraphSpaceID spaceId_; std::vector> indexes_; diff --git a/tests/tck/features/insert/InsertIfNotExists.feature b/tests/tck/features/insert/InsertIfNotExists.feature index 38eb9786d26..c766c6ddabe 100644 --- a/tests/tck/features/insert/InsertIfNotExists.feature +++ b/tests/tck/features/insert/InsertIfNotExists.feature @@ -276,3 +276,267 @@ Feature: Insert vertex and edge with if not exists | like.likeness | | 200 | And drop the used space + + Scenario: vertices index and data consistency check + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | FIXED_STRING(20) | + And having executed: + """ + CREATE TAG IF NOT EXISTS student(name string, age int); + CREATE TAG INDEX index_s_age on student(age); + """ + And wait 6 seconds + When try to execute query: + """ + INSERT VERTEX + student(name, age) + VALUES + "zhang":("zhang", 19), + "zhang":("zhang", 29), + "zhang":("zhang", 39), + "wang":("wang", 18), + "li":("li", 16), + "wang":("wang", 38); + """ + Then the execution should be successful + When executing query: + """ + LOOKUP ON student WHERE student.age < 30 YIELD student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | age | + | 16 | + When executing query: + """ + LOOKUP ON student WHERE student.age > 30 YIELD student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | age | + | 39 | + | 38 | + When executing query: + """ + LOOKUP ON student WHERE student.age < 30 YIELD student.name AS name, student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "li" | 16 | + When executing query: + """ + LOOKUP ON student WHERE student.age > 30 YIELD student.name as name, student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "zhang" | 39 | + | "wang" | 38 | + When executing query: + """ + FETCH PROP ON student "zhang", "wang", "li" YIELD student.name as name, student.age as age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "zhang" | 39 | + | "wang" | 38 | + | "li" | 16 | + When try to execute query: + """ + DELETE TAG student FROM "zhang", "wang", "li"; + """ + Then the execution should be successful + When try to execute query: + """ + INSERT VERTEX IF NOT EXISTS + student(name, age) + VALUES + "zhao":("zhao", 19), + "zhao":("zhao", 29), + "zhao":("zhao", 39), + "qian":("qian", 18), + "sun":("sun", 16), + "qian":("qian", 38), + "chen":("chen", 40), + "chen":("chen", 35); + """ + Then the execution should be successful + When executing query: + """ + LOOKUP ON student WHERE student.age < 30 YIELD student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | age | + | 19 | + | 18 | + | 16 | + When executing query: + """ + LOOKUP ON student WHERE student.age > 30 YIELD student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | age | + | 40 | + When executing query: + """ + LOOKUP ON student WHERE student.age < 30 YIELD student.name AS name, student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "zhao" | 19 | + | "qian" | 18 | + | "sun" | 16 | + When executing query: + """ + LOOKUP ON student WHERE student.age > 30 YIELD student.name as name, student.age AS age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "chen" | 40 | + When executing query: + """ + FETCH PROP ON student "zhao", "qian", "sun", "chen" YIELD student.name as name, student.age as age + """ + Then the result should be, in any order, with relax comparison: + | name | age | + | "zhao" | 19 | + | "qian" | 18 | + | "sun" | 16 | + | "chen" | 40 | + And drop the used space + + Scenario: edge index and data consistency check + Given an empty graph + And create a space with following options: + | partition_num | 9 | + | replica_factor | 1 | + | vid_type | FIXED_STRING(20) | + And having executed: + """ + CREATE TAG IF NOT EXISTS student(name string, age int); + CREATE EDGE IF NOT EXISTS like(likeness int, t1 int); + CREATE EDGE INDEX index_l_likeness on like(likeness); + """ + And wait 6 seconds + When try to execute query: + """ + INSERT VERTEX + student(name, age) + VALUES + "zhang":("zhang", 19), + "wang":("wang", 18), + "li":("li", 16); + INSERT EDGE + like(likeness, t1) + VALUES + "zhang"->"wang":(19, 19), + "zhang"->"li":(42, 42), + "zhang"->"li":(20, 20), + "zhang"->"wang":(39, 39), + "wang"->"li":(18, 18), + "wang"->"zhang":(41, 41); + """ + Then the execution should be successful + When executing query: + """ + LOOKUP ON like WHERE like.likeness < 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "li" | 20 | + | "wang" | "li" | 18 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness > 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "wang" | 39 | + | "wang" | "zhang" | 41 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness < 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness, like.t1 as t1 + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | t1 | + | "zhang" | "li" | 20 | 20 | + | "wang" | "li" | 18 | 18 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness > 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness, like.t1 as t1 + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | t1 | + | "zhang" | "wang" | 39 | 39 | + | "wang" | "zhang" | 41 | 41 | + When executing query: + """ + FETCH PROP ON like "zhang"->"wang", "zhang"->"li", "wang"->"li", "wang"->"zhang" YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "wang" | 39 | + | "zhang" | "li" | 20 | + | "wang" | "li" | 18 | + | "wang" | "zhang" | 41 | + When try to execute query: + """ + DELETE EDGE like "zhang"->"wang", "zhang"->"li", "wang"->"li", "wang"->"zhang"; + """ + Then the execution should be successful + When try to execute query: + """ + INSERT EDGE IF NOT EXISTS + like(likeness, t1) + VALUES + "zhang"->"wang":(19, 19), + "zhang"->"li":(42, 42), + "zhang"->"li":(20, 20), + "zhang"->"wang":(39, 39), + "wang"->"li":(18, 18), + "wang"->"zhang":(41, 41); + """ + Then the execution should be successful + When executing query: + """ + LOOKUP ON like WHERE like.likeness < 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "wang" | 19 | + | "wang" | "li" | 18 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness > 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "li" | 42 | + | "wang" | "zhang" | 41 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness < 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness, like.t1 as t1 + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | t1 | + | "zhang" | "wang" | 19 | 19 | + | "wang" | "li" | 18 | 18 | + When executing query: + """ + LOOKUP ON like WHERE like.likeness > 30 YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness, like.t1 as t1 + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | t1 | + | "zhang" | "li" | 42 | 42 | + | "wang" | "zhang" | 41 | 41 | + When executing query: + """ + FETCH PROP ON like "zhang"->"wang", "zhang"->"li", "wang"->"li", "wang"->"zhang" YIELD src(edge) as src, dst(edge) as dst, like.likeness as likeness + """ + Then the result should be, in any order, with relax comparison: + | src | dst | likeness | + | "zhang" | "wang" | 19 | + | "zhang" | "li" | 42 | + | "wang" | "li" | 18 | + | "wang" | "zhang" | 41 | + And drop the used space diff --git a/tests/tck/features/lookup/LookUp.feature b/tests/tck/features/lookup/LookUp.feature index bac70bd930f..2c54e6fe452 100644 --- a/tests/tck/features/lookup/LookUp.feature +++ b/tests/tck/features/lookup/LookUp.feature @@ -842,15 +842,16 @@ Feature: LookUpTest_Vid_String "104":("yyy", 28), "105":("zzz", 21), "106":("kkk", 21), + "121":("Useless", 60), "121":("Useless", 20); - INSERT VERTEX - team(name) - VALUES - "200":("Warriors"), - "201":("Nuggets"), - "202":("oopp"), - "203":("iiiooo"), - "204":("opl"); + INSERT VERTEX + team(name) + VALUES + "200":("Warriors"), + "201":("Nuggets"), + "202":("oopp"), + "203":("iiiooo"), + "204":("opl"); """ When executing query: """