From 018a74dca2f76aa652ccc1dc842b5887f64f3895 Mon Sep 17 00:00:00 2001 From: Lukas Lalinsky Date: Sat, 2 Mar 2024 18:35:41 +0100 Subject: [PATCH] Segment Builder test --- CMakeLists.txt | 4 ++++ src/fpindex/io/directory.h | 17 +++++++++++++++++ src/fpindex/io/file.h | 2 ++ src/fpindex/io/memory_directory.cpp | 21 +++++++++++++++++++++ src/fpindex/io/memory_directory.h | 21 +++++++++++++++++++++ src/fpindex/io/memory_file.cpp | 14 +++++++------- src/fpindex/segment.cpp | 1 + src/fpindex/segment_builder.cpp | 6 +++--- src/fpindex/segment_builder.h | 2 +- src/fpindex/segment_builder_test.cpp | 23 +++++++++++++++++++++++ 10 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 src/fpindex/io/directory.h create mode 100644 src/fpindex/io/memory_directory.cpp create mode 100644 src/fpindex/io/memory_directory.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 84f13e1..5f5af9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,6 +97,10 @@ set(fpindexlib_SOURCES src/fpindex/segment.cpp src/fpindex/segment_builder.h src/fpindex/segment_builder.cpp + src/fpindex/io/file.h + src/fpindex/io/directory.h + src/fpindex/io/memory_directory.h + src/fpindex/io/memory_directory.cpp src/fpindex/io/memory_file.h src/fpindex/io/memory_file.cpp src/fpindex/proto/internal.pb.h diff --git a/src/fpindex/io/directory.h b/src/fpindex/io/directory.h new file mode 100644 index 0000000..de85a29 --- /dev/null +++ b/src/fpindex/io/directory.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +#include "fpindex/io/file.h" + +namespace fpindex { +namespace io { + +class Directory { + public: + virtual std::shared_ptr OpenFile(const std::string& name, bool create = false) = 0; +}; + +} // namespace io +} // namespace fpindex diff --git a/src/fpindex/io/file.h b/src/fpindex/io/file.h index 3456931..d5bad2e 100644 --- a/src/fpindex/io/file.h +++ b/src/fpindex/io/file.h @@ -8,6 +8,7 @@ namespace io { class ZeroCopyInputStream; class ZeroCopyOutputStream; class CodedInputStream; +class CodedOutputStream; } // namespace io } // namespace protobuf } // namespace google @@ -18,6 +19,7 @@ namespace io { using google::protobuf::io::ZeroCopyInputStream; using google::protobuf::io::ZeroCopyOutputStream; using google::protobuf::io::CodedInputStream; +using google::protobuf::io::CodedOutputStream; class File { public: diff --git a/src/fpindex/io/memory_directory.cpp b/src/fpindex/io/memory_directory.cpp new file mode 100644 index 0000000..9554e23 --- /dev/null +++ b/src/fpindex/io/memory_directory.cpp @@ -0,0 +1,21 @@ +#include "fpindex/io/memory_directory.h" + +namespace fpindex { +namespace io { + +std::shared_ptr MemoryDirectory::OpenFile(const std::string &name, bool create) { + std::lock_guard lock(mutex_); + auto iter = files_.find(name); + if (iter != files_.end()) { + return iter->second; + } + if (!create) { + return nullptr; + } + auto file = std::make_shared(); + files_[name] = file; + return file; +} + +} +} diff --git a/src/fpindex/io/memory_directory.h b/src/fpindex/io/memory_directory.h new file mode 100644 index 0000000..17efd7b --- /dev/null +++ b/src/fpindex/io/memory_directory.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +#include "fpindex/io/directory.h" +#include "fpindex/io/memory_file.h" + +namespace fpindex { +namespace io { + +class MemoryDirectory : public Directory { + public: + std::shared_ptr OpenFile(const std::string& name, bool create = false) override; + private: + std::mutex mutex_; + std::map> files_; +}; + +} // namespace io +} // namespace fpindex diff --git a/src/fpindex/io/memory_file.cpp b/src/fpindex/io/memory_file.cpp index ef835f3..0679b8f 100644 --- a/src/fpindex/io/memory_file.cpp +++ b/src/fpindex/io/memory_file.cpp @@ -9,23 +9,23 @@ namespace io { MemoryFile::MemoryFile() {} -size_t MemoryFile::Size() { - return data_.size(); -} +size_t MemoryFile::Size() { return data_.size(); } std::unique_ptr MemoryFile::GetInputStream() { -return std::make_unique(data_.data(), data_.size()); + return std::make_unique(data_.data(), data_.size()); } std::unique_ptr MemoryFile::GetCodedInputStream(size_t offset, size_t size) { if (offset > data_.size()) { - return std::make_unique(reinterpret_cast(data_.data()), 0); + return std::make_unique(reinterpret_cast(data_.data()), + 0); } - return std::make_unique(reinterpret_cast(data_.data()) + offset, std::min(size, data_.size() - offset)); + return std::make_unique( + reinterpret_cast(data_.data()) + offset, std::min(size, data_.size() - offset)); } std::unique_ptr MemoryFile::GetOutputStream() { - return std::make_unique(&data_); + return std::make_unique(&data_); } } // namespace io diff --git a/src/fpindex/segment.cpp b/src/fpindex/segment.cpp index d1c3411..556a91d 100644 --- a/src/fpindex/segment.cpp +++ b/src/fpindex/segment.cpp @@ -125,6 +125,7 @@ bool Segment::Load(const std::shared_ptr& file) { if (items.empty()) { return false; } + qDebug() << "new block: " << items.front().first << " " << items.back().first; block_index_.emplace_back(items.front().first, items.back().first); } diff --git a/src/fpindex/segment_builder.cpp b/src/fpindex/segment_builder.cpp index e38513d..b9a190f 100644 --- a/src/fpindex/segment_builder.cpp +++ b/src/fpindex/segment_builder.cpp @@ -15,7 +15,7 @@ bool SegmentBuilder::Add(uint32_t id, const std::vector& hashes) { return false; } for (auto hash : hashes) { - data_.insert(std::make_pair(id, hash)); + data_.insert(std::make_pair(hash, id)); } return true; } @@ -41,7 +41,7 @@ bool SegmentBuilder::Search(const std::vector& hashes, std::vectorfirst]++; + scores[it->second]++; } } results->clear(); @@ -52,7 +52,7 @@ bool SegmentBuilder::Search(const std::vector& hashes, std::vector &file) { std::shared_lock lock; if (!frozen_) { lock = std::shared_lock(mutex_); diff --git a/src/fpindex/segment_builder.h b/src/fpindex/segment_builder.h index 35f653a..726782b 100644 --- a/src/fpindex/segment_builder.h +++ b/src/fpindex/segment_builder.h @@ -25,7 +25,7 @@ class SegmentBuilder : public BaseSegment { bool IsFrozen(); // Serialize the segment data to the output stream. - bool Serialize(io::File* file); + bool Save(const std::shared_ptr &file); private: std::shared_mutex mutex_; diff --git a/src/fpindex/segment_builder_test.cpp b/src/fpindex/segment_builder_test.cpp index 1476030..b149c43 100644 --- a/src/fpindex/segment_builder_test.cpp +++ b/src/fpindex/segment_builder_test.cpp @@ -1,4 +1,6 @@ +#include "fpindex/segment.h" #include "fpindex/segment_builder.h" +#include "fpindex/io/memory_file.h" #include @@ -25,3 +27,24 @@ TEST(SegmentBuilderTest, SearchExactMatch) { ASSERT_EQ(1, results[0].id()); ASSERT_EQ(3, results[0].score()); } + +TEST(SegmentBuilderTest, Save) { + SegmentBuilder segment(0); + segment.Add(1, {1, 2, 3}); + + auto file = std::make_shared(); + segment.Save(file); + + Segment new_segment(0); + new_segment.Load(file); + + std::vector query{1, 2, 3}; + std::vector results; + ASSERT_TRUE(new_segment.Search(query, &results)); + for (const auto& result : results) { + std::cout << result.id() << " " << result.score() << std::endl; + } + ASSERT_EQ(1, results.size()); + ASSERT_EQ(1, results[0].id()); + ASSERT_EQ(3, results[0].score()); +}