Skip to content

Commit

Permalink
Segment Builder test
Browse files Browse the repository at this point in the history
  • Loading branch information
lalinsky committed Mar 2, 2024
1 parent cf6e4e5 commit 018a74d
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ set(fpindexlib_SOURCES
src/fpindex/segment.cpp
src/fpindex/segment_builder.h
src/fpindex/segment_builder.cpp
src/fpindex/io/file.h
src/fpindex/io/directory.h
src/fpindex/io/memory_directory.h
src/fpindex/io/memory_directory.cpp
src/fpindex/io/memory_file.h
src/fpindex/io/memory_file.cpp
src/fpindex/proto/internal.pb.h
Expand Down
17 changes: 17 additions & 0 deletions src/fpindex/io/directory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include <memory>
#include <string>

#include "fpindex/io/file.h"

namespace fpindex {
namespace io {

class Directory {
public:
virtual std::shared_ptr<File> OpenFile(const std::string& name, bool create = false) = 0;
};

} // namespace io
} // namespace fpindex
2 changes: 2 additions & 0 deletions src/fpindex/io/file.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ namespace io {
class ZeroCopyInputStream;
class ZeroCopyOutputStream;
class CodedInputStream;
class CodedOutputStream;
} // namespace io
} // namespace protobuf
} // namespace google
Expand All @@ -18,6 +19,7 @@ namespace io {
using google::protobuf::io::ZeroCopyInputStream;
using google::protobuf::io::ZeroCopyOutputStream;
using google::protobuf::io::CodedInputStream;
using google::protobuf::io::CodedOutputStream;

class File {
public:
Expand Down
21 changes: 21 additions & 0 deletions src/fpindex/io/memory_directory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "fpindex/io/memory_directory.h"

namespace fpindex {
namespace io {

std::shared_ptr<File> MemoryDirectory::OpenFile(const std::string &name, bool create) {
std::lock_guard<std::mutex> lock(mutex_);
auto iter = files_.find(name);
if (iter != files_.end()) {
return iter->second;
}
if (!create) {
return nullptr;
}
auto file = std::make_shared<MemoryFile>();
files_[name] = file;
return file;
}

}
}
21 changes: 21 additions & 0 deletions src/fpindex/io/memory_directory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <map>
#include <mutex>

#include "fpindex/io/directory.h"
#include "fpindex/io/memory_file.h"

namespace fpindex {
namespace io {

class MemoryDirectory : public Directory {
public:
std::shared_ptr<File> OpenFile(const std::string& name, bool create = false) override;
private:
std::mutex mutex_;
std::map<std::string, std::shared_ptr<MemoryFile>> files_;
};

} // namespace io
} // namespace fpindex
14 changes: 7 additions & 7 deletions src/fpindex/io/memory_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,23 @@ namespace io {

MemoryFile::MemoryFile() {}

size_t MemoryFile::Size() {
return data_.size();
}
size_t MemoryFile::Size() { return data_.size(); }

std::unique_ptr<ZeroCopyInputStream> MemoryFile::GetInputStream() {
return std::make_unique<google::protobuf::io::ArrayInputStream>(data_.data(), data_.size());
return std::make_unique<google::protobuf::io::ArrayInputStream>(data_.data(), data_.size());
}

std::unique_ptr<CodedInputStream> MemoryFile::GetCodedInputStream(size_t offset, size_t size) {
if (offset > data_.size()) {
return std::make_unique<google::protobuf::io::CodedInputStream>(reinterpret_cast<const uint8_t *>(data_.data()), 0);
return std::make_unique<google::protobuf::io::CodedInputStream>(reinterpret_cast<const uint8_t *>(data_.data()),
0);
}
return std::make_unique<google::protobuf::io::CodedInputStream>(reinterpret_cast<const uint8_t *>(data_.data()) + offset, std::min(size, data_.size() - offset));
return std::make_unique<google::protobuf::io::CodedInputStream>(
reinterpret_cast<const uint8_t *>(data_.data()) + offset, std::min(size, data_.size() - offset));
}

std::unique_ptr<ZeroCopyOutputStream> MemoryFile::GetOutputStream() {
return std::make_unique<google::protobuf::io::StringOutputStream>(&data_);
return std::make_unique<google::protobuf::io::StringOutputStream>(&data_);
}

} // namespace io
Expand Down
1 change: 1 addition & 0 deletions src/fpindex/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ bool Segment::Load(const std::shared_ptr<io::File>& file) {
if (items.empty()) {
return false;
}
qDebug() << "new block: " << items.front().first << " " << items.back().first;
block_index_.emplace_back(items.front().first, items.back().first);
}

Expand Down
6 changes: 3 additions & 3 deletions src/fpindex/segment_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ bool SegmentBuilder::Add(uint32_t id, const std::vector<uint32_t>& hashes) {
return false;
}
for (auto hash : hashes) {
data_.insert(std::make_pair(id, hash));
data_.insert(std::make_pair(hash, id));
}
return true;
}
Expand All @@ -41,7 +41,7 @@ bool SegmentBuilder::Search(const std::vector<uint32_t>& hashes, std::vector<Sea
for (auto hash : hashes) {
auto range = data_.equal_range(hash);
for (auto it = range.first; it != range.second; ++it) {
scores[it->first]++;
scores[it->second]++;
}
}
results->clear();
Expand All @@ -52,7 +52,7 @@ bool SegmentBuilder::Search(const std::vector<uint32_t>& hashes, std::vector<Sea
return true;
}

bool SegmentBuilder::Serialize(io::File* file) {
bool SegmentBuilder::Save(const std::shared_ptr<io::File> &file) {
std::shared_lock<std::shared_mutex> lock;
if (!frozen_) {
lock = std::shared_lock<std::shared_mutex>(mutex_);
Expand Down
2 changes: 1 addition & 1 deletion src/fpindex/segment_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SegmentBuilder : public BaseSegment {
bool IsFrozen();

// Serialize the segment data to the output stream.
bool Serialize(io::File* file);
bool Save(const std::shared_ptr<io::File> &file);

private:
std::shared_mutex mutex_;
Expand Down
23 changes: 23 additions & 0 deletions src/fpindex/segment_builder_test.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "fpindex/segment.h"
#include "fpindex/segment_builder.h"
#include "fpindex/io/memory_file.h"

#include <gtest/gtest.h>

Expand All @@ -25,3 +27,24 @@ TEST(SegmentBuilderTest, SearchExactMatch) {
ASSERT_EQ(1, results[0].id());
ASSERT_EQ(3, results[0].score());
}

TEST(SegmentBuilderTest, Save) {
SegmentBuilder segment(0);
segment.Add(1, {1, 2, 3});

auto file = std::make_shared<io::MemoryFile>();
segment.Save(file);

Segment new_segment(0);
new_segment.Load(file);

std::vector<uint32_t> query{1, 2, 3};
std::vector<SearchResult> results;
ASSERT_TRUE(new_segment.Search(query, &results));
for (const auto& result : results) {
std::cout << result.id() << " " << result.score() << std::endl;
}
ASSERT_EQ(1, results.size());
ASSERT_EQ(1, results[0].id());
ASSERT_EQ(3, results[0].score());
}

0 comments on commit 018a74d

Please sign in to comment.