Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): faster InsertObject() uploads #9997

Merged
merged 3 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions google/cloud/storage/internal/curl_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1249,8 +1249,8 @@ StatusOr<ObjectMetadata> CurlClient::InsertObjectMediaMultipart(
AddOptionsWithSkip<CurlRequestBuilder, ContentType> no_content_type{builder};
request.ForEachOption(no_content_type);

// 2. Pick a separator that does not conflict with the request contents.
auto boundary = PickBoundary(request.contents());
// 2. create a random separator which is unlikely to exist in the payload.
auto const boundary = MakeBoundary();
builder.AddHeader("content-type: multipart/related; boundary=" + boundary);
builder.AddQueryParameter("uploadType", "multipart");
builder.AddQueryParameter("name", request.object_name());
Expand Down Expand Up @@ -1303,18 +1303,9 @@ StatusOr<ObjectMetadata> CurlClient::InsertObjectMediaMultipart(
std::move(builder).BuildRequest().MakeRequest(contents));
}

std::string CurlClient::PickBoundary(std::string const& text_to_avoid) {
// We need to find a string that is *not* found in `text_to_avoid`, we pick
// a string at random, and see if it is in `text_to_avoid`, if it is, we grow
// the string with random characters and start from where we last found a
// the candidate. Eventually we will find something, though it might be
// larger than `text_to_avoid`. And we only make (approximately) one pass
// over `text_to_avoid`.
auto generate_candidate = [this]() {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
};
return GenerateMessageBoundary(text_to_avoid, generate_candidate);
std::string CurlClient::MakeBoundary() {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
}

StatusOr<ObjectMetadata> CurlClient::InsertObjectMediaSimple(
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/storage/internal/curl_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ class CurlClient : public RawClient,
/// Insert an object using uploadType=multipart.
StatusOr<ObjectMetadata> InsertObjectMediaMultipart(
InsertObjectMediaRequest const& request);
std::string PickBoundary(std::string const& text_to_avoid);
std::string MakeBoundary();

/// Insert an object using uploadType=media.
StatusOr<ObjectMetadata> InsertObjectMediaSimple(
Expand Down
9 changes: 5 additions & 4 deletions google/cloud/storage/internal/generate_message_boundary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ std::string GenerateMessageBoundary(

std::string GenerateMessageBoundaryCandidate(
google::cloud::internal::DefaultPRNG& generator) {
auto candidate = std::string{
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
std::shuffle(candidate.begin(), candidate.end(), generator);
return candidate;
auto constexpr kCandidateLength = 64;
return google::cloud::internal::Sample(generator, kCandidateLength,
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789");
}

} // namespace internal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,15 @@ namespace {
// L1 Instruction 32 KiB (x48)
// L2 Unified 1024 KiB (x48)
// L3 Unified 39424 KiB (x2)
// ---------------------------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// ---------------------------------------------------------------------------------------------
// ...
// GenerateBoundaryFixture/GenerateBoundary_mean 20478679 ns 20475235 ns 100
// GenerateBoundaryFixture/GenerateBoundary_median 20583724 ns 20580943 ns 100
// GenerateBoundaryFixture/GenerateBoundary_stddev 335315 ns 335459 ns 100
// GenerateBoundaryFixture/GenerateBoundary_cv 1.64 % 1.64 % 100
// ...
// GenerateBoundaryFixture/GenerateBoundaryOld_mean 20809894 ns 20806317 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_median 20520133 ns 20517279 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_stddev 1284277 ns 1284334 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_cv 6.17 % 6.17 % 100
// ...
// GenerateBoundaryFixture/WorstCase_mean 100747489 ns 100727911 ns 100
// GenerateBoundaryFixture/WorstCase_median 101026913 ns 101006689 ns 100
// GenerateBoundaryFixture/WorstCase_stddev 1285934 ns 1285884 ns 100
// GenerateBoundaryFixture/WorstCase_cv 1.28 % 1.28 % 100
// ...
// GenerateBoundaryFixture/BestCase_mean 9584895 ns 9583080 ns 100
// GenerateBoundaryFixture/BestCase_median 9598452 ns 9597243 ns 100
// GenerateBoundaryFixture/BestCase_stddev 90679 ns 90643 ns 100
// GenerateBoundaryFixture/BestCase_cv 0.95 % 0.95 % 100
// Load Average: 8.44, 25.15, 23.46
// -------------------------------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// -------------------------------------------------------------------------------------------------
// GenerateBoundaryFixture/GenerateBoundary 505 ns 505 ns 1385317
// GenerateBoundaryFixture/GenerateBoundaryWithValidation 20031391 ns 20025303 ns 35
// GenerateBoundaryFixture/GenerateBoundaryOld 20133230 ns 20129379 ns 35
// GenerateBoundaryFixture/WorstCase 100998844 ns 100985746 ns 7
// GenerateBoundaryFixture/BestCase 9739599 ns 9736802 ns 69
// clang-format on

auto constexpr kMessageSize = 128 * 1024 * 1024;
Expand Down Expand Up @@ -100,6 +86,15 @@ BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundary)
(benchmark::State& state) {
auto make_string = [this]() { return GenerateCandidate(); };

for (auto _ : state) {
benchmark::DoNotOptimize(make_string());
}
}

BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundaryWithValidation)
(benchmark::State& state) {
auto make_string = [this]() { return GenerateCandidate(); };

for (auto _ : state) {
benchmark::DoNotOptimize(GenerateMessageBoundary(message(), make_string));
}
Expand Down
13 changes: 5 additions & 8 deletions google/cloud/storage/internal/rest_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,9 @@ StatusOr<BucketMetadata> RestClient::LockBucketRetentionPolicy(
std::move(builder).BuildRequest(), {absl::MakeConstSpan(std::string{})}));
}

std::string RestClient::PickBoundary(std::string const& text_to_avoid) {
auto generate_candidate = [this]() {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
};
return GenerateMessageBoundary(text_to_avoid, generate_candidate);
std::string RestClient::MakeBoundary() {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
}

StatusOr<ObjectMetadata> RestClient::InsertObjectMediaMultipart(
Expand All @@ -358,8 +355,8 @@ StatusOr<ObjectMetadata> RestClient::InsertObjectMediaMultipart(
request.GetOption<UserIp>().value());
}

// 2. Pick a separator that does not conflict with the request contents.
auto boundary = PickBoundary(request.contents());
// 2. create a random separator which is unlikely to exist in the payload.
auto const boundary = MakeBoundary();
builder.AddHeader("content-type", "multipart/related; boundary=" + boundary);
builder.AddQueryParameter("uploadType", "multipart");
builder.AddQueryParameter("name", request.object_name());
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/storage/internal/rest_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class RestClient : public RawClient,
StatusOr<ObjectMetadata> InsertObjectMediaSimple(
InsertObjectMediaRequest const& request);

std::string PickBoundary(std::string const& text_to_avoid);
std::string MakeBoundary();
StatusOr<std::unique_ptr<ObjectReadSource>> ReadObjectXml(
ReadObjectRangeRequest const& request);

Expand Down