Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): faster InsertObject() uploads #9997

Merged
merged 3 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions google/cloud/storage/internal/curl_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1304,16 +1304,13 @@ StatusOr<ObjectMetadata> CurlClient::InsertObjectMediaMultipart(
}

std::string CurlClient::PickBoundary(std::string const& text_to_avoid) {
// We need to find a string that is *not* found in `text_to_avoid`, we pick
// a string at random, and see if it is in `text_to_avoid`, if it is, we grow
// the string with random characters and start from where we last found a
// the candidate. Eventually we will find something, though it might be
// larger than `text_to_avoid`. And we only make (approximately) one pass
// over `text_to_avoid`.
auto generate_candidate = [this]() {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
};
if (!CurrentOptions().get<ValidateInsertObjectBoundary>()) {
return generate_candidate();
}
return GenerateMessageBoundary(text_to_avoid, generate_candidate);
}

Expand Down
9 changes: 5 additions & 4 deletions google/cloud/storage/internal/generate_message_boundary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ std::string GenerateMessageBoundary(

std::string GenerateMessageBoundaryCandidate(
google::cloud::internal::DefaultPRNG& generator) {
auto candidate = std::string{
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
std::shuffle(candidate.begin(), candidate.end(), generator);
return candidate;
auto constexpr kCandidateLength = 64;
return google::cloud::internal::Sample(generator, kCandidateLength,
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789");
}

} // namespace internal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,15 @@ namespace {
// L1 Instruction 32 KiB (x48)
// L2 Unified 1024 KiB (x48)
// L3 Unified 39424 KiB (x2)
// ---------------------------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// ---------------------------------------------------------------------------------------------
// ...
// GenerateBoundaryFixture/GenerateBoundary_mean 20478679 ns 20475235 ns 100
// GenerateBoundaryFixture/GenerateBoundary_median 20583724 ns 20580943 ns 100
// GenerateBoundaryFixture/GenerateBoundary_stddev 335315 ns 335459 ns 100
// GenerateBoundaryFixture/GenerateBoundary_cv 1.64 % 1.64 % 100
// ...
// GenerateBoundaryFixture/GenerateBoundaryOld_mean 20809894 ns 20806317 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_median 20520133 ns 20517279 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_stddev 1284277 ns 1284334 ns 100
// GenerateBoundaryFixture/GenerateBoundaryOld_cv 6.17 % 6.17 % 100
// ...
// GenerateBoundaryFixture/WorstCase_mean 100747489 ns 100727911 ns 100
// GenerateBoundaryFixture/WorstCase_median 101026913 ns 101006689 ns 100
// GenerateBoundaryFixture/WorstCase_stddev 1285934 ns 1285884 ns 100
// GenerateBoundaryFixture/WorstCase_cv 1.28 % 1.28 % 100
// ...
// GenerateBoundaryFixture/BestCase_mean 9584895 ns 9583080 ns 100
// GenerateBoundaryFixture/BestCase_median 9598452 ns 9597243 ns 100
// GenerateBoundaryFixture/BestCase_stddev 90679 ns 90643 ns 100
// GenerateBoundaryFixture/BestCase_cv 0.95 % 0.95 % 100
// Load Average: 8.44, 25.15, 23.46
// -------------------------------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// -------------------------------------------------------------------------------------------------
// GenerateBoundaryFixture/GenerateBoundary 505 ns 505 ns 1385317
// GenerateBoundaryFixture/GenerateBoundaryWithValidation 20031391 ns 20025303 ns 35
// GenerateBoundaryFixture/GenerateBoundaryOld 20133230 ns 20129379 ns 35
// GenerateBoundaryFixture/WorstCase 100998844 ns 100985746 ns 7
// GenerateBoundaryFixture/BestCase 9739599 ns 9736802 ns 69
// clang-format on

auto constexpr kMessageSize = 128 * 1024 * 1024;
Expand Down Expand Up @@ -100,6 +86,15 @@ BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundary)
(benchmark::State& state) {
auto make_string = [this]() { return GenerateCandidate(); };

for (auto _ : state) {
benchmark::DoNotOptimize(make_string());
}
}

BENCHMARK_F(GenerateBoundaryFixture, GenerateBoundaryWithValidation)
(benchmark::State& state) {
auto make_string = [this]() { return GenerateCandidate(); };

for (auto _ : state) {
benchmark::DoNotOptimize(GenerateMessageBoundary(message(), make_string));
}
Expand Down
3 changes: 3 additions & 0 deletions google/cloud/storage/internal/rest_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ std::string RestClient::PickBoundary(std::string const& text_to_avoid) {
std::unique_lock<std::mutex> lk(mu_);
return GenerateMessageBoundaryCandidate(generator_);
};
if (!CurrentOptions().get<ValidateInsertObjectBoundary>()) {
return generate_candidate();
}
return GenerateMessageBoundary(text_to_avoid, generate_candidate);
}

Expand Down
26 changes: 25 additions & 1 deletion google/cloud/storage/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,30 @@ struct IdempotencyPolicyOption {
using Type = std::shared_ptr<IdempotencyPolicy>;
};

/**
* Prevent
*
* The `InsertObject()` function often uses JSON multipart uploads, i.e, a
* type of [single request upload] where the HTTP payload contains both the
* metadata and data for the object. These uploads require a message boundary
* that is not found in the payload. By default, the client library will
* generate a random separator and optimistically assume it will not be found in
* the payload. In the worst case scenario, an invalid separator would be
* detected by the service, and result in an error.
*
* When this option is enabled the client library will validate the message,
* if it finds a collision, it will repeatedly generate new random strings until
* one that does not collide is found. The space of possible random strings is
* large enough this should be found in one or two attempts.
*
* [single request upload]:
* https://cloud.google.com/storage/docs/uploads-downloads [JSON multipart
* uploads]: https://cloud.google.com/storage/docs/json_api/v1/objects/insert
*/
struct ValidateInsertObjectBoundary {
coryan marked this conversation as resolved.
Show resolved Hide resolved
using Type = bool;
};

/// The complete list of options accepted by `storage::Client`.
using ClientOptionList = ::google::cloud::OptionList<
RestEndpointOption, IamEndpointOption, Oauth2CredentialsOption,
Expand All @@ -295,7 +319,7 @@ using ClientOptionList = ::google::cloud::OptionList<
MaximumCurlSocketRecvSizeOption, MaximumCurlSocketSendSizeOption,
TransferStallTimeoutOption, RetryPolicyOption, BackoffPolicyOption,
IdempotencyPolicyOption, CARootsFilePathOption,
storage_experimental::HttpVersionOption>;
ValidateInsertObjectBoundary, storage_experimental::HttpVersionOption>;

GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END
} // namespace storage
Expand Down