Skip to content

Commit

Permalink
Use a single part_list, instead of separate etag_list and checksum_list
Browse files Browse the repository at this point in the history
  • Loading branch information
graebm committed Jun 30, 2023
1 parent d1bbce9 commit ca0335f
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 181 deletions.
16 changes: 7 additions & 9 deletions include/aws/s3/private/s3_auto_ranged_put.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,13 @@ struct aws_s3_auto_ranged_put {

/* Members to only be used when the mutex in the base type is locked. */
struct {
/* Array list of `struct aws_string *`. */
struct aws_array_list etag_list;

/**
* Array list of `struct aws_byte_buf *`.
* Very similar to the etag_list used in complete_multipart_upload to create the XML payload. Each part will set
* the corresponding index to its checksum result.
**/
struct aws_array_list encoded_checksum_list;
/* Array list of `struct aws_s3_put_part_info *`
* Info about each part, that we need to remember for CompleteMultipartUpload.
* This is updated as we upload each part.
* If resuming an upload, we first call ListParts and store the details
* of previously uploaded parts here. In this case, the array may start with gaps
* (e.g. if parts 1 and 3 were previously uploaded, but not part 2). */
struct aws_array_list part_list;

struct aws_s3_paginated_operation *list_parts_operation;
struct aws_string *list_parts_continuation_token;
Expand Down
2 changes: 1 addition & 1 deletion include/aws/s3/private/s3_copy_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct aws_s3_copy_object {

/* Members to only be used when the mutex in the base type is locked. */
struct {
struct aws_array_list etag_list;
struct aws_array_list part_list;

/* obtained through a HEAD request against the source object */
uint64_t content_length;
Expand Down
7 changes: 7 additions & 0 deletions include/aws/s3/private/s3_meta_request_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ struct aws_s3_meta_request {
struct aws_s3_checksum *meta_request_level_running_response_sum;
};

/* Info for each part, that we need to remember until we send CompleteMultipartUpload */
struct aws_s3_put_part_info {
uint64_t size;
struct aws_string *etag;
struct aws_byte_buf checksum_base64;
};

AWS_EXTERN_C_BEGIN

/* Initialize the base meta request structure. */
Expand Down
5 changes: 2 additions & 3 deletions include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,15 @@ struct aws_http_message *aws_s3_upload_part_copy_message_new(
bool should_compute_content_md5);

/* Create an HTTP request for an S3 Complete-Multipart-Upload request. Creates the necessary XML payload using the
* passed in array list of ETags. (Each ETag is assumed to be an aws_string*) Buffer passed in will be used to store
* passed in array list of `struct aws_s3_put_part_info *`. Buffer passed in will be used to store
* said XML payload, which will be used as the body. */
AWS_S3_API
struct aws_http_message *aws_s3_complete_multipart_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message,
struct aws_byte_buf *body_buffer,
const struct aws_string *upload_id,
const struct aws_array_list *etags,
const struct aws_array_list *checksums,
const struct aws_array_list *parts,
enum aws_s3_checksum_algorithm algorithm);

AWS_S3_API
Expand Down
4 changes: 2 additions & 2 deletions include/aws/s3/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ struct aws_s3_checksum_config {
enum aws_s3_checksum_algorithm checksum_algorithm;

/**
* Enable checksum mode header will be attached to get requests, this will tell s3 to send back checksums headers if
* Enable checksum mode header will be attached to GET requests, this will tell s3 to send back checksums headers if
* they exist. Calculate the corresponding checksum on the response bodies. The meta request will finish with a did
* validate field and set the error code to AWS_ERROR_S3_RESPONSE_CHECKSUM_MISMATCH if the calculated
* checksum, and checksum found in the response header do not match.
Expand Down Expand Up @@ -527,7 +527,7 @@ struct aws_s3_meta_request_result {
/* HTTP Headers for the failed request that triggered finish of the meta request. NULL if no request failed. */
struct aws_http_headers *error_response_headers;

/* Response body for the failed request that triggered finishing of the meta request. NUll if no request failed.*/
/* Response body for the failed request that triggered finishing of the meta request. NULL if no request failed.*/
struct aws_byte_buf *error_response_body;

/* Response status of the failed request or of the entire meta request. */
Expand Down
205 changes: 90 additions & 115 deletions source/s3_auto_ranged_put.c

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions source/s3_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ struct aws_s3_meta_request *aws_s3_client_make_meta_request(
options->checksum_config->checksum_algorithm == AWS_SCA_NONE) {
AWS_LOGF_ERROR(
AWS_LS_S3_CLIENT,
"id=%p Cannot create meta s3 request; checksum algorithm must be set to calculate checksum.",
"id=%p Cannot create meta s3 request; checksum location is set, but no checksum algorithm selected.",
(void *)client);
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
return NULL;
Expand All @@ -777,8 +777,7 @@ struct aws_s3_meta_request *aws_s3_client_make_meta_request(
options->checksum_config->location == AWS_SCL_NONE) {
AWS_LOGF_ERROR(
AWS_LS_S3_CLIENT,
"id=%p Cannot create meta s3 request; checksum algorithm cannot be set if not calculate checksum from "
"client.",
"id=%p Cannot create meta s3 request; checksum algorithm is set, but no checksum location selected.",
(void *)client);
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
return NULL;
Expand Down
41 changes: 22 additions & 19 deletions source/s3_copy_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
/* Objects with size smaller than the constant below are bypassed as S3 CopyObject instead of multipart copy */
static const size_t s_multipart_copy_minimum_object_size = GB_TO_BYTES(1);

static const size_t s_etags_initial_capacity = 16;
static const struct aws_byte_cursor s_upload_id = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("UploadId");
static const size_t s_complete_multipart_upload_init_body_size_bytes = 512;
static const size_t s_abort_multipart_upload_init_body_size_bytes = 512;
Expand Down Expand Up @@ -105,7 +104,7 @@ struct aws_s3_meta_request *aws_s3_meta_request_copy_object_new(
}

aws_array_list_init_dynamic(
&copy_object->synced_data.etag_list, allocator, s_etags_initial_capacity, sizeof(struct aws_string *));
&copy_object->synced_data.part_list, allocator, 0, sizeof(struct aws_s3_put_part_info *));

copy_object->synced_data.content_length = UNKNOWN_CONTENT_LENGTH;
copy_object->synced_data.total_num_parts = UNKNOWN_NUM_PARTS;
Expand All @@ -125,14 +124,15 @@ static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *me
aws_string_destroy(copy_object->upload_id);
copy_object->upload_id = NULL;

for (size_t etag_index = 0; etag_index < aws_array_list_length(&copy_object->synced_data.etag_list); ++etag_index) {
struct aws_string *etag = NULL;

aws_array_list_get_at(&copy_object->synced_data.etag_list, &etag, etag_index);
aws_string_destroy(etag);
for (size_t part_index = 0; part_index < aws_array_list_length(&copy_object->synced_data.part_list); ++part_index) {
struct aws_s3_put_part_info *part = NULL;
aws_array_list_get_at(&copy_object->synced_data.part_list, &part, part_index);
aws_string_destroy(part->etag);
aws_byte_buf_clean_up(&part->checksum_base64);
aws_mem_release(meta_request->allocator, part);
}

aws_array_list_clean_up(&copy_object->synced_data.etag_list);
aws_array_list_clean_up(&copy_object->synced_data.part_list);
aws_http_headers_release(copy_object->synced_data.needed_response_headers);
aws_mem_release(meta_request->allocator, copy_object);
}
Expand Down Expand Up @@ -413,6 +413,14 @@ static struct aws_future_void *s_s3_copy_object_prepare_request(struct aws_s3_re
copy_object->synced_data.total_num_parts = num_parts;
copy_object->synced_data.part_size = part_size;

/* Fill part_list */
aws_array_list_ensure_capacity(&copy_object->synced_data.part_list, num_parts);
while (aws_array_list_length(&copy_object->synced_data.part_list) < num_parts) {
struct aws_s3_put_part_info *part =
aws_mem_calloc(meta_request->allocator, 1, sizeof(struct aws_s3_put_part_info));
aws_array_list_push_back(&copy_object->synced_data.part_list, &part);
}

AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
"Starting multi-part Copy using part size=%zu, total_num_parts=%zu",
Expand Down Expand Up @@ -481,8 +489,7 @@ static struct aws_future_void *s_s3_copy_object_prepare_request(struct aws_s3_re
meta_request->initial_request_message,
&request->request_body,
copy_object->upload_id,
&copy_object->synced_data.etag_list,
NULL,
&copy_object->synced_data.part_list,
AWS_SCA_NONE);

break;
Expand Down Expand Up @@ -722,15 +729,11 @@ static void s_s3_copy_object_request_finished(
meta_request->progress_callback(meta_request, &progress, meta_request->user_data);
}

struct aws_string *null_etag = NULL;
/* ETags need to be associated with their part number, so we keep the etag indices consistent with
* part numbers. This means we may have to add padding to the list in the case that parts finish out
* of order. */
while (aws_array_list_length(&copy_object->synced_data.etag_list) < part_number) {
int push_back_result = aws_array_list_push_back(&copy_object->synced_data.etag_list, &null_etag);
AWS_FATAL_ASSERT(push_back_result == AWS_OP_SUCCESS);
}
aws_array_list_set_at(&copy_object->synced_data.etag_list, &etag, part_index);
struct aws_s3_put_part_info *part = NULL;
aws_array_list_get_at(&copy_object->synced_data.part_list, &part, part_index);
AWS_ASSERT(part != NULL);
part->etag = etag;

} else {
++copy_object->synced_data.num_parts_failed;
aws_s3_meta_request_set_fail_synced(meta_request, request, error_code);
Expand Down
26 changes: 10 additions & 16 deletions source/s3_request_messages.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

#include "aws/s3/private/s3_request_messages.h"
#include "aws/s3/private/s3_checksums.h"
#include "aws/s3/private/s3_meta_request_impl.h"
#include "aws/s3/private/s3_util.h"
#include <aws/cal/hash.h>
#include <aws/common/byte_buf.h>
Expand Down Expand Up @@ -551,14 +551,13 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
struct aws_http_message *base_message,
struct aws_byte_buf *body_buffer,
const struct aws_string *upload_id,
const struct aws_array_list *etags,
const struct aws_array_list *checksums,
const struct aws_array_list *parts,
enum aws_s3_checksum_algorithm algorithm) {
AWS_PRECONDITION(allocator);
AWS_PRECONDITION(base_message);
AWS_PRECONDITION(body_buffer);
AWS_PRECONDITION(upload_id);
AWS_PRECONDITION(etags);
AWS_PRECONDITION(parts);

const struct aws_byte_cursor *mpu_algorithm_checksum_name = aws_get_complete_mpu_name_from_algorithm(algorithm);

Expand Down Expand Up @@ -607,18 +606,17 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
goto error_clean_up;
}

for (size_t etag_index = 0; etag_index < aws_array_list_length(etags); ++etag_index) {
struct aws_string *etag = NULL;
for (size_t part_index = 0; part_index < aws_array_list_length(parts); ++part_index) {
const struct aws_s3_put_part_info *part = NULL;

aws_array_list_get_at(etags, &etag, etag_index);

AWS_FATAL_ASSERT(etag != NULL);
aws_array_list_get_at(parts, &part, part_index);
AWS_FATAL_ASSERT(part != NULL);

if (aws_byte_buf_append_dynamic(body_buffer, &s_part_section_string_0)) {
goto error_clean_up;
}

struct aws_byte_cursor etag_byte_cursor = aws_byte_cursor_from_string(etag);
struct aws_byte_cursor etag_byte_cursor = aws_byte_cursor_from_string(part->etag);

if (aws_byte_buf_append_dynamic(body_buffer, &etag_byte_cursor)) {
goto error_clean_up;
Expand All @@ -629,7 +627,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
}

char part_number_buffer[32] = "";
int part_number = (int)(etag_index + 1);
int part_number = (int)(part_index + 1);
int part_number_num_char = snprintf(part_number_buffer, sizeof(part_number_buffer), "%d", part_number);
struct aws_byte_cursor part_number_byte_cursor =
aws_byte_cursor_from_array(part_number_buffer, part_number_num_char);
Expand All @@ -643,11 +641,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
}

if (mpu_algorithm_checksum_name) {
struct aws_byte_buf *checksum_buf;

aws_array_list_get_at(checksums, &checksum_buf, etag_index);

struct aws_byte_cursor checksum = aws_byte_cursor_from_buf(checksum_buf);
struct aws_byte_cursor checksum = aws_byte_cursor_from_buf(&part->checksum_base64);

if (aws_byte_buf_append_dynamic(body_buffer, &s_open_start_bracket)) {
goto error_clean_up;
Expand Down
10 changes: 5 additions & 5 deletions tests/s3_data_plane_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -3043,11 +3043,11 @@ static int s_test_s3_complete_multipart_message_with_content_md5(struct aws_allo

struct aws_string *upload_id = aws_string_new_from_c_str(allocator, "dummy_upload_id");

struct aws_array_list etags;
ASSERT_SUCCESS(aws_array_list_init_dynamic(&etags, allocator, 0, sizeof(struct aws_string)));
struct aws_array_list parts;
ASSERT_SUCCESS(aws_array_list_init_dynamic(&parts, allocator, 0, sizeof(struct aws_s3_put_part_info *)));

struct aws_http_message *new_message = aws_s3_complete_multipart_message_new(
allocator, base_message, &body_buffer, upload_id, &etags, NULL, AWS_SCA_NONE);
struct aws_http_message *new_message =
aws_s3_complete_multipart_message_new(allocator, base_message, &body_buffer, upload_id, &parts, AWS_SCA_NONE);

struct aws_http_headers *new_headers = aws_http_message_get_headers(new_message);
ASSERT_FALSE(aws_http_headers_has(new_headers, g_content_md5_header_name));
Expand All @@ -3058,7 +3058,7 @@ static int s_test_s3_complete_multipart_message_with_content_md5(struct aws_allo
aws_http_message_release(base_message);
base_message = NULL;

aws_array_list_clean_up(&etags);
aws_array_list_clean_up(&parts);

aws_string_destroy(upload_id);
upload_id = NULL;
Expand Down
16 changes: 9 additions & 7 deletions tests/s3_request_messages_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,10 +856,11 @@ static int s_test_s3_complete_multipart_message_new(struct aws_allocator *alloca
#define EXPECTED_UPLOAD_PART_PATH TEST_PATH "?uploadId=" UPLOAD_ID
#define ETAG_VALUE "etag_value"

struct aws_array_list etags;
ASSERT_SUCCESS(aws_array_list_init_dynamic(&etags, allocator, 1, sizeof(struct aws_string *)));
struct aws_string *etag = aws_string_new_from_c_str(allocator, ETAG_VALUE);
ASSERT_SUCCESS(aws_array_list_push_back(&etags, &etag));
struct aws_array_list parts;
ASSERT_SUCCESS(aws_array_list_init_dynamic(&parts, allocator, 1, sizeof(struct aws_s3_put_part_info *)));
struct aws_s3_put_part_info *part = aws_mem_calloc(allocator, 1, sizeof(struct aws_s3_put_part_info));
part->etag = aws_string_new_from_c_str(allocator, ETAG_VALUE);
ASSERT_SUCCESS(aws_array_list_push_back(&parts, &part));

const struct aws_byte_cursor header_exclude_exceptions[] = {
AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("Content-Length"),
Expand All @@ -878,7 +879,7 @@ static int s_test_s3_complete_multipart_message_new(struct aws_allocator *alloca
aws_byte_buf_init(&body_buffer, allocator, 64);

struct aws_http_message *complete_multipart_message = aws_s3_complete_multipart_message_new(
allocator, original_message, &body_buffer, upload_id, &etags, NULL, AWS_SCA_NONE);
allocator, original_message, &body_buffer, upload_id, &parts, AWS_SCA_NONE);

ASSERT_SUCCESS(s_test_http_message_request_method(complete_multipart_message, "POST"));
ASSERT_SUCCESS(s_test_http_message_request_path(complete_multipart_message, &expected_create_path));
Expand Down Expand Up @@ -919,8 +920,9 @@ static int s_test_s3_complete_multipart_message_new(struct aws_allocator *alloca
aws_http_message_release(complete_multipart_message);
aws_http_message_release(original_message);

aws_string_destroy(etag);
aws_array_list_clean_up(&etags);
aws_string_destroy(part->etag);
aws_mem_release(allocator, part);
aws_array_list_clean_up(&parts);
#undef TEST_PATH
#undef UPLOAD_ID
#undef EXPECTED_UPLOAD_PART_PATH
Expand Down
2 changes: 1 addition & 1 deletion tests/s3_tester.c
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,7 @@ int aws_s3_tester_send_meta_request_with_options(

switch (options->validate_type) {
case AWS_S3_TESTER_VALIDATE_TYPE_EXPECT_SUCCESS:
ASSERT_TRUE(out_results->finished_error_code == AWS_ERROR_SUCCESS);
ASSERT_INT_EQUALS(AWS_ERROR_SUCCESS, out_results->finished_error_code);

if (meta_request_options.type == AWS_S3_META_REQUEST_TYPE_GET_OBJECT) {
ASSERT_SUCCESS(aws_s3_tester_validate_get_object_results(out_results, options->sse_type));
Expand Down

0 comments on commit ca0335f

Please sign in to comment.