Skip to content

Commit

Permalink
Fix: Properly escape URL construction for XML MPU API (#1333)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewsg authored Aug 5, 2024
1 parent 3d29c6f commit bf4d0e0
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 1 deletion.
3 changes: 2 additions & 1 deletion google/cloud/storage/transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from google.cloud.storage import Client
from google.cloud.storage import Blob
from google.cloud.storage.blob import _get_host_name
from google.cloud.storage.blob import _quote
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry
from google.cloud.storage.retry import DEFAULT_RETRY
Expand Down Expand Up @@ -1083,7 +1084,7 @@ def upload_chunks_concurrently(

hostname = _get_host_name(client._connection)
url = "{hostname}/{bucket}/{blob}".format(
hostname=hostname, bucket=bucket.name, blob=blob.name
hostname=hostname, bucket=bucket.name, blob=_quote(blob.name)
)

base_headers, object_metadata, content_type = blob._get_upload_arguments(
Expand Down
57 changes: 57 additions & 0 deletions tests/system/test_transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ def test_upload_chunks_concurrently(shared_bucket, file_data, blobs_to_delete):
chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API
assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise

blobs_to_delete.append(upload_blob)

transfer_manager.upload_chunks_concurrently(
filename,
upload_blob,
Expand Down Expand Up @@ -418,3 +420,58 @@ def test_upload_chunks_concurrently_with_kms(
source_contents = sf.read()
temp_contents = tmp.read()
assert source_contents == temp_contents


def test_upload_chunks_concurrently_with_quoted_blob_names(
shared_bucket, file_data, blobs_to_delete
):
source_file = file_data["big"]
filename = source_file["path"]
blob_name = "../example_bucket/mpu_file"
upload_blob = shared_bucket.blob(blob_name)
chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API
assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise

blobs_to_delete.append(upload_blob)

# If the blob name is not quoted/encoded at all, this will result in a 403.
transfer_manager.upload_chunks_concurrently(
filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE
)

with tempfile.NamedTemporaryFile() as tmp:
# If the blob name is not quoted correctly, this will result in a 404.
download_blob = shared_bucket.blob(blob_name)
download_blob.download_to_file(tmp)
tmp.seek(0)

with open(source_file["path"], "rb") as sf:
source_contents = sf.read()
temp_contents = tmp.read()
assert source_contents == temp_contents

# Test emoji names are not mangled.
blob_name = "\U0001f681" # Helicopter emoji
upload_blob = shared_bucket.blob(blob_name)
chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API
assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise

blobs_to_delete.append(upload_blob)

transfer_manager.upload_chunks_concurrently(
filename,
upload_blob,
chunk_size=chunk_size,
deadline=DEADLINE,
worker_type=transfer_manager.THREAD,
)

with tempfile.NamedTemporaryFile() as tmp:
download_blob = shared_bucket.blob(blob_name)
download_blob.download_to_file(tmp)
tmp.seek(0)

with open(source_file["path"], "rb") as sf:
source_contents = sf.read()
temp_contents = tmp.read()
assert source_contents == temp_contents
51 changes: 51 additions & 0 deletions tests/unit/test_transfer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,57 @@ def test_upload_chunks_concurrently():
part_mock.upload.assert_called_with(transport)


def test_upload_chunks_concurrently_quotes_urls():
bucket = mock.Mock()
bucket.name = "bucket"
bucket.client = _PickleableMockClient(identify_as_client=True)
transport = bucket.client._http
bucket.user_project = None

blob = Blob(b"../wrongbucket/blob", bucket)
blob.content_type = FAKE_CONTENT_TYPE
quoted_url = "https://example.com/bucket/..%2Fwrongbucket%2Fblob"

FILENAME = "file_a.txt"
SIZE = 2048

container_mock = mock.Mock()
container_mock.upload_id = "abcd"
part_mock = mock.Mock()
ETAG = "efgh"
part_mock.etag = ETAG
container_cls_mock = mock.Mock(return_value=container_mock)

with mock.patch("os.path.getsize", return_value=SIZE), mock.patch(
"google.cloud.storage.transfer_manager.XMLMPUContainer", new=container_cls_mock
), mock.patch(
"google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock
):
transfer_manager.upload_chunks_concurrently(
FILENAME,
blob,
chunk_size=SIZE // 2,
worker_type=transfer_manager.THREAD,
)

container_mock.initiate.assert_called_once_with(
transport=transport, content_type=blob.content_type
)
container_mock.register_part.assert_any_call(1, ETAG)
container_mock.register_part.assert_any_call(2, ETAG)
container_mock.finalize.assert_called_once_with(bucket.client._http)

assert container_mock._retry_strategy.max_sleep == 60.0
assert container_mock._retry_strategy.max_cumulative_retry == 120.0
assert container_mock._retry_strategy.max_retries is None

container_cls_mock.assert_called_once_with(
quoted_url, FILENAME, headers=mock.ANY
)

part_mock.upload.assert_called_with(transport)


def test_upload_chunks_concurrently_passes_concurrency_options():
bucket = mock.Mock()
bucket.name = "bucket"
Expand Down

0 comments on commit bf4d0e0

Please sign in to comment.