Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added chunk streaming docstrings and samples #17435

Merged
merged 4 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,8 @@ def download_blob(self, offset=None, length=None, **kwargs):
# type: (Optional[int], Optional[int], **Any) -> StorageStreamDownloader
"""Downloads a blob to the StorageStreamDownloader. The readall() method must
be used to read all the content or readinto() must be used to download the blob into
a stream.
a stream. The chunks() method can be used to break the content into chunks in order to process
them without storing the entire content in memory.

:param int offset:
Start of byte range to use for downloading a section of the blob.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ async def download_blob(self, offset=None, length=None, **kwargs):
# type: (Optional[int], Optional[int], Any) -> StorageStreamDownloader
"""Downloads a blob to the StorageStreamDownloader. The readall() method must
be used to read all the content or readinto() must be used to download the blob into
a stream.
a stream. The chunks() method can be used to break the content into chunks in order to process
them without storing the entire content in memory.

tasherif-msft marked this conversation as resolved.
Show resolved Hide resolved
:param int offset:
Start of byte range to use for downloading a section of the blob.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
SOURCE_FILE = 'SampleSource.txt'
DEST_FILE = 'BlockDestination.txt'


class BlobSamples(object):

connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
Expand Down Expand Up @@ -85,6 +86,49 @@ def block_blob_sample(self):
# Delete the container
container_client.delete_container()

def stream_block_blob(self):

import uuid
# Instantiate a new BlobServiceClient using a connection string - set chunk size to 1MB
from azure.storage.blob import BlobServiceClient, BlobBlock
blob_service_client = BlobServiceClient.from_connection_string(self.connection_string,
max_single_get_size=1024*1024,
max_chunk_get_size=1024*1024)

# Instantiate a new ContainerClient
container_client = blob_service_client.get_container_client("containersync")
# Generate 4MB of data
data = b'a'*4*1024*1024

try:
# Create new Container in the service
container_client.create_container()

# Instantiate a new source blob client
source_blob_client = container_client.get_blob_client("source_blob")
# Upload content to block blob
source_blob_client.upload_blob(data, blob_type="BlockBlob")

destination_blob_client = container_client.get_blob_client("destination_blob")

# This returns a StorageStreamDownloader.
stream = source_blob_client.download_blob()
block_list = []

# Read data in chunks to avoid loading all into memory at once
for chunk in stream.chunks():
# process your data (anything can be done here really. `chunk` is a byte array).
block_id = str(uuid.uuid4())
destination_blob_client.stage_block(block_id=block_id, data=chunk)
block_list.append(BlobBlock(block_id=block_id))

# Upload the whole chunk to azure storage and make up one blob
destination_blob_client.commit_block_list(block_list)

finally:
# Delete container
container_client.delete_container()

def page_blob_sample(self):

# Instantiate a new BlobServiceClient using a connection string
Expand Down Expand Up @@ -149,9 +193,11 @@ def append_blob_sample(self):
# Delete container
container_client.delete_container()


if __name__ == '__main__':
sample = BlobSamples()
sample.create_container_sample()
sample.block_blob_sample()
sample.append_blob_sample()
sample.page_blob_sample()
sample.page_blob_sample()
sample.stream_block_blob()
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
DEST_FILE = 'BlockDestination.txt'
SOURCE_FILE = 'SampleSource.txt'


class BlobSamplesAsync(object):

connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
Expand Down Expand Up @@ -88,6 +89,51 @@ async def block_blob_sample_async(self):
# Delete the container
await container_client.delete_container()

async def stream_block_blob(self):

import uuid
# Instantiate a new BlobServiceClient using a connection string - set chunk size to 1MB
from azure.storage.blob import BlobBlock
from azure.storage.blob.aio import BlobServiceClient
blob_service_client = BlobServiceClient.from_connection_string(self.connection_string,
max_single_get_size=1024*1024,
max_chunk_get_size=1024*1024)

async with blob_service_client:
# Instantiate a new ContainerClient
container_client = blob_service_client.get_container_client("containerasync")
# Generate 4MB of data
data = b'a'*4*1024*1024

try:
# Create new Container in the service
await container_client.create_container()

# Instantiate a new source blob client
source_blob_client = container_client.get_blob_client("source_blob")
# Upload content to block blob
await source_blob_client.upload_blob(data, blob_type="BlockBlob")

destination_blob_client = container_client.get_blob_client("destination_blob")

# This returns a StorageStreamDownloader.
stream = await source_blob_client.download_blob()
block_list = []

# Read data in chunks to avoid loading all into memory at once
async for chunk in stream.chunks():
# process your data (anything can be done here really. `chunk` is a byte array).
block_id = str(uuid.uuid4())
await destination_blob_client.stage_block(block_id=block_id, data=chunk)
block_list.append(BlobBlock(block_id=block_id))

# Upload the whole chunk to azure storage and make up one blob
await destination_blob_client.commit_block_list(block_list)

finally:
# Delete container
await container_client.delete_container()

async def page_blob_sample_async(self):
# Instantiate a new BlobServiceClient using a connection string
from azure.storage.blob.aio import BlobServiceClient
Expand Down Expand Up @@ -154,12 +200,14 @@ async def append_blob_sample_async(self):
# Delete container
await container_client.delete_container()


async def main():
sample = BlobSamplesAsync()
await sample.create_container_sample_async()
await sample.block_blob_sample_async()
await sample.append_blob_sample_async()
await sample.page_blob_sample_async()
await sample.stream_block_blob()

if __name__ == '__main__':
loop = asyncio.get_event_loop()
Expand Down