Azure · tasherif-msft · Mar 26, 2021 · Mar 18, 2021 · Mar 26, 2021 · Mar 26, 2021
diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
@@ -736,7 +736,8 @@ def download_blob(self, offset=None, length=None, **kwargs):
         # type: (Optional[int], Optional[int], **Any) -> StorageStreamDownloader
         """Downloads a blob to the StorageStreamDownloader. The readall() method must
         be used to read all the content or readinto() must be used to download the blob into
-        a stream.
+        a stream. The chunks() method can be used to break the content into chunks in order to process
+        them without storing the entire content in memory.
 
         :param int offset:
             Start of byte range to use for downloading a section of the blob.

diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/aio/_blob_client_async.py b/sdk/storage/azure-storage-blob/azure/storage/blob/aio/_blob_client_async.py
@@ -377,7 +377,8 @@ async def download_blob(self, offset=None, length=None, **kwargs):
         # type: (Optional[int], Optional[int], Any) -> StorageStreamDownloader
         """Downloads a blob to the StorageStreamDownloader. The readall() method must
         be used to read all the content or readinto() must be used to download the blob into
-        a stream.
+        a stream. The chunks() method can be used to break the content into chunks in order to process
+        them without storing the entire content in memory.
 
         :param int offset:
             Start of byte range to use for downloading a section of the blob.

diff --git a/sdk/storage/azure-storage-blob/samples/blob_samples_hello_world.py b/sdk/storage/azure-storage-blob/samples/blob_samples_hello_world.py
@@ -23,6 +23,7 @@
 SOURCE_FILE = 'SampleSource.txt'
 DEST_FILE = 'BlockDestination.txt'
 
+
 class BlobSamples(object):
 
     connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
@@ -85,6 +86,49 @@ def block_blob_sample(self):
             # Delete the container
             container_client.delete_container()
 
+    def stream_block_blob(self):
+
+        import uuid
+        # Instantiate a new BlobServiceClient using a connection string - set chunk size to 1MB
+        from azure.storage.blob import BlobServiceClient, BlobBlock
+        blob_service_client = BlobServiceClient.from_connection_string(self.connection_string,
+                                                                       max_single_get_size=1024*1024,
+                                                                       max_chunk_get_size=1024*1024)
+
+        # Instantiate a new ContainerClient
+        container_client = blob_service_client.get_container_client("containersync")
+        # Generate 4MB of data
+        data = b'a'*4*1024*1024
+
+        try:
+            # Create new Container in the service
+            container_client.create_container()
+
+            # Instantiate a new source blob client
+            source_blob_client = container_client.get_blob_client("source_blob")
+            # Upload content to block blob
+            source_blob_client.upload_blob(data, blob_type="BlockBlob")
+
+            destination_blob_client = container_client.get_blob_client("destination_blob")
+
+            # This returns a StorageStreamDownloader.
+            stream = source_blob_client.download_blob()
+            block_list = []
+
+            # Read data in chunks to avoid loading all into memory at once
+            for chunk in stream.chunks():
+                # process your data (anything can be done here really. `chunk` is a byte array).
+                block_id = str(uuid.uuid4())
+                destination_blob_client.stage_block(block_id=block_id, data=chunk)
+                block_list.append(BlobBlock(block_id=block_id))
+
+            # Upload the whole chunk to azure storage and make up one blob
+            destination_blob_client.commit_block_list(block_list)
+
+        finally:
+            # Delete container
+            container_client.delete_container()
+
     def page_blob_sample(self):
 
         # Instantiate a new BlobServiceClient using a connection string
@@ -149,9 +193,11 @@ def append_blob_sample(self):
             # Delete container
             container_client.delete_container()
 
+
 if __name__ == '__main__':
     sample = BlobSamples()
     sample.create_container_sample()
     sample.block_blob_sample()
     sample.append_blob_sample()
-    sample.page_blob_sample()
+    sample.page_blob_sample()
+    sample.stream_block_blob()
diff --git a/sdk/storage/azure-storage-blob/samples/blob_samples_hello_world_async.py b/sdk/storage/azure-storage-blob/samples/blob_samples_hello_world_async.py
@@ -23,6 +23,7 @@
 DEST_FILE = 'BlockDestination.txt'
 SOURCE_FILE = 'SampleSource.txt'
 
+
 class BlobSamplesAsync(object):
 
     connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
@@ -88,6 +89,51 @@ async def block_blob_sample_async(self):
                 # Delete the container
                 await container_client.delete_container()
 
+    async def stream_block_blob(self):
+
+        import uuid
+        # Instantiate a new BlobServiceClient using a connection string - set chunk size to 1MB
+        from azure.storage.blob import BlobBlock
+        from azure.storage.blob.aio import BlobServiceClient
+        blob_service_client = BlobServiceClient.from_connection_string(self.connection_string,
+                                                                       max_single_get_size=1024*1024,
+                                                                       max_chunk_get_size=1024*1024)
+
+        async with blob_service_client:
+            # Instantiate a new ContainerClient
+            container_client = blob_service_client.get_container_client("containerasync")
+            # Generate 4MB of data
+            data = b'a'*4*1024*1024
+
+            try:
+                # Create new Container in the service
+                await container_client.create_container()
+
+                # Instantiate a new source blob client
+                source_blob_client = container_client.get_blob_client("source_blob")
+                # Upload content to block blob
+                await source_blob_client.upload_blob(data, blob_type="BlockBlob")
+
+                destination_blob_client = container_client.get_blob_client("destination_blob")
+
+                # This returns a StorageStreamDownloader.
+                stream = await source_blob_client.download_blob()
+                block_list = []
+
+                # Read data in chunks to avoid loading all into memory at once
+                async for chunk in stream.chunks():
+                    # process your data (anything can be done here really. `chunk` is a byte array).
+                    block_id = str(uuid.uuid4())
+                    await destination_blob_client.stage_block(block_id=block_id, data=chunk)
+                    block_list.append(BlobBlock(block_id=block_id))
+
+                # Upload the whole chunk to azure storage and make up one blob
+                await destination_blob_client.commit_block_list(block_list)
+
+            finally:
+                # Delete container
+                await container_client.delete_container()
+
     async def page_blob_sample_async(self):
         # Instantiate a new BlobServiceClient using a connection string
         from azure.storage.blob.aio import BlobServiceClient
@@ -154,12 +200,14 @@ async def append_blob_sample_async(self):
                 # Delete container
                 await container_client.delete_container()
 
+
 async def main():
     sample = BlobSamplesAsync()
     await sample.create_container_sample_async()
     await sample.block_blob_sample_async()
     await sample.append_blob_sample_async()
     await sample.page_blob_sample_async()
+    await sample.stream_block_blob()
 
 if __name__ == '__main__':
     loop = asyncio.get_event_loop()