diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py b/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py
index caa00401b96c..e5bef3d6a2bd 100644
--- a/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py
+++ b/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py
@@ -54,6 +54,7 @@
BlobQueryError,
DelimitedJsonDialect,
DelimitedTextDialect,
+ ArrowDialect,
ObjectReplicationPolicy,
ObjectReplicationRule
)
@@ -219,6 +220,7 @@ def download_blob_from_url(
'BlobQueryError',
'DelimitedJsonDialect',
'DelimitedTextDialect',
+ 'ArrowDialect',
'BlobQueryReader',
'ObjectReplicationPolicy',
'ObjectReplicationRule'
diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
index 85837199921b..0cc5ff877a5c 100644
--- a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
+++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
@@ -683,13 +683,19 @@ def _quick_query_options(self, query_expression,
try:
delimiter = input_format.lineterminator
except AttributeError:
- delimiter = input_format.delimiter
+ try:
+ delimiter = input_format.delimiter
+ except AttributeError:
+ raise ValueError("The Type of blob_format can only be DelimitedTextDialect or DelimitedJsonDialect")
output_format = kwargs.pop('output_format', None)
if output_format:
try:
delimiter = output_format.lineterminator
except AttributeError:
- delimiter = output_format.delimiter
+ try:
+ delimiter = output_format.delimiter
+ except AttributeError:
+ pass
else:
output_format = input_format
query_request = QueryRequest(
@@ -742,6 +748,7 @@ def query_blob(self, query_expression, **kwargs):
as it is represented in the blob. By providing an output format, the blob data will be reformatted
according to that profile. This value can be a DelimitedTextDialect or a DelimitedJsonDialect.
:paramtype output_format: ~azure.storage.blob.DelimitedTextDialect or ~azure.storage.blob.DelimitedJsonDialect
+ or list[~azure.storage.blob.ArrowDialect]
:keyword lease:
Required if the blob has an active lease. Value can be a BlobLeaseClient object
or the lease ID as a string.
diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py
index fc1249cf8c02..fee0382a8d7b 100644
--- a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py
+++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py
@@ -435,7 +435,7 @@ def find_blobs_by_tags(self, filter_expression, **kwargs):
:param str filter_expression:
The expression to find blobs whose tags matches the specified condition.
eg. "\"yourtagname\"='firsttag' and \"yourtagname2\"='secondtag'"
- To specify a container, eg. "@container='containerName' and \"Name\"='C'"
+ To specify a container, eg. where=@container='containerName' and \"Name\"='C'"
:keyword int results_per_page:
The max result per page when paginating.
:keyword int timeout:
diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py
index 2fb963d3894b..799310f627cc 100644
--- a/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py
+++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py
@@ -9,7 +9,7 @@
from enum import Enum
from azure.core.paging import PageIterator
-from azure.storage.blob._generated.models import FilterBlobItem
+from azure.storage.blob._generated.models import FilterBlobItem, ArrowField
from ._shared import decode_base64_to_text
from ._shared.response_handlers import return_context_and_deserialized, process_storage_error
@@ -1099,6 +1099,20 @@ def __init__(self, **kwargs):
self.has_header = kwargs.pop('has_header', False)
+class ArrowDialect(ArrowField):
+ """field of an arrow schema.
+
+ All required parameters must be populated in order to send to Azure.
+
+ :param str type: Required.
+ :keyword str name: The name of the field.
+ :keyword int precision: The precision of the field.
+ :keyword int scale: The scale of the field.
+ """
+ def __init__(self, type, **kwargs):
+ super(ArrowDialect, self).__init__(type=type, **kwargs)
+
+
class ObjectReplicationPolicy(DictMixin):
"""Policy id and rule ids applied to a blob.
diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py
index 372ab454c74f..cb3710fc6596 100644
--- a/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py
+++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py
@@ -13,8 +13,8 @@
from ._models import (
ContainerEncryptionScope,
- DelimitedJsonDialect
-)
+ DelimitedJsonDialect,
+ ArrowDialect)
from ._generated.models import (
ModifiedAccessConditions,
SourceModifiedAccessConditions,
@@ -24,6 +24,7 @@
QuerySerialization,
DelimitedTextConfiguration,
JsonTextConfiguration,
+ ArrowConfiguration,
QueryFormatType,
BlobTag,
BlobTags, LeaseAccessConditions
@@ -182,6 +183,13 @@ def serialize_query_format(formater):
type=QueryFormatType.delimited,
delimited_text_configuration=serialization_settings
)
+ elif isinstance(formater, list):
+ serialization_settings = ArrowConfiguration(
+ schema=formater
+ )
+ qq_format = QueryFormat(
+ type=QueryFormatType.arrow,
+ arrow_configuration=serialization_settings)
elif not formater:
return None
else:
diff --git a/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml b/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml
new file mode 100644
index 000000000000..8ee27403148f
--- /dev/null
+++ b/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml
@@ -0,0 +1,217 @@
+interactions:
+- request:
+ body: null
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '0'
+ User-Agent:
+ - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-date:
+ - Fri, 11 Sep 2020 20:58:27 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: PUT
+ uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container
+ response:
+ body:
+ string: ''
+ headers:
+ date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ etag:
+ - '"0x8D856956EBF3C36"'
+ last-modified:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ transfer-encoding:
+ - chunked
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 201
+ message: Created
+- request:
+ body: '100,200,300,400
+
+ 300,400,500,600
+
+ '
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '32'
+ Content-Type:
+ - application/octet-stream
+ User-Agent:
+ - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-blob-type:
+ - BlockBlob
+ x-ms-date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: PUT
+ uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789
+ response:
+ body:
+ string: ''
+ headers:
+ content-md5:
+ - /hmKXD7m7tyfn12eEsFvyQ==
+ date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ etag:
+ - '"0x8D856956ED0E86F"'
+ last-modified:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ transfer-encoding:
+ - chunked
+ x-ms-content-crc64:
+ - Dn1U+tgM/4c=
+ x-ms-request-server-encrypted:
+ - 'false'
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 201
+ message: Created
+- request:
+ body: '
+
+ SQLSELECT _2 from BlobStorage
+ WHERE _1 > 250arrowdecimalabc42'
+ headers:
+ Accept:
+ - application/xml
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '390'
+ Content-Type:
+ - application/xml; charset=utf-8
+ User-Agent:
+ - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: POST
+ uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?comp=query
+ response:
+ body:
+ string: !!binary |
+ T2JqAQIWYXZyby5zY2hlbWG+HlsKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUi
+ OiAiY29tLm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLnJlc3VsdERh
+ dGEiLAogICAgImRvYyI6ICJIb2xkcyByZXN1bHQgZGF0YSBpbiB0aGUgZm9ybWF0IHNwZWNpZmll
+ ZCBmb3IgdGhpcyBxdWVyeSAoQ1NWLCBKU09OLCBldGMuKS4iLAogICAgImZpZWxkcyI6IFsKICAg
+ ICAgewogICAgICAgICJuYW1lIjogImRhdGEiLAogICAgICAgICJ0eXBlIjogImJ5dGVzIgogICAg
+ ICB9CiAgICBdCiAgfSwKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUiOiAiY29t
+ Lm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLmVycm9yIiwKICAgICJk
+ b2MiOiAiQW4gZXJyb3IgdGhhdCBvY2N1cnJlZCB3aGlsZSBwcm9jZXNzaW5nIHRoZSBxdWVyeS4i
+ LAogICAgImZpZWxkcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogImZhdGFsIiwKICAgICAg
+ ICAidHlwZSI6ICJib29sZWFuIiwKICAgICAgICAiZG9jIjogIklmIHRydWUsIHRoaXMgZXJyb3Ig
+ cHJldmVudHMgZnVydGhlciBxdWVyeSBwcm9jZXNzaW5nLiAgTW9yZSByZXN1bHQgZGF0YSBtYXkg
+ YmUgcmV0dXJuZWQsIGJ1dCB0aGVyZSBpcyBubyBndWFyYW50ZWUgdGhhdCBhbGwgb2YgdGhlIG9y
+ aWdpbmFsIGRhdGEgd2lsbCBiZSBwcm9jZXNzZWQuICBJZiBmYWxzZSwgdGhpcyBlcnJvciBkb2Vz
+ IG5vdCBwcmV2ZW50IGZ1cnRoZXIgcXVlcnkgcHJvY2Vzc2luZy4iCiAgICAgIH0sCiAgICAgIHsK
+ ICAgICAgICAibmFtZSI6ICJuYW1lIiwKICAgICAgICAidHlwZSI6ICJzdHJpbmciLAogICAgICAg
+ ICJkb2MiOiAiVGhlIG5hbWUgb2YgdGhlIGVycm9yIgogICAgICB9LAogICAgICB7CiAgICAgICAg
+ Im5hbWUiOiAiZGVzY3JpcHRpb24iLAogICAgICAgICJ0eXBlIjogInN0cmluZyIsCiAgICAgICAg
+ ImRvYyI6ICJBIGRlc2NyaXB0aW9uIG9mIHRoZSBlcnJvciIKICAgICAgfSwKICAgICAgewogICAg
+ ICAgICJuYW1lIjogInBvc2l0aW9uIiwKICAgICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgICAi
+ ZG9jIjogIlRoZSBibG9iIG9mZnNldCBhdCB3aGljaCB0aGUgZXJyb3Igb2NjdXJyZWQiCiAgICAg
+ IH0KICAgIF0KICB9LAogIHsKICAgICJ0eXBlIjogInJlY29yZCIsCiAgICAibmFtZSI6ICJjb20u
+ bWljcm9zb2Z0LmF6dXJlLnN0b3JhZ2UucXVlcnlCbG9iQ29udGVudHMucHJvZ3Jlc3MiLAogICAg
+ ImRvYyI6ICJJbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvZ3Jlc3Mgb2YgdGhlIHF1ZXJ5IiwKICAg
+ ICJmaWVsZHMiOiBbCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJieXRlc1NjYW5uZWQiLAogICAg
+ ICAgICJ0eXBlIjogImxvbmciLAogICAgICAgICJkb2MiOiAiVGhlIG51bWJlciBvZiBieXRlcyB0
+ aGF0IGhhdmUgYmVlbiBzY2FubmVkIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAi
+ dG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAgICAgICAgImRvYyI6ICJUaGUg
+ dG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4gdGhpcyBxdWVyeSIKICAgICAg
+ fQogICAgXQogIH0sCiAgewogICAgInR5cGUiOiAicmVjb3JkIiwKICAgICJuYW1lIjogImNvbS5t
+ aWNyb3NvZnQuYXp1cmUuc3RvcmFnZS5xdWVyeUJsb2JDb250ZW50cy5lbmQiLAogICAgImRvYyI6
+ ICJTZW50IGFzIHRoZSBmaW5hbCBtZXNzYWdlIG9mIHRoZSByZXNwb25zZSwgaW5kaWNhdGluZyB0
+ aGF0IGFsbCByZXN1bHRzIGhhdmUgYmVlbiBzZW50LiIsCiAgICAiZmllbGRzIjogWwogICAgICB7
+ CiAgICAgICAgIm5hbWUiOiAidG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAg
+ ICAgICAgImRvYyI6ICJUaGUgdG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4g
+ dGhpcyBxdWVyeSIKICAgICAgfQogICAgXQogIH0KXQoAQmgjmNsu90Ck/YQ3d6WMowL2AwDwA///
+ //94AAAAEAAAAAAACgAMAAYABQAIAAoAAAAAAQMADAAAAAgACAAAAAQACAAAAAQAAAABAAAAFAAA
+ ABAAFAAIAAYABwAMAAAAEAAQAAAAAAABByQAAAAUAAAABAAAAAAAAAAIAAwABAAIAAgAAAAEAAAA
+ AgAAAAMAAABhYmMA/////3AAAAAQAAAAAAAKAA4ABgAFAAgACgAAAAADAwAQAAAAAAAKAAwAAAAE
+ AAgACgAAADAAAAAEAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA
+ AAAAAAAAAAAAAAAAAAAAAAAAQmgjmNsu90Ck/YQ3d6WMowLGAgDAAv////+IAAAAFAAAAAAAAAAM
+ ABYABgAFAAgADAAMAAAAAAMDABgAAAAQAAAAAAAAAAAACgAYAAwABAAIAAoAAAA8AAAAEAAAAAEA
+ AAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAABAAAAAQAA
+ AAAAAAAAAAAAAAAAAJABAAAAAAAAAAAAAAAAAABCaCOY2y73QKT9hDd3pYyjAgYEQEBCaCOY2y73
+ QKT9hDd3pYyjAgQGQEJoI5jbLvdApP2EN3eljKM=
+ headers:
+ accept-ranges:
+ - bytes
+ content-type:
+ - avro/binary
+ date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ etag:
+ - '"0x8D856956ED0E86F"'
+ last-modified:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ transfer-encoding:
+ - chunked
+ x-ms-blob-type:
+ - BlockBlob
+ x-ms-creation-time:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ x-ms-lease-state:
+ - available
+ x-ms-lease-status:
+ - unlocked
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 200
+ message: OK
+- request:
+ body: null
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '0'
+ User-Agent:
+ - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: DELETE
+ uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container
+ response:
+ body:
+ string: ''
+ headers:
+ date:
+ - Fri, 11 Sep 2020 20:58:28 GMT
+ transfer-encoding:
+ - chunked
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 202
+ message: Accepted
+version: 1
diff --git a/sdk/storage/azure-storage-blob/tests/test_quick_query.py b/sdk/storage/azure-storage-blob/tests/test_quick_query.py
index e11ed1b2f136..aec267c5b031 100644
--- a/sdk/storage/azure-storage-blob/tests/test_quick_query.py
+++ b/sdk/storage/azure-storage-blob/tests/test_quick_query.py
@@ -5,6 +5,7 @@
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
+import base64
import pytest
@@ -17,6 +18,8 @@
)
# ------------------------------------------------------------------------------
+from azure.storage.blob._models import ArrowDialect
+
CSV_DATA = b'Service,Package,Version,RepoPath,MissingDocs\r\nApp Configuration,' \
b'azure-data-appconfiguration,1,appconfiguration,FALSE\r\nEvent Hubs' \
b'\r\nEvent Hubs - Azure Storage CheckpointStore,' \
@@ -875,4 +878,62 @@ def on_error(error):
self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n')
self._teardown(bsc)
+ @GlobalStorageAccountPreparer()
+ def test_quick_query_output_in_arrow_format(self, resource_group, location, storage_account,
+ storage_account_key):
+ # Arrange
+ bsc = BlobServiceClient(
+ self.account_url(storage_account, "blob"),
+ credential=storage_account_key)
+ self._setup(bsc)
+
+ data = b'100,200,300,400\n300,400,500,600\n'
+
+ # upload the json file
+ blob_name = self._get_blob_reference()
+ blob_client = bsc.get_blob_client(self.container_name, blob_name)
+ blob_client.upload_blob(data, overwrite=True)
+
+ errors = []
+ def on_error(error):
+ errors.append(error)
+
+ output_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)]
+
+ resp = blob_client.query_blob(
+ "SELECT _2 from BlobStorage WHERE _1 > 250",
+ on_error=on_error,
+ output_format=output_format)
+ query_result = base64.b64encode(resp.readall())
+ expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA"
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(query_result, expected_result)
+ self._teardown(bsc)
+
+ @GlobalStorageAccountPreparer()
+ def test_quick_query_input_in_arrow_format(self, resource_group, location, storage_account,
+ storage_account_key):
+ # Arrange
+ bsc = BlobServiceClient(
+ self.account_url(storage_account, "blob"),
+ credential=storage_account_key)
+ self._setup(bsc)
+
+ # upload the json file
+ blob_name = self._get_blob_reference()
+ blob_client = bsc.get_blob_client(self.container_name, blob_name)
+
+ errors = []
+ def on_error(error):
+ errors.append(error)
+
+ input_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)]
+
+ with self.assertRaises(ValueError):
+ blob_client.query_blob(
+ "SELECT * from BlobStorage",
+ on_error=on_error,
+ blob_format=input_format)
+
# ------------------------------------------------------------------------------
diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py
index a86368c72c85..aeafd184585e 100644
--- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py
+++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py
@@ -30,6 +30,7 @@
AccessPolicy,
DelimitedTextDialect,
DelimitedJsonDialect,
+ ArrowDialect,
DataLakeFileQueryError
)
from ._shared_access_signature import generate_account_sas, generate_file_system_sas, generate_directory_sas, \
@@ -75,5 +76,6 @@
'StorageStreamDownloader',
'DelimitedTextDialect',
'DelimitedJsonDialect',
+ 'ArrowDialect',
'DataLakeFileQueryError'
]
diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py
index db076b81d6b3..37511ee55c5f 100644
--- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py
+++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py
@@ -665,6 +665,7 @@ def query_file(self, query_expression, **kwargs):
according to that profile. This value can be a DelimitedTextDialect or a DelimitedJsonDialect.
:paramtype output_format:
~azure.storage.filedatalake.DelimitedTextDialect or ~azure.storage.filedatalake.DelimitedJsonDialect
+ or list[~azure.storage.filedatalake.ArrowDialect]
:keyword lease:
Required if the file has an active lease. Value can be a DataLakeLeaseClient object
or the lease ID as a string.
diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py
index 406eedceac74..b8dd4d3568f9 100644
--- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py
+++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py
@@ -17,6 +17,7 @@
from azure.storage.blob import AccessPolicy as BlobAccessPolicy
from azure.storage.blob import DelimitedTextDialect as BlobDelimitedTextDialect
from azure.storage.blob import DelimitedJsonDialect as BlobDelimitedJSON
+from azure.storage.blob import ArrowDialect as BlobArrowDialect
from azure.storage.blob._generated.models import StorageErrorException
from azure.storage.blob._models import ContainerPropertiesPaged
from ._deserialize import return_headers_and_deserialized_path_list
@@ -627,6 +628,20 @@ class DelimitedTextDialect(BlobDelimitedTextDialect):
"""
+class ArrowDialect(BlobArrowDialect):
+ """field of an arrow schema.
+
+ All required parameters must be populated in order to send to Azure.
+
+ :param str type: Required.
+ :keyword str name: The name of the field.
+ :keyword int precision: The precision of the field.
+ :keyword int scale: The scale of the field.
+ """
+ def __init__(self, type, **kwargs):
+ super(ArrowDialect, self).__init__(type, **kwargs)
+
+
class DataLakeFileQueryError(object):
"""The error happened during quick query operation.
diff --git a/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml b/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml
new file mode 100644
index 000000000000..3d0a614f7985
--- /dev/null
+++ b/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml
@@ -0,0 +1,233 @@
+interactions:
+- request:
+ body: null
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '0'
+ User-Agent:
+ - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-client-request-id:
+ - 8a6b42e2-f477-11ea-a588-001a7dda7113
+ x-ms-date:
+ - Fri, 11 Sep 2020 21:41:24 GMT
+ x-ms-properties:
+ - ''
+ x-ms-version:
+ - '2020-02-10'
+ method: PUT
+ uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?resource=file
+ response:
+ body:
+ string: ''
+ headers:
+ Content-Length:
+ - '0'
+ Date:
+ - Fri, 11 Sep 2020 21:41:24 GMT
+ ETag:
+ - '"0x8D8569B6EDE9D6F"'
+ Last-Modified:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-request-id:
+ - ddbf5a49-d01f-0006-7684-881999000000
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 201
+ message: Created
+- request:
+ body: '100,200,300,400
+
+ 300,400,500,600
+
+ '
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '32'
+ Content-Type:
+ - application/json; charset=utf-8
+ User-Agent:
+ - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-client-request-id:
+ - 8ac7d776-f477-11ea-aed4-001a7dda7113
+ x-ms-date:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: PATCH
+ uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?position=0&action=append
+ response:
+ body:
+ string: ''
+ headers:
+ Content-Length:
+ - '0'
+ Date:
+ - Fri, 11 Sep 2020 21:41:24 GMT
+ x-ms-request-id:
+ - ddbf5a4a-d01f-0006-7784-881999000000
+ x-ms-request-server-encrypted:
+ - 'false'
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 202
+ message: Accepted
+- request:
+ body: null
+ headers:
+ Accept:
+ - '*/*'
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '0'
+ If-Match:
+ - '"0x8D8569B6EDE9D6F"'
+ User-Agent:
+ - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-client-request-id:
+ - 8ad4d4b0-f477-11ea-b511-001a7dda7113
+ x-ms-date:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: PATCH
+ uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?position=32&action=flush
+ response:
+ body:
+ string: ''
+ headers:
+ Content-Length:
+ - '0'
+ Date:
+ - Fri, 11 Sep 2020 21:41:24 GMT
+ ETag:
+ - '"0x8D8569B6EF8B486"'
+ Last-Modified:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-request-id:
+ - ddbf5a4b-d01f-0006-7884-881999000000
+ x-ms-request-server-encrypted:
+ - 'false'
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '
+
+ SQLSELECT _2 from BlobStorage
+ WHERE _1 > 250arrowdecimalabc42'
+ headers:
+ Accept:
+ - application/xml
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '390'
+ Content-Type:
+ - application/xml; charset=utf-8
+ User-Agent:
+ - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0)
+ x-ms-client-request-id:
+ - 8ae1c068-f477-11ea-a8a8-001a7dda7113
+ x-ms-date:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-version:
+ - '2020-02-10'
+ method: POST
+ uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?comp=query
+ response:
+ body:
+ string: !!binary |
+ T2JqAQIWYXZyby5zY2hlbWG+HlsKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUi
+ OiAiY29tLm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLnJlc3VsdERh
+ dGEiLAogICAgImRvYyI6ICJIb2xkcyByZXN1bHQgZGF0YSBpbiB0aGUgZm9ybWF0IHNwZWNpZmll
+ ZCBmb3IgdGhpcyBxdWVyeSAoQ1NWLCBKU09OLCBldGMuKS4iLAogICAgImZpZWxkcyI6IFsKICAg
+ ICAgewogICAgICAgICJuYW1lIjogImRhdGEiLAogICAgICAgICJ0eXBlIjogImJ5dGVzIgogICAg
+ ICB9CiAgICBdCiAgfSwKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUiOiAiY29t
+ Lm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLmVycm9yIiwKICAgICJk
+ b2MiOiAiQW4gZXJyb3IgdGhhdCBvY2N1cnJlZCB3aGlsZSBwcm9jZXNzaW5nIHRoZSBxdWVyeS4i
+ LAogICAgImZpZWxkcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogImZhdGFsIiwKICAgICAg
+ ICAidHlwZSI6ICJib29sZWFuIiwKICAgICAgICAiZG9jIjogIklmIHRydWUsIHRoaXMgZXJyb3Ig
+ cHJldmVudHMgZnVydGhlciBxdWVyeSBwcm9jZXNzaW5nLiAgTW9yZSByZXN1bHQgZGF0YSBtYXkg
+ YmUgcmV0dXJuZWQsIGJ1dCB0aGVyZSBpcyBubyBndWFyYW50ZWUgdGhhdCBhbGwgb2YgdGhlIG9y
+ aWdpbmFsIGRhdGEgd2lsbCBiZSBwcm9jZXNzZWQuICBJZiBmYWxzZSwgdGhpcyBlcnJvciBkb2Vz
+ IG5vdCBwcmV2ZW50IGZ1cnRoZXIgcXVlcnkgcHJvY2Vzc2luZy4iCiAgICAgIH0sCiAgICAgIHsK
+ ICAgICAgICAibmFtZSI6ICJuYW1lIiwKICAgICAgICAidHlwZSI6ICJzdHJpbmciLAogICAgICAg
+ ICJkb2MiOiAiVGhlIG5hbWUgb2YgdGhlIGVycm9yIgogICAgICB9LAogICAgICB7CiAgICAgICAg
+ Im5hbWUiOiAiZGVzY3JpcHRpb24iLAogICAgICAgICJ0eXBlIjogInN0cmluZyIsCiAgICAgICAg
+ ImRvYyI6ICJBIGRlc2NyaXB0aW9uIG9mIHRoZSBlcnJvciIKICAgICAgfSwKICAgICAgewogICAg
+ ICAgICJuYW1lIjogInBvc2l0aW9uIiwKICAgICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgICAi
+ ZG9jIjogIlRoZSBibG9iIG9mZnNldCBhdCB3aGljaCB0aGUgZXJyb3Igb2NjdXJyZWQiCiAgICAg
+ IH0KICAgIF0KICB9LAogIHsKICAgICJ0eXBlIjogInJlY29yZCIsCiAgICAibmFtZSI6ICJjb20u
+ bWljcm9zb2Z0LmF6dXJlLnN0b3JhZ2UucXVlcnlCbG9iQ29udGVudHMucHJvZ3Jlc3MiLAogICAg
+ ImRvYyI6ICJJbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvZ3Jlc3Mgb2YgdGhlIHF1ZXJ5IiwKICAg
+ ICJmaWVsZHMiOiBbCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJieXRlc1NjYW5uZWQiLAogICAg
+ ICAgICJ0eXBlIjogImxvbmciLAogICAgICAgICJkb2MiOiAiVGhlIG51bWJlciBvZiBieXRlcyB0
+ aGF0IGhhdmUgYmVlbiBzY2FubmVkIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAi
+ dG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAgICAgICAgImRvYyI6ICJUaGUg
+ dG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4gdGhpcyBxdWVyeSIKICAgICAg
+ fQogICAgXQogIH0sCiAgewogICAgInR5cGUiOiAicmVjb3JkIiwKICAgICJuYW1lIjogImNvbS5t
+ aWNyb3NvZnQuYXp1cmUuc3RvcmFnZS5xdWVyeUJsb2JDb250ZW50cy5lbmQiLAogICAgImRvYyI6
+ ICJTZW50IGFzIHRoZSBmaW5hbCBtZXNzYWdlIG9mIHRoZSByZXNwb25zZSwgaW5kaWNhdGluZyB0
+ aGF0IGFsbCByZXN1bHRzIGhhdmUgYmVlbiBzZW50LiIsCiAgICAiZmllbGRzIjogWwogICAgICB7
+ CiAgICAgICAgIm5hbWUiOiAidG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAg
+ ICAgICAgImRvYyI6ICJUaGUgdG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4g
+ dGhpcyBxdWVyeSIKICAgICAgfQogICAgXQogIH0KXQoAx7a7c8s6SUGx6YZlanjA4QL2AwDwA///
+ //94AAAAEAAAAAAACgAMAAYABQAIAAoAAAAAAQMADAAAAAgACAAAAAQACAAAAAQAAAABAAAAFAAA
+ ABAAFAAIAAYABwAMAAAAEAAQAAAAAAABByQAAAAUAAAABAAAAAAAAAAIAAwABAAIAAgAAAAEAAAA
+ AgAAAAMAAABhYmMA/////3AAAAAQAAAAAAAKAA4ABgAFAAgACgAAAAADAwAQAAAAAAAKAAwAAAAE
+ AAgACgAAADAAAAAEAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA
+ AAAAAAAAAAAAAAAAAAAAAAAAx7a7c8s6SUGx6YZlanjA4QLGAgDAAv////+IAAAAFAAAAAAAAAAM
+ ABYABgAFAAgADAAMAAAAAAMDABgAAAAQAAAAAAAAAAAACgAYAAwABAAIAAoAAAA8AAAAEAAAAAEA
+ AAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAABAAAAAQAA
+ AAAAAAAAAAAAAAAAAJABAAAAAAAAAAAAAAAAAADHtrtzyzpJQbHphmVqeMDhAgYEQEDHtrtzyzpJ
+ QbHphmVqeMDhAgQGQMe2u3PLOklBsemGZWp4wOE=
+ headers:
+ Accept-Ranges:
+ - bytes
+ Content-Type:
+ - avro/binary
+ Date:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ ETag:
+ - '"0x8D8569B6EF8B486"'
+ Last-Modified:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ Transfer-Encoding:
+ - chunked
+ x-ms-blob-type:
+ - BlockBlob
+ x-ms-creation-time:
+ - Fri, 11 Sep 2020 21:41:25 GMT
+ x-ms-lease-state:
+ - available
+ x-ms-lease-status:
+ - unlocked
+ x-ms-request-id:
+ - 07a14690-801e-0008-3e84-88d877000000
+ x-ms-version:
+ - '2020-02-10'
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py b/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py
index 8e2cb74dfe8a..d28f0a917204 100644
--- a/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py
+++ b/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py
@@ -5,14 +5,15 @@
# Licensed under the MIT License. See License.txt in the project root for
# license information.
# --------------------------------------------------------------------------
+import base64
import pytest
from azure.storage.filedatalake import (
DelimitedTextDialect,
DelimitedJsonDialect,
- DataLakeFileQueryError
-)
+ DataLakeFileQueryError,
+ ArrowDialect)
from testcase import (
StorageTestCase,
@@ -799,4 +800,49 @@ def on_error(error):
self.assertEqual(len(resp), len(data))
self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n')
+ @record
+ def test_quick_query_output_in_arrow_format(self):
+ # Arrange
+ data = b'100,200,300,400\n300,400,500,600\n'
+
+ # upload the json file
+ file_name = self._get_file_reference()
+ file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
+ file_client.upload_data(data, overwrite=True)
+
+ errors = []
+ def on_error(error):
+ errors.append(error)
+
+ output_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)]
+
+ expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA"
+
+ resp = file_client.query_file(
+ "SELECT _2 from BlobStorage WHERE _1 > 250",
+ on_error=on_error,
+ output_format=output_format)
+ query_result = base64.b64encode(resp.readall())
+
+ self.assertEqual(len(errors), 0)
+ self.assertEqual(query_result, expected_result)
+
+ @record
+ def test_quick_query_input_in_arrow_format(self):
+ # Arrange
+ file_name = self._get_file_reference()
+ file_client = self.dsc.get_file_client(self.filesystem_name, file_name)
+
+ errors = []
+ def on_error(error):
+ errors.append(error)
+
+ input_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)]
+
+ with self.assertRaises(ValueError):
+ file_client.query_file(
+ "SELECT _2 from BlobStorage WHERE _1 > 250",
+ on_error=on_error,
+ file_format=input_format)
+
# ------------------------------------------------------------------------------