diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py b/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py index caa00401b96c..e5bef3d6a2bd 100644 --- a/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py +++ b/sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py @@ -54,6 +54,7 @@ BlobQueryError, DelimitedJsonDialect, DelimitedTextDialect, + ArrowDialect, ObjectReplicationPolicy, ObjectReplicationRule ) @@ -219,6 +220,7 @@ def download_blob_from_url( 'BlobQueryError', 'DelimitedJsonDialect', 'DelimitedTextDialect', + 'ArrowDialect', 'BlobQueryReader', 'ObjectReplicationPolicy', 'ObjectReplicationRule' diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py index 85837199921b..0cc5ff877a5c 100644 --- a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py +++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py @@ -683,13 +683,19 @@ def _quick_query_options(self, query_expression, try: delimiter = input_format.lineterminator except AttributeError: - delimiter = input_format.delimiter + try: + delimiter = input_format.delimiter + except AttributeError: + raise ValueError("The Type of blob_format can only be DelimitedTextDialect or DelimitedJsonDialect") output_format = kwargs.pop('output_format', None) if output_format: try: delimiter = output_format.lineterminator except AttributeError: - delimiter = output_format.delimiter + try: + delimiter = output_format.delimiter + except AttributeError: + pass else: output_format = input_format query_request = QueryRequest( @@ -742,6 +748,7 @@ def query_blob(self, query_expression, **kwargs): as it is represented in the blob. By providing an output format, the blob data will be reformatted according to that profile. This value can be a DelimitedTextDialect or a DelimitedJsonDialect. :paramtype output_format: ~azure.storage.blob.DelimitedTextDialect or ~azure.storage.blob.DelimitedJsonDialect + or list[~azure.storage.blob.ArrowDialect] :keyword lease: Required if the blob has an active lease. Value can be a BlobLeaseClient object or the lease ID as a string. diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py index fc1249cf8c02..fee0382a8d7b 100644 --- a/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py +++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py @@ -435,7 +435,7 @@ def find_blobs_by_tags(self, filter_expression, **kwargs): :param str filter_expression: The expression to find blobs whose tags matches the specified condition. eg. "\"yourtagname\"='firsttag' and \"yourtagname2\"='secondtag'" - To specify a container, eg. "@container='containerName' and \"Name\"='C'" + To specify a container, eg. where=@container='containerName' and \"Name\"='C'" :keyword int results_per_page: The max result per page when paginating. :keyword int timeout: diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py index 2fb963d3894b..799310f627cc 100644 --- a/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py +++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_models.py @@ -9,7 +9,7 @@ from enum import Enum from azure.core.paging import PageIterator -from azure.storage.blob._generated.models import FilterBlobItem +from azure.storage.blob._generated.models import FilterBlobItem, ArrowField from ._shared import decode_base64_to_text from ._shared.response_handlers import return_context_and_deserialized, process_storage_error @@ -1099,6 +1099,20 @@ def __init__(self, **kwargs): self.has_header = kwargs.pop('has_header', False) +class ArrowDialect(ArrowField): + """field of an arrow schema. + + All required parameters must be populated in order to send to Azure. + + :param str type: Required. + :keyword str name: The name of the field. + :keyword int precision: The precision of the field. + :keyword int scale: The scale of the field. + """ + def __init__(self, type, **kwargs): + super(ArrowDialect, self).__init__(type=type, **kwargs) + + class ObjectReplicationPolicy(DictMixin): """Policy id and rule ids applied to a blob. diff --git a/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py b/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py index 372ab454c74f..cb3710fc6596 100644 --- a/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py +++ b/sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py @@ -13,8 +13,8 @@ from ._models import ( ContainerEncryptionScope, - DelimitedJsonDialect -) + DelimitedJsonDialect, + ArrowDialect) from ._generated.models import ( ModifiedAccessConditions, SourceModifiedAccessConditions, @@ -24,6 +24,7 @@ QuerySerialization, DelimitedTextConfiguration, JsonTextConfiguration, + ArrowConfiguration, QueryFormatType, BlobTag, BlobTags, LeaseAccessConditions @@ -182,6 +183,13 @@ def serialize_query_format(formater): type=QueryFormatType.delimited, delimited_text_configuration=serialization_settings ) + elif isinstance(formater, list): + serialization_settings = ArrowConfiguration( + schema=formater + ) + qq_format = QueryFormat( + type=QueryFormatType.arrow, + arrow_configuration=serialization_settings) elif not formater: return None else: diff --git a/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml b/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml new file mode 100644 index 000000000000..8ee27403148f --- /dev/null +++ b/sdk/storage/azure-storage-blob/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml @@ -0,0 +1,217 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + User-Agent: + - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-date: + - Fri, 11 Sep 2020 20:58:27 GMT + x-ms-version: + - '2020-02-10' + method: PUT + uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container + response: + body: + string: '' + headers: + date: + - Fri, 11 Sep 2020 20:58:28 GMT + etag: + - '"0x8D856956EBF3C36"' + last-modified: + - Fri, 11 Sep 2020 20:58:28 GMT + transfer-encoding: + - chunked + x-ms-version: + - '2020-02-10' + status: + code: 201 + message: Created +- request: + body: '100,200,300,400 + + 300,400,500,600 + + ' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '32' + Content-Type: + - application/octet-stream + User-Agent: + - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-blob-type: + - BlockBlob + x-ms-date: + - Fri, 11 Sep 2020 20:58:28 GMT + x-ms-version: + - '2020-02-10' + method: PUT + uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789 + response: + body: + string: '' + headers: + content-md5: + - /hmKXD7m7tyfn12eEsFvyQ== + date: + - Fri, 11 Sep 2020 20:58:28 GMT + etag: + - '"0x8D856956ED0E86F"' + last-modified: + - Fri, 11 Sep 2020 20:58:28 GMT + transfer-encoding: + - chunked + x-ms-content-crc64: + - Dn1U+tgM/4c= + x-ms-request-server-encrypted: + - 'false' + x-ms-version: + - '2020-02-10' + status: + code: 201 + message: Created +- request: + body: ' + + SQLSELECT _2 from BlobStorage + WHERE _1 > 250arrowdecimalabc42' + headers: + Accept: + - application/xml + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '390' + Content-Type: + - application/xml; charset=utf-8 + User-Agent: + - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-date: + - Fri, 11 Sep 2020 20:58:28 GMT + x-ms-version: + - '2020-02-10' + method: POST + uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?comp=query + response: + body: + string: !!binary | + T2JqAQIWYXZyby5zY2hlbWG+HlsKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUi + OiAiY29tLm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLnJlc3VsdERh + dGEiLAogICAgImRvYyI6ICJIb2xkcyByZXN1bHQgZGF0YSBpbiB0aGUgZm9ybWF0IHNwZWNpZmll + ZCBmb3IgdGhpcyBxdWVyeSAoQ1NWLCBKU09OLCBldGMuKS4iLAogICAgImZpZWxkcyI6IFsKICAg + ICAgewogICAgICAgICJuYW1lIjogImRhdGEiLAogICAgICAgICJ0eXBlIjogImJ5dGVzIgogICAg + ICB9CiAgICBdCiAgfSwKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUiOiAiY29t + Lm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLmVycm9yIiwKICAgICJk + b2MiOiAiQW4gZXJyb3IgdGhhdCBvY2N1cnJlZCB3aGlsZSBwcm9jZXNzaW5nIHRoZSBxdWVyeS4i + LAogICAgImZpZWxkcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogImZhdGFsIiwKICAgICAg + ICAidHlwZSI6ICJib29sZWFuIiwKICAgICAgICAiZG9jIjogIklmIHRydWUsIHRoaXMgZXJyb3Ig + cHJldmVudHMgZnVydGhlciBxdWVyeSBwcm9jZXNzaW5nLiAgTW9yZSByZXN1bHQgZGF0YSBtYXkg + YmUgcmV0dXJuZWQsIGJ1dCB0aGVyZSBpcyBubyBndWFyYW50ZWUgdGhhdCBhbGwgb2YgdGhlIG9y + aWdpbmFsIGRhdGEgd2lsbCBiZSBwcm9jZXNzZWQuICBJZiBmYWxzZSwgdGhpcyBlcnJvciBkb2Vz + IG5vdCBwcmV2ZW50IGZ1cnRoZXIgcXVlcnkgcHJvY2Vzc2luZy4iCiAgICAgIH0sCiAgICAgIHsK + ICAgICAgICAibmFtZSI6ICJuYW1lIiwKICAgICAgICAidHlwZSI6ICJzdHJpbmciLAogICAgICAg + ICJkb2MiOiAiVGhlIG5hbWUgb2YgdGhlIGVycm9yIgogICAgICB9LAogICAgICB7CiAgICAgICAg + Im5hbWUiOiAiZGVzY3JpcHRpb24iLAogICAgICAgICJ0eXBlIjogInN0cmluZyIsCiAgICAgICAg + ImRvYyI6ICJBIGRlc2NyaXB0aW9uIG9mIHRoZSBlcnJvciIKICAgICAgfSwKICAgICAgewogICAg + ICAgICJuYW1lIjogInBvc2l0aW9uIiwKICAgICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgICAi + ZG9jIjogIlRoZSBibG9iIG9mZnNldCBhdCB3aGljaCB0aGUgZXJyb3Igb2NjdXJyZWQiCiAgICAg + IH0KICAgIF0KICB9LAogIHsKICAgICJ0eXBlIjogInJlY29yZCIsCiAgICAibmFtZSI6ICJjb20u + bWljcm9zb2Z0LmF6dXJlLnN0b3JhZ2UucXVlcnlCbG9iQ29udGVudHMucHJvZ3Jlc3MiLAogICAg + ImRvYyI6ICJJbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvZ3Jlc3Mgb2YgdGhlIHF1ZXJ5IiwKICAg + ICJmaWVsZHMiOiBbCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJieXRlc1NjYW5uZWQiLAogICAg + ICAgICJ0eXBlIjogImxvbmciLAogICAgICAgICJkb2MiOiAiVGhlIG51bWJlciBvZiBieXRlcyB0 + aGF0IGhhdmUgYmVlbiBzY2FubmVkIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAi + dG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAgICAgICAgImRvYyI6ICJUaGUg + dG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4gdGhpcyBxdWVyeSIKICAgICAg + fQogICAgXQogIH0sCiAgewogICAgInR5cGUiOiAicmVjb3JkIiwKICAgICJuYW1lIjogImNvbS5t + aWNyb3NvZnQuYXp1cmUuc3RvcmFnZS5xdWVyeUJsb2JDb250ZW50cy5lbmQiLAogICAgImRvYyI6 + ICJTZW50IGFzIHRoZSBmaW5hbCBtZXNzYWdlIG9mIHRoZSByZXNwb25zZSwgaW5kaWNhdGluZyB0 + aGF0IGFsbCByZXN1bHRzIGhhdmUgYmVlbiBzZW50LiIsCiAgICAiZmllbGRzIjogWwogICAgICB7 + CiAgICAgICAgIm5hbWUiOiAidG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAg + ICAgICAgImRvYyI6ICJUaGUgdG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4g + dGhpcyBxdWVyeSIKICAgICAgfQogICAgXQogIH0KXQoAQmgjmNsu90Ck/YQ3d6WMowL2AwDwA/// + //94AAAAEAAAAAAACgAMAAYABQAIAAoAAAAAAQMADAAAAAgACAAAAAQACAAAAAQAAAABAAAAFAAA + ABAAFAAIAAYABwAMAAAAEAAQAAAAAAABByQAAAAUAAAABAAAAAAAAAAIAAwABAAIAAgAAAAEAAAA + AgAAAAMAAABhYmMA/////3AAAAAQAAAAAAAKAA4ABgAFAAgACgAAAAADAwAQAAAAAAAKAAwAAAAE + AAgACgAAADAAAAAEAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA + AAAAAAAAAAAAAAAAAAAAAAAAQmgjmNsu90Ck/YQ3d6WMowLGAgDAAv////+IAAAAFAAAAAAAAAAM + ABYABgAFAAgADAAMAAAAAAMDABgAAAAQAAAAAAAAAAAACgAYAAwABAAIAAoAAAA8AAAAEAAAAAEA + AAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAABAAAAAQAA + AAAAAAAAAAAAAAAAAJABAAAAAAAAAAAAAAAAAABCaCOY2y73QKT9hDd3pYyjAgYEQEBCaCOY2y73 + QKT9hDd3pYyjAgQGQEJoI5jbLvdApP2EN3eljKM= + headers: + accept-ranges: + - bytes + content-type: + - avro/binary + date: + - Fri, 11 Sep 2020 20:58:28 GMT + etag: + - '"0x8D856956ED0E86F"' + last-modified: + - Fri, 11 Sep 2020 20:58:28 GMT + transfer-encoding: + - chunked + x-ms-blob-type: + - BlockBlob + x-ms-creation-time: + - Fri, 11 Sep 2020 20:58:28 GMT + x-ms-lease-state: + - available + x-ms-lease-status: + - unlocked + x-ms-version: + - '2020-02-10' + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + User-Agent: + - azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-date: + - Fri, 11 Sep 2020 20:58:28 GMT + x-ms-version: + - '2020-02-10' + method: DELETE + uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container + response: + body: + string: '' + headers: + date: + - Fri, 11 Sep 2020 20:58:28 GMT + transfer-encoding: + - chunked + x-ms-version: + - '2020-02-10' + status: + code: 202 + message: Accepted +version: 1 diff --git a/sdk/storage/azure-storage-blob/tests/test_quick_query.py b/sdk/storage/azure-storage-blob/tests/test_quick_query.py index e11ed1b2f136..aec267c5b031 100644 --- a/sdk/storage/azure-storage-blob/tests/test_quick_query.py +++ b/sdk/storage/azure-storage-blob/tests/test_quick_query.py @@ -5,6 +5,7 @@ # Licensed under the MIT License. See License.txt in the project root for # license information. # -------------------------------------------------------------------------- +import base64 import pytest @@ -17,6 +18,8 @@ ) # ------------------------------------------------------------------------------ +from azure.storage.blob._models import ArrowDialect + CSV_DATA = b'Service,Package,Version,RepoPath,MissingDocs\r\nApp Configuration,' \ b'azure-data-appconfiguration,1,appconfiguration,FALSE\r\nEvent Hubs' \ b'\r\nEvent Hubs - Azure Storage CheckpointStore,' \ @@ -875,4 +878,62 @@ def on_error(error): self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n') self._teardown(bsc) + @GlobalStorageAccountPreparer() + def test_quick_query_output_in_arrow_format(self, resource_group, location, storage_account, + storage_account_key): + # Arrange + bsc = BlobServiceClient( + self.account_url(storage_account, "blob"), + credential=storage_account_key) + self._setup(bsc) + + data = b'100,200,300,400\n300,400,500,600\n' + + # upload the json file + blob_name = self._get_blob_reference() + blob_client = bsc.get_blob_client(self.container_name, blob_name) + blob_client.upload_blob(data, overwrite=True) + + errors = [] + def on_error(error): + errors.append(error) + + output_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)] + + resp = blob_client.query_blob( + "SELECT _2 from BlobStorage WHERE _1 > 250", + on_error=on_error, + output_format=output_format) + query_result = base64.b64encode(resp.readall()) + expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA" + + self.assertEqual(len(errors), 0) + self.assertEqual(query_result, expected_result) + self._teardown(bsc) + + @GlobalStorageAccountPreparer() + def test_quick_query_input_in_arrow_format(self, resource_group, location, storage_account, + storage_account_key): + # Arrange + bsc = BlobServiceClient( + self.account_url(storage_account, "blob"), + credential=storage_account_key) + self._setup(bsc) + + # upload the json file + blob_name = self._get_blob_reference() + blob_client = bsc.get_blob_client(self.container_name, blob_name) + + errors = [] + def on_error(error): + errors.append(error) + + input_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)] + + with self.assertRaises(ValueError): + blob_client.query_blob( + "SELECT * from BlobStorage", + on_error=on_error, + blob_format=input_format) + # ------------------------------------------------------------------------------ diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py index a86368c72c85..aeafd184585e 100644 --- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py +++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/__init__.py @@ -30,6 +30,7 @@ AccessPolicy, DelimitedTextDialect, DelimitedJsonDialect, + ArrowDialect, DataLakeFileQueryError ) from ._shared_access_signature import generate_account_sas, generate_file_system_sas, generate_directory_sas, \ @@ -75,5 +76,6 @@ 'StorageStreamDownloader', 'DelimitedTextDialect', 'DelimitedJsonDialect', + 'ArrowDialect', 'DataLakeFileQueryError' ] diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py index db076b81d6b3..37511ee55c5f 100644 --- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py +++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_data_lake_file_client.py @@ -665,6 +665,7 @@ def query_file(self, query_expression, **kwargs): according to that profile. This value can be a DelimitedTextDialect or a DelimitedJsonDialect. :paramtype output_format: ~azure.storage.filedatalake.DelimitedTextDialect or ~azure.storage.filedatalake.DelimitedJsonDialect + or list[~azure.storage.filedatalake.ArrowDialect] :keyword lease: Required if the file has an active lease. Value can be a DataLakeLeaseClient object or the lease ID as a string. diff --git a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py index 406eedceac74..b8dd4d3568f9 100644 --- a/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py +++ b/sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py @@ -17,6 +17,7 @@ from azure.storage.blob import AccessPolicy as BlobAccessPolicy from azure.storage.blob import DelimitedTextDialect as BlobDelimitedTextDialect from azure.storage.blob import DelimitedJsonDialect as BlobDelimitedJSON +from azure.storage.blob import ArrowDialect as BlobArrowDialect from azure.storage.blob._generated.models import StorageErrorException from azure.storage.blob._models import ContainerPropertiesPaged from ._deserialize import return_headers_and_deserialized_path_list @@ -627,6 +628,20 @@ class DelimitedTextDialect(BlobDelimitedTextDialect): """ +class ArrowDialect(BlobArrowDialect): + """field of an arrow schema. + + All required parameters must be populated in order to send to Azure. + + :param str type: Required. + :keyword str name: The name of the field. + :keyword int precision: The precision of the field. + :keyword int scale: The scale of the field. + """ + def __init__(self, type, **kwargs): + super(ArrowDialect, self).__init__(type, **kwargs) + + class DataLakeFileQueryError(object): """The error happened during quick query operation. diff --git a/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml b/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml new file mode 100644 index 000000000000..3d0a614f7985 --- /dev/null +++ b/sdk/storage/azure-storage-file-datalake/tests/recordings/test_quick_query.test_quick_query_output_in_arrow_format.yaml @@ -0,0 +1,233 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + User-Agent: + - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-client-request-id: + - 8a6b42e2-f477-11ea-a588-001a7dda7113 + x-ms-date: + - Fri, 11 Sep 2020 21:41:24 GMT + x-ms-properties: + - '' + x-ms-version: + - '2020-02-10' + method: PUT + uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?resource=file + response: + body: + string: '' + headers: + Content-Length: + - '0' + Date: + - Fri, 11 Sep 2020 21:41:24 GMT + ETag: + - '"0x8D8569B6EDE9D6F"' + Last-Modified: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-request-id: + - ddbf5a49-d01f-0006-7684-881999000000 + x-ms-version: + - '2020-02-10' + status: + code: 201 + message: Created +- request: + body: '100,200,300,400 + + 300,400,500,600 + + ' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '32' + Content-Type: + - application/json; charset=utf-8 + User-Agent: + - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-client-request-id: + - 8ac7d776-f477-11ea-aed4-001a7dda7113 + x-ms-date: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-version: + - '2020-02-10' + method: PATCH + uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?position=0&action=append + response: + body: + string: '' + headers: + Content-Length: + - '0' + Date: + - Fri, 11 Sep 2020 21:41:24 GMT + x-ms-request-id: + - ddbf5a4a-d01f-0006-7784-881999000000 + x-ms-request-server-encrypted: + - 'false' + x-ms-version: + - '2020-02-10' + status: + code: 202 + message: Accepted +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + If-Match: + - '"0x8D8569B6EDE9D6F"' + User-Agent: + - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-client-request-id: + - 8ad4d4b0-f477-11ea-b511-001a7dda7113 + x-ms-date: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-version: + - '2020-02-10' + method: PATCH + uri: https://storagename.dfs.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?position=32&action=flush + response: + body: + string: '' + headers: + Content-Length: + - '0' + Date: + - Fri, 11 Sep 2020 21:41:24 GMT + ETag: + - '"0x8D8569B6EF8B486"' + Last-Modified: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-request-id: + - ddbf5a4b-d01f-0006-7884-881999000000 + x-ms-request-server-encrypted: + - 'false' + x-ms-version: + - '2020-02-10' + status: + code: 200 + message: OK +- request: + body: ' + + SQLSELECT _2 from BlobStorage + WHERE _1 > 250arrowdecimalabc42' + headers: + Accept: + - application/xml + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '390' + Content-Type: + - application/xml; charset=utf-8 + User-Agent: + - azsdk-python-storage-dfs/12.1.1 Python/3.7.3 (Windows-10-10.0.19041-SP0) + x-ms-client-request-id: + - 8ae1c068-f477-11ea-a8a8-001a7dda7113 + x-ms-date: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-version: + - '2020-02-10' + method: POST + uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?comp=query + response: + body: + string: !!binary | + T2JqAQIWYXZyby5zY2hlbWG+HlsKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUi + OiAiY29tLm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLnJlc3VsdERh + dGEiLAogICAgImRvYyI6ICJIb2xkcyByZXN1bHQgZGF0YSBpbiB0aGUgZm9ybWF0IHNwZWNpZmll + ZCBmb3IgdGhpcyBxdWVyeSAoQ1NWLCBKU09OLCBldGMuKS4iLAogICAgImZpZWxkcyI6IFsKICAg + ICAgewogICAgICAgICJuYW1lIjogImRhdGEiLAogICAgICAgICJ0eXBlIjogImJ5dGVzIgogICAg + ICB9CiAgICBdCiAgfSwKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUiOiAiY29t + Lm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLmVycm9yIiwKICAgICJk + b2MiOiAiQW4gZXJyb3IgdGhhdCBvY2N1cnJlZCB3aGlsZSBwcm9jZXNzaW5nIHRoZSBxdWVyeS4i + LAogICAgImZpZWxkcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogImZhdGFsIiwKICAgICAg + ICAidHlwZSI6ICJib29sZWFuIiwKICAgICAgICAiZG9jIjogIklmIHRydWUsIHRoaXMgZXJyb3Ig + cHJldmVudHMgZnVydGhlciBxdWVyeSBwcm9jZXNzaW5nLiAgTW9yZSByZXN1bHQgZGF0YSBtYXkg + YmUgcmV0dXJuZWQsIGJ1dCB0aGVyZSBpcyBubyBndWFyYW50ZWUgdGhhdCBhbGwgb2YgdGhlIG9y + aWdpbmFsIGRhdGEgd2lsbCBiZSBwcm9jZXNzZWQuICBJZiBmYWxzZSwgdGhpcyBlcnJvciBkb2Vz + IG5vdCBwcmV2ZW50IGZ1cnRoZXIgcXVlcnkgcHJvY2Vzc2luZy4iCiAgICAgIH0sCiAgICAgIHsK + ICAgICAgICAibmFtZSI6ICJuYW1lIiwKICAgICAgICAidHlwZSI6ICJzdHJpbmciLAogICAgICAg + ICJkb2MiOiAiVGhlIG5hbWUgb2YgdGhlIGVycm9yIgogICAgICB9LAogICAgICB7CiAgICAgICAg + Im5hbWUiOiAiZGVzY3JpcHRpb24iLAogICAgICAgICJ0eXBlIjogInN0cmluZyIsCiAgICAgICAg + ImRvYyI6ICJBIGRlc2NyaXB0aW9uIG9mIHRoZSBlcnJvciIKICAgICAgfSwKICAgICAgewogICAg + ICAgICJuYW1lIjogInBvc2l0aW9uIiwKICAgICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgICAi + ZG9jIjogIlRoZSBibG9iIG9mZnNldCBhdCB3aGljaCB0aGUgZXJyb3Igb2NjdXJyZWQiCiAgICAg + IH0KICAgIF0KICB9LAogIHsKICAgICJ0eXBlIjogInJlY29yZCIsCiAgICAibmFtZSI6ICJjb20u + bWljcm9zb2Z0LmF6dXJlLnN0b3JhZ2UucXVlcnlCbG9iQ29udGVudHMucHJvZ3Jlc3MiLAogICAg + ImRvYyI6ICJJbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvZ3Jlc3Mgb2YgdGhlIHF1ZXJ5IiwKICAg + ICJmaWVsZHMiOiBbCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJieXRlc1NjYW5uZWQiLAogICAg + ICAgICJ0eXBlIjogImxvbmciLAogICAgICAgICJkb2MiOiAiVGhlIG51bWJlciBvZiBieXRlcyB0 + aGF0IGhhdmUgYmVlbiBzY2FubmVkIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAi + dG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAgICAgICAgImRvYyI6ICJUaGUg + dG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4gdGhpcyBxdWVyeSIKICAgICAg + fQogICAgXQogIH0sCiAgewogICAgInR5cGUiOiAicmVjb3JkIiwKICAgICJuYW1lIjogImNvbS5t + aWNyb3NvZnQuYXp1cmUuc3RvcmFnZS5xdWVyeUJsb2JDb250ZW50cy5lbmQiLAogICAgImRvYyI6 + ICJTZW50IGFzIHRoZSBmaW5hbCBtZXNzYWdlIG9mIHRoZSByZXNwb25zZSwgaW5kaWNhdGluZyB0 + aGF0IGFsbCByZXN1bHRzIGhhdmUgYmVlbiBzZW50LiIsCiAgICAiZmllbGRzIjogWwogICAgICB7 + CiAgICAgICAgIm5hbWUiOiAidG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAg + ICAgICAgImRvYyI6ICJUaGUgdG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4g + dGhpcyBxdWVyeSIKICAgICAgfQogICAgXQogIH0KXQoAx7a7c8s6SUGx6YZlanjA4QL2AwDwA/// + //94AAAAEAAAAAAACgAMAAYABQAIAAoAAAAAAQMADAAAAAgACAAAAAQACAAAAAQAAAABAAAAFAAA + ABAAFAAIAAYABwAMAAAAEAAQAAAAAAABByQAAAAUAAAABAAAAAAAAAAIAAwABAAIAAgAAAAEAAAA + AgAAAAMAAABhYmMA/////3AAAAAQAAAAAAAKAA4ABgAFAAgACgAAAAADAwAQAAAAAAAKAAwAAAAE + AAgACgAAADAAAAAEAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA + AAAAAAAAAAAAAAAAAAAAAAAAx7a7c8s6SUGx6YZlanjA4QLGAgDAAv////+IAAAAFAAAAAAAAAAM + ABYABgAFAAgADAAMAAAAAAMDABgAAAAQAAAAAAAAAAAACgAYAAwABAAIAAoAAAA8AAAAEAAAAAEA + AAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAABAAAAAQAA + AAAAAAAAAAAAAAAAAJABAAAAAAAAAAAAAAAAAADHtrtzyzpJQbHphmVqeMDhAgYEQEDHtrtzyzpJ + QbHphmVqeMDhAgQGQMe2u3PLOklBsemGZWp4wOE= + headers: + Accept-Ranges: + - bytes + Content-Type: + - avro/binary + Date: + - Fri, 11 Sep 2020 21:41:25 GMT + ETag: + - '"0x8D8569B6EF8B486"' + Last-Modified: + - Fri, 11 Sep 2020 21:41:25 GMT + Transfer-Encoding: + - chunked + x-ms-blob-type: + - BlockBlob + x-ms-creation-time: + - Fri, 11 Sep 2020 21:41:25 GMT + x-ms-lease-state: + - available + x-ms-lease-status: + - unlocked + x-ms-request-id: + - 07a14690-801e-0008-3e84-88d877000000 + x-ms-version: + - '2020-02-10' + status: + code: 200 + message: OK +version: 1 diff --git a/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py b/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py index 8e2cb74dfe8a..d28f0a917204 100644 --- a/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py +++ b/sdk/storage/azure-storage-file-datalake/tests/test_quick_query.py @@ -5,14 +5,15 @@ # Licensed under the MIT License. See License.txt in the project root for # license information. # -------------------------------------------------------------------------- +import base64 import pytest from azure.storage.filedatalake import ( DelimitedTextDialect, DelimitedJsonDialect, - DataLakeFileQueryError -) + DataLakeFileQueryError, + ArrowDialect) from testcase import ( StorageTestCase, @@ -799,4 +800,49 @@ def on_error(error): self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n') + @record + def test_quick_query_output_in_arrow_format(self): + # Arrange + data = b'100,200,300,400\n300,400,500,600\n' + + # upload the json file + file_name = self._get_file_reference() + file_client = self.dsc.get_file_client(self.filesystem_name, file_name) + file_client.upload_data(data, overwrite=True) + + errors = [] + def on_error(error): + errors.append(error) + + output_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)] + + expected_result = b"/////3gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABAwAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEHJAAAABQAAAAEAAAAAAAAAAgADAAEAAgACAAAAAQAAAACAAAAAwAAAGFiYwD/////cAAAABAAAAAAAAoADgAGAAUACAAKAAAAAAMDABAAAAAAAAoADAAAAAQACAAKAAAAMAAAAAQAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAD/////iAAAABQAAAAAAAAADAAWAAYABQAIAAwADAAAAAADAwAYAAAAEAAAAAAAAAAAAAoAGAAMAAQACAAKAAAAPAAAABAAAAABAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAQAAAAEAAAAAAAAAAAAAAAAAAACQAQAAAAAAAAAAAAAAAAAA" + + resp = file_client.query_file( + "SELECT _2 from BlobStorage WHERE _1 > 250", + on_error=on_error, + output_format=output_format) + query_result = base64.b64encode(resp.readall()) + + self.assertEqual(len(errors), 0) + self.assertEqual(query_result, expected_result) + + @record + def test_quick_query_input_in_arrow_format(self): + # Arrange + file_name = self._get_file_reference() + file_client = self.dsc.get_file_client(self.filesystem_name, file_name) + + errors = [] + def on_error(error): + errors.append(error) + + input_format = [ArrowDialect("decimal", name="abc", precision=4, scale=2)] + + with self.assertRaises(ValueError): + file_client.query_file( + "SELECT _2 from BlobStorage WHERE _1 > 250", + on_error=on_error, + file_format=input_format) + # ------------------------------------------------------------------------------