diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index ff3741656..32050c226 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -105,15 +105,15 @@ For the FileIO there are several configuration options available: -| Key | Example | Description | -| ----------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| adlfs.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=http://localhost/ | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | -| adlfs.account-name | devstoreaccount1 | The account that you want to connect to | -| adlfs.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. | -| adlfs.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature | -| adlfs.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id | -| adlfs.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id | -| adlfs.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret | +| Key | Example | Description | +| ---------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=http://localhost/ | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | +| adls.account-name | devstoreaccount1 | The account that you want to connect to | +| adls.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. | +| adls.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature | +| adls.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id | +| adls.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id | +| adls.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index d20087474..4e467cc9b 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -46,6 +46,14 @@ logger = logging.getLogger(__name__) +ADLFS_CONNECTION_STRING = "adlfs.connection-string" +ADLFS_ACCOUNT_NAME = "adlfs.account-name" +ADLFS_ACCOUNT_KEY = "adlfs.account-key" +ADLFS_SAS_TOKEN = "adlfs.sas-token" +ADLFS_TENANT_ID = "adlfs.tenant-id" +ADLFS_CLIENT_ID = "adlfs.client-id" +ADLFS_ClIENT_SECRET = "adlfs.client-secret" +ADLFS_PREFIX = "adlfs" AWS_REGION = "client.region" AWS_ACCESS_KEY_ID = "client.access-key-id" AWS_SECRET_ACCESS_KEY = "client.secret-access-key" @@ -62,13 +70,13 @@ HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" HDFS_KERB_TICKET = "hdfs.kerberos_ticket" -ADLFS_CONNECTION_STRING = "adlfs.connection-string" -ADLFS_ACCOUNT_NAME = "adlfs.account-name" -ADLFS_ACCOUNT_KEY = "adlfs.account-key" -ADLFS_SAS_TOKEN = "adlfs.sas-token" -ADLFS_TENANT_ID = "adlfs.tenant-id" -ADLFS_CLIENT_ID = "adlfs.client-id" -ADLFS_ClIENT_SECRET = "adlfs.client-secret" +ADLS_CONNECTION_STRING = "adls.connection-string" +ADLS_ACCOUNT_NAME = "adls.account-name" +ADLS_ACCOUNT_KEY = "adls.account-key" +ADLS_SAS_TOKEN = "adls.sas-token" +ADLS_TENANT_ID = "adls.tenant-id" +ADLS_CLIENT_ID = "adls.client-id" +ADLS_ClIENT_SECRET = "adls.client-secret" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d6e4a32ad..783108774 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -44,8 +44,15 @@ ADLFS_ACCOUNT_NAME, ADLFS_CLIENT_ID, ADLFS_CONNECTION_STRING, + ADLFS_PREFIX, ADLFS_SAS_TOKEN, ADLFS_TENANT_ID, + ADLS_ACCOUNT_KEY, + ADLS_ACCOUNT_NAME, + ADLS_CLIENT_ID, + ADLS_CONNECTION_STRING, + ADLS_SAS_TOKEN, + ADLS_TENANT_ID, AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, @@ -69,6 +76,7 @@ S3_SESSION_TOKEN, S3_SIGNER_URI, ADLFS_ClIENT_SECRET, + ADLS_ClIENT_SECRET, FileIO, InputFile, InputStream, @@ -77,6 +85,7 @@ ) from pyiceberg.typedef import Properties from pyiceberg.utils.properties import get_first_property_value, property_as_bool +from pyiceberg.utils.deprecated import deprecated logger = logging.getLogger(__name__) @@ -175,14 +184,50 @@ def _gs(properties: Properties) -> AbstractFileSystem: def _adlfs(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem + for property_name in properties: + if property_name.startswith(ADLFS_PREFIX): + deprecated( + deprecated_in="0.7.0", + removed_in="0.8.0", + help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.", + )(lambda: None)() + return AzureBlobFileSystem( - connection_string=properties.get(ADLFS_CONNECTION_STRING), - account_name=properties.get(ADLFS_ACCOUNT_NAME), - account_key=properties.get(ADLFS_ACCOUNT_KEY), - sas_token=properties.get(ADLFS_SAS_TOKEN), - tenant_id=properties.get(ADLFS_TENANT_ID), - client_id=properties.get(ADLFS_CLIENT_ID), - client_secret=properties.get(ADLFS_ClIENT_SECRET), + connection_string=get_first_property_value( + properties, + ADLS_CONNECTION_STRING, + ADLFS_CONNECTION_STRING, + ), + account_name=get_first_property_value( + properties, + ADLS_ACCOUNT_NAME, + ADLFS_ACCOUNT_NAME, + ), + account_key=get_first_property_value( + properties, + ADLS_ACCOUNT_KEY, + ADLFS_ACCOUNT_KEY, + ), + sas_token=get_first_property_value( + properties, + ADLS_SAS_TOKEN, + ADLFS_SAS_TOKEN, + ), + tenant_id=get_first_property_value( + properties, + ADLS_TENANT_ID, + ADLFS_TENANT_ID, + ), + client_id=get_first_property_value( + properties, + ADLS_CLIENT_ID, + ADLFS_CLIENT_ID, + ), + client_secret=get_first_property_value( + properties, + ADLS_ClIENT_SECRET, + ADLFS_ClIENT_SECRET, + ), ) diff --git a/tests/conftest.py b/tests/conftest.py index f65f4ed55..9b7c8acfa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,23 +111,23 @@ def pytest_addoption(parser: pytest.Parser) -> None: parser.addoption( "--s3.secret-access-key", action="store", default="password", help="The AWS secret access key ID for tests marked as s3" ) - # ADLFS options + # ADLS options # Azurite provides default account name and key. Those can be customized using env variables. # For more information, see README file at https://github.com/azure/azurite#default-storage-account parser.addoption( - "--adlfs.endpoint", + "--adls.endpoint", action="store", default="http://127.0.0.1:10000", - help="The ADLS endpoint URL for tests marked as adlfs", + help="The ADLS endpoint URL for tests marked as adls", ) parser.addoption( - "--adlfs.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adlfs" + "--adls.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adls" ) parser.addoption( - "--adlfs.account-key", + "--adls.account-key", action="store", default="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==", - help="The ADLS secret account key for tests marked as adlfs", + help="The ADLS secret account key for tests marked as adls", ) parser.addoption( "--gcs.endpoint", action="store", default="http://0.0.0.0:4443", help="The GCS endpoint URL for tests marked gcs" @@ -1958,13 +1958,13 @@ def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, No def adlfs_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]: from azure.storage.blob import BlobServiceClient - azurite_url = request.config.getoption("--adlfs.endpoint") - azurite_account_name = request.config.getoption("--adlfs.account-name") - azurite_account_key = request.config.getoption("--adlfs.account-key") + azurite_url = request.config.getoption("--adls.endpoint") + azurite_account_name = request.config.getoption("--adls.account-name") + azurite_account_key = request.config.getoption("--adls.account-key") azurite_connection_string = f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};" properties = { - "adlfs.connection-string": azurite_connection_string, - "adlfs.account-name": azurite_account_name, + "adls.connection-string": azurite_connection_string, + "adls.account-name": azurite_account_name, } bbs = BlobServiceClient.from_connection_string(conn_str=azurite_connection_string)