Skip to content

Commit

Permalink
Deprecate ADLFS prefix in favor of ADLS
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Sep 6, 2024
1 parent ed1932b commit bc47da9
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 34 deletions.
18 changes: 9 additions & 9 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ For the FileIO there are several configuration options available:

<!-- markdown-link-check-disable -->

| Key | Example | Description |
| ----------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| adlfs.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=<http://localhost/> | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). |
| adlfs.account-name | devstoreaccount1 | The account that you want to connect to |
| adlfs.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. |
| adlfs.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature |
| adlfs.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id |
| adlfs.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id |
| adlfs.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret |
| Key | Example | Description |
| ---------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint=<http://localhost/> | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). |
| adls.account-name | devstoreaccount1 | The account that you want to connect to |
| adls.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. |
| adls.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature |
| adls.tenant-id | ad667be4-b811-11ed-afa1-0242ac120002 | The tenant-id |
| adls.client-id | ad667be4-b811-11ed-afa1-0242ac120002 | The client-id |
| adls.client-secret | oCA3R6P\*ka#oa1Sms2J74z... | The client-secret |

<!-- markdown-link-check-enable-->

Expand Down
22 changes: 15 additions & 7 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@

logger = logging.getLogger(__name__)

ADLFS_CONNECTION_STRING = "adlfs.connection-string"
ADLFS_ACCOUNT_NAME = "adlfs.account-name"
ADLFS_ACCOUNT_KEY = "adlfs.account-key"
ADLFS_SAS_TOKEN = "adlfs.sas-token"
ADLFS_TENANT_ID = "adlfs.tenant-id"
ADLFS_CLIENT_ID = "adlfs.client-id"
ADLFS_ClIENT_SECRET = "adlfs.client-secret"
ADLFS_PREFIX = "adlfs"
AWS_REGION = "client.region"
AWS_ACCESS_KEY_ID = "client.access-key-id"
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
Expand All @@ -66,13 +74,13 @@
HDFS_PORT = "hdfs.port"
HDFS_USER = "hdfs.user"
HDFS_KERB_TICKET = "hdfs.kerberos_ticket"
ADLFS_CONNECTION_STRING = "adlfs.connection-string"
ADLFS_ACCOUNT_NAME = "adlfs.account-name"
ADLFS_ACCOUNT_KEY = "adlfs.account-key"
ADLFS_SAS_TOKEN = "adlfs.sas-token"
ADLFS_TENANT_ID = "adlfs.tenant-id"
ADLFS_CLIENT_ID = "adlfs.client-id"
ADLFS_ClIENT_SECRET = "adlfs.client-secret"
ADLS_CONNECTION_STRING = "adls.connection-string"
ADLS_ACCOUNT_NAME = "adls.account-name"
ADLS_ACCOUNT_KEY = "adls.account-key"
ADLS_SAS_TOKEN = "adls.sas-token"
ADLS_TENANT_ID = "adls.tenant-id"
ADLS_CLIENT_ID = "adls.client-id"
ADLS_ClIENT_SECRET = "adls.client-secret"
GCS_TOKEN = "gcs.oauth2.token"
GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at"
GCS_PROJECT_ID = "gcs.project-id"
Expand Down
59 changes: 52 additions & 7 deletions pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@
ADLFS_ACCOUNT_NAME,
ADLFS_CLIENT_ID,
ADLFS_CONNECTION_STRING,
ADLFS_PREFIX,
ADLFS_SAS_TOKEN,
ADLFS_TENANT_ID,
ADLS_ACCOUNT_KEY,
ADLS_ACCOUNT_NAME,
ADLS_CLIENT_ID,
ADLS_CONNECTION_STRING,
ADLS_SAS_TOKEN,
ADLS_TENANT_ID,
AWS_ACCESS_KEY_ID,
AWS_REGION,
AWS_SECRET_ACCESS_KEY,
Expand All @@ -71,13 +78,15 @@
S3_SIGNER_ENDPOINT_DEFAULT,
S3_SIGNER_URI,
ADLFS_ClIENT_SECRET,
ADLS_ClIENT_SECRET,
FileIO,
InputFile,
InputStream,
OutputFile,
OutputStream,
)
from pyiceberg.typedef import Properties
from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_first_property_value, property_as_bool

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -179,14 +188,50 @@ def _gs(properties: Properties) -> AbstractFileSystem:
def _adlfs(properties: Properties) -> AbstractFileSystem:
from adlfs import AzureBlobFileSystem

for property_name in properties:
if property_name.startswith(ADLFS_PREFIX):
deprecation_message(
deprecated_in="0.8.0",
removed_in="0.9.0",
help_message=f"The property {property_name} is deprecated. Please use properties that start with adls.",
)

return AzureBlobFileSystem(
connection_string=properties.get(ADLFS_CONNECTION_STRING),
account_name=properties.get(ADLFS_ACCOUNT_NAME),
account_key=properties.get(ADLFS_ACCOUNT_KEY),
sas_token=properties.get(ADLFS_SAS_TOKEN),
tenant_id=properties.get(ADLFS_TENANT_ID),
client_id=properties.get(ADLFS_CLIENT_ID),
client_secret=properties.get(ADLFS_ClIENT_SECRET),
connection_string=get_first_property_value(
properties,
ADLS_CONNECTION_STRING,
ADLFS_CONNECTION_STRING,
),
account_name=get_first_property_value(
properties,
ADLS_ACCOUNT_NAME,
ADLFS_ACCOUNT_NAME,
),
account_key=get_first_property_value(
properties,
ADLS_ACCOUNT_KEY,
ADLFS_ACCOUNT_KEY,
),
sas_token=get_first_property_value(
properties,
ADLS_SAS_TOKEN,
ADLFS_SAS_TOKEN,
),
tenant_id=get_first_property_value(
properties,
ADLS_TENANT_ID,
ADLFS_TENANT_ID,
),
client_id=get_first_property_value(
properties,
ADLS_CLIENT_ID,
ADLFS_CLIENT_ID,
),
client_secret=get_first_property_value(
properties,
ADLS_ClIENT_SECRET,
ADLFS_ClIENT_SECRET,
),
)


Expand Down
22 changes: 11 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,23 +111,23 @@ def pytest_addoption(parser: pytest.Parser) -> None:
parser.addoption(
"--s3.secret-access-key", action="store", default="password", help="The AWS secret access key ID for tests marked as s3"
)
# ADLFS options
# ADLS options
# Azurite provides default account name and key. Those can be customized using env variables.
# For more information, see README file at https://github.com/azure/azurite#default-storage-account
parser.addoption(
"--adlfs.endpoint",
"--adls.endpoint",
action="store",
default="http://127.0.0.1:10000",
help="The ADLS endpoint URL for tests marked as adlfs",
help="The ADLS endpoint URL for tests marked as adls",
)
parser.addoption(
"--adlfs.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adlfs"
"--adls.account-name", action="store", default="devstoreaccount1", help="The ADLS account key for tests marked as adls"
)
parser.addoption(
"--adlfs.account-key",
"--adls.account-key",
action="store",
default="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==",
help="The ADLS secret account key for tests marked as adlfs",
help="The ADLS secret account key for tests marked as adls",
)
parser.addoption(
"--gcs.endpoint", action="store", default="http://0.0.0.0:4443", help="The GCS endpoint URL for tests marked gcs"
Expand Down Expand Up @@ -1958,13 +1958,13 @@ def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, No
def adlfs_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]:
from azure.storage.blob import BlobServiceClient

azurite_url = request.config.getoption("--adlfs.endpoint")
azurite_account_name = request.config.getoption("--adlfs.account-name")
azurite_account_key = request.config.getoption("--adlfs.account-key")
azurite_url = request.config.getoption("--adls.endpoint")
azurite_account_name = request.config.getoption("--adls.account-name")
azurite_account_key = request.config.getoption("--adls.account-key")
azurite_connection_string = f"DefaultEndpointsProtocol=http;AccountName={azurite_account_name};AccountKey={azurite_account_key};BlobEndpoint={azurite_url}/{azurite_account_name};"
properties = {
"adlfs.connection-string": azurite_connection_string,
"adlfs.account-name": azurite_account_name,
"adls.connection-string": azurite_connection_string,
"adls.account-name": azurite_account_name,
}

bbs = BlobServiceClient.from_connection_string(conn_str=azurite_connection_string)
Expand Down

0 comments on commit bc47da9

Please sign in to comment.