From 862b00b92234e845d419e99a8bba1d8eadb14326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andre=20Anast=C3=A1cio?= Date: Mon, 9 Sep 2024 18:48:58 -0300 Subject: [PATCH] Add missing renaming --- Makefile | 4 +- mkdocs/docs/configuration.md | 2 +- mkdocs/docs/contributing.md | 6 +- pyiceberg/io/fsspec.py | 6 +- pyproject.toml | 8 +-- tests/conftest.py | 2 +- tests/io/test_fsspec.py | 108 +++++++++++++++++------------------ 7 files changed, 68 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index a40efd17c..ee0b40557 100644 --- a/Makefile +++ b/Makefile @@ -59,9 +59,9 @@ test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-adlfs: ## Run tests marked with adlfs, can add arguments with PYTEST_ARGS="-vv" +test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-azurite.sh - poetry run pytest tests/ -m adlfs ${PYTEST_ARGS} + poetry run pytest tests/ -m adls ${PYTEST_ARGS} test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-gcs-server.sh diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index a5aa545e1..8495437f2 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -111,7 +111,7 @@ For the FileIO there are several configuration options available: | Key | Example | Description | | ---------------------- | ----------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint= | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adlfs-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | +| adls.connection-string | AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqF...;BlobEndpoint= | A [connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string). This could be used to use FileIO with any adls-compatible object storage service that has a different endpoint (like [azurite](https://github.com/azure/azurite)). | | adls.account-name | devstoreaccount1 | The account that you want to connect to | | adls.account-key | Eby8vdM02xNOcqF... | The key to authentication against the account. | | adls.sas-token | NuHOuuzdQN7VRM%2FOpOeqBlawRCA845IY05h9eu1Yte4%3D | The shared access signature | diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index d87f2ec6a..62db11c6b 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -106,17 +106,17 @@ For Python, `pytest` is used a testing framework in combination with `coverage` make test ``` -By default, S3 and ADLFS tests are ignored because that require minio and azurite to be running. +By default, S3 and ADLS tests are ignored because that require minio and azurite to be running. To run the S3 suite: ```bash make test-s3 ``` -To run the ADLFS suite: +To run the ADLS suite: ```bash -make test-adlfs +make test-adls ``` To pass additional arguments to pytest, you can use `PYTEST_ARGS`. diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 518d3a426..1632c4bb2 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -185,7 +185,7 @@ def _gs(properties: Properties) -> AbstractFileSystem: ) -def _adlfs(properties: Properties) -> AbstractFileSystem: +def _adls(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem for property_name in properties: @@ -241,8 +241,8 @@ def _adlfs(properties: Properties) -> AbstractFileSystem: "s3": _s3, "s3a": _s3, "s3n": _s3, - "abfs": _adlfs, - "abfss": _adlfs, + "abfs": _adls, + "abfss": _adls, "gs": _gs, "gcs": _gs, } diff --git a/pyproject.toml b/pyproject.toml index 40a2255f1..1d304b77a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,8 +80,8 @@ psycopg2-binary = { version = ">=2.9.6", optional = true } sqlalchemy = { version = "^2.0.18", optional = true } getdaft = { version = ">=0.2.12", optional = true } numpy = [ - { version = "1.26.0", python = ">=3.9,<3.13", optional = true }, - { version = "1.24.4", python = ">=3.8,<3.9", optional = true } + { version = "1.26.0", python = ">=3.9,<3.13", optional = true }, + { version = "1.24.4", python = ">=3.8,<3.9", optional = true }, ] [tool.poetry.group.dev.dependencies] @@ -98,7 +98,7 @@ pytest-mock = "3.14.0" pyspark = "3.5.2" cython = "3.0.11" deptry = ">=0.14,<0.21" -docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520 +docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520 [[tool.mypy.overrides]] module = "pytest_mock.*" ignore_missing_imports = true @@ -603,7 +603,7 @@ gcsfs = ["gcsfs"] markers = [ "unmarked: marks a test as a unittest", "s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)", - "adlfs: marks a test as requiring access to adlfs compliant storage (use with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)", + "adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)", "integration: marks integration tests against Apache Spark", "gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)", ] diff --git a/tests/conftest.py b/tests/conftest.py index 9b7c8acfa..b05947ebe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1955,7 +1955,7 @@ def fixture_dynamodb(_aws_credentials: None) -> Generator[boto3.client, None, No @pytest.fixture -def adlfs_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]: +def adls_fsspec_fileio(request: pytest.FixtureRequest) -> Generator[FsspecFileIO, None, None]: from azure.storage.blob import BlobServiceClient azurite_url = request.config.getoption("--adls.endpoint") diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index fbb184910..64dc68b9c 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -290,70 +290,70 @@ def test_fsspec_unified_session_properties() -> None: ) -@pytest.mark.adlfs -def test_fsspec_new_input_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_new_input_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test creating a new input file from an fsspec file-io""" filename = str(uuid.uuid4()) - input_file = adlfs_fsspec_fileio.new_input(f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(f"abfss://tests/{filename}") assert isinstance(input_file, fsspec.FsspecInputFile) assert input_file.location == f"abfss://tests/{filename}" -@pytest.mark.adlfs -def test_fsspec_new_abfss_output_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_new_abfss_output_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test creating a new output file from an fsspec file-io""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(f"abfss://tests/{filename}") assert isinstance(output_file, fsspec.FsspecOutputFile) assert output_file.location == f"abfss://tests/{filename}" -@pytest.mark.adlfs -def test_fsspec_write_and_read_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_write_and_read_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test writing and reading a file using FsspecInputFile and FsspecOutputFile""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as f: f.write(b"foo") - input_file = adlfs_fsspec_fileio.new_input(f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(f"abfss://tests/{filename}") assert input_file.open().read() == b"foo" - adlfs_fsspec_fileio.delete(input_file) + adls_fsspec_fileio.delete(input_file) -@pytest.mark.adlfs -def test_fsspec_getting_length_of_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_getting_length_of_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test getting the length of an FsspecInputFile and FsspecOutputFile""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as f: f.write(b"foobar") assert len(output_file) == 6 - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") assert len(input_file) == 6 - adlfs_fsspec_fileio.delete(output_file) + adls_fsspec_fileio.delete(output_file) -@pytest.mark.adlfs -def test_fsspec_file_tell_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_file_tell_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test finding cursor position for an fsspec file-io file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foobar") - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() f.seek(0) @@ -365,19 +365,19 @@ def test_fsspec_file_tell_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: f.seek(0) assert f.tell() == 0 - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_read_specified_bytes_for_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_read_specified_bytes_for_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test reading a specified number of bytes from an fsspec file-io file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foo") - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() f.seek(0) @@ -391,89 +391,89 @@ def test_fsspec_read_specified_bytes_for_file_adlfs(adlfs_fsspec_fileio: FsspecF f.seek(0) assert b"foo" == f.read(999) # test reading amount larger than entire content length - adlfs_fsspec_fileio.delete(input_file) + adls_fsspec_fileio.delete(input_file) -@pytest.mark.adlfs -def test_fsspec_raise_on_opening_file_not_found_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: - """Test that an fsspec input file raises appropriately when the adlfs file is not found""" +@pytest.mark.adls +def test_fsspec_raise_on_opening_file_not_found_adls(adls_fsspec_fileio: FsspecFileIO) -> None: + """Test that an fsspec input file raises appropriately when the adls file is not found""" filename = str(uuid.uuid4()) - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") with pytest.raises(FileNotFoundError) as exc_info: input_file.open().read() assert filename in str(exc_info.value) -@pytest.mark.adlfs -def test_checking_if_a_file_exists_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_checking_if_a_file_exists_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test checking if a file exists""" - non_existent_file = adlfs_fsspec_fileio.new_input(location="abfss://tests/does-not-exist.txt") + non_existent_file = adls_fsspec_fileio.new_input(location="abfss://tests/does-not-exist.txt") assert not non_existent_file.exists() filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") assert not output_file.exists() with output_file.create() as f: f.write(b"foo") - existing_input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + existing_input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") assert existing_input_file.exists() - existing_output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + existing_output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") assert existing_output_file.exists() - adlfs_fsspec_fileio.delete(existing_output_file) + adls_fsspec_fileio.delete(existing_output_file) -@pytest.mark.adlfs -def test_closing_a_file_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_closing_a_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test closing an output file and input file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") with output_file.create() as write_file: write_file.write(b"foo") assert not write_file.closed # type: ignore assert write_file.closed # type: ignore - input_file = adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") + input_file = adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}") f = input_file.open() assert not f.closed # type: ignore f.close() assert f.closed # type: ignore - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_converting_an_outputfile_to_an_inputfile_adlfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_fsspec_converting_an_outputfile_to_an_inputfile_adls(adls_fsspec_fileio: FsspecFileIO) -> None: """Test converting an output file to an input file""" filename = str(uuid.uuid4()) - output_file = adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") + output_file = adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}") input_file = output_file.to_input_file() assert input_file.location == output_file.location -@pytest.mark.adlfs -def test_writing_avro_file_adlfs(generated_manifest_entry_file: str, adlfs_fsspec_fileio: FsspecFileIO) -> None: +@pytest.mark.adls +def test_writing_avro_file_adls(generated_manifest_entry_file: str, adls_fsspec_fileio: FsspecFileIO) -> None: """Test that bytes match when reading a local avro file, writing it using fsspec file-io, and then reading it again""" filename = str(uuid.uuid4()) with PyArrowFileIO().new_input(location=generated_manifest_entry_file).open() as f: b1 = f.read() - with adlfs_fsspec_fileio.new_output(location=f"abfss://tests/{filename}").create() as out_f: + with adls_fsspec_fileio.new_output(location=f"abfss://tests/{filename}").create() as out_f: out_f.write(b1) - with adlfs_fsspec_fileio.new_input(location=f"abfss://tests/{filename}").open() as in_f: + with adls_fsspec_fileio.new_input(location=f"abfss://tests/{filename}").open() as in_f: b2 = in_f.read() - assert b1 == b2 # Check that bytes of read from local avro file match bytes written to adlfs + assert b1 == b2 # Check that bytes of read from local avro file match bytes written to adls - adlfs_fsspec_fileio.delete(f"abfss://tests/{filename}") + adls_fsspec_fileio.delete(f"abfss://tests/{filename}") -@pytest.mark.adlfs -def test_fsspec_pickle_round_trip_aldfs(adlfs_fsspec_fileio: FsspecFileIO) -> None: - _test_fsspec_pickle_round_trip(adlfs_fsspec_fileio, "abfss://tests/foo.txt") +@pytest.mark.adls +def test_fsspec_pickle_round_trip_aldfs(adls_fsspec_fileio: FsspecFileIO) -> None: + _test_fsspec_pickle_round_trip(adls_fsspec_fileio, "abfss://tests/foo.txt") @pytest.mark.gcs