diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index af139b007..dc67b7904 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -82,7 +82,8 @@ For the FileIO there are several configuration options available: | s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | -| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. | +| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/`. (default : v1/aws/s3/sign). | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 0567af2d5..d5f26a178 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -58,6 +58,8 @@ S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" S3_SIGNER_URI = "s3.signer.uri" +S3_SIGNER_ENDPOINT = "s3.signer.endpoint" +S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index d6e4a32ad..cbe5d5b64 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -67,6 +67,8 @@ S3_REGION, S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, + S3_SIGNER_ENDPOINT, + S3_SIGNER_ENDPOINT_DEFAULT, S3_SIGNER_URI, ADLFS_ClIENT_SECRET, FileIO, @@ -86,6 +88,8 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A raise SignError("Signer set, but token is not available") signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") + signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT) + signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { "method": request.method, @@ -94,7 +98,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A "headers": {key: [val] for key, val in request.headers.items()}, } - response = requests.post(f"{signer_url}/v1/aws/s3/sign", headers=signer_headers, json=signer_body) + response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", headers=signer_headers, json=signer_body) try: response.raise_for_status() response_json = response.json() @@ -131,9 +135,9 @@ def _s3(properties: Properties) -> AbstractFileSystem: if signer := properties.get("s3.signer"): logger.info("Loading signer %s", signer) - if singer_func := SIGNERS.get(signer): - singer_func_with_properties = partial(singer_func, properties) - register_events["before-sign.s3"] = singer_func_with_properties + if signer_func := SIGNERS.get(signer): + signer_func_with_properties = partial(signer_func, properties) + register_events["before-sign.s3"] = signer_func_with_properties # Disable the AWS Signer config_kwargs["signature_version"] = UNSIGNED diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py index 3dd79e182..fbb184910 100644 --- a/tests/io/test_fsspec.py +++ b/tests/io/test_fsspec.py @@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None: } +def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None: + new_uri = "https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro" + endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w" + requests_mock.post( + f"{TEST_URI}/{endpoint}", + json={ + "uri": new_uri, + "headers": { + "Authorization": [ + "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02" + ], + "Host": ["bucket.s3.us-west-2.amazonaws.com"], + "User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"], + "x-amz-content-sha256": ["UNSIGNED-PAYLOAD"], + "X-Amz-Date": ["20221017T102940Z"], + "X-Amz-Security-Token": [ + "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=" + ], + }, + "extensions": {}, + }, + status_code=200, + ) + + request = AWSRequest( + method="HEAD", + url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro", + headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"}, + data=b"", + params={}, + auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro", + ) + request.context = { + "client_region": "us-west-2", + "has_streaming_input": False, + "auth_type": None, + "signing": {"bucket": "bucket"}, + "retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"}, + } + + signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "s3.signer.endpoint": endpoint}, request) + + assert signed_request.url == new_uri + assert dict(signed_request.headers) == { + "Authorization": "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02", + "Host": "bucket.s3.us-west-2.amazonaws.com", + "User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0", + "X-Amz-Date": "20221017T102940Z", + "X-Amz-Security-Token": "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=", + "x-amz-content-sha256": "UNSIGNED-PAYLOAD", + } + + def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None: requests_mock.post( f"{TEST_URI}/v1/aws/s3/sign",