Skip to content

Commit

Permalink
Support s3.signer.endpoint for the REST catalog (#1029)
Browse files Browse the repository at this point in the history
* s3_signer_endpoint

* prune any trailing whitespaces

Co-authored-by: Fokko Driesprong <fokko@apache.org>

* fallback to default value instead of "endpoint" property

Co-authored-by: Fokko Driesprong <fokko@apache.org>

* fix test_s3v4_rest_signer_endpoint

* Fix missing backtick

Co-authored-by: Fokko Driesprong <fokko@apache.org>

* rename S3_SIGNER_ENDPOINT_DEFAULT_VALUE to S3_SIGNER_ENDPOINT_DEFAULT

* fix s3.signer.endpoint docs

* fk typo in signer

* fix fmt

---------

Co-authored-by: guilhermecastro <guilherme.castro@protonmail.com>
Co-authored-by: Fokko Driesprong <fokko@apache.org>
  • Loading branch information
3 people committed Aug 12, 2024
1 parent 4f33f3a commit 2e73a41
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 5 deletions.
3 changes: 2 additions & 1 deletion mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ For the FileIO there are several configuration options available:
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
| s3.signer | bearer | Configure the signature version of the FileIO. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.singer.uri>/v1/aws/s3/sign`. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. |
| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. (default : v1/aws/s3/sign). |
| s3.region | us-west-2 | Sets the region of the bucket |
| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. |
| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |
Expand Down
2 changes: 2 additions & 0 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
S3_PROXY_URI = "s3.proxy-uri"
S3_CONNECT_TIMEOUT = "s3.connect-timeout"
S3_SIGNER_URI = "s3.signer.uri"
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"
HDFS_HOST = "hdfs.host"
HDFS_PORT = "hdfs.port"
HDFS_USER = "hdfs.user"
Expand Down
12 changes: 8 additions & 4 deletions pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
S3_REGION,
S3_SECRET_ACCESS_KEY,
S3_SESSION_TOKEN,
S3_SIGNER_ENDPOINT,
S3_SIGNER_ENDPOINT_DEFAULT,
S3_SIGNER_URI,
ADLFS_ClIENT_SECRET,
FileIO,
Expand All @@ -86,6 +88,8 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
raise SignError("Signer set, but token is not available")

signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/")
signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT)

signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"}
signer_body = {
"method": request.method,
Expand All @@ -94,7 +98,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
"headers": {key: [val] for key, val in request.headers.items()},
}

response = requests.post(f"{signer_url}/v1/aws/s3/sign", headers=signer_headers, json=signer_body)
response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", headers=signer_headers, json=signer_body)
try:
response.raise_for_status()
response_json = response.json()
Expand Down Expand Up @@ -131,9 +135,9 @@ def _s3(properties: Properties) -> AbstractFileSystem:

if signer := properties.get("s3.signer"):
logger.info("Loading signer %s", signer)
if singer_func := SIGNERS.get(signer):
singer_func_with_properties = partial(singer_func, properties)
register_events["before-sign.s3"] = singer_func_with_properties
if signer_func := SIGNERS.get(signer):
signer_func_with_properties = partial(signer_func, properties)
register_events["before-sign.s3"] = signer_func_with_properties

# Disable the AWS Signer
config_kwargs["signature_version"] = UNSIGNED
Expand Down
53 changes: 53 additions & 0 deletions tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
}


def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None:
new_uri = "https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro"
endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w"
requests_mock.post(
f"{TEST_URI}/{endpoint}",
json={
"uri": new_uri,
"headers": {
"Authorization": [
"AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
],
"Host": ["bucket.s3.us-west-2.amazonaws.com"],
"User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
"x-amz-content-sha256": ["UNSIGNED-PAYLOAD"],
"X-Amz-Date": ["20221017T102940Z"],
"X-Amz-Security-Token": [
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw="
],
},
"extensions": {},
},
status_code=200,
)

request = AWSRequest(
method="HEAD",
url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"},
data=b"",
params={},
auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
)
request.context = {
"client_region": "us-west-2",
"has_streaming_input": False,
"auth_type": None,
"signing": {"bucket": "bucket"},
"retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"},
}

signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "s3.signer.endpoint": endpoint}, request)

assert signed_request.url == new_uri
assert dict(signed_request.headers) == {
"Authorization": "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
"Host": "bucket.s3.us-west-2.amazonaws.com",
"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
"X-Amz-Date": "20221017T102940Z",
"X-Amz-Security-Token": "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=",
"x-amz-content-sha256": "UNSIGNED-PAYLOAD",
}


def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None:
requests_mock.post(
f"{TEST_URI}/v1/aws/s3/sign",
Expand Down

0 comments on commit 2e73a41

Please sign in to comment.