Skip to content

Commit

Permalink
Issue #531 encapsulate ETL API credentials in `GeoPySparkBackendImple…
Browse files Browse the repository at this point in the history
…mentation.request_costs`
  • Loading branch information
soxofaan committed Oct 31, 2023
1 parent 13e7846 commit 748e3c2
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 21 deletions.
25 changes: 8 additions & 17 deletions openeogeotrellis/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
)
from openeogeotrellis.integrations.hadoop import setup_kerberos_auth
from openeogeotrellis.processgraphvisiting import GeotrellisTileProcessGraphVisitor, SingleNodeUDFProcessGraphVisitor
from openeogeotrellis.integrations.etl_api import EtlApi, get_etl_api_access_token
from openeogeotrellis.integrations.etl_api import EtlApi, get_etl_api_access_token, get_etl_api_credentials
from openeogeotrellis.integrations.kubernetes import (
truncate_job_id_k8s,
k8s_job_name,
Expand Down Expand Up @@ -1471,23 +1471,14 @@ def request_costs(self, user_id: str, request_id: str, success: bool) -> Optiona
requests_session = requests_with_retry(total=3, backoff_factor=2)

if sentinel_hub_processing_units > 0:
# TODO: replace with strategy pattern?
if ConfigParams().is_kube_deploy:
etl_api_client_id = os.environ["OPENEO_ETL_OIDC_CLIENT_ID"]
etl_api_client_secret = os.environ["OPENEO_ETL_OIDC_CLIENT_SECRET"]
else:
vault = Vault(ConfigParams().vault_addr, requests_session)

vault_token = vault.login_kerberos(self._principal, self._key_tab)
etl_api_credentials = vault.get_etl_api_credentials(vault_token)
etl_api_client_id = etl_api_credentials.client_id
etl_api_client_secret = etl_api_credentials.client_secret

source_id = get_backend_config().etl_source_id
etl_api = EtlApi(ConfigParams().etl_api, source_id, requests_session)

etl_api = EtlApi(endpoint=ConfigParams().etl_api, requests_session=requests_session)
etl_credentials = get_etl_api_credentials(
kerberos_principal=self._principal, key_tab=self._key_tab, requests_session=requests_session
)
etl_access_token = get_etl_api_access_token(
client_id=etl_api_client_id, client_secret=etl_api_client_secret, requests_session=requests_session
client_id=etl_credentials.client_id,
client_secret=etl_credentials.client_id,
requests_session=requests_session,
)

costs = etl_api.log_resource_usage(
Expand Down
2 changes: 1 addition & 1 deletion openeogeotrellis/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@ class GpsBackendConfig(OpenEoBackendConfig):
ejr_backend_id: str = "unknown"
ejr_credentials_vault_path: Optional[str] = os.environ.get("OPENEO_EJR_CREDENTIALS_VAULT_PATH")

etl_source_id: str = "TerraScope/MEP"
etl_source_id: str = "TerraScope/MEP" # TODO: eliminate hardcoded VITO reference

prometheus_api: Optional[str] = os.environ.get("OPENEO_PROMETHEUS_API")
58 changes: 55 additions & 3 deletions openeogeotrellis/integrations/etl_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import logging
from typing import Optional
import os
from typing import Optional, NamedTuple

import requests
from openeo.rest.auth.oidc import OidcProviderInfo, OidcClientInfo, OidcClientCredentialsAuthenticator
from openeo_driver.config import get_backend_config
from openeo_driver.datastructs import secretive_repr

from openeogeotrellis.configparams import ConfigParams
from openeogeotrellis.vault import Vault

ORCHESTRATOR = "openeo"

Expand Down Expand Up @@ -43,9 +47,16 @@ class EtlApi:
API for reporting resource usage and added value to the ETL (EOPlaza marketplace) API
and deriving a cost estimate.
"""
def __init__(self, endpoint: str, source_id: str, requests_session: Optional[requests.Session] = None):

def __init__(
self,
endpoint: str,
*,
source_id: Optional[str] = None,
requests_session: Optional[requests.Session] = None,
):
self._endpoint = endpoint
self._source_id = source_id
self._source_id = source_id or get_backend_config().etl_source_id
self._session = requests_session or requests.Session()

def assert_access_token_valid(self, access_token: str):
Expand Down Expand Up @@ -151,6 +162,47 @@ def log_added_value(self, batch_job_id: str, title: Optional[str], execution_id:
return total_credits


class EtlCredentials(NamedTuple):
"""Container of ETL API related (OAuth) credentials."""

oidc_issuer: str
client_id: str
client_secret: str
__repr__ = __str__ = secretive_repr()


def get_etl_api_credentials(
kerberos_principal: str,
key_tab: str,
requests_session: Optional[requests.Session] = None,
) -> EtlCredentials:
# TODO: unify this with get_etl_api_access_token
if all(
v in os.environ
for v in [
# "OPENEO_ETL_API_OIDC_ISSUER",
"OPENEO_ETL_OIDC_CLIENT_ID",
"OPENEO_ETL_OIDC_CLIENT_SECRET",
]
):
return EtlCredentials(
oidc_issuer=os.environ.get("OPENEO_ETL_API_OIDC_ISSUER") or ConfigParams().etl_api_oidc_issuer,
client_id=os.environ["OPENEO_ETL_OIDC_CLIENT_ID"],
client_secret=os.environ["OPENEO_ETL_OIDC_CLIENT_SECRET"],
)
else:
# Get credentials directly from vault
# TODO: eliminate this code path?
vault = Vault(ConfigParams().vault_addr, requests_session)
vault_token = vault.login_kerberos(kerberos_principal, key_tab)
etl_api_credentials = vault.get_etl_api_credentials(vault_token)
return EtlCredentials(
oidc_issuer=ConfigParams().etl_api_oidc_issuer,
client_id=etl_api_credentials.client_id,
client_secret=etl_api_credentials.client_secret,
)


def get_etl_api_access_token(client_id: str, client_secret: str, requests_session: requests.Session) -> str:
oidc_provider = OidcProviderInfo(
# TODO: get issuer from the secret as well? (~ openeo-job-registry-elastic-api)
Expand Down

0 comments on commit 748e3c2

Please sign in to comment.