Skip to content

Commit

Permalink
Issue #531 MultiEtlApiConfig: don't fail-fast on missing env vars
Browse files Browse the repository at this point in the history
just skip with warnings for now
  • Loading branch information
soxofaan committed Feb 2, 2024
1 parent d2d2f68 commit df7b1b5
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 25 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ without compromising stable operations.

## Unreleased

## 0.26.2

- `MultiEtlApiConfig`: don't fail-fast on missing env vars for credentials extraction, just skip with warnings for now

## 0.26.1

### Bugfix
Expand Down
2 changes: 1 addition & 1 deletion openeogeotrellis/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.26.1a1"
__version__ = "0.26.2a1"
41 changes: 26 additions & 15 deletions openeogeotrellis/integrations/etl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def __init__(
default_credentials_env_var: str = "OPENEO_ETL_OIDC_CLIENT_CREDENTIALS",
other_etl_apis: List[Tuple[str, str, str]],
job_option_field: str = "etl_api_id",
credential_extraction_log_level: int = logging.WARNING,
):
"""
:param default_root_url: default ETL API root URL
Expand All @@ -269,21 +270,31 @@ def __init__(
"""
super().__init__()
self._default_root_url = default_root_url
self._other_root_urls: Dict[str, str] = {id: url for id, url, cred_var in other_etl_apis}
self._job_option_field = job_option_field

# TODO: option to fail fast on missing env vars instead of ignoring?
self._other_root_urls: Dict[str, str] = {}
self._credentials: Dict[str, ClientCredentials] = {}
try:
self._credentials: Dict[str, ClientCredentials] = {
self._default_root_url: ClientCredentials.from_credentials_string(
os.environ[default_credentials_env_var]
)
}
self._credentials.update(
(url, ClientCredentials.from_credentials_string(os.environ[cred_var]))
for id, url, cred_var in other_etl_apis
self._credentials[default_root_url] = ClientCredentials.from_credentials_string(
os.environ[default_credentials_env_var]
)
except KeyError as e:
# Log key error details, but raise generic exception to avoid leaking internals to end user.
raise EtlApiConfigException(f"Missing ETL API credentials env var: {e!r}") from e
self._job_option_field = job_option_field
except Exception as e:
_log.log(
level=credential_extraction_log_level,
msg=f"MultiEtlApiConfig: failed to get credentials for {default_root_url=} from {default_credentials_env_var=}: {e!r}",
)
for etl_id, etl_url, cred_var in other_etl_apis:
try:
creds = ClientCredentials.from_credentials_string(os.environ[cred_var])
except Exception as e:
_log.log(
level=credential_extraction_log_level,
msg=f"MultiEtlApiConfig: failed to get credentials for {etl_url=} ({etl_id=}) from {cred_var=}: {e!r}. Skipping.",
)
else:
self._other_root_urls[etl_id] = etl_url
self._credentials[etl_url] = creds

def get_root_url(self, *, user: Optional[User] = None, job_options: Optional[dict] = None) -> str:
# TODO: also support extraction from user?
Expand All @@ -292,12 +303,12 @@ def get_root_url(self, *, user: Optional[User] = None, job_options: Optional[dic
if etl_api_id in self._other_root_urls:
return self._other_root_urls[etl_api_id]
else:
_log.warning(f"Invalid ETL API id {etl_api_id!r}, falling back on default ETL API")
_log.warning(f"MultiEtlApiConfig: Invalid {etl_api_id=}, using default.")
return self._default_root_url

def get_client_credentials(self, root_url: str) -> Union[ClientCredentials, None]:
if root_url not in self._credentials:
_log.error(f"EtlApiConfig: given {root_url=} not in {self._credentials.keys()!r}")
_log.error(f"MultiEtlApiConfig: invalid {root_url=}: not in {self._credentials.keys()!r}")
raise EtlApiConfigException("Invalid ETL API root URL.")
return self._credentials[root_url]

Expand Down
57 changes: 48 additions & 9 deletions tests/integrations/test_etl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,15 +324,16 @@ def test_basic(self, monkeypatch, caplog):
("beta", "https://etl-beta.test/", "OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_BETA"),
],
)

assert config.get_root_url() == "https://etl.test/"
assert config.get_root_url(job_options={}) == "https://etl.test/"
assert config.get_root_url(job_options={"etl_api_id": "alt"}) == "https://etl-alt.test/"
assert config.get_root_url(job_options={"etl_api_id": "beta"}) == "https://etl-beta.test/"
assert caplog.messages == []

assert config.get_root_url(job_options={"etl_api_id": "foobar"}) == "https://etl.test/"
assert [r.message for r in caplog.records if r.levelname == "WARNING"] == dirty_equals.Contains(
dirty_equals.IsStr(regex=".*Invalid.*id 'foobar'.*fall.*back.*default.*")
assert [f"{r.levelname}: {r.message}" for r in caplog.records] == dirty_equals.Contains(
dirty_equals.IsStr(regex="WARNING:.*Invalid etl_api_id='foobar', using default.*")
)

assert config.get_client_credentials("https://etl.test/") == ClientCredentials(
Expand All @@ -347,13 +348,51 @@ def test_basic(self, monkeypatch, caplog):
with pytest.raises(EtlApiConfigException, match="Invalid ETL API root URL."):
_ = config.get_client_credentials("https://meh.test/")

def test_missing_env_vars(self, monkeypatch):
def test_missing_env_vars(self, monkeypatch, caplog):
monkeypatch.setenv("OPENEO_ETL_OIDC_CLIENT_CREDENTIALS", "john:pw6@https://oidc.test/")
# Missing OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_ALT
monkeypatch.setenv("OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_BETA", "bob:808@https://boidc.test/")

config = MultiEtlApiConfig(
default_root_url="https://etl.test/",
other_etl_apis=[
("alt", "https://etl-alt.test/", "OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_ALT"),
("beta", "https://etl-beta.test/", "OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_BETA"),
],
)
assert [f"{r.levelname}: {r.message}" for r in caplog.records] == dirty_equals.Contains(
dirty_equals.IsStr(regex="WARNING:.*failed to get credentials for.*etl-alt.test.*Skipping.*")
)

with pytest.raises(EtlApiConfigException, match="Missing.*env var.*OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_ALT"):
_ = MultiEtlApiConfig(
default_root_url="https://etl.test/",
other_etl_apis=[
("alt", "https://etl-alt.test/", "OPENEO_ETL_OIDC_CLIENT_CREDENTIALS_ALT"),
],
# Test get_root_url behavior
caplog.clear()

assert config.get_root_url() == "https://etl.test/"
assert config.get_root_url(job_options={}) == "https://etl.test/"
# Missing env var, so falls back to default
assert config.get_root_url(job_options={"etl_api_id": "alt"}) == "https://etl.test/"
# Working env var: picks up the alternative root URL
assert config.get_root_url(job_options={"etl_api_id": "beta"}) == "https://etl-beta.test/"
assert [f"{r.levelname}: {r.message}" for r in caplog.records] == dirty_equals.Contains(
dirty_equals.IsStr(regex="WARNING:.*Invalid etl_api_id='alt', using default.*")
)

# Test get_client_credentials behavior
caplog.clear()

assert config.get_client_credentials("https://etl.test/") == ClientCredentials(
oidc_issuer="https://oidc.test/", client_id="john", client_secret="pw6"
)
# Missing env var -> failure to get credentials
with pytest.raises(EtlApiConfigException, match="Invalid ETL API root URL."):
_ = config.get_client_credentials("https://etl-alt.test/")
# Working env var -> get alternative credentials
assert config.get_client_credentials("https://etl-beta.test/") == ClientCredentials(
oidc_issuer="https://boidc.test/", client_id="bob", client_secret="808"
)

assert [f"{r.levelname}: {r.message}" for r in caplog.records] == dirty_equals.Contains(
dirty_equals.IsStr(
regex="ERROR:.*invalid root_url.*https://etl-alt.test/.*not in.*https://etl.test/.*https://etl-beta.test/.*"
)
)

0 comments on commit df7b1b5

Please sign in to comment.