Skip to content

Commit

Permalink
Add retries to taxonomy reading. (#205)
Browse files Browse the repository at this point in the history
When we try to load a taxonomy multiple times concurrently, we run into
`FileExistsError`s as the various threads try to cache files at the same
location.
  • Loading branch information
jdangerx committed Mar 26, 2024
1 parent 3692396 commit 9fc1f35
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/ferc_xbrl_extractor/arelle_interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Abstract away interface to Arelle XBRL Library."""

import io
import time
from pathlib import Path
from typing import Literal

Expand All @@ -12,12 +13,22 @@
from pydantic import BaseModel


def _taxonomy_view(taxonomy_source: str | FileSource.FileSource):
def _taxonomy_view(taxonomy_source: str | FileSource.FileSource, max_retries: int = 7):
"""Actually use Arelle to get a taxonomy and its relationships."""
cntlr = Cntlr.Cntlr()
cntlr.startLogging(logFileName="logToPrint")
model_manager = ModelManager.initialize(cntlr)
taxonomy = ModelXbrl.load(model_manager, taxonomy_source)
for try_count in range(max_retries):
try:
cntlr.logger.debug(f"Try #{try_count}: {taxonomy_source=}")
taxonomy = ModelXbrl.load(model_manager, taxonomy_source)
continue
except FileExistsError as e:
if (try_count + 1) == max_retries:
raise e
backoff = 2 ** (try_count + 1)
cntlr.logger.warning(f"Failed try #{try_count}, retrying in {backoff}s")
time.sleep(backoff)

view = ViewRelationshipSet(taxonomy, "taxonomy.json", "roles", None, None, None)
view.view(XbrlConst.parentChild, None, None, None)
Expand Down
21 changes: 21 additions & 0 deletions tests/integration/arelle_interface_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import concurrent.futures
from unittest.mock import patch

from arelle import Cntlr

from ferc_xbrl_extractor.arelle_interface import load_taxonomy


def test_concurrent_taxonomy_load(tmp_path):
cntlr = Cntlr.Cntlr()
cntlr.webCache.cacheDir = str(tmp_path)
cntlr.webCache.clear()
path = "https://eCollection.ferc.gov/taxonomy/form60/2022-01-01/form/form60/form-60_2022-01-01.xsd"
with patch("ferc_xbrl_extractor.arelle_interface.Cntlr.Cntlr", lambda: cntlr):
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
futures = [executor.submit(load_taxonomy, path) for _ in range(2)]
done, _not_done = concurrent.futures.wait(
futures, timeout=10, return_when=concurrent.futures.ALL_COMPLETED
)
errored = {fut for fut in done if fut.exception()}
assert len(errored) == 0

0 comments on commit 9fc1f35

Please sign in to comment.