catalyst-cooperative · zaneselvans · Sep 15, 2023 · Sep 15, 2023 · Sep 15, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,9 +3,6 @@ repos:
   - repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.10.0
     hooks:
-      - id: python-check-blanket-noqa # Prohibit overly broad QA exclusions.
-      - id: python-no-eval # Never use eval() it's dangerous.
-      - id: python-no-log-warn # logger.warning(), not old .warn()
       - id: rst-backticks # Find single rather than double backticks
       - id: rst-directive-colons # Missing double-colons after directives
       - id: rst-inline-touching-normal # Inline code should never touch normal text
@@ -20,48 +17,19 @@ repos:
       - id: check-yaml # Validate all YAML files.
       - id: check-case-conflict # Avoid case sensitivity in file names.
       - id: debug-statements # Watch for lingering debugger calls.
-      - id: end-of-file-fixer # Ensure there's a newline at EOF.
       - id: mixed-line-ending # Only newlines, no line-feeds.
+        args: ["--fix=lf"]
       - id: trailing-whitespace # Remove trailing whitespace.
       - id: name-tests-test # Follow PyTest naming convention.
 
   ########################################################################################
   # Formatters: hooks that re-write Python and RST files
   ########################################################################################
-
-  # Convert relative imports to absolute imports
-  - repo: https://github.com/MarcoGorelli/absolufy-imports
-    rev: v0.3.1
-    hooks:
-      - id: absolufy-imports
-
-  # Make sure import statements are sorted uniformly.
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-
-  # Remove f-string prefix when there's nothing in the string to format.
-  - repo: https://github.com/dannysepler/rm_unneeded_f_str
-    rev: v0.2.0
-    hooks:
-      - id: rm-unneeded-f-str
-
-  # Use built-in types for annotations as per PEP585, with __futures__ for Python 3.8/3.9
-  # Note that this sometimes conflicts with Pydantic on Python 3.8/3.9
-  - repo: https://github.com/sondrelg/pep585-upgrade
-    rev: "v1.0"
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.0.289
     hooks:
-      - id: upgrade-type-hints
-        args: ["--futures=true"]
-
-  # Update a bunch of Python syntax, using __futures__ for Python 3.8/3.9
-  # Note that this sometimes conflicts with Pydantic on Python 3.8/3.9
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.10.1
-    hooks:
-      - id: pyupgrade
-        args: ["--py310-plus"]
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
 
   # Deterministic python formatting:
   - repo: https://github.com/psf/black
@@ -80,45 +48,12 @@ repos:
   # Linters: hooks that check but don't alter Python & RST files
   ########################################################################################
 
-  # Check for PEP8 non-compliance, code complexity, style, errors, etc:
-  - repo: https://github.com/PyCQA/flake8
-    rev: 6.1.0
-    hooks:
-      - id: flake8
-        args: ["--config", "tox.ini"]
-        additional_dependencies:
-          - flake8-docstrings
-          - flake8-colors
-          - pydocstyle
-          - flake8-builtins
-          - mccabe
-          - pep8-naming
-          - pycodestyle
-          - pyflakes
-          - flake8-rst-docstrings
-          - flake8-use-fstring
-
-  # Check for known security vulnerabilities:
-  - repo: https://github.com/PyCQA/bandit
-    rev: 1.7.5
-    hooks:
-      - id: bandit
-        args: ["--configfile", ".bandit.yml"]
-
   # Check for errors in restructuredtext (.rst) files under the doc hierarchy
   - repo: https://github.com/PyCQA/doc8
     rev: v1.1.1
     hooks:
       - id: doc8
-        args: ["--config", "tox.ini"]
-
-  # Lint any RST files and embedded code blocks for syntax / formatting errors
-  - repo: https://github.com/rstcheck/rstcheck
-    rev: v6.2.0
-    hooks:
-      - id: rstcheck
-        additional_dependencies: [sphinx]
-        args: ["--config", "tox.ini"]
+        args: ["--config", "pyproject.toml"]
 
   # Lint Dockerfiles for errors and to ensure best practices
   - repo: https://github.com/AleksaC/hadolint-py

diff --git a/pyproject.toml b/pyproject.toml
@@ -54,37 +54,28 @@ xbrl_extract = "ferc_xbrl_extractor.cli:main"
 [project.optional-dependencies]
 dev = [
     "black>=22.0,<23.10",  # A deterministic code formatter
-    "build>=0.10,<0.11",
-    "isort>=5.0,<5.13",  # Standardized import sorting
+    "build>=1.0,<1.1",
+    "ruff>=0.0.289",  # A very fast linter and autofixer
     "tox>=4.0,<4.12",  # Python test environment manager
     "twine>=3.3,<4.1",  # Used to make releases to PyPI
 ]
 docs = [
     "doc8>=1.0,<1.2",  # Ensures clean documentation formatting
     "furo>=2022.4.7",
-    "sphinx>=4,!=5.1.0,<7.2.6",  # The default Python documentation engine
+    "sphinx>=4,!=5.1.0,<7.3",  # The default Python documentation engine
     "sphinx-autoapi>=2.0,<2.2",  # Generates documentation from docstrings
     "sphinx-issues>=1.2,<3.1",  # Allows references to GitHub issues
 ]
 tests = [
-    "bandit>=1.6,<1.8",  # Checks code for security issues
     "coverage>=5.3,<7.4",  # Lets us track what code is being tested
     "doc8>=1.0,<1.2",  # Ensures clean documentation formatting
-    "flake8>=4.0,<6.2",  # A framework for linting & static analysis
-    "flake8-builtins>=1.5,<2.2",  # Avoid shadowing Python built-in names
-    "flake8-colors>=0.1,<0.2",  # Produce colorful error / warning output
-    "flake8-docstrings>=1.5,<1.8",  # Ensure docstrings are formatted well
-    "flake8-rst-docstrings>=0.2,<0.4",  # Allow use of ReST in docstrings
-    "flake8-use-fstring>=1.0,<1.5",  # Highlight use of old-style string formatting
-    "mccabe>=0.6,<0.8",  # Checks that code isn't overly complicated
     "mypy>=1.0,<1.6",  # Static type checking
-    "pep8-naming>=0.12,<0.14",  # Require PEP8 compliant variable names
     "pre-commit>=2.9,<3.5",  # Allow us to run pre-commit hooks in testing
     "pydocstyle>=5.1,<6.4",  # Style guidelines for Python documentation
     "pytest>=6.2,<7.5",  # Our testing framework
     "pytest-console-scripts>=1.1,<1.5",  # Allow automatic testing of scripts
     "pytest-cov>=2.10,<4.2",  # Pytest plugin for working with coverage
-    "rstcheck[sphinx]>=5.0,<6.3",  # ReStructuredText linter
+    "ruff>=0.0.289",  # A very fast linter and autofixer
     "tox>=4.0,<4.12",  # Python test environment manager
 ]
 types = [
@@ -104,6 +95,98 @@ line-length = 88
 target-version = ["py310", "py311"]
 include = "\\.pyi?$"
 
-[tool.isort]
-profile = "black"
-known_first_party = "ferc_xbrl_extractor"
+[tool.ruff]
+select = [
+    "A", # flake8-builtins
+    # "ARG", # unused arguments
+    # "B",  # flake8-bugbear
+    "C",   # Limit cyclomatic complexity using mccabe
+    "D",   # pydocstyle errors
+    "E",   # pycodestyle errors
+    "EXE", # executable file issues
+    # "ERA", # eradicate: find commented out code
+    "F",   # pyflakes
+    "I",   # isort
+    "ISC", # implicit string concatenation
+    "N",   # pep8-naming
+    "NPY", # NumPy specific checks
+    "PD",  # pandas checks
+    "PGH", # pygrep-hooks
+    # "PL",  # pylint
+    # "PT",  # pytest style
+    "PTH", # use pathlib
+    "Q",   # flake8-quotes
+    "RET", # check return values
+    "RSE", # unnecessary parenthises on raised exceptions
+    "S",   # flake8-bandit
+    "SIM", # flake8-simplify
+    # "T",   # print statements found
+    "UP", # pyupgrade (use modern python syntax)
+    "W",  # pycodestyle warnings
+]
+ignore = [
+    "D401",   # Require imperative mood in docstrings.
+    "D417",
+    "E501",   # Overlong lines.
+    "E203",   # Space before ':' (black recommends to ignore)
+    "PD003",  # Use of isna rather than isnull
+    "PD004",  # Use of notna rather than notnull
+    "PD008",  # Use of df.at[] rather than df.loc[]
+    "PD010",  # Use of df.stack()
+    "PD013",  # Use of df.unstack()
+    "PD015",  # Use of pd.merge() rather than df.merge()
+    "PD901",  # df as variable name
+    "RET504", # Ignore unnecessary assignment before return
+    "S101",   # Use of assert
+]
+
+# Assume Python 3.11
+target-version = "py310"
+line-length = 88
+
+# Don't automatically concatenate strings -- sometimes we forget a comma!
+unfixable = ["ISC"]
+
+[tool.ruff.per-file-ignores]
+"__init__.py" = ["F401"]    # Ignore unused imports
+"tests/*" = ["D"]
+
+[tool.ruff.pep8-naming]
+# Allow Pydantic's `@validator` decorator to trigger class method treatment.
+classmethod-decorators = ["pydantic.validator", "pydantic.root_validator"]
+
+[tool.ruff.isort]
+known-first-party = ["ferc_xbrl_extractor"]
+
+[tool.ruff.pydocstyle]
+convention = "google"
+
+[tool.ruff.mccabe]
+max-complexity = 10
+
+[tool.ruff.flake8-quotes]
+docstring-quotes = "double"
+inline-quotes = "double"
+multiline-quotes = "double"
+
+[tool.doc8]
+max-line-length = 88
+ignore-path = ["docs/_build"]
+
+[tool.pytest.ini_options]
+testpaths = "./"
+filterwarnings = [
+    "ignore:distutils Version classes are deprecated:DeprecationWarning",
+    "ignore:Creating a LegacyVersion:DeprecationWarning:pkg_resources[.*]",
+]
+
+addopts = "--verbose"
+log_format = "%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s"
+log_date_format = "%Y-%m-%d %H:%M:%S"
+log_cli = "true"
+log_cli_level = "INFO"
+doctest_optionflags = [
+    "NORMALIZE_WHITESPACE",
+    "IGNORE_EXCEPTION_DETAIL",
+    "ELLIPSIS",
+]
diff --git a/src/ferc_xbrl_extractor/arelle_interface.py b/src/ferc_xbrl_extractor/arelle_interface.py
@@ -124,9 +124,12 @@ def from_concept(cls, concept: ModelConcept) -> "Metadata":
                 reference_dict[reference_name] = [part_dict]
 
             # Flatten out references where applicable
-            if len(reference_dict[reference_name]) == 1 and len(part_dict) == 1:
-                if reference_name in part_dict:
-                    reference_dict[reference_name] = part_dict[reference_name]
+            if (
+                len(reference_dict[reference_name]) == 1
+                and len(part_dict) == 1
+                and reference_name in part_dict
+            ):
+                reference_dict[reference_name] = part_dict[reference_name]
 
         # Add references to metadata
         concept_metadata["references"] = reference_dict

diff --git a/src/ferc_xbrl_extractor/datapackage.py b/src/ferc_xbrl_extractor/datapackage.py
@@ -460,7 +460,7 @@ def resolve_conflict(series: pd.Series, max_precision=6) -> Any:
                 if len(typed[at_least_this_precise]) == 1:
                     return typed[at_least_this_precise].iloc[0]
         raise ValueError(
-            f"Fact {':'.join(series.index.values[0])} has values {series.values}"
+            f"Fact {':'.join(series.index.to_numpy()[0])} has values {series.to_numpy()}"
         )
 
     resolved = df[duplicated].groupby(df.index.names).aggregate(resolve_conflict)

diff --git a/src/ferc_xbrl_extractor/instance.py b/src/ferc_xbrl_extractor/instance.py
@@ -68,7 +68,7 @@ class Axis(BaseModel):
     value: str = ""
     dimension_type: DimensionType
 
-    @validator("name", pre=True)  # type: ignore
+    @validator("name", pre=True)
     def strip_prefix(cls, name: str):  # noqa: N805
         """Strip XML prefix from name."""
         return name.split(":")[1] if ":" in name else name
@@ -83,7 +83,7 @@ def from_xml(cls, elem: Element) -> "Axis":
                 dimension_type=DimensionType.EXPLICIT,
             )
 
-        elif elem.tag.endswith("typedMember"):
+        if elem.tag.endswith("typedMember"):
             dim = elem.getchildren()[0]
             return cls(
                 name=elem.attrib["dimension"],
@@ -176,7 +176,7 @@ def as_primary_key(self, filing_name: str, axes: list[str]) -> dict[str, str]:
         else:
             date_dict = {
                 # Ignore type because start_date will always be str if duration period
-                "start_date": self.period.start_date,  # type: ignore
+                "start_date": self.period.start_date,
                 "end_date": self.period.end_date,
             }
 
@@ -300,10 +300,7 @@ def get_facts(
             concept_names: Name of concepts which map to a column name and name of facts.
             primary_key: Name of columns in primary_key used to filter facts.
         """
-        if instant:
-            period_fact_dict = self.instant_facts
-        else:
-            period_fact_dict = self.duration_facts
+        period_fact_dict = self.instant_facts if instant else self.duration_facts
 
         all_facts_for_concepts = itertools.chain.from_iterable(
             period_fact_dict[concept_name] for concept_name in concept_names
@@ -346,7 +343,7 @@ def parse(self, fact_prefix: str = "ferc") -> Instance:
         parser = etree.XMLParser(huge_tree=True)
 
         # Check if instance contains path to file or file data and parse accordingly
-        tree = etree.parse(self.file, parser=parser)  # nosec: B320
+        tree = etree.parse(self.file, parser=parser)  # noqa: S320
         root = tree.getroot()
 
         # Dictionary mapping context ID's to context structures

diff --git a/src/ferc_xbrl_extractor/taxonomy.py b/src/ferc_xbrl_extractor/taxonomy.py
@@ -40,31 +40,31 @@
         """Construct XBRLType class from arelle ModelType."""
         return cls(name=arelle_type.name, base=arelle_type.baseXsdType.lower())
 
-    def get_pandas_type(self) -> str:
+    def get_pandas_type(self) -> str | None:
         """Return corresponding pandas type.
 
         Gets a string representation of the pandas type best suited to represent the
         base type.
         """
         if self.base == "string" or self.base == "date" or self.base == "duration":
             return "string"
-        elif self.base == "decimal":
+        if self.base == "decimal":
             return "Float64"
-        elif self.base == "gyear" or self.base == "integer":
+        if self.base == "gyear" or self.base == "integer":
             return "Int64"
-        elif self.base == "boolean":
+        if self.base == "boolean":
             return "boolean"
+        return None
 
     def get_schema_type(self) -> str:
         """Return string specifying type for a frictionless table schema."""
         if self.base == "gyear":
             return "year"
-        elif self.base == "decimal":
+        if self.base == "decimal":
             return "number"
-        elif self.base == "duration":
+        if self.base == "duration":
             return "string"
-        else:
-            return self.base
+        return self.base
 
 
 class Concept(BaseModel):
@@ -296,5 +296,5 @@
         metadata = {**duration_metadata, **instant_metadata}
 
         # Write to JSON file
-        with open(filename, "w") as f:
+        with Path(filename).open(mode="w") as f:
             json.dump(metadata, f, indent=4)
diff --git a/src/ferc_xbrl_extractor/xbrl.py b/src/ferc_xbrl_extractor/xbrl.py
@@ -243,11 +243,11 @@ def get_fact_tables(
         form_number: FERC Form number (can be 1, 2, 6, 60, 714).
         db_uri: URI of database used for constructing datapackage descriptor.
         archive_path: Path to taxonomy entry point within archive. If not None,
-                then `taxonomy` should be a path to zipfile, not a URL.
+            then `taxonomy` should be a path to zipfile, not a URL.
         filter_tables: Optionally specify the set of tables to extract.
-                If None, all possible tables will be extracted.
-        datapackage_path: Create frictionless datapackage and write to specified path as JSON file.
-                          If path is None no datapackage descriptor will be saved.
+            If None, all possible tables will be extracted.
+        datapackage_path: Create frictionless datapackage and write to specified path
+            as JSON file. If path is None no datapackage descriptor will be saved.
         metadata_path: Path to metadata json file to output taxonomy metadata.
 
     Returns:
@@ -271,7 +271,7 @@ def get_fact_tables(
             )
 
         # Write to JSON file
-        with open(datapackage_path, "w") as f:
+        with Path(datapackage_path).open(mode="w") as f:
             f.write(datapackage.json(by_alias=True))
 
     return datapackage.get_fact_tables(filter_tables=filter_tables)