Skip to content

Commit

Permalink
Merge pull request #140 from catalyst-cooperative/ruffing-it
Browse files Browse the repository at this point in the history
Switch from flake8 to ruff for linting & autoformatting
  • Loading branch information
zaneselvans committed Sep 15, 2023
2 parents 3a1da11 + ac28bf5 commit bdf8a36
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 220 deletions.
77 changes: 6 additions & 71 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ repos:
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
- id: python-check-blanket-noqa # Prohibit overly broad QA exclusions.
- id: python-no-eval # Never use eval() it's dangerous.
- id: python-no-log-warn # logger.warning(), not old .warn()
- id: rst-backticks # Find single rather than double backticks
- id: rst-directive-colons # Missing double-colons after directives
- id: rst-inline-touching-normal # Inline code should never touch normal text
Expand All @@ -20,48 +17,19 @@ repos:
- id: check-yaml # Validate all YAML files.
- id: check-case-conflict # Avoid case sensitivity in file names.
- id: debug-statements # Watch for lingering debugger calls.
- id: end-of-file-fixer # Ensure there's a newline at EOF.
- id: mixed-line-ending # Only newlines, no line-feeds.
args: ["--fix=lf"]
- id: trailing-whitespace # Remove trailing whitespace.
- id: name-tests-test # Follow PyTest naming convention.

########################################################################################
# Formatters: hooks that re-write Python and RST files
########################################################################################

# Convert relative imports to absolute imports
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports

# Make sure import statements are sorted uniformly.
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort

# Remove f-string prefix when there's nothing in the string to format.
- repo: https://github.com/dannysepler/rm_unneeded_f_str
rev: v0.2.0
hooks:
- id: rm-unneeded-f-str

# Use built-in types for annotations as per PEP585, with __futures__ for Python 3.8/3.9
# Note that this sometimes conflicts with Pydantic on Python 3.8/3.9
- repo: https://github.com/sondrelg/pep585-upgrade
rev: "v1.0"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.289
hooks:
- id: upgrade-type-hints
args: ["--futures=true"]

# Update a bunch of Python syntax, using __futures__ for Python 3.8/3.9
# Note that this sometimes conflicts with Pydantic on Python 3.8/3.9
- repo: https://github.com/asottile/pyupgrade
rev: v3.10.1
hooks:
- id: pyupgrade
args: ["--py310-plus"]
- id: ruff
args: [--fix, --exit-non-zero-on-fix]

# Deterministic python formatting:
- repo: https://github.com/psf/black
Expand All @@ -80,45 +48,12 @@ repos:
# Linters: hooks that check but don't alter Python & RST files
########################################################################################

# Check for PEP8 non-compliance, code complexity, style, errors, etc:
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
args: ["--config", "tox.ini"]
additional_dependencies:
- flake8-docstrings
- flake8-colors
- pydocstyle
- flake8-builtins
- mccabe
- pep8-naming
- pycodestyle
- pyflakes
- flake8-rst-docstrings
- flake8-use-fstring

# Check for known security vulnerabilities:
- repo: https://github.com/PyCQA/bandit
rev: 1.7.5
hooks:
- id: bandit
args: ["--configfile", ".bandit.yml"]

# Check for errors in restructuredtext (.rst) files under the doc hierarchy
- repo: https://github.com/PyCQA/doc8
rev: v1.1.1
hooks:
- id: doc8
args: ["--config", "tox.ini"]

# Lint any RST files and embedded code blocks for syntax / formatting errors
- repo: https://github.com/rstcheck/rstcheck
rev: v6.2.0
hooks:
- id: rstcheck
additional_dependencies: [sphinx]
args: ["--config", "tox.ini"]
args: ["--config", "pyproject.toml"]

# Lint Dockerfiles for errors and to ensure best practices
- repo: https://github.com/AleksaC/hadolint-py
Expand Down
115 changes: 99 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,37 +54,28 @@ xbrl_extract = "ferc_xbrl_extractor.cli:main"
[project.optional-dependencies]
dev = [
"black>=22.0,<23.10", # A deterministic code formatter
"build>=0.10,<0.11",
"isort>=5.0,<5.13", # Standardized import sorting
"build>=1.0,<1.1",
"ruff>=0.0.289", # A very fast linter and autofixer
"tox>=4.0,<4.12", # Python test environment manager
"twine>=3.3,<4.1", # Used to make releases to PyPI
]
docs = [
"doc8>=1.0,<1.2", # Ensures clean documentation formatting
"furo>=2022.4.7",
"sphinx>=4,!=5.1.0,<7.2.6", # The default Python documentation engine
"sphinx>=4,!=5.1.0,<7.3", # The default Python documentation engine
"sphinx-autoapi>=2.0,<2.2", # Generates documentation from docstrings
"sphinx-issues>=1.2,<3.1", # Allows references to GitHub issues
]
tests = [
"bandit>=1.6,<1.8", # Checks code for security issues
"coverage>=5.3,<7.4", # Lets us track what code is being tested
"doc8>=1.0,<1.2", # Ensures clean documentation formatting
"flake8>=4.0,<6.2", # A framework for linting & static analysis
"flake8-builtins>=1.5,<2.2", # Avoid shadowing Python built-in names
"flake8-colors>=0.1,<0.2", # Produce colorful error / warning output
"flake8-docstrings>=1.5,<1.8", # Ensure docstrings are formatted well
"flake8-rst-docstrings>=0.2,<0.4", # Allow use of ReST in docstrings
"flake8-use-fstring>=1.0,<1.5", # Highlight use of old-style string formatting
"mccabe>=0.6,<0.8", # Checks that code isn't overly complicated
"mypy>=1.0,<1.6", # Static type checking
"pep8-naming>=0.12,<0.14", # Require PEP8 compliant variable names
"pre-commit>=2.9,<3.5", # Allow us to run pre-commit hooks in testing
"pydocstyle>=5.1,<6.4", # Style guidelines for Python documentation
"pytest>=6.2,<7.5", # Our testing framework
"pytest-console-scripts>=1.1,<1.5", # Allow automatic testing of scripts
"pytest-cov>=2.10,<4.2", # Pytest plugin for working with coverage
"rstcheck[sphinx]>=5.0,<6.3", # ReStructuredText linter
"ruff>=0.0.289", # A very fast linter and autofixer
"tox>=4.0,<4.12", # Python test environment manager
]
types = [
Expand All @@ -104,6 +95,98 @@ line-length = 88
target-version = ["py310", "py311"]
include = "\\.pyi?$"

[tool.isort]
profile = "black"
known_first_party = "ferc_xbrl_extractor"
[tool.ruff]
select = [
"A", # flake8-builtins
# "ARG", # unused arguments
# "B", # flake8-bugbear
"C", # Limit cyclomatic complexity using mccabe
"D", # pydocstyle errors
"E", # pycodestyle errors
"EXE", # executable file issues
# "ERA", # eradicate: find commented out code
"F", # pyflakes
"I", # isort
"ISC", # implicit string concatenation
"N", # pep8-naming
"NPY", # NumPy specific checks
"PD", # pandas checks
"PGH", # pygrep-hooks
# "PL", # pylint
# "PT", # pytest style
"PTH", # use pathlib
"Q", # flake8-quotes
"RET", # check return values
"RSE", # unnecessary parenthises on raised exceptions
"S", # flake8-bandit
"SIM", # flake8-simplify
# "T", # print statements found
"UP", # pyupgrade (use modern python syntax)
"W", # pycodestyle warnings
]
ignore = [
"D401", # Require imperative mood in docstrings.
"D417",
"E501", # Overlong lines.
"E203", # Space before ':' (black recommends to ignore)
"PD003", # Use of isna rather than isnull
"PD004", # Use of notna rather than notnull
"PD008", # Use of df.at[] rather than df.loc[]
"PD010", # Use of df.stack()
"PD013", # Use of df.unstack()
"PD015", # Use of pd.merge() rather than df.merge()
"PD901", # df as variable name
"RET504", # Ignore unnecessary assignment before return
"S101", # Use of assert
]

# Assume Python 3.11
target-version = "py310"
line-length = 88

# Don't automatically concatenate strings -- sometimes we forget a comma!
unfixable = ["ISC"]

[tool.ruff.per-file-ignores]
"__init__.py" = ["F401"] # Ignore unused imports
"tests/*" = ["D"]

[tool.ruff.pep8-naming]
# Allow Pydantic's `@validator` decorator to trigger class method treatment.
classmethod-decorators = ["pydantic.validator", "pydantic.root_validator"]

[tool.ruff.isort]
known-first-party = ["ferc_xbrl_extractor"]

[tool.ruff.pydocstyle]
convention = "google"

[tool.ruff.mccabe]
max-complexity = 10

[tool.ruff.flake8-quotes]
docstring-quotes = "double"
inline-quotes = "double"
multiline-quotes = "double"

[tool.doc8]
max-line-length = 88
ignore-path = ["docs/_build"]

[tool.pytest.ini_options]
testpaths = "./"
filterwarnings = [
"ignore:distutils Version classes are deprecated:DeprecationWarning",
"ignore:Creating a LegacyVersion:DeprecationWarning:pkg_resources[.*]",
]

addopts = "--verbose"
log_format = "%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s"
log_date_format = "%Y-%m-%d %H:%M:%S"
log_cli = "true"
log_cli_level = "INFO"
doctest_optionflags = [
"NORMALIZE_WHITESPACE",
"IGNORE_EXCEPTION_DETAIL",
"ELLIPSIS",
]
9 changes: 6 additions & 3 deletions src/ferc_xbrl_extractor/arelle_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,12 @@ def from_concept(cls, concept: ModelConcept) -> "Metadata":
reference_dict[reference_name] = [part_dict]

# Flatten out references where applicable
if len(reference_dict[reference_name]) == 1 and len(part_dict) == 1:
if reference_name in part_dict:
reference_dict[reference_name] = part_dict[reference_name]
if (
len(reference_dict[reference_name]) == 1
and len(part_dict) == 1
and reference_name in part_dict
):
reference_dict[reference_name] = part_dict[reference_name]

# Add references to metadata
concept_metadata["references"] = reference_dict
Expand Down
2 changes: 1 addition & 1 deletion src/ferc_xbrl_extractor/datapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def resolve_conflict(series: pd.Series, max_precision=6) -> Any:
if len(typed[at_least_this_precise]) == 1:
return typed[at_least_this_precise].iloc[0]
raise ValueError(
f"Fact {':'.join(series.index.values[0])} has values {series.values}"
f"Fact {':'.join(series.index.to_numpy()[0])} has values {series.to_numpy()}"
)

resolved = df[duplicated].groupby(df.index.names).aggregate(resolve_conflict)
Expand Down
13 changes: 5 additions & 8 deletions src/ferc_xbrl_extractor/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class Axis(BaseModel):
value: str = ""
dimension_type: DimensionType

@validator("name", pre=True) # type: ignore
@validator("name", pre=True)
def strip_prefix(cls, name: str): # noqa: N805
"""Strip XML prefix from name."""
return name.split(":")[1] if ":" in name else name
Expand All @@ -83,7 +83,7 @@ def from_xml(cls, elem: Element) -> "Axis":
dimension_type=DimensionType.EXPLICIT,
)

elif elem.tag.endswith("typedMember"):
if elem.tag.endswith("typedMember"):
dim = elem.getchildren()[0]
return cls(
name=elem.attrib["dimension"],
Expand Down Expand Up @@ -176,7 +176,7 @@ def as_primary_key(self, filing_name: str, axes: list[str]) -> dict[str, str]:
else:
date_dict = {
# Ignore type because start_date will always be str if duration period
"start_date": self.period.start_date, # type: ignore
"start_date": self.period.start_date,
"end_date": self.period.end_date,
}

Expand Down Expand Up @@ -300,10 +300,7 @@ def get_facts(
concept_names: Name of concepts which map to a column name and name of facts.
primary_key: Name of columns in primary_key used to filter facts.
"""
if instant:
period_fact_dict = self.instant_facts
else:
period_fact_dict = self.duration_facts
period_fact_dict = self.instant_facts if instant else self.duration_facts

all_facts_for_concepts = itertools.chain.from_iterable(
period_fact_dict[concept_name] for concept_name in concept_names
Expand Down Expand Up @@ -346,7 +343,7 @@ def parse(self, fact_prefix: str = "ferc") -> Instance:
parser = etree.XMLParser(huge_tree=True)

# Check if instance contains path to file or file data and parse accordingly
tree = etree.parse(self.file, parser=parser) # nosec: B320
tree = etree.parse(self.file, parser=parser) # noqa: S320
root = tree.getroot()

# Dictionary mapping context ID's to context structures
Expand Down
18 changes: 9 additions & 9 deletions src/ferc_xbrl_extractor/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,31 +40,31 @@ def from_arelle_type(cls, arelle_type: ModelType) -> "XBRLType":
"""Construct XBRLType class from arelle ModelType."""
return cls(name=arelle_type.name, base=arelle_type.baseXsdType.lower())

def get_pandas_type(self) -> str:
def get_pandas_type(self) -> str | None:
"""Return corresponding pandas type.
Gets a string representation of the pandas type best suited to represent the
base type.
"""
if self.base == "string" or self.base == "date" or self.base == "duration":
return "string"
elif self.base == "decimal":
if self.base == "decimal":
return "Float64"
elif self.base == "gyear" or self.base == "integer":
if self.base == "gyear" or self.base == "integer":
return "Int64"
elif self.base == "boolean":
if self.base == "boolean":
return "boolean"
return None

def get_schema_type(self) -> str:
"""Return string specifying type for a frictionless table schema."""
if self.base == "gyear":
return "year"
elif self.base == "decimal":
if self.base == "decimal":
return "number"
elif self.base == "duration":
if self.base == "duration":
return "string"
else:
return self.base
return self.base


class Concept(BaseModel):
Expand Down Expand Up @@ -296,5 +296,5 @@ def save_metadata(self, filename: Path):
metadata = {**duration_metadata, **instant_metadata}

# Write to JSON file
with open(filename, "w") as f:
with Path(filename).open(mode="w") as f:
json.dump(metadata, f, indent=4)
10 changes: 5 additions & 5 deletions src/ferc_xbrl_extractor/xbrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,11 @@ def get_fact_tables(
form_number: FERC Form number (can be 1, 2, 6, 60, 714).
db_uri: URI of database used for constructing datapackage descriptor.
archive_path: Path to taxonomy entry point within archive. If not None,
then `taxonomy` should be a path to zipfile, not a URL.
then `taxonomy` should be a path to zipfile, not a URL.
filter_tables: Optionally specify the set of tables to extract.
If None, all possible tables will be extracted.
datapackage_path: Create frictionless datapackage and write to specified path as JSON file.
If path is None no datapackage descriptor will be saved.
If None, all possible tables will be extracted.
datapackage_path: Create frictionless datapackage and write to specified path
as JSON file. If path is None no datapackage descriptor will be saved.
metadata_path: Path to metadata json file to output taxonomy metadata.
Returns:
Expand All @@ -271,7 +271,7 @@ def get_fact_tables(
)

# Write to JSON file
with open(datapackage_path, "w") as f:
with Path(datapackage_path).open(mode="w") as f:
f.write(datapackage.json(by_alias=True))

return datapackage.get_fact_tables(filter_tables=filter_tables)
Loading

0 comments on commit bdf8a36

Please sign in to comment.