Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RemoveUnusedImports] Support string type annotations #353

Merged
merged 7 commits into from
Jul 31, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcst/codemod/tests/codemod_formatter_error_input.py.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# pyre-strict

import subprocess # noqa: F401
import subprocess
from contextlib import AsyncExitStack


Expand Down
12 changes: 10 additions & 2 deletions libcst/codemod/visitors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,23 @@
#
from libcst.codemod.visitors._add_imports import AddImportsVisitor
from libcst.codemod.visitors._apply_type_annotations import ApplyTypeAnnotationsVisitor
from libcst.codemod.visitors._gather_comments import GatherCommentsVisitor
from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
from libcst.codemod.visitors._gather_imports import GatherImportsVisitor
from libcst.codemod.visitors._gather_string_annotation_names import (
GatherNamesFromStringAnnotationsVisitor,
)
from libcst.codemod.visitors._gather_unused_imports import GatherUnusedImportsVisitor
from libcst.codemod.visitors._remove_imports import RemoveImportsVisitor


__all__ = [
"AddImportsVisitor",
"GatherImportsVisitor",
"GatherExportsVisitor",
"ApplyTypeAnnotationsVisitor",
"GatherCommentsVisitor",
"GatherExportsVisitor",
"GatherImportsVisitor",
"GatherNamesFromStringAnnotationsVisitor",
"GatherUnusedImportsVisitor",
"RemoveImportsVisitor",
]
36 changes: 36 additions & 0 deletions libcst/codemod/visitors/_gather_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import re
from typing import Dict, Union

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.metadata import PositionProvider


class GatherCommentsVisitor(ContextAwareVisitor):
METADATA_DEPENDENCIES = (PositionProvider,)

def __init__(self, context: CodemodContext, comment_regex: str) -> None:
super().__init__(context)

self.comments: Dict[int, cst.Comment] = {}

self._comment_matcher = re.compile(comment_regex)

@m.visit(m.EmptyLine(comment=m.DoesNotMatch(None)))
@m.visit(m.TrailingWhitespace(comment=m.DoesNotMatch(None)))
def visit_comment(self, node: Union[cst.EmptyLine, cst.TrailingWhitespace]) -> None:
assert node.comment is not None # hello, type checker
if not self._comment_matcher.match(node.comment.value):
return
line = self.get_metadata(PositionProvider, node.comment).start.line
if isinstance(node, cst.EmptyLine):
# Standalone comments refer to the next line
line += 1
self.comments[line] = node.comment
65 changes: 65 additions & 0 deletions libcst/codemod/visitors/_gather_string_annotation_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import Union, Set, cast

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.metadata import QualifiedNameProvider

FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS = {"typing.TypeVar"}
ANNOTATION_MATCHER = m.Annotation() | m.Call(
metadata=m.MatchMetadataIfTrue(
QualifiedNameProvider,
lambda qualnames: any(
qn.name in FUNCS_CONSIDERED_AS_STRING_ANNOTATIONS for qn in qualnames
),
)
)


class GatherNamesFromStringAnnotationsVisitor(ContextAwareVisitor):
METADATA_DEPENDENCIES = (QualifiedNameProvider,)

def __init__(self, context: CodemodContext) -> None:
super().__init__(context)

self.names: Set[str] = set()

@m.call_if_inside(ANNOTATION_MATCHER)
@m.visit(m.ConcatenatedString())
def handle_any_string(
self, node: Union[cst.SimpleString, cst.ConcatenatedString]
) -> None:
value = node.evaluated_value
if value is None:
return
mod = cst.parse_module(value)
extracted_nodes = m.extractall(
mod,
m.Name(value=m.SaveMatchedNode(m.DoNotCare(), "name"))
| m.SaveMatchedNode(m.Attribute(), "attribute"),
)
zsol marked this conversation as resolved.
Show resolved Hide resolved
# This captures a bit more than necessary. For attributes, we capture the inner
# Name twice.
names = {
cast(str, values["name"]) for values in extracted_nodes if "name" in values
} | {
name
for values in extracted_nodes
if "attribute" in values
for name, _ in cst.metadata.scope_provider._gen_dotted_names(
cast(cst.Attribute, values["attribute"])
)
}
self.names.update(names)

@m.call_if_inside(ANNOTATION_MATCHER)
@m.call_if_not_inside(m.ConcatenatedString())
@m.visit(m.SimpleString())
def handle_simple_string(self, node: cst.SimpleString) -> None:
self.handle_any_string(node)
121 changes: 121 additions & 0 deletions libcst/codemod/visitors/_gather_unused_imports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#

from typing import Dict, List, Optional, Sequence, Set, Tuple, Union, Iterable

import libcst as cst
import libcst.matchers as m
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareVisitor
from libcst.metadata import QualifiedNameProvider, ScopeProvider, PositionProvider
from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
from libcst.codemod.visitors._gather_comments import GatherCommentsVisitor
from libcst.codemod.visitors._gather_string_annotation_names import (
GatherNamesFromStringAnnotationsVisitor,
)
from libcst.metadata.scope_provider import _gen_dotted_names


DEFAULT_SUPPRESS_COMMENT_REGEX = (
r".*\W(lint-ignore: ?unused-import|noqa|lint-ignore: ?F401)(\W.*)?$"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest this one instead:

r".*\W(noqa[^:]|noqa: ?F401|lint-ignore: ?unused-import|lint-ignore: ?F401)(\W.*)?$"

So it ignores things like # noqa, # noqa: F401, but not # noqa: IG61

)


class GatherUnusedImportsVisitor(ContextAwareVisitor):

SUPPRESS_COMMENT_REGEX_CONTEXT_KEY = f"GatherUnusedImportsVisitor.suppress_regex"
METADATA_DEPENDENCIES = (
*GatherCommentsVisitor.METADATA_DEPENDENCIES,
*GatherNamesFromStringAnnotationsVisitor.METADATA_DEPENDENCIES,
PositionProvider,
ScopeProvider,
)

def __init__(self, context: CodemodContext) -> None:
super().__init__(context)

self._string_annotation_names: Set[str] = set()
self._ignored_lines: Set[int] = set()
self._exported_names: Set[str] = set()
self.unused_imports: Set[
Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]
] = set()

def visit_Module(self, node: cst.Module) -> bool:
export_collector = GatherExportsVisitor(self.context)
node.visit(export_collector)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jimmylai, just wondering... won't calling node.visit(export_collector) and node.visit(annotation_visitor) here traverses the tree multiple times? wouldn't this be inefficient?

I was thinking maybe using multiple inheritance instead of this, so we make it that it only traverses the tree once.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it will traverse the tree multiple times. I don't think that's a big deal for a codemod like this. Running the codemod on a bunch of large files is still not visibly slower than before. I don't have benchmarks for you though :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use this in a linter, which will run over and over, while the user types, we want this as fast as possible, otherwise linting times will hit the experience. Although I’m not sure how expensive traversing the tree is, processing time would be exponential, potentially noticeable on big files. What do you think @jimmylai ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance is a concern for lint but not for codemod. In lint, we try to avoid creating a visitor and calling node.visit(visitor).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The @m.visitused in this codemod is not supported in the lint framework since we worried about the the pattern can introduce some expensive checks. So we only use m.matches() in lint.

self._exported_names = export_collector.explicit_exported_objects
comment_visitor = GatherCommentsVisitor(
self.context,
self.context.scratch.get(
self.SUPPRESS_COMMENT_REGEX_CONTEXT_KEY, DEFAULT_SUPPRESS_COMMENT_REGEX,
),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing with suppressing comments at GatherUnusedImportsVisitor level is that in some use cases (linter rules in frameworks that already silent lines with such comments) this ends up in doing more work than needed.

Maybe the suppression of comments could be moved to some other visitor which adds lines to be excluded, so one could choose whether to have them collected or not by using that one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose you can set the regex to something that will never match (like ^$), but we could add an explicit bool option to disable it. Would that help?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, faster code is code never executed. Best thing would be if we could put these in a whole different, also reusable, place so they're never executed when they're not needed. Something like GatherIgnoredLines or something like that. But that's only a suggestion for those use cases where this isn't really needed.

)
node.visit(comment_visitor)
self._ignored_lines = set(comment_visitor.comments.keys())
annotation_visitor = GatherNamesFromStringAnnotationsVisitor(self.context)
node.visit(annotation_visitor)
self._string_annotation_names = annotation_visitor.names
return True

@m.visit(
m.Import()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could we add other exceptions to this reusable class, like if we wanted to ignore import __strict__, for example?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's best to apply filtering on the results of this visitor (just like how you pointed out that the suppression filtering belongs outside of this class). So I would filter for special module names after running GatherUnusedImportsVisitor, and keep the exceptions here to a minimum (i.e. the ones implied by Python itself)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree :)

| m.ImportFrom(
module=m.DoesNotMatch(m.Name("__future__")),
names=m.DoesNotMatch(m.ImportStar()),
)
)
def handle_import(self, node: Union[cst.Import, cst.ImportFrom]) -> None:
assert not isinstance(node.names, cst.ImportStar) # hello, type checker

node_start = self.get_metadata(PositionProvider, node).start.line
if node_start in self._ignored_lines:
return

for alias in node.names:
position = self.get_metadata(PositionProvider, alias)
lines = set(range(position.start.line, position.end.line + 1))
if lines.isdisjoint(self._ignored_lines):
self.unused_imports.add((alias, node))

def leave_Module(self, original_node: cst.Module) -> None:
self.unused_imports = self.filter_unused_imports(self.unused_imports)

def filter_unused_imports(
self,
candidates: Iterable[Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]],
) -> Set[Tuple[cst.ImportAlias, Union[cst.Import, cst.ImportFrom]]]:
unused_imports = set()
for (alias, parent) in candidates:
scope = self.get_metadata(ScopeProvider, parent)
if scope is None:
continue
if not self.is_in_use(scope, alias):
unused_imports.add((alias, parent))
return unused_imports

def is_in_use(self, scope: cst.metadata.Scope, alias: cst.ImportAlias) -> bool:
asname = alias.asname
names = _gen_dotted_names(
cst.ensure_type(asname.name, cst.Name) if asname is not None else alias.name
)

for name_or_alias, _ in names:
if (
name_or_alias in self._exported_names
or name_or_alias in self._string_annotation_names
):
return True

for assignment in scope[name_or_alias]:
if (
isinstance(assignment, cst.metadata.Assignment)
and isinstance(assignment.node, (cst.ImportFrom, cst.Import))
and len(assignment.references) > 0
):
return True
return False

51 changes: 10 additions & 41 deletions libcst/codemod/visitors/_remove_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import libcst as cst
from libcst.codemod._context import CodemodContext
from libcst.codemod._visitor import ContextAwareTransformer, ContextAwareVisitor
from libcst.codemod.visitors._gather_exports import GatherExportsVisitor
from libcst.codemod.visitors._gather_unused_imports import GatherUnusedImportsVisitor
from libcst.helpers import get_absolute_module_for_import, get_full_name_for_node
from libcst.metadata import Assignment, Scope, ScopeProvider
from libcst.metadata.scope_provider import _gen_dotted_names
Expand Down Expand Up @@ -173,7 +173,7 @@ def leave_AnnAssign(
"""

CONTEXT_KEY = "RemoveImportsVisitor"
METADATA_DEPENDENCIES = (ScopeProvider,)
METADATA_DEPENDENCIES = (*GatherUnusedImportsVisitor.METADATA_DEPENDENCIES,)

@staticmethod
def _get_imports_from_context(
Expand Down Expand Up @@ -279,48 +279,24 @@ def __init__(
module: alias for module, obj, alias in all_unused_imports if obj is None
}
self.unused_obj_imports: Dict[str, Set[Tuple[str, Optional[str]]]] = {}
self.exported_objects: Set[str] = set()
for module, obj, alias in all_unused_imports:
if obj is None:
continue
if module not in self.unused_obj_imports:
self.unused_obj_imports[module] = set()
self.unused_obj_imports[module].add((obj, alias))
self._unused_imports: Dict[
cst.ImportAlias, Union[cst.Import, cst.ImportFrom]
] = {}

def visit_Module(self, node: cst.Module) -> None:
object_visitor = GatherExportsVisitor(self.context)
node.visit(object_visitor)
self.exported_objects = object_visitor.explicit_exported_objects

def _is_in_use(self, scope: Scope, alias: cst.ImportAlias) -> bool:
# Grab the string name of this alias from the point of view of this module.
asname = alias.asname
names = _gen_dotted_names(
cst.ensure_type(asname.name, cst.Name) if asname is not None else alias.name
)

for name_or_alias, _ in names:
if name_or_alias in self.exported_objects:
return True

for assignment in scope[name_or_alias]:
if (
isinstance(assignment, Assignment)
and isinstance(assignment.node, (cst.ImportFrom, cst.Import))
and len(assignment.references) > 0
):
return True
return False
visitor = GatherUnusedImportsVisitor(self.context)
node.visit(visitor)
self._unused_imports = {k: v for (k, v) in visitor.unused_imports}

def leave_Import(
self, original_node: cst.Import, updated_node: cst.Import
) -> Union[cst.Import, cst.RemovalSentinel]:
# Grab the scope for this import. If we don't have scope, we can't determine
# whether this import is unused so it is unsafe to remove.
scope = self.get_metadata(ScopeProvider, original_node, None)
if scope is None:
return updated_node

names_to_keep = []
for import_alias in original_node.names:
if import_alias.evaluated_name not in self.unused_module_imports:
Expand All @@ -339,7 +315,7 @@ def leave_Import(

# Now that we know we want to remove this module, figure out if
# there are any live references to it.
if self._is_in_use(scope, import_alias):
if import_alias not in self._unused_imports:
names_to_keep.append(import_alias)
continue

Expand All @@ -363,13 +339,6 @@ def leave_Import(
def leave_ImportFrom(
self, original_node: cst.ImportFrom, updated_node: cst.ImportFrom
) -> Union[cst.ImportFrom, cst.RemovalSentinel]:
# Grab the scope for this import. If we don't have scope, we can't determine
# whether this import is unused so it is unsafe to remove.
scope = self.get_metadata(ScopeProvider, original_node, None)
if scope is None:
return updated_node

# Make sure we have anything to do with this node.
names = original_node.names
if isinstance(names, cst.ImportStar):
# This is a star import, so we won't remove it.
Expand Down Expand Up @@ -400,7 +369,7 @@ def leave_ImportFrom(

# Now that we know we want to remove this object, figure out if
# there are any live references to it.
if self._is_in_use(scope, import_alias):
if import_alias not in self._unused_imports:
names_to_keep.append(import_alias)
continue

Expand Down
Loading