Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement UnnecessaryListAllocationForFirstElement #5549

Merged
merged 19 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF015.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
x = range(10)

# RUF015
list(x)[0]
evanrittenhouse marked this conversation as resolved.
Show resolved Hide resolved
list(x)[:1]
list(x)[:1:1]
list(x)[:1:2]
tuple(x)[0]
tuple(x)[:1]
tuple(x)[:1:1]
tuple(x)[:1:2]
list(i for i in x)[0]
list(i for i in x)[:1]
list(i for i in x)[:1:1]
list(i for i in x)[:1:2]
[i for i in x][0]
evanrittenhouse marked this conversation as resolved.
Show resolved Hide resolved
[i for i in x][:1]
[i for i in x][:1:1]
[i for i in x][:1:2]

# Fine - not indexing (solely) the first element
list(x)
list(x)[1]
list(x)[-1]
list(x)[1:]
list(x)[:3:2]
list(x)[::2]
list(x)[::]
[i for i in x]
[i for i in x][1]
[i for i in x][-1]
[i for i in x][1:]
[i for i in x][:3:2]
[i for i in x][::2]
[i for i in x][::]

# Fine - doesn't mirror the underlying list
[i + 1 for i in x][0]
[i for i in x if i > 5][0]
[(i, i + 1) for i in x][0]

y = range(10)
# Fine - multiple generators
[i + j for i in x for j in y][0]
5 changes: 4 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2139,7 +2139,7 @@ where

// Pre-visit.
match expr {
Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
Expand Down Expand Up @@ -2219,6 +2219,9 @@ where
if self.enabled(Rule::UncapitalizedEnvironmentVariables) {
flake8_simplify::rules::use_capital_environment_variables(self, expr);
}
if self.enabled(Rule::UnnecessaryIterableAllocationForFirstElement) {
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}

pandas_vet::rules::subscript(self, value, expr);
}
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Ruff, "013") => (RuleGroup::Unspecified, rules::ruff::rules::ImplicitOptional),
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

Expand Down
4 changes: 4 additions & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ mod tests {
#[test_case(Rule::MutableDataclassDefault, Path::new("RUF008.py"))]
#[test_case(Rule::PairwiseOverZipped, Path::new("RUF007.py"))]
#[test_case(Rule::StaticKeyDictComprehension, Path::new("RUF011.py"))]
#[test_case(
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
pub(crate) use pairwise_over_zipped::*;
pub(crate) use static_key_dict_comprehension::*;
pub(crate) use unnecessary_iterable_allocation_for_first_element::*;
#[cfg(feature = "unreachable-code")]
pub(crate) use unreachable::*;
pub(crate) use unused_noqa::*;
Expand All @@ -26,6 +27,7 @@ mod mutable_class_default;
mod mutable_dataclass_default;
mod pairwise_over_zipped;
mod static_key_dict_comprehension;
mod unnecessary_iterable_allocation_for_first_element;
#[cfg(feature = "unreachable-code")]
pub(crate) mod unreachable;
mod unused_noqa;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
use num_traits::ToPrimitive;
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use rustpython_parser::ast::{self, Comprehension, Constant, Expr};

use crate::checkers::ast::Checker;
use crate::registry::AsRule;

/// ## What it does
/// Ensures that instead of creating a new list and indexing into it to find the first element of a
/// collection (e.g., `list(...)[0]`), Python iterators are used.
///
/// ## Why is this bad?
/// Creating a new list of great size can involve significant memory/speed concerns. Python's `next(iter(...))`
/// pattern can be used in lieu of creating a new list. This pattern will lazily fetch the first
/// element of the collection, avoiding the memory overhead involved with new list allocation.
/// `next(iter(...))` also is much faster since the list itself doesn't get initialized at once.
///
/// ## Example
/// ```python
/// x = range(1000000000000)
/// return list(x)[0]
/// ```
///
/// Use instead:
/// ```python
/// x = range(1000000000000)
/// return next(iter(x))
/// ```
///
/// ## References
/// - [Iterators and Iterables in Python: Run Efficient
/// Iterations](https://realpython.com/python-iterators-iterables/#when-to-use-an-iterator-in-python)
#[violation]
pub(crate) struct UnnecessaryIterableAllocationForFirstElement {
arg: String,
contains_slice: bool,
}

impl UnnecessaryIterableAllocationForFirstElement {
pub(crate) fn new(arg: String, contains_slice: bool) -> Self {
Self {
arg,
contains_slice,
}
}
}

impl AlwaysAutofixableViolation for UnnecessaryIterableAllocationForFirstElement {
#[derive_message_formats]
fn message(&self) -> String {
if self.contains_slice {
format!(
"Prefer `[next(iter({}))]` over `list({})[:1]` or equivalent list comprehension/slice",
self.arg, self.arg
)
} else {
format!(
"Prefer `next(iter({}))` over `list({})[0]` or equivalent list comprehension/slice",
self.arg, self.arg
)
}
}

fn autofix_title(&self) -> String {
if self.contains_slice {
format!("Replace with `[next(iter({}))]", self.arg)
} else {
format!("Replace with `next(iter({}))`", self.arg)
}
}
}

/// Contains information about a [`Expr::Subscript`] to determine if and how it accesses the first element
/// of an iterable.
struct ClassifiedSubscript {
/// If the subscript accesses the first element of the iterable
indexes_first_element: bool,
/// If the subscript is a slice (e.g., `[:1]`)
is_slice: bool,
}

impl ClassifiedSubscript {
fn new(indexes_first_element: bool, is_slice: bool) -> Self {
Self {
indexes_first_element,
is_slice,
}
}
}

/// RUF015
pub(crate) fn unnecessary_iterable_allocation_for_first_element(
checker: &mut Checker,
subscript: &Expr,
) {
let Expr::Subscript(ast::ExprSubscript { value, slice, range, .. }) = subscript else {
return;
};

let ClassifiedSubscript {
indexes_first_element,
is_slice,
} = classify_subscript(slice);
if !indexes_first_element {
return;
}

let Some(iter_name) = get_iterable_name(checker, value) else {
return;
};

let mut diagnostic = Diagnostic::new(
UnnecessaryIterableAllocationForFirstElement::new(iter_name.to_string(), is_slice),
*range,
);

if checker.patch(diagnostic.kind.rule()) {
let replacement = if is_slice {
format!("[next(iter({iter_name}))]")
} else {
format!("next(iter({iter_name}))")
};

diagnostic.set_fix(Fix::suggested(Edit::range_replacement(replacement, *range)));
}

checker.diagnostics.push(diagnostic);
}

/// Check that the slice [`Expr`] is functionally equivalent to slicing into the first element. The
/// first `bool` checks that the element is in fact first, the second checks if it's a slice or an
/// index.
fn classify_subscript(expr: &Expr) -> ClassifiedSubscript {
match expr {
Expr::Constant(ast::ExprConstant { .. }) => {
let effective_index = get_effective_index(expr);
ClassifiedSubscript::new(matches!(effective_index, None | Some(0)), false)
}
Expr::Slice(ast::ExprSlice {
step: step_value,
lower: lower_index,
upper: upper_index,
..
}) => {
let lower = lower_index.as_ref().and_then(|l| get_effective_index(l));
let upper = upper_index.as_ref().and_then(|u| get_effective_index(u));
let step = step_value.as_ref().and_then(|s| get_effective_index(s));

if matches!(lower, None | Some(0)) {
if upper.unwrap_or(i64::MAX) > step.unwrap_or(1i64) {
return ClassifiedSubscript::new(false, true);
}

return ClassifiedSubscript::new(true, true);
}

ClassifiedSubscript::new(false, true)
}
_ => ClassifiedSubscript::new(false, false),
}
}

/// Fetch the name of the iterable from an expression if the expression returns an unmodified list
/// which can be sliced into.
fn get_iterable_name<'a>(checker: &mut Checker, expr: &'a Expr) -> Option<&'a str> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can &mut be removed here?

match expr {
Expr::Call(ast::ExprCall { func, args, .. }) => {
let Some(id) = get_name_id(func.as_ref()) else {
return None;
};
if !((id == "list" && checker.semantic().is_builtin("list"))
|| (id == "tuple" && checker.semantic().is_builtin("tuple")))
{
return None;
}

match args.first() {
Some(Expr::Name(ast::ExprName { id: arg_name, .. })) => Some(arg_name.as_str()),
Some(Expr::GeneratorExp(ast::ExprGeneratorExp {
elt, generators, ..
})) => get_generator_iterable(elt, generators),
_ => None,
}
}
Expr::ListComp(ast::ExprListComp {
elt, generators, ..
}) => get_generator_iterable(elt, generators),
_ => None,
}
}

fn get_generator_iterable<'a>(
elt: &'a Expr,
generators: &'a Vec<Comprehension>,
) -> Option<&'a str> {
// If the `elt` field is anything other than a [`Expr::Name`], we can't be sure that it
// doesn't modify the elements of the underlying iterator - for example, `[i + 1 for i in x][0]`.
if !matches!(elt, Expr::Name(ast::ExprName { .. })) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can use elt.is_name_expr here.

return None;
}

// If there's more than 1 generator, we can't safely say that it fits the diagnostic conditions -
// for example, `[(i, j) for i in x for j in y][0]`.
if generators.len() != 1 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can use let [generator] = generators.as_slice() else { return None; } here.

return None;
}

let generator = &generators[0];
// Ignore if there's an `if` statement in the comprehension, since it filters the list.
if !generator.ifs.is_empty() {
return None;
}

let Some(arg_name) = get_name_id(&generator.iter) else {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this down to the end of the method is equivalent to just get_name_id(&generator.iter), right?

return None;
};

Some(arg_name)
}

fn get_name_id(expr: &Expr) -> Option<&str> {
match expr {
Expr::Name(ast::ExprName { id, .. }) => Some(id),
_ => None,
}
}

fn get_effective_index(expr: &Expr) -> Option<i64> {
match expr {
Expr::Constant(ast::ExprConstant {
value: Constant::Int(value),
..
}) => value.to_i64(),
_ => None,
}
}
Loading
Loading