From f192c725961b346ade010fa32690b4a83f681c0f Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 11 Jan 2024 19:41:19 +0100 Subject: [PATCH] Remove type parameter from `parse_*` methods (#9466) --- crates/ruff_benchmark/benches/linter.rs | 2 +- crates/ruff_python_ast/tests/preorder.rs | 2 +- crates/ruff_python_ast/tests/visitor.rs | 2 +- crates/ruff_python_formatter/src/cli.rs | 4 +- .../ruff_python_formatter/src/comments/mod.rs | 7 +- crates/ruff_python_formatter/src/lib.rs | 8 +- .../src/string/docstring.rs | 2 +- .../ruff_python_index/src/comment_ranges.rs | 11 +-- crates/ruff_python_parser/src/invalid.rs | 2 +- crates/ruff_python_parser/src/lib.rs | 11 +-- crates/ruff_python_parser/src/parser.rs | 73 +++++-------------- crates/ruff_python_parser/src/token_source.rs | 46 ++++++++++++ .../ruff_python_trivia/src/comment_ranges.rs | 12 +-- 13 files changed, 98 insertions(+), 84 deletions(-) create mode 100644 crates/ruff_python_parser/src/token_source.rs diff --git a/crates/ruff_benchmark/benches/linter.rs b/crates/ruff_benchmark/benches/linter.rs index a98cf29753166..d555d3d0ce5e4 100644 --- a/crates/ruff_benchmark/benches/linter.rs +++ b/crates/ruff_benchmark/benches/linter.rs @@ -55,7 +55,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) { &case, |b, case| { // Tokenize the source. - let tokens = lexer::lex(case.code(), Mode::Module).collect::>(); + let tokens: Vec<_> = lexer::lex(case.code(), Mode::Module).collect(); // Parse the source. let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap(); diff --git a/crates/ruff_python_ast/tests/preorder.rs b/crates/ruff_python_ast/tests/preorder.rs index 1a2af3dacdd7f..b3b1f457f8207 100644 --- a/crates/ruff_python_ast/tests/preorder.rs +++ b/crates/ruff_python_ast/tests/preorder.rs @@ -149,7 +149,7 @@ fn f_strings() { fn trace_preorder_visitation(source: &str) -> String { let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens, source, Mode::Module).unwrap(); + let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); visitor.visit_mod(&parsed); diff --git a/crates/ruff_python_ast/tests/visitor.rs b/crates/ruff_python_ast/tests/visitor.rs index 420c7517887f9..1097d699a993f 100644 --- a/crates/ruff_python_ast/tests/visitor.rs +++ b/crates/ruff_python_ast/tests/visitor.rs @@ -160,7 +160,7 @@ fn f_strings() { fn trace_visitation(source: &str) -> String { let tokens = lex(source, Mode::Module); - let parsed = parse_tokens(tokens, source, Mode::Module).unwrap(); + let parsed = parse_tokens(tokens.collect(), source, Mode::Module).unwrap(); let mut visitor = RecordVisitor::default(); walk_module(&mut visitor, &parsed); diff --git a/crates/ruff_python_formatter/src/cli.rs b/crates/ruff_python_formatter/src/cli.rs index bb8b29a20c021..881ca8ffae666 100644 --- a/crates/ruff_python_formatter/src/cli.rs +++ b/crates/ruff_python_formatter/src/cli.rs @@ -8,7 +8,7 @@ use clap::{command, Parser, ValueEnum}; use ruff_formatter::SourceCode; use ruff_python_ast::PySourceType; use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_ok_tokens, AsMode}; +use ruff_python_parser::{parse_tokens, AsMode}; use ruff_text_size::Ranged; use crate::comments::collect_comments; @@ -51,7 +51,7 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re // Parse the AST. let module = - parse_ok_tokens(tokens, source, source_type.as_mode()).context("Syntax error in input")?; + parse_tokens(tokens, source, source_type.as_mode()).context("Syntax error in input")?; let options = PyFormatOptions::from_extension(source_path) .with_preview(if cli.preview { diff --git a/crates/ruff_python_formatter/src/comments/mod.rs b/crates/ruff_python_formatter/src/comments/mod.rs index 6524a56a2b2ad..80b412ee4df2c 100644 --- a/crates/ruff_python_formatter/src/comments/mod.rs +++ b/crates/ruff_python_formatter/src/comments/mod.rs @@ -102,12 +102,12 @@ use ruff_python_ast::Mod; use ruff_python_trivia::{CommentRanges, PythonWhitespace}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; +pub(crate) use visitor::collect_comments; use crate::comments::debug::{DebugComment, DebugComments}; use crate::comments::map::{LeadingDanglingTrailing, MultiMap}; use crate::comments::node_key::NodeRefEqualityKey; use crate::comments::visitor::{CommentsMapBuilder, CommentsVisitor}; -pub(crate) use visitor::collect_comments; mod debug; pub(crate) mod format; @@ -563,8 +563,7 @@ mod tests { use ruff_formatter::SourceCode; use ruff_python_ast::{Mod, PySourceType}; use ruff_python_index::tokens_and_ranges; - - use ruff_python_parser::{parse_ok_tokens, AsMode}; + use ruff_python_parser::{parse_tokens, AsMode}; use ruff_python_trivia::CommentRanges; use crate::comments::Comments; @@ -581,7 +580,7 @@ mod tests { let source_type = PySourceType::Python; let (tokens, comment_ranges) = tokens_and_ranges(source, source_type).expect("Expect source to be valid Python"); - let parsed = parse_ok_tokens(tokens, source, source_type.as_mode()) + let parsed = parse_tokens(tokens, source, source_type.as_mode()) .expect("Expect source to be valid Python"); CommentsTestCase { diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 73d98a6161025..ea8db20abb1e7 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -6,7 +6,7 @@ use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, Source use ruff_python_ast::AstNode; use ruff_python_ast::Mod; use ruff_python_index::tokens_and_ranges; -use ruff_python_parser::{parse_ok_tokens, AsMode, ParseError, ParseErrorType}; +use ruff_python_parser::{parse_tokens, AsMode, ParseError, ParseErrorType}; use ruff_python_trivia::CommentRanges; use ruff_source_file::Locator; @@ -126,7 +126,7 @@ pub fn format_module_source( offset: err.location, error: ParseErrorType::Lexical(err.error), })?; - let module = parse_ok_tokens(tokens, source, source_type.as_mode())?; + let module = parse_tokens(tokens, source, source_type.as_mode())?; let formatted = format_module_ast(&module, &comment_ranges, source, options)?; Ok(formatted.print()?) } @@ -169,7 +169,7 @@ mod tests { use ruff_python_ast::PySourceType; use ruff_python_index::tokens_and_ranges; - use ruff_python_parser::{parse_ok_tokens, AsMode}; + use ruff_python_parser::{parse_tokens, AsMode}; use crate::{format_module_ast, format_module_source, PyFormatOptions}; @@ -213,7 +213,7 @@ def main() -> None: // Parse the AST. let source_path = "code_inline.py"; - let module = parse_ok_tokens(tokens, source, source_type.as_mode()).unwrap(); + let module = parse_tokens(tokens, source, source_type.as_mode()).unwrap(); let options = PyFormatOptions::from_extension(Path::new(source_path)); let formatted = format_module_ast(&module, &comment_ranges, source, options).unwrap(); diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index d10fbe128ae96..c6669c776818d 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -1516,7 +1516,7 @@ fn docstring_format_source( let source_type = options.source_type(); let (tokens, comment_ranges) = ruff_python_index::tokens_and_ranges(source, source_type).map_err(ParseError::from)?; - let module = ruff_python_parser::parse_ok_tokens(tokens, source, source_type.as_mode())?; + let module = ruff_python_parser::parse_tokens(tokens, source, source_type.as_mode())?; let source_code = ruff_formatter::SourceCode::new(source); let comments = crate::Comments::from_ast(&module, source_code, &comment_ranges); let locator = Locator::new(source); diff --git a/crates/ruff_python_index/src/comment_ranges.rs b/crates/ruff_python_index/src/comment_ranges.rs index e554bb8b00e63..11e6496a38b18 100644 --- a/crates/ruff_python_index/src/comment_ranges.rs +++ b/crates/ruff_python_index/src/comment_ranges.rs @@ -1,7 +1,7 @@ use std::fmt::Debug; use ruff_python_ast::PySourceType; -use ruff_python_parser::lexer::{lex, LexicalError}; +use ruff_python_parser::lexer::{lex, LexResult, LexicalError}; use ruff_python_parser::{AsMode, Tok}; use ruff_python_trivia::CommentRanges; use ruff_text_size::TextRange; @@ -27,15 +27,16 @@ impl CommentRangesBuilder { pub fn tokens_and_ranges( source: &str, source_type: PySourceType, -) -> Result<(Vec<(Tok, TextRange)>, CommentRanges), LexicalError> { +) -> Result<(Vec, CommentRanges), LexicalError> { let mut tokens = Vec::new(); let mut comment_ranges = CommentRangesBuilder::default(); for result in lex(source, source_type.as_mode()) { - let (token, range) = result?; + if let Ok((token, range)) = &result { + comment_ranges.visit_token(token, *range); + } - comment_ranges.visit_token(&token, range); - tokens.push((token, range)); + tokens.push(result); } let comment_ranges = comment_ranges.finish(); diff --git a/crates/ruff_python_parser/src/invalid.rs b/crates/ruff_python_parser/src/invalid.rs index 92a19f0d833b4..2075a6e08917a 100644 --- a/crates/ruff_python_parser/src/invalid.rs +++ b/crates/ruff_python_parser/src/invalid.rs @@ -687,7 +687,7 @@ mod tests { let src = r"!foo = 42"; let tokens = crate::lexer::lex(src, Mode::Ipython); - let ast = crate::parse_tokens(tokens, src, Mode::Ipython); + let ast = crate::parse_tokens(tokens.collect(), src, Mode::Ipython); insta::assert_debug_snapshot!(ast); } diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index b0f2787b6c412..0217331fe21ff 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -85,7 +85,7 @@ //! return bool(i & 1) //! "#; //! let tokens = lex(python_source, Mode::Module); -//! let ast = parse_tokens(tokens, python_source, Mode::Module); +//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module); //! //! assert!(ast.is_ok()); //! ``` @@ -110,8 +110,8 @@ //! [lexer]: crate::lexer pub use parser::{ - parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program, - parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType, + parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at, + parse_suite, parse_tokens, ParseError, ParseErrorType, }; use ruff_python_ast::{Mod, PySourceType, Suite}; pub use string::FStringErrorType; @@ -128,6 +128,7 @@ mod parser; mod soft_keywords; mod string; mod token; +mod token_source; pub mod typing; /// Collect tokens up to and including the first error. @@ -145,7 +146,7 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec { /// Parse a full Python program from its tokens. pub fn parse_program_tokens( - lxr: Vec, + tokens: Vec, source: &str, is_jupyter_notebook: bool, ) -> anyhow::Result { @@ -154,7 +155,7 @@ pub fn parse_program_tokens( } else { Mode::Module }; - match parse_tokens(lxr, source, mode)? { + match parse_tokens(tokens, source, mode)? { Mod::Module(m) => Ok(m.body), Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), } diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 5f3a5f95ca837..e158dadfcff4d 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -14,15 +14,7 @@ use itertools::Itertools; pub(super) use lalrpop_util::ParseError as LalrpopError; -use ruff_text_size::{Ranged, TextRange, TextSize}; -use crate::lexer::{lex, lex_starts_at, Spanned}; -use crate::{ - lexer::{self, LexResult, LexicalError, LexicalErrorType}, - python, - token::Tok, - Mode, -}; use ruff_python_ast::{ Expr, ExprAttribute, ExprAwait, ExprBinOp, ExprBoolOp, ExprBooleanLiteral, ExprBytesLiteral, ExprCall, ExprCompare, ExprDict, ExprDictComp, ExprEllipsisLiteral, ExprFString, @@ -31,6 +23,16 @@ use ruff_python_ast::{ ExprStarred, ExprStringLiteral, ExprSubscript, ExprTuple, ExprUnaryOp, ExprYield, ExprYieldFrom, Mod, ModModule, Suite, }; +use ruff_text_size::{Ranged, TextRange, TextSize}; + +use crate::lexer::{lex, lex_starts_at, LexResult}; +use crate::token_source::TokenSource; +use crate::{ + lexer::{self, LexicalError, LexicalErrorType}, + python, + token::Tok, + Mode, +}; /// Parse a full Python program usually consisting of multiple lines. /// @@ -54,7 +56,7 @@ use ruff_python_ast::{ /// ``` pub fn parse_program(source: &str) -> Result { let lexer = lex(source, Mode::Module); - match parse_tokens(lexer, source, Mode::Module)? { + match parse_tokens(lexer.collect(), source, Mode::Module)? { Mod::Module(m) => Ok(m), Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"), } @@ -82,7 +84,7 @@ pub fn parse_suite(source: &str) -> Result { /// ``` pub fn parse_expression(source: &str) -> Result { let lexer = lex(source, Mode::Expression); - match parse_tokens(lexer, source, Mode::Expression)? { + match parse_tokens(lexer.collect(), source, Mode::Expression)? { Mod::Expression(expression) => Ok(*expression.body), Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), } @@ -107,7 +109,7 @@ pub fn parse_expression(source: &str) -> Result { /// ``` pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result { let lexer = lex_starts_at(source, Mode::Module, offset); - match parse_tokens(lexer, source, Mode::Expression)? { + match parse_tokens(lexer.collect(), source, Mode::Expression)? { Mod::Expression(expression) => Ok(*expression.body), Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"), } @@ -188,7 +190,7 @@ pub fn parse(source: &str, mode: Mode) -> Result { /// ``` pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result { let lxr = lexer::lex_starts_at(source, mode, offset); - parse_tokens(lxr, source, mode) + parse_tokens(lxr.collect(), source, mode) } /// Parse an iterator of [`LexResult`]s using the specified [`Mode`]. @@ -204,48 +206,12 @@ pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result, - source: &str, - mode: Mode, -) -> Result { - let lxr = lxr.into_iter(); - - parse_filtered_tokens( - lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)), - source, - mode, - ) -} - -/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid. -pub fn parse_ok_tokens( - lxr: impl IntoIterator, - source: &str, - mode: Mode, -) -> Result { - let lxr = lxr - .into_iter() - .filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)); - let marker_token = (Tok::start_marker(mode), TextRange::default()); - let lexer = std::iter::once(marker_token) - .chain(lxr) - .map(|(t, range)| (range.start(), t, range.end())); - python::TopParser::new() - .parse(source, mode, lexer) - .map_err(parse_error_from_lalrpop) -} - -fn parse_filtered_tokens( - lxr: impl IntoIterator, - source: &str, - mode: Mode, -) -> Result { +pub fn parse_tokens(tokens: Vec, source: &str, mode: Mode) -> Result { let marker_token = (Tok::start_marker(mode), TextRange::default()); - let lexer = std::iter::once(Ok(marker_token)).chain(lxr); + let lexer = std::iter::once(Ok(marker_token)).chain(TokenSource::new(tokens)); python::TopParser::new() .parse( source, @@ -597,9 +563,10 @@ impl From for ParenthesizedExpr { #[cfg(target_pointer_width = "64")] mod size_assertions { - use crate::parser::ParenthesizedExpr; use static_assertions::assert_eq_size; + use crate::parser::ParenthesizedExpr; + assert_eq_size!(ParenthesizedExpr, [u8; 88]); } @@ -1475,7 +1442,7 @@ a = 1 " .trim(); let lxr = lexer::lex_starts_at(source, Mode::Ipython, TextSize::default()); - let parse_err = parse_tokens(lxr, source, Mode::Module).unwrap_err(); + let parse_err = parse_tokens(lxr.collect(), source, Mode::Module).unwrap_err(); assert_eq!( parse_err.to_string(), "IPython escape commands are only allowed in `Mode::Ipython` at byte offset 6" diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs new file mode 100644 index 0000000000000..b133ee5ff227d --- /dev/null +++ b/crates/ruff_python_parser/src/token_source.rs @@ -0,0 +1,46 @@ +use crate::lexer::LexResult; +use crate::Tok; +use std::iter::FusedIterator; + +#[derive(Clone, Debug)] +pub(crate) struct TokenSource { + tokens: std::vec::IntoIter, +} + +impl TokenSource { + pub(crate) fn new(tokens: Vec) -> Self { + Self { + tokens: tokens.into_iter(), + } + } +} + +impl FromIterator for TokenSource { + #[inline] + fn from_iter>(iter: T) -> Self { + Self::new(Vec::from_iter(iter)) + } +} + +impl Iterator for TokenSource { + type Item = LexResult; + + #[inline] + fn next(&mut self) -> Option { + loop { + let next = self.tokens.next()?; + + if is_trivia(&next) { + continue; + } + + break Some(next); + } + } +} + +impl FusedIterator for TokenSource {} + +const fn is_trivia(result: &LexResult) -> bool { + matches!(result, Ok((Tok::Comment(_) | Tok::NonLogicalNewline, _))) +} diff --git a/crates/ruff_python_trivia/src/comment_ranges.rs b/crates/ruff_python_trivia/src/comment_ranges.rs index dabd2517be91a..1cd8c5487b453 100644 --- a/crates/ruff_python_trivia/src/comment_ranges.rs +++ b/crates/ruff_python_trivia/src/comment_ranges.rs @@ -216,7 +216,7 @@ mod tests { fn block_comments_two_line_block_at_start() { // arrange let source = "# line 1\n# line 2\n"; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -231,7 +231,7 @@ mod tests { fn block_comments_indented_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -261,7 +261,7 @@ mod tests { fn block_comments_lines_with_code_not_a_block() { // arrange let source = "x = 1 # line 1\ny = 2 # line 2\n"; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -276,7 +276,7 @@ mod tests { fn block_comments_sequential_lines_not_in_block() { // arrange let source = " # line 1\n # line 2\n"; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -296,7 +296,7 @@ mod tests { # line 2 """ "#; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -333,7 +333,7 @@ y = 2 # do not form a block comment # therefore do not form a block comment """ "#; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator);