Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for parsing f-string as per PEP 701 #7041

Merged
merged 17 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/ruff/src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ pub fn check_path(
if use_ast || use_imports || use_doc_lines {
match ruff_python_parser::parse_program_tokens(
tokens,
source_kind.source_code(),
&path.to_string_lossy(),
source_type.is_ipynb(),
dhruvmanila marked this conversation as resolved.
Show resolved Hide resolved
) {
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_benchmark/benches/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ fn benchmark_formatter(criterion: &mut Criterion) {
let comment_ranges = comment_ranges.finish();

// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")
let python_ast = parse_tokens(tokens, case.code(), Mode::Module, "<filename>")
.expect("Input to be a valid python program");

b.iter(|| {
Expand Down
8 changes: 8 additions & 0 deletions crates/ruff_python_ast/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2620,6 +2620,14 @@ impl Constant {
_ => false,
}
}

/// Returns `true` if the constant is a string constant that is a unicode string (i.e., `u"..."`).
pub fn is_unicode_string(&self) -> bool {
match self {
Constant::Str(value) => value.unicode,
_ => false,
}
}
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_ast/tests/preorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ fn function_type_parameters() {

fn trace_preorder_visitation(source: &str) -> String {
let tokens = lex(source, Mode::Module);
let parsed = parse_tokens(tokens, Mode::Module, "test.py").unwrap();
let parsed = parse_tokens(tokens, source, Mode::Module, "test.py").unwrap();

let mut visitor = RecordVisitor::default();
visitor.visit_mod(&parsed);
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_ast/tests/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ fn function_type_parameters() {

fn trace_visitation(source: &str) -> String {
let tokens = lex(source, Mode::Module);
let parsed = parse_tokens(tokens, Mode::Module, "test.py").unwrap();
let parsed = parse_tokens(tokens, source, Mode::Module, "test.py").unwrap();

let mut visitor = RecordVisitor::default();
walk_module(&mut visitor, &parsed);
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_formatter/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub fn format_and_debug_print(input: &str, cli: &Cli, source_type: &Path) -> Res

// Parse the AST.
let python_ast =
parse_tokens(tokens, Mode::Module, "<filename>").context("Syntax error in input")?;
parse_tokens(tokens, input, Mode::Module, "<filename>").context("Syntax error in input")?;

let options = PyFormatOptions::from_extension(source_type);
let formatted = format_node(&python_ast, &comment_ranges, input, options)
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_formatter/src/comments/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ mod tests {

let comment_ranges = comment_ranges.finish();

let parsed = parse_tokens(tokens, Mode::Module, "test.py")
let parsed = parse_tokens(tokens, code, Mode::Module, "test.py")
.expect("Expect source to be valid Python");

CommentsTestCase {
Expand Down
4 changes: 2 additions & 2 deletions crates/ruff_python_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ pub fn format_module(
let comment_ranges = comment_ranges.finish();

// Parse the AST.
let python_ast = parse_tokens(tokens, Mode::Module, "<filename>")?;
let python_ast = parse_tokens(tokens, contents, Mode::Module, "<filename>")?;

let formatted = format_node(&python_ast, &comment_ranges, contents, options)?;

Expand Down Expand Up @@ -237,7 +237,7 @@ if True:

// Parse the AST.
let source_path = "code_inline.py";
let python_ast = parse_tokens(tokens, Mode::Module, source_path).unwrap();
let python_ast = parse_tokens(tokens, src, Mode::Module, source_path).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted = format_node(&python_ast, &comment_ranges, src, options).unwrap();

Expand Down
3 changes: 2 additions & 1 deletion crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
/// Parse a full Python program from its tokens.
pub fn parse_program_tokens(
lxr: Vec<LexResult>,
source: &str,
source_path: &str,
is_jupyter_notebook: bool,
) -> anyhow::Result<Suite, ParseError> {
Expand All @@ -154,7 +155,7 @@ pub fn parse_program_tokens(
} else {
Mode::Module
};
match parse_tokens(lxr, mode, source_path)? {
match parse_tokens(lxr, source, mode, source_path)? {
Mod::Module(m) => Ok(m.body),
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
}
Expand Down
61 changes: 55 additions & 6 deletions crates/ruff_python_parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ use ruff_python_ast::{Mod, ModModule, Suite};
/// ```
pub fn parse_program(source: &str, source_path: &str) -> Result<ModModule, ParseError> {
let lexer = lex(source, Mode::Module);
match parse_tokens(lexer, Mode::Module, source_path)? {
match parse_tokens(lexer, source, Mode::Module, source_path)? {
Mod::Module(m) => Ok(m),
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
}
Expand Down Expand Up @@ -78,7 +78,7 @@ pub fn parse_suite(source: &str, source_path: &str) -> Result<Suite, ParseError>
/// ```
pub fn parse_expression(source: &str, source_path: &str) -> Result<ast::Expr, ParseError> {
let lexer = lex(source, Mode::Expression);
match parse_tokens(lexer, Mode::Expression, source_path)? {
match parse_tokens(lexer, source, Mode::Expression, source_path)? {
Mod::Expression(expression) => Ok(*expression.body),
Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
}
Expand Down Expand Up @@ -107,7 +107,7 @@ pub fn parse_expression_starts_at(
offset: TextSize,
) -> Result<ast::Expr, ParseError> {
let lexer = lex_starts_at(source, Mode::Module, offset);
match parse_tokens(lexer, Mode::Expression, source_path)? {
match parse_tokens(lexer, source, Mode::Expression, source_path)? {
Mod::Expression(expression) => Ok(*expression.body),
Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
}
Expand Down Expand Up @@ -193,7 +193,7 @@ pub fn parse_starts_at(
offset: TextSize,
) -> Result<ast::Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, mode, source_path)
parse_tokens(lxr, source, mode, source_path)
}

/// Parse an iterator of [`LexResult`]s using the specified [`Mode`].
Expand All @@ -208,32 +208,37 @@ pub fn parse_starts_at(
/// ```
/// use ruff_python_parser::{lexer::lex, Mode, parse_tokens};
///
/// let expr = parse_tokens(lex("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
/// let source = "1 + 2";
/// let expr = parse_tokens(lex(source, Mode::Expression), source, Mode::Expression, "<embedded>");
/// assert!(expr.is_ok());
/// ```
pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source: &str,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably want to start grouping source, mode and source_path by an abstraction but we don't have to do this as part of this PR. But we should probably do it before introducing more arguments.

mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
let lxr = lxr.into_iter();

parse_filtered_tokens(
lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)),
source,
mode,
source_path,
)
}

fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source: &str,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(
source,
mode,
lexer
.into_iter()
Expand Down Expand Up @@ -1237,11 +1242,55 @@ a = 1
"#
.trim();
let lxr = lexer::lex_starts_at(source, Mode::Ipython, TextSize::default());
let parse_err = parse_tokens(lxr, Mode::Module, "<test>").unwrap_err();
let parse_err = parse_tokens(lxr, source, Mode::Module, "<test>").unwrap_err();
assert_eq!(
parse_err.to_string(),
"IPython escape commands are only allowed in `Mode::Ipython` at byte offset 6"
.to_string()
);
}

#[test]
fn test_fstrings() {
let parse_ast = parse_suite(
r#"
f"{" f"}"
f"{foo!s}"
f"{3,}"
f"{3!=4:}"
f'{3:{"}"}>10}'
f'{3:{"{"}>10}'
f"{ foo = }"
f"{ foo = :.3f }"
f"{ foo = !s }"
f"{ 1, 2 = }"
f'{f"{3.1415=:.1f}":*^20}'

{"foo " f"bar {x + y} " "baz": 10}
match foo:
case "foo " f"bar {x + y} " "baz":
pass
"#
.trim(),
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
}

#[test]
fn test_fstrings_with_unicode() {
let parse_ast = parse_suite(
r#"
u"foo" f"{bar}" "baz" " some"
dhruvmanila marked this conversation as resolved.
Show resolved Hide resolved
"foo" f"{bar}" u"baz" " some"
"foo" f"{bar}" "baz" u" some"
u"foo" f"bar {baz} really" u"bar" "no"
"#
.trim(),
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}
Loading
Loading