From a42b40c7e98ef702f6ff27cc061ad75762912a4e Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Thu, 17 Aug 2023 20:09:06 +0530
Subject: [PATCH] Add support for the new f-string tokens per PEP 701

---
 Cargo.lock                                    |   1 +
 crates/ruff_python_parser/Cargo.toml          |   1 +
 crates/ruff_python_parser/src/lexer.rs        | 394 +++++++++++++++++-
 crates/ruff_python_parser/src/lexer/cursor.rs |  12 +
 .../ruff_python_parser/src/lexer/fstring.rs   | 133 ++++++
 ..._parser__lexer__tests__empty_fstrings.snap |  27 ++
 ..._python_parser__lexer__tests__fstring.snap |  40 ++
 ...arser__lexer__tests__fstring_comments.snap |  27 ++
 ...ser__lexer__tests__fstring_conversion.snap |  50 +++
 ..._parser__lexer__tests__fstring_escape.snap |  32 ++
 ...ser__lexer__tests__fstring_escape_raw.snap |  32 ++
 ...__tests__fstring_expression_multiline.snap |  30 ++
 ...rser__lexer__tests__fstring_multiline.snap |  42 ++
 ...__lexer__tests__fstring_named_unicode.snap |  13 +
 ...xer__tests__fstring_named_unicode_raw.snap |  22 +
 ..._parser__lexer__tests__fstring_nested.snap |  64 +++
 ...er__lexer__tests__fstring_parentheses.snap |  61 +++
 ..._parser__lexer__tests__fstring_prefix.snap |  27 ++
 ...exer__tests__fstring_with_format_spec.snap |  84 ++++
 ...ests__fstring_with_ipy_escape_command.snap |  23 +
 ..._tests__fstring_with_named_expression.snap |  68 +++
 crates/ruff_python_parser/src/string.rs       |   7 +-
 crates/ruff_python_parser/src/token.rs        |  33 ++
 23 files changed, 1213 insertions(+), 10 deletions(-)
 create mode 100644 crates/ruff_python_parser/src/lexer/fstring.rs
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap

diff --git a/Cargo.lock b/Cargo.lock
index 74123f7491db82..26544c9c53a364 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2384,6 +2384,7 @@ name = "ruff_python_parser"
 version = "0.0.0"
 dependencies = [
  "anyhow",
+ "bitflags 2.3.3",
  "insta",
  "is-macro",
  "itertools",
diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml
index 02b27577b2e667..c311d34e58c228 100644
--- a/crates/ruff_python_parser/Cargo.toml
+++ b/crates/ruff_python_parser/Cargo.toml
@@ -18,6 +18,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
 ruff_text_size = { path = "../ruff_text_size" }
 
 anyhow = { workspace = true }
+bitflags = { workspace = true }
 is-macro = { workspace = true }
 itertools = { workspace = true }
 lalrpop-util = { version = "0.20.0", default-features = false }
diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 1691fc6c49a340..e3c4b641c3a935 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -40,6 +40,7 @@ use unic_emoji_char::is_emoji_presentation;
 use unic_ucd_ident::{is_xid_continue, is_xid_start};
 
 use crate::lexer::cursor::{Cursor, EOF_CHAR};
+use crate::lexer::fstring::{FStringContext, FStringContextFlags};
 use crate::lexer::indentation::{Indentation, Indentations};
 use crate::{
     soft_keywords::SoftKeywordTransformer,
@@ -49,6 +50,7 @@ use crate::{
 };
 
 mod cursor;
+mod fstring;
 mod indentation;
 
 /// A lexer for Python source code.
@@ -65,6 +67,8 @@ pub struct Lexer<'source> {
     pending_indentation: Option<Indentation>,
     // Lexer mode.
     mode: Mode,
+
+    fstring_stack: Vec<FStringContext>,
 }
 
 /// Contains a Token along with its `range`.
@@ -157,6 +161,7 @@ impl<'source> Lexer<'source> {
             source: input,
             cursor: Cursor::new(input),
             mode,
+            fstring_stack: vec![],
         };
         // TODO: Handle possible mismatch between BOM and explicit encoding declaration.
         // spell-checker:ignore feff
@@ -168,16 +173,24 @@ impl<'source> Lexer<'source> {
     /// Lex an identifier. Also used for keywords and string/bytes literals with a prefix.
     fn lex_identifier(&mut self, first: char) -> Result<Tok, LexicalError> {
         // Detect potential string like rb'' b'' f'' u'' r''
-        match self.cursor.first() {
-            quote @ ('\'' | '"') => {
+        match (first, self.cursor.first()) {
+            ('f' | 'F', quote @ ('\'' | '"')) => {
+                self.cursor.bump();
+                return Ok(self.lex_fstring_start(quote, false));
+            }
+            ('r' | 'R', 'f' | 'F') | ('f' | 'F', 'r' | 'R') if is_quote(self.cursor.second()) => {
+                self.cursor.bump();
+                let quote = self.cursor.bump().unwrap();
+                return Ok(self.lex_fstring_start(quote, true));
+            }
+            (_, quote @ ('\'' | '"')) => {
                 if let Ok(string_kind) = StringKind::try_from(first) {
                     self.cursor.bump();
                     return self.lex_string(string_kind, quote);
                 }
             }
-            second @ ('f' | 'F' | 'r' | 'R' | 'b' | 'B') if is_quote(self.cursor.second()) => {
+            (_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => {
                 self.cursor.bump();
-
                 if let Ok(string_kind) = StringKind::try_from([first, second]) {
                     let quote = self.cursor.bump().unwrap();
                     return self.lex_string(string_kind, quote);
@@ -508,6 +521,147 @@ impl<'source> Lexer<'source> {
         }
     }
 
+    /// Lex a f-string start token.
+    fn lex_fstring_start(&mut self, quote: char, is_raw_string: bool) -> Tok {
+        #[cfg(debug_assertions)]
+        debug_assert_eq!(self.cursor.previous(), quote);
+
+        let mut flags = FStringContextFlags::empty();
+        if quote == '"' {
+            flags |= FStringContextFlags::DOUBLE;
+        }
+        if is_raw_string {
+            flags |= FStringContextFlags::RAW;
+        }
+        if self.cursor.eat_char2(quote, quote) {
+            flags |= FStringContextFlags::TRIPLE;
+        };
+
+        self.fstring_stack.push(FStringContext::new(flags));
+        Tok::FStringStart
+    }
+
+    fn lex_fstring_middle_or_end(&mut self) -> Result<Option<Tok>, LexicalError> {
+        // SAFETY: Safe because the function is only called when `self.fstring_stack` is not empty.
+        let context = self.fstring_stack.last().unwrap();
+
+        // Check if we're at the end of the f-string.
+        if context.is_triple_quoted() {
+            let quote_char = context.quote_char();
+            if self.cursor.eat_char3(quote_char, quote_char, quote_char) {
+                return Ok(Some(Tok::FStringEnd));
+            }
+        } else if self.cursor.eat_char(context.quote_char()) {
+            return Ok(Some(Tok::FStringEnd));
+        }
+
+        // The normalized string if the token value is not yet normalized.
+        // This must remain empty if it's already normalized. Normalization
+        // is to replace `{{` and `}}` with `{` and `}` respectively.
+        let mut normalized = String::new();
+
+        // Tracks the last offset of token value that has been written to `normalized`.
+        let mut last_offset = self.offset();
+
+        let mut in_named_unicode = false;
+
+        loop {
+            match self.cursor.first() {
+                EOF_CHAR => {
+                    let error = if context.is_triple_quoted() {
+                        FStringErrorType::UnterminatedTripleQuotedString
+                    } else {
+                        FStringErrorType::UnterminatedString
+                    };
+                    // This is to avoid infinite loop where the lexer keeps returning
+                    // the error token.
+                    self.fstring_stack.pop();
+                    return Err(LexicalError {
+                        error: LexicalErrorType::FStringError(error),
+                        location: self.offset(),
+                    });
+                }
+                '\n' if !context.is_triple_quoted() => {
+                    // This is to avoid infinite loop where the lexer keeps returning
+                    // the error token.
+                    self.fstring_stack.pop();
+                    return Err(LexicalError {
+                        error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
+                        location: self.offset(),
+                    });
+                }
+                '\\' => {
+                    self.cursor.bump(); // '\'
+                    if matches!(self.cursor.first(), '{' | '}') {
+                        // Don't consume `{` or `}` as we want them to be consumed as tokens.
+                        break;
+                    } else if !context.is_raw_string() {
+                        if self.cursor.eat_char2('N', '{') {
+                            in_named_unicode = true;
+                            continue;
+                        }
+                    }
+                    // Consume the escaped character.
+                    self.cursor.bump();
+                }
+                quote @ ('\'' | '"') if quote == context.quote_char() => {
+                    if let Some(triple_quotes) = context.triple_quotes() {
+                        if self.cursor.rest().starts_with(triple_quotes) {
+                            break;
+                        }
+                        self.cursor.bump();
+                    } else {
+                        break;
+                    }
+                }
+                '{' => {
+                    if self.cursor.second() == '{' {
+                        self.cursor.bump();
+                        normalized
+                            .push_str(&self.source[TextRange::new(last_offset, self.offset())]);
+                        self.cursor.bump(); // Skip the second `{`
+                        last_offset = self.offset();
+                    } else {
+                        break;
+                    }
+                }
+                '}' => {
+                    if in_named_unicode {
+                        in_named_unicode = false;
+                        self.cursor.bump();
+                    } else if self.cursor.second() == '}' && !context.is_in_format_spec() {
+                        self.cursor.bump();
+                        normalized
+                            .push_str(&self.source[TextRange::new(last_offset, self.offset())]);
+                        self.cursor.bump(); // Skip the second `}`
+                        last_offset = self.offset();
+                    } else {
+                        break;
+                    }
+                }
+                _ => {
+                    self.cursor.bump();
+                }
+            }
+        }
+
+        let range = self.token_range();
+        if range.is_empty() {
+            return Ok(None);
+        }
+
+        let value = if normalized.is_empty() {
+            self.source[range].to_string()
+        } else {
+            normalized.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
+            normalized
+        };
+        Ok(Some(Tok::FStringMiddle {
+            value,
+            is_raw: context.is_raw_string(),
+        }))
+    }
+
     /// Lex a string literal.
     fn lex_string(&mut self, kind: StringKind, quote: char) -> Result<Tok, LexicalError> {
         #[cfg(debug_assertions)]
@@ -529,6 +683,21 @@ impl<'source> Lexer<'source> {
                     }
                 }
                 Some('\r' | '\n') if !triple_quoted => {
+                    if let Some(fstring_context) = self.fstring_stack.last() {
+                        // When we are in an f-string, check whether does the initial quote
+                        // matches with f-strings quotes and if it is, then this must be a
+                        // missing '}' token so raise the proper error.
+                        if fstring_context.quote_char() == quote
+                            && !fstring_context.is_triple_quoted()
+                        {
+                            return Err(LexicalError {
+                                error: LexicalErrorType::FStringError(
+                                    FStringErrorType::UnclosedLbrace,
+                                ),
+                                location: self.offset() - fstring_context.quote_size(),
+                            });
+                        }
+                    }
                     return Err(LexicalError {
                         error: LexicalErrorType::OtherError(
                             "EOL while scanning string literal".to_owned(),
@@ -548,6 +717,21 @@ impl<'source> Lexer<'source> {
 
                 Some(_) => {}
                 None => {
+                    if let Some(fstring_context) = self.fstring_stack.last() {
+                        // When we are in an f-string, check whether does the initial quote
+                        // matches with f-strings quotes and if it is, then this must be a
+                        // missing '}' token so raise the proper error.
+                        if fstring_context.quote_char() == quote
+                            && fstring_context.is_triple_quoted() == triple_quoted
+                        {
+                            return Err(LexicalError {
+                                error: LexicalErrorType::FStringError(
+                                    FStringErrorType::UnclosedLbrace,
+                                ),
+                                location: self.offset() - fstring_context.quote_size(),
+                            });
+                        }
+                    }
                     return Err(LexicalError {
                         error: if triple_quoted {
                             LexicalErrorType::Eof
@@ -571,6 +755,18 @@ impl<'source> Lexer<'source> {
     // This is the main entry point. Call this function to retrieve the next token.
     // This function is used by the iterator implementation.
     pub fn next_token(&mut self) -> LexResult {
+        if let Some(fstring_context) = self.fstring_stack.last() {
+            if !fstring_context.is_in_expression() {
+                self.cursor.start_token();
+                if let Some(tok) = self.lex_fstring_middle_or_end()? {
+                    if matches!(tok, Tok::FStringEnd) {
+                        self.fstring_stack.pop();
+                    }
+                    return Ok((tok, self.token_range()));
+                }
+            }
+        }
+
         // Return dedent tokens until the current indentation level matches the indentation of the next token.
         if let Some(indentation) = self.pending_indentation.take() {
             if let Ok(Ordering::Greater) = self.indentations.current().try_compare(indentation) {
@@ -841,39 +1037,66 @@ impl<'source> Lexer<'source> {
                 if self.cursor.eat_char('=') {
                     Tok::NotEqual
                 } else {
-                    return Err(LexicalError {
-                        error: LexicalErrorType::UnrecognizedToken { tok: '!' },
-                        location: self.token_start(),
-                    });
+                    Tok::Exclamation
                 }
             }
             '~' => Tok::Tilde,
             '(' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    fstring_context.increment_opening_parentheses();
+                }
                 self.nesting += 1;
                 Tok::Lpar
             }
             ')' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    fstring_context.decrement_closing_parentheses();
+                }
                 self.nesting = self.nesting.saturating_sub(1);
                 Tok::Rpar
             }
             '[' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    fstring_context.increment_opening_parentheses();
+                }
                 self.nesting += 1;
                 Tok::Lsqb
             }
             ']' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    fstring_context.decrement_closing_parentheses();
+                }
                 self.nesting = self.nesting.saturating_sub(1);
                 Tok::Rsqb
             }
             '{' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    fstring_context.increment_opening_parentheses();
+                }
                 self.nesting += 1;
                 Tok::Lbrace
             }
             '}' => {
+                if let Some(fstring_context) = self.fstring_stack.last_mut() {
+                    if !fstring_context.has_open_parentheses() {
+                        return Err(LexicalError {
+                            error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
+                            location: self.token_start(),
+                        });
+                    }
+                    fstring_context.decrement_closing_parentheses();
+                }
                 self.nesting = self.nesting.saturating_sub(1);
                 Tok::Rbrace
             }
             ':' => {
-                if self.cursor.eat_char('=') {
+                if self
+                    .fstring_stack
+                    .last_mut()
+                    .is_some_and(FStringContext::try_start_format_spec)
+                {
+                    Tok::Colon
+                } else if self.cursor.eat_char('=') {
                     Tok::ColonEqual
                 } else {
                     Tok::Colon
@@ -1680,4 +1903,157 @@ def f(arg=%timeit a = b):
         let source = "[1";
         let _ = lex(source, Mode::Module).collect::<Vec<_>>();
     }
+
+    #[test]
+    fn test_empty_fstrings() {
+        let source = r#"f"" "" F"" f'' '' f"""""" f''''''"#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_prefix() {
+        let source = r#"f"" F"" rf"" rF"" Rf"" RF"" fr"" Fr"" fR"" FR"""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring() {
+        let source = r#"f"normal {foo} {{another}} {bar} {{{three}}}""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_parentheses() {
+        let source = r#"f"{}" f"{{}}" f" {}" f"{{{}}}" f"{{{{}}}}" f" {} {{}} {{{}}} {{{{}}}}  ""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_escape() {
+        let source = r#"f"\{x:\"\{x}} \"\"\
+ end""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_escape_raw() {
+        let source = r#"rf"\{x:\"\{x}} \"\"\
+ end""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_named_unicode() {
+        let source = r#"f"\N{BULLET} normal \Nope \N""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_named_unicode_raw() {
+        let source = r#"rf"\N{BULLET} normal""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_with_named_expression() {
+        let source = r#"f"{x:=10} {(x:=10)} {x,{y:=10}} {[x:=10]}""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_with_format_spec() {
+        let source = r#"f"{foo:} {x=!s:.3f} {x:.{y}f} {'':*^{1:{1}}}""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_conversion() {
+        let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_nested() {
+        let source = r#"f"foo {f"bar {x + f"{wow}"}"} baz" f'foo {f'bar'} some {f"another"}'"#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_expression_multiline() {
+        let source = r#"f"first {
+    x
+        *
+            y
+} second""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_multiline() {
+        let source = r#"f"""
+hello
+    world
+""" f'''
+    world
+hello
+''' f"some {f"""multiline
+allowed {x}"""} string""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_comments() {
+        let source = r#"f"""
+# not a comment { # comment {
+    x
+} # not a comment
+""""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    #[test]
+    fn test_fstring_with_ipy_escape_command() {
+        let source = r#"f"foo {!pwd} bar""#;
+        assert_debug_snapshot!(lex_source(source));
+    }
+
+    fn lex_fstring_error(source: &str) -> FStringErrorType {
+        match lex(source, Mode::Module).find_map(std::result::Result::err) {
+            Some(err) => match err.error {
+                LexicalErrorType::FStringError(error) => error,
+                _ => panic!("Expected FStringError: {err:?}"),
+            },
+            _ => panic!("Expected atleast one FStringError"),
+        }
+    }
+
+    #[test]
+    fn test_fstring_error() {
+        use FStringErrorType::{
+            UnclosedLbrace, UnterminatedString, UnterminatedTripleQuotedString,
+        };
+
+        assert_eq!(lex_fstring_error(r#"f"{""#), UnclosedLbrace);
+        assert_eq!(lex_fstring_error(r#"f"{foo!r""#), UnclosedLbrace);
+        assert_eq!(
+            lex_fstring_error(
+                r#"f"{"
+"#
+            ),
+            UnclosedLbrace
+        );
+        assert_eq!(lex_fstring_error(r#"f"""{""""#), UnclosedLbrace);
+        assert_eq!(lex_fstring_error(r#"f""#), UnterminatedString);
+        assert_eq!(lex_fstring_error(r#"f'"#), UnterminatedString);
+        assert_eq!(lex_fstring_error(r#"f""""#), UnterminatedTripleQuotedString);
+        assert_eq!(lex_fstring_error(r#"f'''"#), UnterminatedTripleQuotedString);
+        assert_eq!(
+            lex_fstring_error(r#"f"""""#),
+            UnterminatedTripleQuotedString
+        );
+        assert_eq!(
+            lex_fstring_error(r#"f""""""#),
+            UnterminatedTripleQuotedString
+        );
+    }
 }
diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs
index ed5011a1d85075..c026c88e9b7fb1 100644
--- a/crates/ruff_python_parser/src/lexer/cursor.rs
+++ b/crates/ruff_python_parser/src/lexer/cursor.rs
@@ -96,6 +96,18 @@ impl<'a> Cursor<'a> {
         }
     }
 
+    pub(super) fn eat_char3(&mut self, c1: char, c2: char, c3: char) -> bool {
+        let mut chars = self.chars.clone();
+        if chars.next() == Some(c1) && chars.next() == Some(c2) && chars.next() == Some(c3) {
+            self.bump();
+            self.bump();
+            self.bump();
+            true
+        } else {
+            false
+        }
+    }
+
     pub(super) fn eat_if<F>(&mut self, mut predicate: F) -> Option<char>
     where
         F: FnMut(char) -> bool,
diff --git a/crates/ruff_python_parser/src/lexer/fstring.rs b/crates/ruff_python_parser/src/lexer/fstring.rs
new file mode 100644
index 00000000000000..586d252b3d5ca6
--- /dev/null
+++ b/crates/ruff_python_parser/src/lexer/fstring.rs
@@ -0,0 +1,133 @@
+use bitflags::bitflags;
+
+use ruff_text_size::TextSize;
+
+bitflags! {
+    #[derive(Debug)]
+    pub(crate) struct FStringContextFlags: u32 {
+        /// The current f-string is a triple-quoted f-string i.e., the number of
+        /// opening and closing quotes is 3. If this flag is not set, the number
+        /// of opening and closing quotes is 1.
+        const TRIPLE = 1 << 0;
+
+        /// The current f-string is a double-quoted f-string. If this flag is not
+        /// set, the current f-string is a single-quoted f-string.
+        const DOUBLE = 1 << 1;
+
+        /// The current f-string is a raw f-string. If this flag is not set, the
+        /// current f-string is a non-raw f-string.
+        const RAW = 1 << 2;
+    }
+}
+
+#[derive(Debug)]
+pub(crate) struct FStringContext {
+    flags: FStringContextFlags,
+    /// The number of open parentheses for the current f-string. This includes all
+    /// three types of parentheses: round (`(`), square (`[`), and curly (`{`).
+    open_parentheses_count: u32,
+    /// The number of format specs for the current f-string. This is because there
+    /// can be multiple format specs nested. For example, `{a:{b:{c}}}` has 3 format
+    /// specs.
+    format_spec_depth: u32,
+}
+
+impl FStringContext {
+    pub(crate) fn new(flags: FStringContextFlags) -> Self {
+        Self {
+            flags,
+            open_parentheses_count: 0,
+            format_spec_depth: 0,
+        }
+    }
+
+    /// Returns the quote character for the current f-string.
+    pub(crate) fn quote_char(&self) -> char {
+        if self.flags.contains(FStringContextFlags::DOUBLE) {
+            '"'
+        } else {
+            '\''
+        }
+    }
+
+    /// Returns the number of quotes for the current f-string.
+    pub(crate) fn quote_size(&self) -> TextSize {
+        if self.is_triple_quoted() {
+            TextSize::from(3)
+        } else {
+            TextSize::from(1)
+        }
+    }
+
+    /// Returns the triple quotes for the current f-string if it is a triple-quoted
+    /// f-string, `None` otherwise.
+    pub(crate) fn triple_quotes(&self) -> Option<&'static str> {
+        if self.is_triple_quoted() {
+            if self.flags.contains(FStringContextFlags::DOUBLE) {
+                Some(r#"""""#)
+            } else {
+                Some("'''")
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Returns `true` if the current f-string is a raw f-string.
+    pub(crate) fn is_raw_string(&self) -> bool {
+        self.flags.contains(FStringContextFlags::RAW)
+    }
+
+    /// Returns `true` if the current f-string is a triple-quoted f-string.
+    pub(crate) fn is_triple_quoted(&self) -> bool {
+        self.flags.contains(FStringContextFlags::TRIPLE)
+    }
+
+    /// Returns `true` if the current f-string has open parentheses.
+    pub(crate) fn has_open_parentheses(&mut self) -> bool {
+        self.open_parentheses_count > 0
+    }
+
+    /// Increments the number of parentheses for the current f-string.
+    pub(crate) fn increment_opening_parentheses(&mut self) {
+        self.open_parentheses_count += 1;
+    }
+
+    /// Decrements the number of parentheses for the current f-string. If the
+    /// lexer is in a format spec, also decrements the number of format specs.
+    pub(crate) fn decrement_closing_parentheses(&mut self) {
+        if self.is_in_format_spec() {
+            self.format_spec_depth = self.format_spec_depth.saturating_sub(1);
+        }
+        self.open_parentheses_count = self.open_parentheses_count.saturating_sub(1);
+    }
+
+    /// Returns `true` if the lexer is in a f-string expression i.e., between
+    /// two curly braces.
+    pub(crate) fn is_in_expression(&self) -> bool {
+        self.open_parentheses_count > self.format_spec_depth
+    }
+
+    /// Returns `true` if the lexer is in a f-string format spec i.e., after a colon.
+    pub(crate) fn is_in_format_spec(&self) -> bool {
+        self.format_spec_depth > 0 && !self.is_in_expression()
+    }
+
+    /// Returns `true` if the context is in a valid position to start format spec
+    /// i.e., at the same level of nesting as the opening parentheses token.
+    /// Increments the number of format specs if it is.
+    ///
+    /// This assumes that the current character for the lexer is a colon (`:`).
+    pub(crate) fn try_start_format_spec(&mut self) -> bool {
+        if self
+            .open_parentheses_count
+            .saturating_sub(self.format_spec_depth)
+            == 1
+        {
+            self.format_spec_depth += 1;
+            true
+        } else {
+            false
+        }
+    }
+}
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap
new file mode 100644
index 00000000000000..d854312164f550
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__empty_fstrings.snap
@@ -0,0 +1,27 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringEnd,
+    String {
+        value: "",
+        kind: String,
+        triple_quoted: false,
+    },
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    String {
+        value: "",
+        kind: String,
+        triple_quoted: false,
+    },
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap
new file mode 100644
index 00000000000000..15eaed4907f48d
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring.snap
@@ -0,0 +1,40 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "normal ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "foo",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " {another} ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "bar",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " {",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "three",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: "}",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap
new file mode 100644
index 00000000000000..d7602c5a95e7d6
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_comments.snap
@@ -0,0 +1,27 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\n# not a comment ",
+        is_raw: false,
+    },
+    Lbrace,
+    Comment(
+        "# comment {",
+    ),
+    NonLogicalNewline,
+    Name {
+        name: "x",
+    },
+    NonLogicalNewline,
+    Rbrace,
+    FStringMiddle {
+        value: " # not a comment\n",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap
new file mode 100644
index 00000000000000..c65bc4eccd30d8
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_conversion.snap
@@ -0,0 +1,50 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Exclamation,
+    Name {
+        name: "s",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Equal,
+    Exclamation,
+    Name {
+        name: "r",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Colon,
+    FStringMiddle {
+        value: ".3f!r",
+        is_raw: false,
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " {x!r}",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap
new file mode 100644
index 00000000000000..c2bb475599111a
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape.snap
@@ -0,0 +1,32 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\\",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Colon,
+    FStringMiddle {
+        value: "\\\"\\",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Rbrace,
+    Rbrace,
+    FStringMiddle {
+        value: " \\\"\\\"\\\n end",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap
new file mode 100644
index 00000000000000..989196d56ccf1d
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_escape_raw.snap
@@ -0,0 +1,32 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\\",
+        is_raw: true,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Colon,
+    FStringMiddle {
+        value: "\\\"\\",
+        is_raw: true,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Rbrace,
+    Rbrace,
+    FStringMiddle {
+        value: " \\\"\\\"\\\n end",
+        is_raw: true,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap
new file mode 100644
index 00000000000000..1abd2fca4d7c38
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_expression_multiline.snap
@@ -0,0 +1,30 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "first ",
+        is_raw: false,
+    },
+    Lbrace,
+    NonLogicalNewline,
+    Name {
+        name: "x",
+    },
+    NonLogicalNewline,
+    Star,
+    NonLogicalNewline,
+    Name {
+        name: "y",
+    },
+    NonLogicalNewline,
+    Rbrace,
+    FStringMiddle {
+        value: " second",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap
new file mode 100644
index 00000000000000..da37f282676550
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_multiline.snap
@@ -0,0 +1,42 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\nhello\n    world\n",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "\n    world\nhello\n",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "some ",
+        is_raw: false,
+    },
+    Lbrace,
+    FStringStart,
+    FStringMiddle {
+        value: "multiline\nallowed ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Rbrace,
+    FStringEnd,
+    Rbrace,
+    FStringMiddle {
+        value: " string",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap
new file mode 100644
index 00000000000000..74b6e997a2c862
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode.snap
@@ -0,0 +1,13 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\\N{BULLET} normal \\Nope \\N",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap
new file mode 100644
index 00000000000000..7196be49f24417
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_named_unicode_raw.snap
@@ -0,0 +1,22 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "\\N",
+        is_raw: true,
+    },
+    Lbrace,
+    Name {
+        name: "BULLET",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " normal",
+        is_raw: true,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap
new file mode 100644
index 00000000000000..1fa039cb4aa28d
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_nested.snap
@@ -0,0 +1,64 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "foo ",
+        is_raw: false,
+    },
+    Lbrace,
+    FStringStart,
+    FStringMiddle {
+        value: "bar ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Plus,
+    FStringStart,
+    Lbrace,
+    Name {
+        name: "wow",
+    },
+    Rbrace,
+    FStringEnd,
+    Rbrace,
+    FStringEnd,
+    Rbrace,
+    FStringMiddle {
+        value: " baz",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "foo ",
+        is_raw: false,
+    },
+    Lbrace,
+    FStringStart,
+    FStringMiddle {
+        value: "bar",
+        is_raw: false,
+    },
+    FStringEnd,
+    Rbrace,
+    FStringMiddle {
+        value: " some ",
+        is_raw: false,
+    },
+    Lbrace,
+    FStringStart,
+    FStringMiddle {
+        value: "another",
+        is_raw: false,
+    },
+    FStringEnd,
+    Rbrace,
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap
new file mode 100644
index 00000000000000..42d111aa4532af
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_parentheses.snap
@@ -0,0 +1,61 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    Lbrace,
+    Rbrace,
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "{}",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Rbrace,
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "{",
+        is_raw: false,
+    },
+    Lbrace,
+    Rbrace,
+    FStringMiddle {
+        value: "}",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: "{{}}",
+        is_raw: false,
+    },
+    FStringEnd,
+    FStringStart,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Rbrace,
+    FStringMiddle {
+        value: " {} {",
+        is_raw: false,
+    },
+    Lbrace,
+    Rbrace,
+    FStringMiddle {
+        value: "} {{}}  ",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap
new file mode 100644
index 00000000000000..3f249489698a43
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_prefix.snap
@@ -0,0 +1,27 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    FStringStart,
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap
new file mode 100644
index 00000000000000..b1125413f3f5f2
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_format_spec.snap
@@ -0,0 +1,84 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    Lbrace,
+    Name {
+        name: "foo",
+    },
+    Colon,
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Equal,
+    Exclamation,
+    Name {
+        name: "s",
+    },
+    Colon,
+    FStringMiddle {
+        value: ".3f",
+        is_raw: false,
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Colon,
+    FStringMiddle {
+        value: ".",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "y",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: "f",
+        is_raw: false,
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    String {
+        value: "",
+        kind: String,
+        triple_quoted: false,
+    },
+    Colon,
+    FStringMiddle {
+        value: "*^",
+        is_raw: false,
+    },
+    Lbrace,
+    Int {
+        value: 1,
+    },
+    Colon,
+    Lbrace,
+    Int {
+        value: 1,
+    },
+    Rbrace,
+    Rbrace,
+    Rbrace,
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap
new file mode 100644
index 00000000000000..99982d714fa45f
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_ipy_escape_command.snap
@@ -0,0 +1,23 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    FStringMiddle {
+        value: "foo ",
+        is_raw: false,
+    },
+    Lbrace,
+    Exclamation,
+    Name {
+        name: "pwd",
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " bar",
+        is_raw: false,
+    },
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap
new file mode 100644
index 00000000000000..7308b9e2b824a5
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_named_expression.snap
@@ -0,0 +1,68 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    FStringStart,
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Colon,
+    FStringMiddle {
+        value: "=10",
+        is_raw: false,
+    },
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Lpar,
+    Name {
+        name: "x",
+    },
+    ColonEqual,
+    Int {
+        value: 10,
+    },
+    Rpar,
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Name {
+        name: "x",
+    },
+    Comma,
+    Lbrace,
+    Name {
+        name: "y",
+    },
+    ColonEqual,
+    Int {
+        value: 10,
+    },
+    Rbrace,
+    Rbrace,
+    FStringMiddle {
+        value: " ",
+        is_raw: false,
+    },
+    Lbrace,
+    Lsqb,
+    Name {
+        name: "x",
+    },
+    ColonEqual,
+    Int {
+        value: 10,
+    },
+    Rsqb,
+    Rbrace,
+    FStringEnd,
+    Newline,
+]
diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs
index 936c4fdef18241..01ef71b83b5fbf 100644
--- a/crates/ruff_python_parser/src/string.rs
+++ b/crates/ruff_python_parser/src/string.rs
@@ -736,6 +736,8 @@ pub enum FStringErrorType {
     // TODO: Test this case.
     /// Unterminated string.
     UnterminatedString,
+    /// Unterminated triple-quoted string.
+    UnterminatedTripleQuotedString,
 }
 
 impl std::fmt::Display for FStringErrorType {
@@ -743,7 +745,7 @@ impl std::fmt::Display for FStringErrorType {
         use FStringErrorType::{
             EmptyExpression, ExpressionCannotInclude, ExpressionNestedTooDeeply,
             InvalidConversionFlag, InvalidExpression, MismatchedDelimiter, SingleRbrace,
-            UnclosedLbrace, Unmatched, UnterminatedString,
+            UnclosedLbrace, Unmatched, UnterminatedString, UnterminatedTripleQuotedString,
         };
         match self {
             UnclosedLbrace => write!(f, "expecting '}}'"),
@@ -764,6 +766,9 @@ impl std::fmt::Display for FStringErrorType {
             UnterminatedString => {
                 write!(f, "unterminated string")
             }
+            UnterminatedTripleQuotedString => {
+                write!(f, "unterminated triple-quoted string")
+            }
             ExpressionCannotInclude(c) => {
                 if *c == '\\' {
                     write!(f, "f-string expression part cannot include a backslash")
diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs
index db159a0340e039..1189c0686496cf 100644
--- a/crates/ruff_python_parser/src/token.rs
+++ b/crates/ruff_python_parser/src/token.rs
@@ -44,6 +44,19 @@ pub enum Tok {
         /// Whether the string is triple quoted.
         triple_quoted: bool,
     },
+    /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
+    /// and the opening quote(s).
+    FStringStart,
+    /// Token value that includes the portion of text inside the f-string that's not
+    /// part of the expression part and isn't an opening or closing brace.
+    FStringMiddle {
+        /// The string value.
+        value: String,
+        /// Whether the string is raw or not.
+        is_raw: bool,
+    },
+    /// Token value for the end of an f-string. This includes the closing quote.
+    FStringEnd,
     /// Token value for IPython escape commands. These are recognized by the lexer
     /// only when the mode is [`Mode::Jupyter`].
     IpyEscapeCommand {
@@ -66,6 +79,8 @@ pub enum Tok {
     EndOfFile,
     /// Token value for a question mark `?`. This is only used in [`Mode::Jupyter`].
     Question,
+    /// Token value for a exclamation mark `!`.
+    Exclamation,
     /// Token value for a left parenthesis `(`.
     Lpar,
     /// Token value for a right parenthesis `)`.
@@ -234,6 +249,9 @@ impl fmt::Display for Tok {
                 let quotes = "\"".repeat(if *triple_quoted { 3 } else { 1 });
                 write!(f, "{kind}{quotes}{value}{quotes}")
             }
+            FStringStart => f.write_str("FStringStart"),
+            FStringMiddle { value, .. } => f.write_str(value),
+            FStringEnd => f.write_str("FStringEnd"),
             IpyEscapeCommand { kind, value } => write!(f, "{kind}{value}"),
             Newline => f.write_str("Newline"),
             NonLogicalNewline => f.write_str("NonLogicalNewline"),
@@ -243,6 +261,7 @@ impl fmt::Display for Tok {
             StartExpression => f.write_str("StartExpression"),
             EndOfFile => f.write_str("EOF"),
             Question => f.write_str("'?'"),
+            Exclamation => f.write_str("'!'"),
             Lpar => f.write_str("'('"),
             Rpar => f.write_str("')'"),
             Lsqb => f.write_str("'['"),
@@ -450,6 +469,14 @@ pub enum TokenKind {
     Complex,
     /// Token value for a string.
     String,
+    /// Token value for the start of an f-string. This includes the `f`/`F`/`fr` prefix
+    /// and the opening quote(s).
+    FStringStart,
+    /// Token value that includes the portion of text inside the f-string that's not
+    /// part of the expression part and isn't an opening or closing brace.
+    FStringMiddle,
+    /// Token value for the end of an f-string. This includes the closing quote.
+    FStringEnd,
     /// Token value for a IPython escape command.
     EscapeCommand,
     /// Token value for a comment. These are filtered out of the token stream prior to parsing.
@@ -466,6 +493,8 @@ pub enum TokenKind {
     EndOfFile,
     /// Token value for a question mark `?`.
     Question,
+    /// Token value for an exclamation mark `!`.
+    Exclamation,
     /// Token value for a left parenthesis `(`.
     Lpar,
     /// Token value for a right parenthesis `)`.
@@ -781,6 +810,9 @@ impl TokenKind {
             Tok::Float { .. } => TokenKind::Float,
             Tok::Complex { .. } => TokenKind::Complex,
             Tok::String { .. } => TokenKind::String,
+            Tok::FStringStart => TokenKind::FStringStart,
+            Tok::FStringMiddle { .. } => TokenKind::FStringMiddle,
+            Tok::FStringEnd => TokenKind::FStringEnd,
             Tok::IpyEscapeCommand { .. } => TokenKind::EscapeCommand,
             Tok::Comment(_) => TokenKind::Comment,
             Tok::Newline => TokenKind::Newline,
@@ -789,6 +821,7 @@ impl TokenKind {
             Tok::Dedent => TokenKind::Dedent,
             Tok::EndOfFile => TokenKind::EndOfFile,
             Tok::Question => TokenKind::Question,
+            Tok::Exclamation => TokenKind::Exclamation,
             Tok::Lpar => TokenKind::Lpar,
             Tok::Rpar => TokenKind::Rpar,
             Tok::Lsqb => TokenKind::Lsqb,