Skip to content

Commit

Permalink
fix(regular_expression): Keep LegacyOctalEscape raw digits for `to_st…
Browse files Browse the repository at this point in the history
…ring` (#5692)

Fixes #5690

- Update `CharacterKind` enum from `Octal` to `Octal1`, `Octal2` and `Octal3`
- Stylistic refactoring for `impl Display`
  • Loading branch information
leaysgur committed Sep 11, 2024
1 parent 9e9435f commit 304ce25
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 89 deletions.
11 changes: 7 additions & 4 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,13 @@ pub enum CharacterKind {
HexadecimalEscape = 1,
Identifier = 2,
Null = 3,
Octal = 4,
SingleEscape = 5,
Symbol = 6,
UnicodeEscape = 7,
// To distinguish leading 0 cases like `\00` and `\000`
Octal1 = 4,
Octal2 = 5,
Octal3 = 6,
SingleEscape = 7,
Symbol = 8,
UnicodeEscape = 9,
}

/// Character class.
Expand Down
15 changes: 12 additions & 3 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,12 +691,21 @@ impl<'a> PatternParser<'a> {
}));
}

// e.g. \18
// e.g. \1, \00, \000
if !self.state.unicode_mode {
if let Some(cp) = self.consume_legacy_octal_escape_sequence() {
let span_end = self.reader.offset();
// Keep original digits for `to_string()`
// Otherwise `\0022`(octal \002 + symbol 2) will be `\22`(octal \22)
let digits = span_end - span_start - 1; // -1 for '\'

return Ok(Some(ast::Character {
span: self.span_factory.create(span_start, self.reader.offset()),
kind: ast::CharacterKind::Octal,
span: self.span_factory.create(span_start, span_end),
kind: (match digits {
3 => ast::CharacterKind::Octal3,
2 => ast::CharacterKind::Octal2,
_ => ast::CharacterKind::Octal1,
}),
value: cp,
}));
}
Expand Down
170 changes: 97 additions & 73 deletions crates/oxc_regular_expression/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,23 @@ impl<'a> Display for RegularExpression<'a> {
impl Display for Flags {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut flags = String::with_capacity(8);
macro_rules! if_true_append {
($flag:ident, $char:literal) => {
if self.$flag {
flags.push($char);
}
};
}

// write flags in the order they are described in the `MDN`
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags>
if_true_append!(has_indices, 'd');
if_true_append!(global, 'g');
if_true_append!(ignore_case, 'i');
if_true_append!(multiline, 'm');
if_true_append!(dot_all, 's');
if_true_append!(unicode, 'u');
if_true_append!(unicode_sets, 'v');
if_true_append!(sticky, 'y');
for (v, ch) in [
(self.has_indices, 'd'),
(self.global, 'g'),
(self.ignore_case, 'i'),
(self.multiline, 'm'),
(self.dot_all, 's'),
(self.unicode, 'u'),
(self.unicode_sets, 'v'),
(self.sticky, 'y'),
] {
if v {
flags.push(ch);
}
}

write!(f, "{flags}")
}
Expand All @@ -60,14 +59,17 @@ impl<'a> Display for Alternative<'a> {
None
}
}

write_join_with(f, "", &self.body, |iter| {
let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) };

let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek);
if eat {
_ = iter.next();
iter.next();
}

Some(result)
})
}
Expand Down Expand Up @@ -208,25 +210,30 @@ impl<'a> Display for CharacterClass<'a> {
None
}
}

write!(f, "[")?;

if !self.body.is_empty() {
if self.negative {
write!(f, "^")?;
}

let sep = match self.kind {
CharacterClassContentsKind::Union => "",
CharacterClassContentsKind::Subtraction => "--",
CharacterClassContentsKind::Intersection => "&&",
};

write_join_with(f, sep, &self.body, |iter| {
let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) };

let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek);
if eat {
_ = iter.next();
iter.next();
}

Some(result)
})?;
}
Expand Down Expand Up @@ -270,12 +277,14 @@ impl<'a> Display for ClassString<'a> {

impl<'a> Display for CapturingGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let body = &self.body;
write!(f, "(")?;

if let Some(name) = &self.name {
write!(f, "(?<{name}>{body})")
} else {
write!(f, "({body})")
write!(f, "?<{name}>")?;
}
write!(f, "{}", &self.body)?;

write!(f, ")")
}
}

Expand All @@ -299,12 +308,14 @@ impl<'a> Display for IgnoreGroup<'a> {
}

write!(f, "(?")?;

if let Some(enabling) = &self.enabling_modifiers {
write_flags(f, '\0', enabling)?;
}
if let Some(disabling) = &self.disabling_modifiers {
write_flags(f, '-', disabling)?;
}

write!(f, ":{})", self.body)
}
}
Expand All @@ -321,35 +332,7 @@ impl<'a> Display for NamedReference<'a> {
}
}

fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
{
write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string()))
}

fn write_join_with<S, I, E, F>(f: &mut fmt::Formatter<'_>, sep: S, items: I, next: F) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
F: Fn(&mut Peekable<I::IntoIter>) -> Option<String>,
{
let sep = sep.as_ref();
let iter = &mut items.into_iter().peekable();

if let Some(first) = next(iter) {
write!(f, "{first}")?;
}

while let Some(it) = next(iter) {
write!(f, "{sep}{it}")?;
}

Ok(())
}
// ---

fn character_to_string(
this: &Character,
Expand Down Expand Up @@ -378,6 +361,8 @@ fn character_to_string(

let ch = char::from_u32(cp).expect("Invalid `Character`!");
let result = match this.kind {
// Not a surrogate, like BMP, or all units in unicode mode
CharacterKind::Symbol => format!("{ch}"),
CharacterKind::ControlLetter => match ch {
'\n' => r"\cJ".to_string(),
'\r' => r"\cM".to_string(),
Expand All @@ -387,8 +372,16 @@ fn character_to_string(
CharacterKind::Identifier => {
format!(r"\{ch}")
}
// Not a surrogate, like BMP, or all units in unicode mode
CharacterKind::Symbol => format!("{ch}"),
CharacterKind::SingleEscape => match ch {
'\n' => String::from(r"\n"),
'\r' => String::from(r"\r"),
'\t' => String::from(r"\t"),
'\u{b}' => String::from(r"\v"),
'\u{c}' => String::from(r"\f"),
'\u{8}' => String::from(r"\b"),
'\u{2D}' => String::from(r"\-"),
_ => format!(r"\{ch}"),
},
CharacterKind::Null => String::from(r"\0"),
CharacterKind::UnicodeEscape => {
let hex = &format!("{cp:04X}");
Expand All @@ -402,27 +395,58 @@ fn character_to_string(
let hex = &format!("{cp:02X}");
format!(r"\x{hex}")
}
CharacterKind::Octal => {
CharacterKind::Octal1 => {
let octal = format!("{cp:o}");
format!(r"\{octal}")
}
CharacterKind::SingleEscape => match ch {
'\n' => String::from(r"\n"),
'\r' => String::from(r"\r"),
'\t' => String::from(r"\t"),
'\u{b}' => String::from(r"\v"),
'\u{c}' => String::from(r"\f"),
'\u{8}' => String::from(r"\b"),
'\u{2D}' => String::from(r"\-"),
_ => format!(r"\{ch}"),
},
CharacterKind::Octal2 => {
let octal = format!("{cp:02o}");
format!(r"\{octal}")
}
CharacterKind::Octal3 => {
let octal = format!("{cp:03o}");
format!(r"\{octal}")
}
};

(result, false)
}

// ---

fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
{
write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string()))
}

fn write_join_with<S, I, E, F>(f: &mut fmt::Formatter<'_>, sep: S, items: I, next: F) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
F: Fn(&mut Peekable<I::IntoIter>) -> Option<String>,
{
let sep = sep.as_ref();
let iter = &mut items.into_iter().peekable();

if let Some(first) = next(iter) {
write!(f, "{first}")?;
}

while let Some(it) = next(iter) {
write!(f, "{sep}{it}")?;
}

Ok(())
}

#[cfg(test)]
mod test {
use crate::{Parser, ParserOptions};
use oxc_allocator::Allocator;

type Case<'a> = (
Expand Down Expand Up @@ -505,23 +529,24 @@ mod test {
(r"/\5/", None),
(r"/\6/", None),
(r"/\7/", None),
// Remove leading zeroes --
(r"/\00/", Some(r"/\0/")),
(r"/\07/", Some(r"/\7/")),
// --
(r"/\00/", None),
(r"/\07/", None),
(r"/\30/", None),
(r"/\37/", None),
(r"/\40/", None),
(r"/\47/", None),
(r"/\70/", None),
(r"/\77/", None),
// Remove leading zeroes --
(r"/\000/", Some(r"/\0/")),
(r"/\007/", Some(r"/\7/")),
(r"/\070/", Some(r"/\70/")),
// --
(r"/\000/", None),
(r"/\007/", None),
(r"/\070/", None),
(r"/\300/", None),
(r"/\307/", None),
(r"/\370/", None),
(r"/\377/", None),
(r"/\0111/", None),
(r"/\0022/", None),
(r"/\0003/", None),
(r"/(.)\1/", None),
// Identity escape from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/identity-escape.js>
(r"/\C/", None),
Expand Down Expand Up @@ -553,7 +578,6 @@ mod test {
];

fn test_display(allocator: &Allocator, (source, expect): &Case) {
use crate::{Parser, ParserOptions};
let expect = expect.unwrap_or(source);
let actual = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap();
assert_eq!(expect, actual.to_string());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ impl<'alloc> CloneIn<'alloc> for CharacterKind {
Self::HexadecimalEscape => CharacterKind::HexadecimalEscape,
Self::Identifier => CharacterKind::Identifier,
Self::Null => CharacterKind::Null,
Self::Octal => CharacterKind::Octal,
Self::Octal1 => CharacterKind::Octal1,
Self::Octal2 => CharacterKind::Octal2,
Self::Octal3 => CharacterKind::Octal3,
Self::SingleEscape => CharacterKind::SingleEscape,
Self::Symbol => CharacterKind::Symbol,
Self::UnicodeEscape => CharacterKind::UnicodeEscape,
Expand Down
5 changes: 1 addition & 4 deletions tasks/coverage/parser_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ commit: d62fa93c

parser_test262 Summary:
AST Parsed : 43765/43765 (100.00%)
Positive Passed: 43764/43765 (100.00%)
Positive Passed: 43765/43765 (100.00%)
Negative Passed: 4237/4237 (100.00%)
Expect to Parse: tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js

× Regular Expression mismatch: \03 \3

× '0'-prefixed octal literals and octal escape sequences are deprecated
╭─[test262/test/annexB/language/expressions/template-literal/legacy-octal-escape-sequence-strict.js:19:4]
Expand Down
5 changes: 1 addition & 4 deletions tasks/coverage/semantic_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ commit: d62fa93c

semantic_test262 Summary:
AST Parsed : 43765/43765 (100.00%)
Positive Passed: 43564/43765 (99.54%)
Positive Passed: 43565/43765 (99.54%)
tasks/coverage/test262/test/annexB/language/function-code/if-decl-else-decl-a-func-block-scoping.js
semantic error: Symbol scope ID mismatch:
after transform: SymbolId(3): ScopeId(4294967294)
Expand Down Expand Up @@ -1119,9 +1119,6 @@ semantic error: Symbol scope ID mismatch:
after transform: SymbolId(0): ScopeId(4294967294)
rebuilt : SymbolId(0): ScopeId(4294967294)

tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js
semantic error: Regular Expression mismatch: \03 \3

tasks/coverage/test262/test/language/module-code/eval-rqstd-once.js
semantic error: Bindings mismatch:
after transform: ScopeId(0): ["dflt1", "dflt2", "dflt3", "global", "ns1", "ns3"]
Expand Down

0 comments on commit 304ce25

Please sign in to comment.