Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(regular_expression): Keep LegacyOctalEscape raw digits for to_string #5692

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,13 @@ pub enum CharacterKind {
HexadecimalEscape = 1,
Identifier = 2,
Null = 3,
Octal = 4,
SingleEscape = 5,
Symbol = 6,
UnicodeEscape = 7,
// To distinguish leading 0 cases like `\00` and `\000`
Octal1 = 4,
Octal2 = 5,
Octal3 = 6,
SingleEscape = 7,
Symbol = 8,
UnicodeEscape = 9,
}

/// Character class.
Expand Down
15 changes: 12 additions & 3 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,12 +691,21 @@ impl<'a> PatternParser<'a> {
}));
}

// e.g. \18
// e.g. \1, \00, \000
if !self.state.unicode_mode {
if let Some(cp) = self.consume_legacy_octal_escape_sequence() {
let span_end = self.reader.offset();
// Keep original digits for `to_string()`
// Otherwise `\0022`(octal \002 + symbol 2) will be `\22`(octal \22)
let digits = span_end - span_start - 1; // -1 for '\'

return Ok(Some(ast::Character {
span: self.span_factory.create(span_start, self.reader.offset()),
kind: ast::CharacterKind::Octal,
span: self.span_factory.create(span_start, span_end),
kind: (match digits {
3 => ast::CharacterKind::Octal3,
2 => ast::CharacterKind::Octal2,
_ => ast::CharacterKind::Octal1,
}),
value: cp,
}));
}
Expand Down
170 changes: 97 additions & 73 deletions crates/oxc_regular_expression/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,23 @@ impl<'a> Display for RegularExpression<'a> {
impl Display for Flags {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut flags = String::with_capacity(8);
macro_rules! if_true_append {
($flag:ident, $char:literal) => {
if self.$flag {
flags.push($char);
}
};
}

// write flags in the order they are described in the `MDN`
// <https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#advanced_searching_with_flags>
if_true_append!(has_indices, 'd');
if_true_append!(global, 'g');
if_true_append!(ignore_case, 'i');
if_true_append!(multiline, 'm');
if_true_append!(dot_all, 's');
if_true_append!(unicode, 'u');
if_true_append!(unicode_sets, 'v');
if_true_append!(sticky, 'y');
for (v, ch) in [
(self.has_indices, 'd'),
(self.global, 'g'),
(self.ignore_case, 'i'),
(self.multiline, 'm'),
(self.dot_all, 's'),
(self.unicode, 'u'),
(self.unicode_sets, 'v'),
(self.sticky, 'y'),
] {
if v {
flags.push(ch);
}
}

write!(f, "{flags}")
}
Expand All @@ -60,14 +59,17 @@ impl<'a> Display for Alternative<'a> {
None
}
}

write_join_with(f, "", &self.body, |iter| {
let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) };

let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek);
if eat {
_ = iter.next();
iter.next();
}

Some(result)
})
}
Expand Down Expand Up @@ -208,25 +210,30 @@ impl<'a> Display for CharacterClass<'a> {
None
}
}

write!(f, "[")?;

if !self.body.is_empty() {
if self.negative {
write!(f, "^")?;
}

let sep = match self.kind {
CharacterClassContentsKind::Union => "",
CharacterClassContentsKind::Subtraction => "--",
CharacterClassContentsKind::Intersection => "&&",
};

write_join_with(f, sep, &self.body, |iter| {
let next = iter.next()?;
let Some(next) = as_character(next) else { return Some(next.to_string()) };

let peek = iter.peek().and_then(|it| as_character(it));
let (result, eat) = character_to_string(next, peek);
if eat {
_ = iter.next();
iter.next();
}

Some(result)
})?;
}
Expand Down Expand Up @@ -270,12 +277,14 @@ impl<'a> Display for ClassString<'a> {

impl<'a> Display for CapturingGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let body = &self.body;
write!(f, "(")?;

if let Some(name) = &self.name {
write!(f, "(?<{name}>{body})")
} else {
write!(f, "({body})")
write!(f, "?<{name}>")?;
}
write!(f, "{}", &self.body)?;

write!(f, ")")
}
}

Expand All @@ -299,12 +308,14 @@ impl<'a> Display for IgnoreGroup<'a> {
}

write!(f, "(?")?;

if let Some(enabling) = &self.enabling_modifiers {
write_flags(f, '\0', enabling)?;
}
if let Some(disabling) = &self.disabling_modifiers {
write_flags(f, '-', disabling)?;
}

write!(f, ":{})", self.body)
}
}
Expand All @@ -321,35 +332,7 @@ impl<'a> Display for NamedReference<'a> {
}
}

fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
{
write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string()))
}

fn write_join_with<S, I, E, F>(f: &mut fmt::Formatter<'_>, sep: S, items: I, next: F) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
F: Fn(&mut Peekable<I::IntoIter>) -> Option<String>,
{
let sep = sep.as_ref();
let iter = &mut items.into_iter().peekable();

if let Some(first) = next(iter) {
write!(f, "{first}")?;
}

while let Some(it) = next(iter) {
write!(f, "{sep}{it}")?;
}

Ok(())
}
// ---

fn character_to_string(
this: &Character,
Expand Down Expand Up @@ -378,6 +361,8 @@ fn character_to_string(

let ch = char::from_u32(cp).expect("Invalid `Character`!");
let result = match this.kind {
// Not a surrogate, like BMP, or all units in unicode mode
CharacterKind::Symbol => format!("{ch}"),
CharacterKind::ControlLetter => match ch {
'\n' => r"\cJ".to_string(),
'\r' => r"\cM".to_string(),
Expand All @@ -387,8 +372,16 @@ fn character_to_string(
CharacterKind::Identifier => {
format!(r"\{ch}")
}
// Not a surrogate, like BMP, or all units in unicode mode
CharacterKind::Symbol => format!("{ch}"),
CharacterKind::SingleEscape => match ch {
'\n' => String::from(r"\n"),
'\r' => String::from(r"\r"),
'\t' => String::from(r"\t"),
'\u{b}' => String::from(r"\v"),
'\u{c}' => String::from(r"\f"),
'\u{8}' => String::from(r"\b"),
'\u{2D}' => String::from(r"\-"),
_ => format!(r"\{ch}"),
},
CharacterKind::Null => String::from(r"\0"),
CharacterKind::UnicodeEscape => {
let hex = &format!("{cp:04X}");
Expand All @@ -402,27 +395,58 @@ fn character_to_string(
let hex = &format!("{cp:02X}");
format!(r"\x{hex}")
}
CharacterKind::Octal => {
CharacterKind::Octal1 => {
let octal = format!("{cp:o}");
format!(r"\{octal}")
}
CharacterKind::SingleEscape => match ch {
'\n' => String::from(r"\n"),
'\r' => String::from(r"\r"),
'\t' => String::from(r"\t"),
'\u{b}' => String::from(r"\v"),
'\u{c}' => String::from(r"\f"),
'\u{8}' => String::from(r"\b"),
'\u{2D}' => String::from(r"\-"),
_ => format!(r"\{ch}"),
},
CharacterKind::Octal2 => {
let octal = format!("{cp:02o}");
format!(r"\{octal}")
}
CharacterKind::Octal3 => {
let octal = format!("{cp:03o}");
format!(r"\{octal}")
}
};

(result, false)
}

// ---

fn write_join<S, I, E>(f: &mut fmt::Formatter<'_>, sep: S, items: I) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
{
write_join_with(f, sep, items, |iter| iter.next().map(|it| it.to_string()))
}

fn write_join_with<S, I, E, F>(f: &mut fmt::Formatter<'_>, sep: S, items: I, next: F) -> fmt::Result
where
S: AsRef<str>,
E: Display,
I: IntoIterator<Item = E>,
F: Fn(&mut Peekable<I::IntoIter>) -> Option<String>,
{
let sep = sep.as_ref();
let iter = &mut items.into_iter().peekable();

if let Some(first) = next(iter) {
write!(f, "{first}")?;
}

while let Some(it) = next(iter) {
write!(f, "{sep}{it}")?;
}

Ok(())
}

#[cfg(test)]
mod test {
use crate::{Parser, ParserOptions};
use oxc_allocator::Allocator;

type Case<'a> = (
Expand Down Expand Up @@ -505,23 +529,24 @@ mod test {
(r"/\5/", None),
(r"/\6/", None),
(r"/\7/", None),
// Remove leading zeroes --
(r"/\00/", Some(r"/\0/")),
(r"/\07/", Some(r"/\7/")),
// --
(r"/\00/", None),
(r"/\07/", None),
(r"/\30/", None),
(r"/\37/", None),
(r"/\40/", None),
(r"/\47/", None),
(r"/\70/", None),
(r"/\77/", None),
// Remove leading zeroes --
(r"/\000/", Some(r"/\0/")),
(r"/\007/", Some(r"/\7/")),
(r"/\070/", Some(r"/\70/")),
// --
(r"/\000/", None),
(r"/\007/", None),
(r"/\070/", None),
(r"/\300/", None),
(r"/\307/", None),
(r"/\370/", None),
(r"/\377/", None),
(r"/\0111/", None),
(r"/\0022/", None),
(r"/\0003/", None),
(r"/(.)\1/", None),
// Identity escape from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/identity-escape.js>
(r"/\C/", None),
Expand Down Expand Up @@ -553,7 +578,6 @@ mod test {
];

fn test_display(allocator: &Allocator, (source, expect): &Case) {
use crate::{Parser, ParserOptions};
let expect = expect.unwrap_or(source);
let actual = Parser::new(allocator, source, ParserOptions::default()).parse().unwrap();
assert_eq!(expect, actual.to_string());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ impl<'alloc> CloneIn<'alloc> for CharacterKind {
Self::HexadecimalEscape => CharacterKind::HexadecimalEscape,
Self::Identifier => CharacterKind::Identifier,
Self::Null => CharacterKind::Null,
Self::Octal => CharacterKind::Octal,
Self::Octal1 => CharacterKind::Octal1,
Self::Octal2 => CharacterKind::Octal2,
Self::Octal3 => CharacterKind::Octal3,
Self::SingleEscape => CharacterKind::SingleEscape,
Self::Symbol => CharacterKind::Symbol,
Self::UnicodeEscape => CharacterKind::UnicodeEscape,
Expand Down
5 changes: 1 addition & 4 deletions tasks/coverage/parser_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ commit: d62fa93c

parser_test262 Summary:
AST Parsed : 43765/43765 (100.00%)
Positive Passed: 43764/43765 (100.00%)
Positive Passed: 43765/43765 (100.00%)
Negative Passed: 4237/4237 (100.00%)
Expect to Parse: tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js

× Regular Expression mismatch: \03 \3

× '0'-prefixed octal literals and octal escape sequences are deprecated
╭─[test262/test/annexB/language/expressions/template-literal/legacy-octal-escape-sequence-strict.js:19:4]
Expand Down
5 changes: 1 addition & 4 deletions tasks/coverage/semantic_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ commit: d62fa93c

semantic_test262 Summary:
AST Parsed : 43765/43765 (100.00%)
Positive Passed: 43564/43765 (99.54%)
Positive Passed: 43565/43765 (99.54%)
tasks/coverage/test262/test/annexB/language/function-code/if-decl-else-decl-a-func-block-scoping.js
semantic error: Symbol scope ID mismatch:
after transform: SymbolId(3): ScopeId(4294967294)
Expand Down Expand Up @@ -1119,9 +1119,6 @@ semantic error: Symbol scope ID mismatch:
after transform: SymbolId(0): ScopeId(4294967294)
rebuilt : SymbolId(0): ScopeId(4294967294)

tasks/coverage/test262/test/annexB/language/literals/regexp/legacy-octal-escape.js
semantic error: Regular Expression mismatch: \03 \3

tasks/coverage/test262/test/language/module-code/eval-rqstd-once.js
semantic error: Bindings mismatch:
after transform: ScopeId(0): ["dflt1", "dflt2", "dflt3", "global", "ns1", "ns3"]
Expand Down