Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(parser): parse regular expression with regex parser #4998

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ oxc_traverse = { version = "0.24.3", path = "crates/oxc_traverse" }
oxc_module_lexer = { version = "0.24.3", path = "crates/oxc_module_lexer" }
oxc_cfg = { version = "0.24.3", path = "crates/oxc_cfg" }
oxc_isolated_declarations = { version = "0.24.3", path = "crates/oxc_isolated_declarations" }
oxc_regular_expression = { version = "0.24.3", path = "crates/oxc_regular_expression" }
oxc_transform_napi = { version = "0.24.3", path = "napi/transform" }

# publish = false
Expand Down
11 changes: 6 additions & 5 deletions crates/oxc_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ workspace = true
doctest = false

[dependencies]
oxc_allocator = { workspace = true }
oxc_span = { workspace = true }
oxc_ast = { workspace = true }
oxc_syntax = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_allocator = { workspace = true }
oxc_span = { workspace = true }
oxc_ast = { workspace = true }
oxc_syntax = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_regular_expression = { workspace = true }

assert-unchecked = { workspace = true }
bitflags = { workspace = true }
Expand Down
29 changes: 28 additions & 1 deletion crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use oxc_allocator::Box;
use oxc_ast::ast::*;
use oxc_diagnostics::Result;
use oxc_regular_expression::ast::Pattern;
use oxc_span::{Atom, Span};
use oxc_syntax::{
number::{BigintBase, NumberBase},
Expand Down Expand Up @@ -342,15 +343,41 @@ impl<'a> ParserImpl<'a> {
let (pattern_end, flags) = self.read_regex();
let pattern_start = self.cur_token().start + 1; // +1 to exclude `/`
let pattern = &self.source_text[pattern_start as usize..pattern_end as usize];

self.bump_any();

let _pattern = self
.options
.parse_regular_expression
.then(|| self.parse_regex_pattern(pattern_start, pattern, flags));

self.ast.reg_exp_literal(
self.end_span(span),
EmptyObject,
RegExp { pattern: self.ast.atom(pattern), flags },
)
}

fn parse_regex_pattern(
&mut self,
span_offset: u32,
pattern: &'a str,
flags: RegExpFlags,
) -> Option<Pattern<'a>> {
use oxc_regular_expression::{ParserOptions, PatternParser};
let options = ParserOptions {
span_offset,
unicode_mode: flags.contains(RegExpFlags::U) || flags.contains(RegExpFlags::V),
unicode_sets_mode: flags.contains(RegExpFlags::V),
};
match PatternParser::new(self.ast.allocator, pattern, options).parse() {
Ok(regular_expression) => Some(regular_expression),
Err(diagnostic) => {
self.error(diagnostic);
None
}
}
}

pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral<'a>> {
if !self.at(Kind::Str) {
return Err(self.unexpected());
Expand Down
11 changes: 10 additions & 1 deletion crates/oxc_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ pub struct ParserReturn<'a> {
/// Parse options
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
/// Whether to parse regular expressions or not.
///
/// Default: false
pub parse_regular_expression: bool,

/// Allow return outside of function
///
/// By default, a return statement at the top level raises an error.
Expand All @@ -124,7 +129,11 @@ pub struct ParseOptions {

impl Default for ParseOptions {
fn default() -> Self {
Self { allow_return_outside_function: false, preserve_parens: true }
Self {
parse_regular_expression: false,
allow_return_outside_function: false,
preserve_parens: true,
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "oxc_regular_expression"
version = "0.0.0"
version = "0.24.3"
publish = false
authors.workspace = true
categories.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# oxc_regexp_parser
# oxc_regular_expression

Implements ECMAScript® 2024 Language Specification

Expand Down
12 changes: 10 additions & 2 deletions crates/oxc_regular_expression/src/body_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ mod test {
use crate::{ParserOptions, PatternParser};
use oxc_allocator::Allocator;

// NOTE: These may be useless when integlation tests are added
#[test]
fn should_pass() {
let allocator = Allocator::default();
Expand Down Expand Up @@ -40,6 +39,7 @@ mod test {
(r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ParserOptions::default()),
("a.b..", ParserOptions::default()),
(r"\d\D\s\S\w\W", ParserOptions::default()),
(r"\x", ParserOptions::default()),
(
r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{Basic_Emoji}",
ParserOptions::default(),
Expand All @@ -48,8 +48,10 @@ mod test {
r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{P}",
ParserOptions::default().with_unicode_mode(),
),
(r"^\p{General_Category=cntrl}+$", ParserOptions::default().with_unicode_mode()),
(r"\p{Basic_Emoji}", ParserOptions::default().with_unicode_sets_mode()),
(r"\n\cM\0\x41\u1f60\.\/", ParserOptions::default()),
(r"\c0", ParserOptions::default()),
(r"\0", ParserOptions::default()),
(r"\0", ParserOptions::default().with_unicode_mode()),
(r"\u", ParserOptions::default()),
Expand Down Expand Up @@ -137,7 +139,8 @@ mod test {
("a{,", ParserOptions::default().with_unicode_mode()),
("(?=a", ParserOptions::default()),
("(?<!a", ParserOptions::default()),
(r"\xa", ParserOptions::default()),
(r"\c0", ParserOptions::default().with_unicode_mode()),
(r"\xa", ParserOptions::default().with_unicode_mode()),
(r"a\u", ParserOptions::default().with_unicode_mode()),
(r"\p{Emoji_Presentation", ParserOptions::default().with_unicode_mode()),
(r"\p{Script=", ParserOptions::default().with_unicode_mode()),
Expand All @@ -152,6 +155,10 @@ mod test {
("a(?:", ParserOptions::default()),
("(a", ParserOptions::default()),
("(?<a>", ParserOptions::default()),
(r"(?<a\>.)", ParserOptions::default()),
(r"(?<a\>.)", ParserOptions::default().with_unicode_mode()),
(r"(?<\>.)", ParserOptions::default()),
(r"(?<\>.)", ParserOptions::default().with_unicode_mode()),
("(?)", ParserOptions::default()),
("(?=a){1}", ParserOptions::default().with_unicode_mode()),
("(?!a){1}", ParserOptions::default().with_unicode_mode()),
Expand Down Expand Up @@ -183,6 +190,7 @@ mod test {
let allocator = Allocator::default();

for (source_text, options, is_err) in &[
// No tests for 4,294,967,295 left parens
(r"(?<n>..)(?<n>..)", ParserOptions::default(), true),
(r"a{2,1}", ParserOptions::default(), true),
(r"(?<a>)\k<n>", ParserOptions::default(), true),
Expand Down
Loading
Loading