diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144 new file mode 100644 index 000000000..184b6ed70 Binary files /dev/null and b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_lite_match-4692452983046144 differ diff --git a/regex-lite/src/hir/parse.rs b/regex-lite/src/hir/parse.rs index 33bb97a7d..0dcccdd46 100644 --- a/regex-lite/src/hir/parse.rs +++ b/regex-lite/src/hir/parse.rs @@ -377,6 +377,24 @@ impl<'a> Parser<'a> { /// own routine. impl<'a> Parser<'a> { pub(super) fn parse(&self) -> Result { + let hir = self.parse_inner()?; + // While we also check nesting during parsing, that only checks the + // number of recursive parse calls. It does not necessarily cover + // all possible recursive nestings of the Hir itself. For example, + // repetition operators don't require recursive parse calls. So one + // can stack them arbitrarily without overflowing the stack in the + // *parser*. But then if one recurses over the resulting Hir, a stack + // overflow is possible. So here we check the Hir nesting level + // thoroughly to ensure it isn't nested too deeply. + // + // Note that we do still need the nesting limit check in the parser as + // well, since that will avoid overflowing the stack during parse time + // before the complete Hir value is constructed. + check_hir_nesting(&hir, self.config.nest_limit)?; + Ok(hir) + } + + fn parse_inner(&self) -> Result { let depth = self.increment_depth()?; let mut alternates = vec![]; let mut concat = vec![]; @@ -806,7 +824,7 @@ impl<'a> Parser<'a> { if self.bump_if("?P<") || self.bump_if("?<") { let index = self.next_capture_index()?; let name = Some(Box::from(self.parse_capture_name()?)); - let sub = Box::new(self.parse()?); + let sub = Box::new(self.parse_inner()?); let cap = hir::Capture { index, name, sub }; Ok(Some(Hir::capture(cap))) } else if self.bump_if("?") { @@ -826,11 +844,11 @@ impl<'a> Parser<'a> { } else { assert_eq!(':', self.char()); self.bump(); - self.parse().map(Some) + self.parse_inner().map(Some) } } else { let index = self.next_capture_index()?; - let sub = Box::new(self.parse()?); + let sub = Box::new(self.parse_inner()?); let cap = hir::Capture { index, name: None, sub }; Ok(Some(Hir::capture(cap))) } @@ -1263,6 +1281,38 @@ impl<'a> Parser<'a> { } } +/// This checks the depth of the given `Hir` value, and if it exceeds the given +/// limit, then an error is returned. +fn check_hir_nesting(hir: &Hir, limit: u32) -> Result<(), Error> { + fn recurse(hir: &Hir, limit: u32, depth: u32) -> Result<(), Error> { + if depth > limit { + return Err(Error::new(ERR_TOO_MUCH_NESTING)); + } + let Some(next_depth) = depth.checked_add(1) else { + return Err(Error::new(ERR_TOO_MUCH_NESTING)); + }; + match *hir.kind() { + HirKind::Empty + | HirKind::Char(_) + | HirKind::Class(_) + | HirKind::Look(_) => Ok(()), + HirKind::Repetition(hir::Repetition { ref sub, .. }) => { + recurse(sub, limit, next_depth) + } + HirKind::Capture(hir::Capture { ref sub, .. }) => { + recurse(sub, limit, next_depth) + } + HirKind::Concat(ref subs) | HirKind::Alternation(ref subs) => { + for sub in subs.iter() { + recurse(sub, limit, next_depth)?; + } + Ok(()) + } + } + } + recurse(hir, limit, 0) +} + /// Converts the given Hir to a literal char if the Hir is just a single /// character. Otherwise this returns an error. /// @@ -1344,12 +1394,12 @@ mod tests { use super::*; fn p(pattern: &str) -> Hir { - Parser::new(Config::default(), pattern).parse().unwrap() + Parser::new(Config::default(), pattern).parse_inner().unwrap() } fn perr(pattern: &str) -> String { Parser::new(Config::default(), pattern) - .parse() + .parse_inner() .unwrap_err() .to_string() } diff --git a/regex-lite/tests/fuzz/mod.rs b/regex-lite/tests/fuzz/mod.rs index 6eb37b50b..747aab040 100644 --- a/regex-lite/tests/fuzz/mod.rs +++ b/regex-lite/tests/fuzz/mod.rs @@ -14,6 +14,23 @@ fn captures_wrong_order_min() { let _ = run(data); } +// Simpler regression test from a failure found by OSS-fuzz[1]. This test, +// when it failed, caused a stack overflow. We fixed it by adding another nest +// check on the Hir value itself, since the Hir type can have depth added to +// it without recursive calls in the parser (which is where the existing nest +// check was). +// +// Many thanks to Addison Crump for coming up with this test case[2]. +// +// [1]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608 +// [2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608#c1 +#[test] +fn many_zero_to_many_reps() { + let pat = format!(".{}", "*".repeat(1 << 15)); + let Ok(re) = regex_lite::RegexBuilder::new(&pat).build() else { return }; + re.is_match(""); +} + // This is the fuzz target function. We duplicate it here since this is the // thing we use to interpret the data. It is ultimately what we want to // succeed.