From 33689258504038a6745b657c40df62d8ea828f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sat, 30 Sep 2023 15:03:14 -0700 Subject: [PATCH] minor cleanup to previous fix Move tests to the non UTF file, remove dead code and document another bugfix --- src/pcre2_compile.c | 11 ----------- src/pcre2_internal.h | 8 ++++---- testdata/testinput2 | 17 +++++++++++++++++ testdata/testinput5 | 12 ------------ testdata/testoutput2 | 29 +++++++++++++++++++++++++++++ testdata/testoutput5 | 16 ---------------- 6 files changed, 50 insertions(+), 43 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index f72ddc822..1b7775bc2 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2786,7 +2786,6 @@ int escape; int i; BOOL inescq = FALSE; BOOL inverbname = FALSE; -BOOL next_is_literal = FALSE; BOOL utf = (options & PCRE2_UTF) != 0; BOOL auto_callout = (options & PCRE2_AUTO_CALLOUT) != 0; BOOL isdupname; @@ -2882,16 +2881,6 @@ while (ptr < ptrend) thisptr = ptr; GETCHARINCTEST(c, ptr); - /* Handle cases where previous processing has determined that the next - character is literal. */ - - if (next_is_literal) - { - PARSED_LITERAL(c, parsed_pattern); - next_is_literal = FALSE; - continue; /* Next character */ - } - /* Copy quoted literals until \E, allowing for the possibility of automatic callouts, except when processing a (*VERB) "name". */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 8f667114a..95737c289 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1345,10 +1345,10 @@ mode rather than an escape sequence. It is also used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves like \N. -ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not -followed by hex digits and }, in which case it should mean a literal "u" -followed by a literal "{". This hack is necessary for cases like \u{ 12} -because without it, this is interpreted as u{12} now that spaces are allowed in +ESC_ub is a special return from check_escape() when, in BSUX mode, \u{ is not +followed by hex digits and }, in which case it should mean a literal "u" +followed by a literal "{". This hack is necessary for cases like \u{ 12} +because without it, this is interpreted as u{12} now that spaces are allowed in quantifiers. Negative numbers are used to encode a backreference (\1, \2, \3, etc.) in diff --git a/testdata/testinput2 b/testdata/testinput2 index ddd89f09d..a741cde28 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3420,6 +3420,23 @@ /^\u{7a}/extra_alt_bsux zoo +/\u{2}/B,extra_alt_bsux + \x02 +\= Expect no match + uu + +/\u{}/extra_alt_bsux + u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + /(?(?=c)c|d)++Y/B /(?(?=c)c|d)*+Y/B diff --git a/testdata/testinput5 b/testdata/testinput5 index 0068cea43..7bdfd7307 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -822,18 +822,6 @@ \= Expect no match \x{1bb1} -/\u{}/extra_alt_bsux - u{} - -/\u{ 12}/extra_alt_bsux - --u{ 12}-- - -/\u{Q12}/extra_alt_bsux - --u{Q12}-- - -/\u{{3}}/extra_alt_bsux - --u{{{}-- - /\u/utf,alt_bsux \\u diff --git a/testdata/testoutput2 b/testdata/testoutput2 index ea38934bd..3b388378d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -11358,6 +11358,35 @@ No match zoo 0: z +/\u{2}/B,extra_alt_bsux +------------------------------------------------------------------ + Bra + \x02 + Ket + End +------------------------------------------------------------------ + \x02 + 0: \x02 +\= Expect no match + uu +No match + +/\u{}/extra_alt_bsux + u{} + 0: u{} + +/\u{Q12}/extra_alt_bsux + --u{Q12}-- + 0: u{Q12} + +/\u{ 12}/extra_alt_bsux + --u{ 12}-- + 0: u{ 12} + +/\u{{3}}/extra_alt_bsux + --u{{{}-- + 0: u{{{} + /(?(?=c)c|d)++Y/B ------------------------------------------------------------------ Bra diff --git a/testdata/testoutput5 b/testdata/testoutput5 index e752c7691..eeea7d665 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -1744,22 +1744,6 @@ Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0x \x{1bb1} No match -/\u{}/extra_alt_bsux - u{} - 0: u{} - -/\u{ 12}/extra_alt_bsux - --u{ 12}-- - 0: u{ 12} - -/\u{Q12}/extra_alt_bsux - --u{Q12}-- - 0: u{Q12} - -/\u{{3}}/extra_alt_bsux - --u{{{}-- - 0: u{{{} - /\u/utf,alt_bsux \\u 0: u