diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 9114f04d..0b62d268 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1351,7 +1351,7 @@ result must be greater than zero. allow_sign if < 0, sign not allowed; if >= 0, sign is relative to this max_value the largest number allowed; you must not pass a value for max_value larger than - INT_MAX/10 - 1 because read_number() relies on max_value to + INT_MAX/10 - 1 because this function relies on max_value to avoid integer overflow max_error the error to give for an over-large number intptr where to put the result @@ -1371,6 +1371,8 @@ uint32_t n = 0; PCRE2_SPTR ptr = *ptrptr; BOOL yield = FALSE; +PCRE2_ASSERT(max_value <= INT_MAX/10 - 1); + *errorcodeptr = 0; if (allow_sign >= 0 && ptr < ptrend) @@ -1395,6 +1397,7 @@ while (ptr < ptrend && IS_DIGIT(*ptr)) if (n > max_value) { *errorcodeptr = max_error; + while (ptr < ptrend && IS_DIGIT(*ptr)) ptr++; goto EXIT; } } @@ -1940,19 +1943,24 @@ else ptr--; /* Back to the digit */ /* As we know we are at a digit, the only possible error from - read_number() is a number that is too large to be a group number. In this - case we treat the group number as too-large (since it may be larger than - INT_MAX we cannot return it for the caller to check). - - \1 to \9 are always back references. \8x and \9x are too; \1x to \7x - are octal escapes if there are not that many previous captures. */ + read_number() is a number that is too large to be a group number. Because + that number might be still valid if read as an octal, errorcodeptr is not + set on failure and therefore a bogus value of INT_MAX is set instead that + will be used later to properly set the error, if not falling through. */ if (!read_number(&ptr, ptrend, -1, MAX_GROUP_NUMBER, 0, &s, errorcodeptr)) s = INT_MAX; - if (s < 10 || oldptr[-1] >= CHAR_8 || (unsigned)s <= bracount) + /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + are octal escapes if there are not that many previous captures. */ + + if (s < 10 || c >= CHAR_8 || (unsigned)s <= bracount) { - if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; + /* s > MAX_GROUP_NUMBER should not be possible because of read_number(), + but we keep it just to be safe and because it will also catch the bogus + value set on failure of that function. */ + + if ((unsigned)s > MAX_GROUP_NUMBER) *errorcodeptr = ERR61; else escape = -s; /* Indicates a back reference */ break; } diff --git a/testdata/testinput5 b/testdata/testinput5 index 5aae6ee0..9f7d9ca7 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -75,6 +75,12 @@ # --------------------------------------------------------------------- +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. +/\65535/IB,utf,no_start_optimize + +/\65536/IB,utf,no_start_optimize + /\x{110000}/IB,utf /\o{4200000}/IB,utf diff --git a/testdata/testoutput2 b/testdata/testoutput2 index dc66112b..971ae55f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -5506,13 +5506,13 @@ Subject length lower bound = 4 No match /a{11111111111111111111}/I -Failed: error 105 at offset 8: number too big in {} quantifier +Failed: error 105 at offset 22: number too big in {} quantifier /(){64294967295}/I -Failed: error 105 at offset 9: number too big in {} quantifier +Failed: error 105 at offset 14: number too big in {} quantifier /(){2,4294967295}/I -Failed: error 105 at offset 11: number too big in {} quantifier +Failed: error 105 at offset 15: number too big in {} quantifier "(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"I Capture group count = 1 @@ -8374,7 +8374,7 @@ No match Failed: error 166 at offset 7: (*MARK) must have an argument /\g6666666666/ -Failed: error 161 at offset 7: subpattern number is too big +Failed: error 161 at offset 12: subpattern number is too big /[\g6666666666]/B ------------------------------------------------------------------ @@ -14645,7 +14645,7 @@ Failed: error 162 at offset 4: subpattern name expected "(?J:(?|(?'R')(\k'R')|((?'R'))))" /(?<=|(\,\$(?73591620449005828816)\xa8.{7}){6}\x09)/ -Failed: error 161 at offset 17: subpattern number is too big +Failed: error 161 at offset 32: subpattern number is too big /^(?:(?(1)x|)+)+$()/B ------------------------------------------------------------------ @@ -14836,7 +14836,7 @@ No match 0: ab /(?(8000000000/ -Failed: error 161 at offset 8: subpattern number is too big +Failed: error 161 at offset 13: subpattern number is too big /((?(R8000000000)))/ Failed: error 161 at offset 9: subpattern number is too big @@ -14847,7 +14847,7 @@ Failed: error 161 at offset 9: subpattern number is too big No match /(?(1)()\983040\2)/ -Failed: error 161 at offset 13: subpattern number is too big +Failed: error 161 at offset 14: subpattern number is too big /(*LIMIT_MATCH=)abc/ Failed: error 160 at offset 14: (*VERB) not recognized or malformed @@ -19219,15 +19219,15 @@ No match # larger than GROUP_MAX, smaller than INT_MAX /a\800000b/ -Failed: error 161 at offset 7: subpattern number is too big +Failed: error 161 at offset 8: subpattern number is too big # coming up on INT_MAX... (used to succeed with \8 being literal 8) /a\800000000b/ -Failed: error 161 at offset 7: subpattern number is too big +Failed: error 161 at offset 11: subpattern number is too big # over INT_MAX (used to succeed with \8 being literal 8) /a\8000000000b/ -Failed: error 161 at offset 7: subpattern number is too big +Failed: error 161 at offset 12: subpattern number is too big # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) diff --git a/testdata/testoutput5 b/testdata/testoutput5 index dcd387ed..3ddcceac 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -98,6 +98,28 @@ No match # --------------------------------------------------------------------- +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. +/\65535/IB,utf,no_start_optimize +------------------------------------------------------------------ + Bra + \x{1ad}35 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_start_optimize utf + +/\65536/IB,utf,no_start_optimize +------------------------------------------------------------------ + Bra + \x{1ad}36 + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: no_start_optimize utf + /\x{110000}/IB,utf Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large