diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 8c8831cbd469a..9206f3bec7780 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -764,7 +764,7 @@ private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter w } // We're done. Patch up any additional declarations. - ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); + InsertAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); return; // Emit a goto for the specified label. @@ -1379,7 +1379,7 @@ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, Reg // We're done with the match. // Patch up any additional declarations. - ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); + InsertAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent); // And emit any required helpers. if (additionalLocalFunctions.Count != 0) @@ -5054,14 +5054,14 @@ string EmitAllAsciiContained() } /// - /// Replaces in with - /// all of the variable declarations in . + /// Inserts all of the variable declarations in into the + /// at with . /// /// The writer around a StringWriter to have additional declarations inserted into. /// The additional declarations to insert. /// The position into the writer at which to insert the additional declarations. /// The indentation to use for the additional declarations. - private static void ReplaceAdditionalDeclarations(IndentedTextWriter writer, HashSet declarations, int position, int indent) + private static void InsertAdditionalDeclarations(IndentedTextWriter writer, HashSet declarations, int position, int indent) { if (declarations.Count != 0) { diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx index cb45fe9ba96f2..0b2904b0bfac0 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/Strings.resx @@ -270,10 +270,6 @@ Unterminated (?#...) comment. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Use 'GeneratedRegexAttribute' to generate the regular expression implementation at compile-time. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf index f4356b6fe000e..d39c7ec6b56a6 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.cs.xlf @@ -167,11 +167,6 @@ Výsledek nelze volat pro shodu, která se nezdařila. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Nahrazení regulárních výrazů pomocí substitucí skupin se u RegexOptions.NonBacktracking nepodporuje. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. Kolekce je jen pro čtení. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf index 19c4e6db146ad..8e267826a243a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.de.xlf @@ -167,11 +167,6 @@ Das Ergebnis kann nicht für eine fehlgeschlagene Übereinstimmung aufgerufen werden. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - RegEx-Ersätze durch Austausch von Gruppen werden bei RegexOptions.NonBacktracking nicht unterstützt. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. Die Sammlung ist schreibgeschützt. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf index ea4db738e9018..e423171d0c07f 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.es.xlf @@ -167,11 +167,6 @@ No se puede llamar al resultado si no se encuentra ninguna coincidencia. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Los reemplazos de regex con sustituciones de grupos no se admiten con RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. La colección es de sólo lectura. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf index 62764c030f400..dbc77bfb47685 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.fr.xlf @@ -167,11 +167,6 @@ Le résultat ne peut pas être appelé sur un Match ayant échoué. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Les remplacements d'expressions régulières avec des substitutions de groupes ne sont pas pris en charge avec RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. La collection est en lecture seule. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf index 8e0463a68fe10..58b1524da3d7b 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.it.xlf @@ -167,11 +167,6 @@ Impossibile chiamare Result su un Match non riuscito. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Le sostituzioni regex con sostituzioni di gruppi non sono supportate con RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. La raccolta è di sola lettura. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf index 2f7e17f7a7c52..ac87dd3a81363 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ja.xlf @@ -167,11 +167,6 @@ 失敗した Match で Result を呼び出すことはできません。 - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - RegexOptions.NonBacktracking では、グループの置換による正規表現の置換はサポートされていません。 - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. コレクションは読み取り専用です。 diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf index c5c38d568c339..b2c87b5648d59 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ko.xlf @@ -167,11 +167,6 @@ 실패한 Match에서 결과를 호출할 수 없습니다. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - RegexOptions.NonBacktracking에서는 그룹을 대체하는 Regex 대체가 지원되지 않습니다. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. 읽기 전용 컬렉션입니다. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf index 67f9c3fb9b246..bd8c1ffc42832 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pl.xlf @@ -167,11 +167,6 @@ Nie można wywołać wyniku błędnego dopasowania. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Zamiany wyrażeń regularnych z podstawieniami grup nie są obsługiwane w przypadku metody RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. Kolekcja jest tylko do odczytu. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf index b1360da153dbc..4bfcf7e55eae3 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.pt-BR.xlf @@ -167,11 +167,6 @@ Não é possível chamar resultado quando há falha na correspondência. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Substituições de regex com substituições de grupos não são suportadas com RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. A coleção é somente leitura. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf index 7fb059d3c234f..4f8b97ac870f2 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.ru.xlf @@ -167,11 +167,6 @@ Вызов результата невозможен при сбойном соответствии. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Замена регулярных выражений на группы не поддерживается в RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. Данная коллекция предназначена только для чтения. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf index dbd32fe8d2008..0f90dc4e1b9e9 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.tr.xlf @@ -167,11 +167,6 @@ Sonuç, başarısız Eşleştirmede çağrılamaz. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - Grup değiştirmeleri içeren normal ifade değiştirmeleri RegexOptions.NonBacktracking ile desteklenmez. - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. Koleksiyon salt okunur. diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf index 5abd614286ce6..ba9d314ccca2a 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hans.xlf @@ -167,11 +167,6 @@ 不能对失败的匹配调用结果。 - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - RegexOptions.NonBacktracking 不支持使用组替换的正则表达式替换。 - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. 集合是只读的。 diff --git a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf index feab879e5ea22..070b36b4125be 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf +++ b/src/libraries/System.Text.RegularExpressions/gen/Resources/xlf/Strings.zh-Hant.xlf @@ -167,11 +167,6 @@ 無法在已失敗的對應 (Match) 上呼叫結果。 - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - RegexOptions.NonBacktracking 不支援以替代群組取代 Regex。 - {Locked="RegexOptions.NonBacktracking"} - Collection is read-only. 集合是唯讀的。 diff --git a/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx b/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx index cb8382353725a..8a2221450a990 100644 --- a/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx +++ b/src/libraries/System.Text.RegularExpressions/src/Resources/Strings.resx @@ -246,10 +246,6 @@ Unterminated (?#...) comment. - - Regex replacements with substitutions of groups are not supported with RegexOptions.NonBacktracking. - {Locked="RegexOptions.NonBacktracking"} - RegexOptions.NonBacktracking is not supported in conjunction with expressions containing: '{0}'. {Locked="RegexOptions.NonBacktracking"} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index d659026d0ae52..7cf3b9fffc74b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2893,7 +2893,12 @@ private string Describe() sb.Append(' ').Append($"index = {M}"); break; case RegexNodeKind.Multi: - sb.Append(" \"").Append(Str).Append('"'); + sb.Append(" \""); + foreach(char c in Str!) + { + sb.Append(RegexCharClass.DescribeChar(c)); + } + sb.Append('"'); break; case RegexNodeKind.Set: case RegexNodeKind.Setloop: diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs index 4a7a4ea6d2741..d6b5998180a5f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOpcode.cs @@ -27,9 +27,11 @@ internal enum RegexOpcode /// Repeater of the specified character. /// Operand 0 is the character. Operand 1 is the repetition count. Onerep = 0, + /// Repeater of a single character other than the one specified. /// Operand 0 is the character. Operand 1 is the repetition count. Notonerep = 1, + /// Repeater of a single character matching the specified set /// Operand 0 is index into the strings table of the character class description. Operand 1 is the repetition count. Setrep = 2, @@ -37,9 +39,11 @@ internal enum RegexOpcode /// Greedy loop of the specified character. /// Operand 0 is the character. Operand 1 is the max iteration count. Oneloop = 3, + /// Greedy loop of a single character other than the one specified. /// Operand 0 is the character. Operand 1 is the max iteration count. Notoneloop = 4, + /// Greedy loop of a single character matching the specified set /// Operand 0 is index into the strings table of the character class description. Operand 1 is the repetition count. Setloop = 5, @@ -47,9 +51,11 @@ internal enum RegexOpcode /// Lazy loop of the specified character. /// Operand 0 is the character. Operand 1 is the max iteration count. Onelazy = 6, + /// Lazy loop of a single character other than the one specified. /// Operand 0 is the character. Operand 1 is the max iteration count. Notonelazy = 7, + /// Lazy loop of a single character matching the specified set /// Operand 0 is index into the strings table of the character class description. Operand 1 is the repetition count. Setlazy = 8, @@ -57,9 +63,11 @@ internal enum RegexOpcode /// Single specified character. /// Operand 0 is the character. One = 9, + /// Single character other than the one specified. /// Operand 0 is the character. Notone = 10, + /// Single character matching the specified set. /// Operand 0 is index into the strings table of the character class description. Set = 11, diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 16e96ea2d69a0..01b09aceed8af 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -334,7 +334,7 @@ private RegexNode ScanRegex() if (isQuantifier) { - AddUnitOne(CharAt(endpos - 1)); + _unit = RegexNode.CreateOneWithCaseConversion(CharAt(endpos - 1), _options, _culture, ref _caseBehavior); } } @@ -380,7 +380,7 @@ private RegexNode ScanRegex() PopGroup(); PopOptions(); - if (Unit() == null) + if (_unit == null) { goto ContinueOuterScan; } @@ -392,15 +392,15 @@ private RegexNode ScanRegex() throw MakeException(RegexParseError.UnescapedEndingBackslash, SR.UnescapedEndingBackslash); } - AddUnitNode(ScanBackslash(scanOnly: false)!); + _unit = ScanBackslash(scanOnly: false)!; break; case '^': - AddUnitType(UseOptionM() ? RegexNodeKind.Bol : RegexNodeKind.Beginning); + _unit = new RegexNode(UseOptionM() ? RegexNodeKind.Bol : RegexNodeKind.Beginning, _options); break; case '$': - AddUnitType(UseOptionM() ? RegexNodeKind.Eol : RegexNodeKind.EndZ); + _unit = new RegexNode(UseOptionM() ? RegexNodeKind.Eol : RegexNodeKind.EndZ, _options); break; case '.': @@ -413,7 +413,7 @@ private RegexNode ScanRegex() case '*': case '+': case '?': - if (Unit() == null) + if (_unit == null) { throw wasPrevQuantifier ? MakeException(RegexParseError.NestedQuantifiersNotParenthesized, SR.Format(SR.NestedQuantifiersNotParenthesized, ch)) : @@ -438,7 +438,7 @@ private RegexNode ScanRegex() ch = RightCharMoveRight(); // Handle quantifiers - while (Unit() != null) + while (_unit != null) { int min = 0, max = 0; @@ -514,7 +514,7 @@ private RegexNode ScanRegex() AddGroup(); - return Unit()!.FinalOptimize(); + return _unit!.FinalOptimize(); } /* @@ -547,7 +547,7 @@ private RegexNode ScanReplacement() if (RightCharMoveRight() == '$') { RegexNode node = ScanDollar(); - AddUnitNode(node); + _unit = node; } AddConcatenate(); @@ -2246,18 +2246,6 @@ private void AddConcatenate(bool lazy, int min, int max) _unit = null; } - /// Returns the current unit - private RegexNode? Unit() => _unit; - - /// Sets the current unit to a single char node - private void AddUnitOne(char ch) => _unit = RegexNode.CreateOneWithCaseConversion(ch, _options, _culture, ref _caseBehavior); - - /// Sets the current unit to a subtree - private void AddUnitNode(RegexNode node) => _unit = node; - - /// Sets the current unit to an assertion of the specified type - private void AddUnitType(RegexNodeKind type) => _unit = new RegexNode(type, _options); - /// Finish the current group (in response to a ')' or end) private void AddGroup() { @@ -2307,8 +2295,6 @@ private RegexParseException MakeException(RegexParseError error, string message) /// Moves the current position to the right. private void MoveRight() => _currentPos++; - private void MoveRight(int i) => _currentPos += i; - /// Moves the current parsing position one to the left. private void MoveLeft() => --_currentPos; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index c1ea7cb3be905..f07f0c821d0fb 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -1,71 +1,111 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// This RegexRunner class is a base class for source-generated regex extensibility -// (and the old CompileToAssembly extensibility). It's not intended to be used -// by anything else. - -// Implementation notes: - -// It provides the driver code that call's the subclass's Scan -// method for either scanning or direct execution. -// It also maintains memory allocation for the backtracking stack, -// the grouping stack and the longjump crawlstack, and provides -// methods to push new subpattern match results into (or remove -// backtracked results from) the Match instance. - using System.ComponentModel; using System.Runtime.CompilerServices; namespace System.Text.RegularExpressions { + /// + /// Base class for source-generated regex extensibility + /// (and the old CompileToAssembly extensibility). + /// It's not intended to be used by anything else. + /// + /// + /// Provides the driver code that calls the subclass's Scan + /// method for either scanning or direct execution. + /// Also maintains memory allocation for the backtracking stack, + /// the grouping stack and the longjump crawlstack, and provides + /// methods to push new subpattern match results into (or remove + /// backtracked results from) the Match instance. + /// [EditorBrowsable(EditorBrowsableState.Never)] public abstract class RegexRunner { - protected internal int runtextbeg; // Beginning of text to search. We now always use a sliced span of the input - // from runtextbeg to runtextend, which means that runtextbeg is now always 0 except - // for CompiledToAssembly scenario which works over the original input. - protected internal int runtextend; // End of text to search. Because we now pass in a sliced span of the input into Scan, - // the runtextend will always match the length of that passed in span except for CompileToAssembly - // scenario, which still works over the original input. - protected internal int runtextstart; // starting point for search - - protected internal string? runtext; // text to search - protected internal int runtextpos; // current position in text - - protected internal int[]? runtrack; // The backtracking stack. Opcodes use this to store data regarding - protected internal int runtrackpos; // what they have matched and where to backtrack to. Each "frame" on - // the stack takes the form of [CodePosition Data1 Data2...], where - // CodePosition is the position of the current opcode and - // the data values are all optional. The CodePosition can be negative, and - // these values (also called "back2") are used by the BranchMark family of opcodes - // to indicate whether they are backtracking after a successful or failed - // match. - // When we backtrack, we pop the CodePosition off the stack, set the current - // instruction pointer to that code position, and mark the opcode - // with a backtracking flag ("Back"). Each opcode then knows how to - // handle its own data. - - protected internal int[]? runstack; // This stack is used to track text positions across different opcodes. - protected internal int runstackpos; // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark - // pair. SetMark records the text position before we match a*b. Then - // CaptureMark uses that position to figure out where the capture starts. - // Opcodes which push onto this stack are always paired with other opcodes - // which will pop the value from it later. A successful match should mean - // that this stack is empty. - - protected internal int[]? runcrawl; // The crawl stack is used to keep track of captures. Every time a group - protected internal int runcrawlpos; // has a capture, we push its group number onto the runcrawl stack. In - // the case of a balanced match, we push BOTH groups onto the stack. - - protected internal int runtrackcount; // count of states that may do backtracking - - protected internal Match? runmatch; // result object - protected internal Regex? runregex; // regex object - - private protected RegexRunnerMode _mode; // the mode in which the runner is currently operating - - private int _timeout; // timeout in milliseconds + /// Index of the first character to search + /// + /// We now always use a sliced span of the input + /// from runtextbeg to runtextend, which means that runtextbeg is now always 0 except + /// for CompiledToAssembly scenario which works over the original input. + /// + protected internal int runtextbeg; + + /// Index just past the last character to search + /// + /// Because we now pass in a sliced span of the input into Scan, + /// the runtextend will always match the length of that passed in span except for CompileToAssembly + /// scenario, which still works over the original input. + /// + protected internal int runtextend; + + /// Index of the starting character for the search. + /// + /// The differs from in that lookbehinds will be able to see text before + /// but not before . + /// + protected internal int runtextstart; + + /// Text to search. May be null if the input was supplied as a span. + protected internal string? runtext; + + /// Current position in text + protected internal int runtextpos; + + /// Backtracking stack + /// + /// Opcodes use this to store data regarding + /// what they have matched and where to backtrack to. Each "frame" on + /// the stack takes the form of [CodePosition Data1 Data2...], where + /// CodePosition is the position of the current opcode and + /// the data values are all optional. The CodePosition can be negative, and + /// these values (also called "back2") are used by the BranchMark family of opcodes + /// to indicate whether they are backtracking after a successful or failed + /// match. + /// When we backtrack, we pop the CodePosition off the stack, set the current + /// instruction pointer to that code position, and mark the opcode + /// with a backtracking flag ("Back"). Each opcode then knows how to + /// handle its own data. + /// + protected internal int[]? runtrack; + /// Backtracking stack position + protected internal int runtrackpos; + + /// Utility stack + /// + /// This stack is used to track text positions across different opcodes. + /// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark + /// pair. SetMark records the text position before we match a*b. Then + /// CaptureMark uses that position to figure out where the capture starts. + /// Opcodes which push onto this stack are always paired with other opcodes + /// which will pop the value from it later. A successful match should mean + /// that this stack is empty. + /// + protected internal int[]? runstack; + /// Utility stack position + protected internal int runstackpos; + + /// Crawl stack + /// + /// Every time a group has a capture, we push its group number onto the runcrawl stack. + /// In the case of a balanced match, we push BOTH groups onto the stack. + /// + protected internal int[]? runcrawl; + /// Crawl stack position + protected internal int runcrawlpos; + + /// Count of states that may do backtracking + protected internal int runtrackcount; + + /// Result object + protected internal Match? runmatch; + /// Regex object + protected internal Regex? runregex; + + /// Mode in which the runner is operating + private protected RegexRunnerMode _mode; + + /// Timeout in milliseconds + private int _timeout; private bool _checkTimeout; private long _timeoutOccursAt; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index f8919a8b8d806..a93121d5ae6d0 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -958,6 +958,9 @@ public static IEnumerable Match_MemberData() yield return ("z(a{0,5}|a{0,10}?)", "xyzaaaaaaaaaxyz", options, 0, 15, true, "zaaaaa"); } + yield return (@"a{2}|a{3}", "aaa", RegexOptions.None, 0, 3, true, "aa"); + yield return (@"a{3}|a{2}", "aaa", RegexOptions.None, 0, 3, true, "aaa"); + // Test for a bug in NonBacktracking's subsumption rule for XY subsuming X??Y, which didn't check that X is nullable yield return (@"XY|X??Y", "Y", RegexOptions.None, 0, 1, true, "Y"); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexAssert.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexAssert.netcoreapp.cs index fe766b1933c0a..fd423d05afbbb 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexAssert.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexAssert.netcoreapp.cs @@ -15,7 +15,7 @@ public static class RegexAssert { public static void Equal(string expected, Capture actual) { - Assert.Equal(expected, actual.Value); + Assert.True(expected == actual.Value, $"Expected {Regex.Escape(expected)} actual {Regex.Escape(actual.Value)}"); Assert.Equal(expected, actual.ValueSpan.ToString()); } }