From b666df5c3e068c3e5ee645809e36b0557fc049bc Mon Sep 17 00:00:00 2001 From: Christian Parpart Date: Wed, 18 Nov 2020 14:35:16 +0100 Subject: [PATCH] Scanner: Generate error on inbalanced RLO/LRO/PDF override markers. --- Changelog.md | 2 + liblangutil/CharStream.h | 13 +++ liblangutil/Scanner.cpp | 96 +++++++++++++++++-- liblangutil/Scanner.h | 18 ++++ scripts/test_antlr_grammar.sh | 4 +- ...multiline_unicode_direction_override_1.sol | 12 +++ ...multiline_unicode_direction_override_2.sol | 12 +++ ...multiline_unicode_direction_override_3.sol | 12 +++ ...multiline_unicode_direction_override_4.sol | 12 +++ ...multiline_unicode_direction_override_5.sol | 17 ++++ ...multiline_unicode_direction_override_6.sol | 12 +++ ...ingleline_unicode_direction_override_1.sol | 12 +++ ...ingleline_unicode_direction_override_2.sol | 12 +++ ...ingleline_unicode_direction_override_3.sol | 12 +++ ...ingleline_unicode_direction_override_4.sol | 12 +++ ...ingleline_unicode_direction_override_5.sol | 17 ++++ ...ingleline_unicode_direction_override_6.sol | 12 +++ 17 files changed, 276 insertions(+), 11 deletions(-) create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol create mode 100644 test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol create mode 100644 test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol diff --git a/Changelog.md b/Changelog.md index 85c71df8e020..efaf450cc181 100644 --- a/Changelog.md +++ b/Changelog.md @@ -3,6 +3,8 @@ Compiler Features: * SMTChecker: Support named arguments in function calls. +Language Features: + * Scanner: Generates a parser error when comments do contain an unbalanced or underflowing set of unicode direction override markers (LRO, RLO, PDF) ### 0.7.5 (2020-11-18) diff --git a/liblangutil/CharStream.h b/liblangutil/CharStream.h index aa8f01af266c..f6aaf049fa9d 100644 --- a/liblangutil/CharStream.h +++ b/liblangutil/CharStream.h @@ -85,6 +85,19 @@ class CharStream /// @returns The character of the current location after update is returned. char setPosition(size_t _location); + /// Tests whether or not given octet sequence is present at the current reading position. + /// @returns true if the sequence could be found, false otherwise. + bool prefixMatch(std::string_view _sequence) const + { + if (m_position + _sequence.size() >= m_source.size()) + return false; + + for (size_t i = 0; i < _sequence.size(); ++i) + if (_sequence[i] != get(i)) + return false; + return true; + } + void reset() { m_position = 0; } std::string const& source() const noexcept { return m_source; } diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 1a3903d5c4df..0ec6bd310b5a 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -79,6 +79,8 @@ string to_string(ScannerError _errorCode) case ScannerError::IllegalExponent: return "Invalid exponent."; case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number."; case ScannerError::OctalNotAllowed: return "Octal numbers not allowed."; + case ScannerError::DirectionalOverrideUnderflowInComment: return "Unicode direction override underflow in comment."; + case ScannerError::DirectionalOverrideMismatchInComment: return "Mismatching directional override markers in comment."; default: solAssert(false, "Unhandled case in to_string(ScannerError)"); return ""; @@ -271,12 +273,74 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak() return sourcePos() != startPosition; } +// A little helper class for counting depth of RLO/LRO/PDF uses. +struct Scanner::UnicodeDirectionOverrideProcessor +{ + Scanner& scanner; + int rtlOverrideDepth = 0; + + /// Tries to scan for an RLO/LRO/PDF and keeps track of script writing direction override depth. + /// + /// @returns a 2-tuple indicating whether or not successfully some script writing direction + /// override has been consumed from the input and an error code in case + /// the input's lexical parser state is invalid and this error should be reported + /// to the user. + pair operator()() + { + // U+202D (LRO - Left-to-Right Override) + // U+202E (RLO - Right-to-Left Override) + if ( + scanner.tryScanByteSequence("\xE2\x80\xAD") || + scanner.tryScanByteSequence("\xE2\x80\xAE") + ) + { + rtlOverrideDepth++; + return pair{true, ScannerError::NoError}; + } + else if (scanner.tryScanByteSequence("\xE2\x80\xAC")) // U+202C (PDF - Pop Directional Formatting) + { + rtlOverrideDepth--; + if (rtlOverrideDepth < 0) + return pair{true, ScannerError::DirectionalOverrideUnderflowInComment}; + else + return pair{true, ScannerError::NoError}; + } + else + return pair{false, ScannerError::NoError}; + } + + ScannerError error() const noexcept + { + if (rtlOverrideDepth < 0) + return ScannerError::DirectionalOverrideUnderflowInComment; + else if (rtlOverrideDepth > 0) + return ScannerError::DirectionalOverrideMismatchInComment; + else + return ScannerError::NoError; + } +}; + Token Scanner::skipSingleLineComment() { + UnicodeDirectionOverrideProcessor unicodeDirectionalOverride{*this}; + // Line terminator is not part of the comment. If it is a // non-ascii line terminator, it will result in a parser error. while (!isUnicodeLinebreak()) - if (!advance()) break; + { + auto const [processed, errorCode] = unicodeDirectionalOverride(); + if (processed && errorCode != ScannerError::NoError) + return setError(errorCode); + else if (!processed) + { + if (!advance()) + break; + } + } + + if (unicodeDirectionalOverride.error() != ScannerError::NoError) + // Unbalanced RLO/LRO/PDF codepoint sequences in comment. + return setError(unicodeDirectionalOverride.error()); return Token::Whitespace; } @@ -349,18 +413,30 @@ size_t Scanner::scanSingleLineDocComment() Token Scanner::skipMultiLineComment() { + UnicodeDirectionOverrideProcessor unicodeDirectionalOverride{*this}; + while (!isSourcePastEndOfInput()) { - char ch = m_char; - advance(); - - // If we have reached the end of the multi-line comment, we - // consume the '/' and insert a whitespace. This way all - // multi-line comments are treated as whitespace. - if (ch == '*' && m_char == '/') + auto const [processed, errorCode] = unicodeDirectionalOverride(); + if (processed && errorCode != ScannerError::NoError) + return setError(errorCode); + else if (!processed) { - m_char = ' '; - return Token::Whitespace; + char ch = m_char; + advance(); + + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && m_char == '/') + { + if (unicodeDirectionalOverride.error() != ScannerError::NoError) + // Unbalanced RLO/LRO/PDF codepoint sequences in comment. + return setError(unicodeDirectionalOverride.error()); + + m_char = ' '; + return Token::Whitespace; + } } } // Unterminated multi-line comment. diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h index f44a9940a7ac..0e1b5a598521 100644 --- a/liblangutil/Scanner.h +++ b/liblangutil/Scanner.h @@ -89,6 +89,9 @@ enum class ScannerError IllegalExponent, IllegalNumberEnd, + DirectionalOverrideUnderflowInComment, + DirectionalOverrideMismatchInComment, + OctalNotAllowed, }; @@ -183,6 +186,8 @@ class Scanner ///@} private: + struct UnicodeDirectionOverrideProcessor; + inline Token setError(ScannerError _error) noexcept { m_tokens[NextNext].error = _error; @@ -248,6 +253,19 @@ class Scanner /// Scans a slash '/' and depending on the characters returns the appropriate token Token scanSlash(); + /// Tries scanning given octet sequence and advances reading position respectively iff found. + /// @returns true if it could be scanned, false otherwise. + bool tryScanByteSequence(std::string_view _sequence) + { + if (!m_source->prefixMatch(_sequence)) + return false; + + for (size_t i = 0; i < _sequence.size(); ++i) + advance(); + + return true; + } + /// Scans an escape-sequence which is part of a string and adds the /// decoded character to the current literal. Returns true if a pattern /// is scanned. diff --git a/scripts/test_antlr_grammar.sh b/scripts/test_antlr_grammar.sh index 46a373cb172f..2eaca006de00 100755 --- a/scripts/test_antlr_grammar.sh +++ b/scripts/test_antlr_grammar.sh @@ -116,7 +116,9 @@ done < <( grep -riL -E \ "^\/\/ (Syntax|Type|Declaration)Error|^\/\/ ParserError (2837|3716|3997|5333|6275|6281|6933|7319)|^==== Source:" \ "${ROOT_DIR}/test/libsolidity/syntaxTests" \ - "${ROOT_DIR}/test/libsolidity/semanticTests" \ + "${ROOT_DIR}/test/libsolidity/semanticTests" | + grep -v -E 'comments/.*_unicode.*.sol' + # Skipping the unicode tests as I couldn't adapt the lexical grammar to recursively counting RLO/LRO/PDF's. ) YUL_FILES=() diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol new file mode 100644 index 000000000000..2d9a8d417a9d --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_1.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // pop 1 + /*underflow ‬*/ + } +} +// ---- +// ParserError 8936: (137-152): Unicode direction override underflow in comment. diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol new file mode 100644 index 000000000000..2094a0d6da1d --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_2.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // pop 2 + /*underflow ‬‬*/ + } +} +// ---- +// ParserError 8936: (137-152): Unicode direction override underflow in comment. diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol new file mode 100644 index 000000000000..18d0872d815e --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_3.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // push 1 + /*overflow ‮*/ + } +} +// ---- +// ParserError 8936: (138-153): Mismatching directional override markers in comment. diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol new file mode 100644 index 000000000000..7f414ea8633d --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_4.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // push 2 + /*overflow ‮‮*/ + } +} +// ---- +// ParserError 8936: (138-156): Mismatching directional override markers in comment. diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol new file mode 100644 index 000000000000..c23719e83121 --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_5.sol @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // first push 1, then pop 1 + /*ok ‮‬*/ + + // first push 2, then pop 2 + /*ok ‮‮‬‬*/ + + // first push 3, then pop 3 + /*ok ‮‮‮‬‬‬*/ + } +} +// ---- diff --git a/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol new file mode 100644 index 000000000000..af743b0e2d1d --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/multiline_unicode_direction_override_6.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // first pop, then push + /*overflow ‬‮*/ + } +} +// ---- +// ParserError 8936: (152-166): Unicode direction override underflow in comment. diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol new file mode 100644 index 000000000000..8f9f8d0b5b7b --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_1.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // pop 1 + // underflow ‬ + } +} +// ---- +// ParserError 8936: (137-153): Unicode direction override underflow in comment. diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol new file mode 100644 index 000000000000..a733a1bc8a96 --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_2.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // pop 2 + // underflow ‬‬ + } +} +// ---- +// ParserError 8936: (137-153): Unicode direction override underflow in comment. diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol new file mode 100644 index 000000000000..c29d3a0e36e0 --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_3.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // push 1 + // overflow ‮ + } +} +// ---- +// ParserError 8936: (138-153): Mismatching directional override markers in comment. diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol new file mode 100644 index 000000000000..ca6f7650a3de --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_4.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // push 2 + // overflow ‮‮ + } +} +// ---- +// ParserError 8936: (138-156): Mismatching directional override markers in comment. diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol new file mode 100644 index 000000000000..bf1f1adcc55d --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_5.sol @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // first push 1, then pop 1 + // ok ‮‬ + + // first push 2, then pop 2 + // ok ‮‮‬‬ + + // first push 3, then pop 3 + // ok ‮‮‮‬‬‬ + } +} +// ---- diff --git a/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol new file mode 100644 index 000000000000..0250acbb1ad0 --- /dev/null +++ b/test/libsolidity/syntaxTests/comments/singleline_unicode_direction_override_6.sol @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.7.0; + +contract C { + function f() public pure + { + // first pop, then push + // underflow ‬‮ + } +} +// ---- +// ParserError 8936: (152-168): Unicode direction override underflow in comment.