Skip to content

Commit

Permalink
Add String.leading_whitespace_index and .trailing_whitespace_index.
Browse files Browse the repository at this point in the history
  • Loading branch information
jemc committed Jun 17, 2023
1 parent 4f4c2d8 commit 9fa9515
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
42 changes: 42 additions & 0 deletions core/String.savi
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,20 @@
)
None

:fun reverse_each_byte_with_index(
from = USize.max_value
to USize = 0
stride USize = 1
)
try (
index USize = from.at_most(@_size) -! 1
while (index >= to) (
yield (@_ptr._get_at(index), index)
index = index.at_most(@_size) -! stride
)
)
None

:: Starting from the given byte index and continuing up to the given end,
:: yield each UTF8-encoded Unicode codepoint, its starting byte index,
:: and its byte width (the number of bytes that were used to encode it).
Expand Down Expand Up @@ -597,6 +611,34 @@
)
--res

:: Return the index of the first non-whitespace character in the string,
:: starting from the given `start` index (zero if none given).
:: If the string is empty or contains only whitespace, returns the total size.
::
:: Use this as an argument for a call to `trim` or `trim_in_place`
:: to help trim leading whitespace from the string.
:fun leading_whitespace_index(start USize = 0) USize
@each_byte_with_index(start) -> (byte, index |
if ((byte > '\r' || byte < '\t') && byte != ' ') (
return index
)
)
@_size

:: Return the index just after the first non-whitespace character before
:: the end of the string (or the given `end` index, if given)
:: If the string is empty or contains only whitespace, returns zero.
::
:: Use this as an argument for a call to `trim` or `trim_in_place`
:: to help trim trailing whitespace from the string.
:fun trailing_whitespace_index(end = USize.max_value) USize
@reverse_each_byte_with_index(end) -> (char, index |
if ((char > '\r' || char < '\t') && char != ' ') (
return (index + 1)
)
)
0

:: Encode the code point into UTF-8. It returns a tuple with the size of the
:: encoded data and then the data.
:module _UTF8Encoder
Expand Down
32 changes: 32 additions & 0 deletions spec/core/String.Spec.savi
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,35 @@
assert: "foo=".split2!('=').second == ""

assert error: "foo".split2!('=')

:it "finds the end of leading whitespace at the start of the string"
assert: " \t\t\f\f\v\v\r\r\n\nFOO".leading_whitespace_index == 12
assert: " \t\t\f\f\v\v\r\r\n\n".leading_whitespace_index == 12
assert: " FOO".leading_whitespace_index == 2
assert: "FOO".leading_whitespace_index == 0
assert: "".leading_whitespace_index == 0

:it "finds the end of leading whitespace from somewhere in the string"
assert: " ... \t\t\f\f\v\v\r\r\n\nFOO".leading_whitespace_index(5) == 17
assert: " ... \t\t\f\f\v\v\r\r\n\n".leading_whitespace_index(5) == 17
assert: " ... FOO".leading_whitespace_index(5) == 7
assert: " ... FOO".leading_whitespace_index(6) == 7
assert: " ... FOO".leading_whitespace_index(7) == 7
assert: " ... FOO".leading_whitespace_index(8) == 8
assert: " ...FOO".leading_whitespace_index(5) == 5
assert: " ...".leading_whitespace_index(5) == 5

:it "finds the end of trailing whitespace at the end of the string"
assert: "FOO \t\t\f\f\v\v\r\r\n\n".trailing_whitespace_index == 3
assert: " \t\t\f\f\v\v\r\r\n\n".trailing_whitespace_index == 0
assert: "FOO ".trailing_whitespace_index == 3
assert: "FOO".trailing_whitespace_index == 3
assert: "".trailing_whitespace_index == 0

:it "finds the end of trailing whitespace from somewhere in the string"
assert: "FOO \t\t\f\f\v\v\r\r\n\n... ".trailing_whitespace_index(12) == 3
assert: "FOO ... ".trailing_whitespace_index(5) == 3
assert: "FOO ... ".trailing_whitespace_index(4) == 3
assert: "FOO ... ".trailing_whitespace_index(3) == 3
assert: "FOO ... ".trailing_whitespace_index(2) == 2
assert: "... ".trailing_whitespace_index(0) == 0
1 change: 1 addition & 0 deletions src/savi/parser/builder/state.cr
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ module Savi::Parser::Builder
when 'n' then result << '\n'
when 'r' then result << '\r'
when 't' then result << '\t'
when 'v' then result << '\v'
when '0' then result << '\0'
when 'x' then
byte_value = 0
Expand Down

0 comments on commit 9fa9515

Please sign in to comment.