Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Align buffers so String.wcslen can use it
Browse files Browse the repository at this point in the history
  • Loading branch information
benaadams committed Feb 7, 2019
1 parent 07a282a commit c7836d5
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 98 deletions.
25 changes: 24 additions & 1 deletion src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ public static bool Contains(ref char searchSpace, char value, int length)
}

[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public static int IndexOf(ref char searchSpace, char value, int length)
public unsafe static int IndexOf(ref char searchSpace, char value, int length)
{
Debug.Assert(length >= 0);

Expand Down Expand Up @@ -385,6 +385,29 @@ public static int IndexOf(ref char searchSpace, char value, int length)
{
if (offset < length)
{
if ((((nint)Unsafe.AsPointer(ref searchSpace) + (nint)offset) & (nint)(Vector256<ushort>.Count - 1)) != 0)
{
// Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches
// with no upper bound e.g. String.wcslen.
// Start with a check on Vector128 to align to Vector256, before moving to processing Vector256.
// This ensures we do not fault across memory pages while searching for an end of string.
Vector128<ushort> values = Vector128.Create(value);
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);

// Same method as below
int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
}
else
{
// Find bitflag offset of first match and add to current offset
return (int)(offset + (BitOps.TrailingZeroCount(matches) / sizeof(char)));
}
}

lengthToExamine = GetCharVector256SpanLength(offset, length);
if (lengthToExamine > offset)
{
Expand Down
103 changes: 6 additions & 97 deletions src/System.Private.CoreLib/shared/System/String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -561,110 +561,19 @@ public StringRuneEnumerator EnumerateRunes()
return new StringRuneEnumerator(this);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe int wcslen(char* ptr)
{
char* end = ptr;

// First make sure our pointer is aligned on a word boundary
int alignment = IntPtr.Size - 1;

// If ptr is at an odd address (e.g. 0x5), this loop will simply iterate all the way
while (((uint)end & (uint)alignment) != 0)
{
if (*end == 0) goto FoundZero;
end++;
}

#if !BIT64
// The following code is (somewhat surprisingly!) significantly faster than a naive loop,
// at least on x86 and the current jit.

// The loop condition below works because if "end[0] & end[1]" is non-zero, that means
// neither operand can have been zero. If is zero, we have to look at the operands individually,
// but we hope this going to fairly rare.

// In general, it would be incorrect to access end[1] if we haven't made sure
// end[0] is non-zero. However, we know the ptr has been aligned by the loop above
// so end[0] and end[1] must be in the same word (and therefore page), so they're either both accessible, or both not.

while ((end[0] & end[1]) != 0 || (end[0] != 0 && end[1] != 0))
{
end += 2;
}

Debug.Assert(end[0] == 0 || end[1] == 0);
if (end[0] != 0) end++;
#else // !BIT64
// Based on https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord

// 64-bit implementation: process 1 ulong (word) at a time

// What we do here is add 0x7fff from each of the
// 4 individual chars within the ulong, using MagicMask.
// If the char > 0 and < 0x8001, it will have its high bit set.
// We then OR with MagicMask, to set all the other bits.
// This will result in all bits set (ulong.MaxValue) for any
// char that fits the above criteria, and something else otherwise.

// Note that for any char > 0x8000, this will be a false
// positive and we will fallback to the slow path and
// check each char individually. This is OK though, since
// we optimize for the common case (ASCII chars, which are < 0x80).

// NOTE: We can access a ulong a time since the ptr is aligned,
// and therefore we're only accessing the same word/page. (See notes
// for the 32-bit version above.)

const ulong MagicMask = 0x7fff7fff7fff7fff;

while (true)
// IndexOf processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
int length = SpanHelpers.IndexOf(ref *ptr, '\0', int.MaxValue);
if (length < 0)
{
ulong word = *(ulong*)end;
word += MagicMask; // cause high bit to be set if not zero, and <= 0x8000
word |= MagicMask; // set everything besides the high bits

if (word == ulong.MaxValue) // 0xffff...
{
// all of the chars have their bits set (and therefore none can be 0)
end += 4;
continue;
}

// at least one of them didn't have their high bit set!
// go through each char and check for 0.

if (end[0] == 0) goto EndAt0;
if (end[1] == 0) goto EndAt1;
if (end[2] == 0) goto EndAt2;
if (end[3] == 0) goto EndAt3;

// if we reached here, it was a false positive-- just continue
end += 4;
ThrowMustBeNullTerminatedString();
}

EndAt3: end++;
EndAt2: end++;
EndAt1: end++;
EndAt0:
#endif // !BIT64

FoundZero:
Debug.Assert(*end == 0);

int count = (int)(end - ptr);

#if BIT64
// Check for overflow
if (ptr + count != end)
throw new ArgumentException(SR.Arg_MustBeNullTerminatedString);
#else
Debug.Assert(ptr + count == end);
#endif

return count;
return length;
}


[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe int strlen(byte* ptr)
{
Expand Down

0 comments on commit c7836d5

Please sign in to comment.