Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Intrinsicify SpanHelpers.IndexOf(char)
Browse files Browse the repository at this point in the history
  • Loading branch information
benaadams committed Feb 7, 2019
1 parent 6bb19c3 commit aa80e6c
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,8 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu
offset += 1;
}

// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
if (Avx2.IsSupported)
{
if ((int)(byte*)offset < length)
Expand Down
127 changes: 124 additions & 3 deletions src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Runtime.Intrinsics.X86;

using Internal.Runtime.CompilerServices;
using System.Runtime.Intrinsics;

namespace System
{
Expand Down Expand Up @@ -196,7 +197,15 @@ public static int IndexOf(ref char searchSpace, char value, int length)
int offset = 0;
int lengthToExamine = length;

if (Vector.IsHardwareAccelerated)
if (Avx2.IsSupported || Sse2.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
if (length >= Vector128<byte>.Count * 2)
{
lengthToExamine = UnalignedCountVector128(ref searchSpace);
}
}
else if (Vector.IsHardwareAccelerated)
{
if (length >= Vector<ushort>.Count * 2)
{
Expand Down Expand Up @@ -231,9 +240,96 @@ public static int IndexOf(ref char searchSpace, char value, int length)
offset += 1;
}

// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
// We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
if (Vector.IsHardwareAccelerated)
if (Avx2.IsSupported)
{
if (offset < length)
{
lengthToExamine = GetCharVector256SpanLength(offset, length);
if (lengthToExamine > offset)
{
Vector256<ushort> values = Vector256.Create(value);
do
{
Vector256<ushort> search = LoadVector256(ref searchSpace, offset);
int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte());
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector256<ushort>.Count;
continue;
}

// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
} while (lengthToExamine > offset);
}

lengthToExamine = GetCharVector128SpanLength(offset, length);
if (lengthToExamine > offset)
{
Vector128<ushort> values = Vector128.Create(value);
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);

// Same method as above
int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
}
else
{
// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
}
}

if (offset < length)
{
lengthToExamine = length - offset;
goto SequentialScan;
}
}
}
else if (Sse2.IsSupported)
{
if (offset < length)
{
lengthToExamine = GetCharVector128SpanLength(offset, length);

Vector128<ushort> values = Vector128.Create(value);
while (lengthToExamine > offset)
{
Vector128<ushort> search = LoadVector128(ref searchSpace, offset);

// Same method as above
int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte());
if (matches == 0)
{
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
continue;
}

// Find bitflag offset of first match and add to current offset,
// flags are in bytes so divide for chars
return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char));
}

if (offset < length)
{
lengthToExamine = length - offset;
goto SequentialScan;
}
}
}
else if (Vector.IsHardwareAccelerated)
{
if (offset < length)
{
Expand Down Expand Up @@ -842,6 +938,14 @@ private static int LocateLastFoundChar(ulong match)
private static unsafe Vector<ushort> LoadVector(ref char start, int offset)
=> Unsafe.ReadUnaligned<Vector<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe Vector128<ushort> LoadVector128(ref char start, int offset)
=> Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe Vector256<ushort> LoadVector256(ref char start, int offset)
=> Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset)
=> Unsafe.ReadUnaligned<UIntPtr>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref start, offset)));
Expand All @@ -850,6 +954,14 @@ private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset)
private static unsafe int GetCharVectorSpanLength(int offset, int length)
=> ((length - offset) & ~(Vector<ushort>.Count - 1));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int GetCharVector128SpanLength(int offset, int length)
=> ((length - offset) & ~(Vector128<ushort>.Count - 1));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int GetCharVector256SpanLength(int offset, int length)
=> ((length - offset) & ~(Vector256<ushort>.Count - 1));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int UnalignedCountVector(ref char searchSpace)
{
Expand All @@ -862,6 +974,15 @@ private static unsafe int UnalignedCountVector(ref char searchSpace)
return ((Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int UnalignedCountVector128(ref char searchSpace)
{
const int elementsPerByte = sizeof(ushort) / sizeof(byte);

int unaligned = ((int)Unsafe.AsPointer(ref searchSpace) & (Unsafe.SizeOf<Vector128<ushort>>() - 1)) / elementsPerByte;
return ((Vector128<ushort>.Count - unaligned) & (Vector128<ushort>.Count - 1));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int UnalignedCountVectorFromEnd(ref char searchSpace, int length)
{
Expand Down

0 comments on commit aa80e6c

Please sign in to comment.