Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64: Optimize IndexOf(byte) and IndexOf(char) APIs using intrinsics. #37624

Merged
merged 8 commits into from
Jun 12, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

using Internal.Runtime.CompilerServices;
Expand Down Expand Up @@ -194,7 +195,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
nuint lengthToExamine = (nuint)(uint)length;

if (Avx2.IsSupported || Sse2.IsSupported)
if (Sse2.IsSupported || AdvSimd.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
if (length >= Vector128<byte>.Count * 2)
Expand Down Expand Up @@ -370,6 +371,40 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length)
}
}
}
else if (AdvSimd.IsSupported)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
if (offset < (nuint)(uint)length)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
lengthToExamine = GetByteVector128SpanLength(offset, length);

// Mask to help select first lane that is set.
// Each lane in the mask has different bit pattern to distinguish the lane selected.
Vector128<byte> mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64);
int matchedLane = 0;

Vector128<byte> values = Vector128.Create(value);
while (lengthToExamine > offset)
{
Vector128<byte> search = LoadVector128(ref searchSpace, offset);
Vector128<byte> compareResult = AdvSimd.CompareEqual(values, search);

if (!TryFindFirstMatchedLane(mask, compareResult, ref matchedLane))
{
// Zero flags set so no matches
offset += (nuint)Vector128<byte>.Count;
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
continue;
}

return (int)(offset + (uint)(matchedLane >> sizeof(byte)));
}

if (offset < (nuint)(uint)length)
{
lengthToExamine = ((nuint)(uint)length - offset);
goto SequentialScan;
}
}
}
else if (Vector.IsHardwareAccelerated)
{
if (offset < (nuint)(uint)length)
Expand Down Expand Up @@ -566,7 +601,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
nuint lengthToExamine = (nuint)(uint)length;

if (Avx2.IsSupported || Sse2.IsSupported)
if (Sse2.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
if (length >= Vector128<byte>.Count * 2)
Expand Down Expand Up @@ -807,7 +842,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
nuint lengthToExamine = (nuint)(uint)length;

if (Avx2.IsSupported || Sse2.IsSupported)
if (Sse2.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
if (length >= Vector128<byte>.Count * 2)
Expand Down Expand Up @@ -1803,5 +1838,24 @@ private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, in
nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
return (nuint)(uint)(((length & (Vector<byte>.Count - 1)) + unaligned) & (Vector<byte>.Count - 1));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool TryFindFirstMatchedLane(Vector128<byte> mask, Vector128<byte> compareResult, ref int matchedLane)
{
Debug.Assert(AdvSimd.IsSupported);

ulong matches = AdvSimd.Arm64.MaxPairwise(compareResult, compareResult).AsUInt64().ToScalar();
if (matches == 0)
{
return false;
}

// Try to find the first lane that is set inside compareResult.
Vector128<byte> selectedLanes = AdvSimd.And(compareResult.AsByte(), mask);
Vector128<byte> pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(selectedLanes, selectedLanes);
pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(pairwiseSelectedLane, pairwiseSelectedLane);
matchedLane = BitOperations.TrailingZeroCount(pairwiseSelectedLane.AsUInt64().ToScalar());
return true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

using Internal.Runtime.CompilerServices;
Expand Down Expand Up @@ -222,7 +223,7 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length)
{
// Input isn't char aligned, we won't be able to align it to a Vector
}
else if (Sse2.IsSupported)
else if (Sse2.IsSupported || AdvSimd.IsSupported)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
// Needs to be double length to allow us to align the data first.
Expand Down Expand Up @@ -408,6 +409,48 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length)
}
}
}
else if (AdvSimd.IsSupported)
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
if (offset < length)
{
Debug.Assert(length - offset >= Vector128<ushort>.Count);

lengthToExamine = GetCharVector128SpanLength(offset, length);
if (lengthToExamine > 0)
{
Vector128<ushort> values = Vector128.Create((ushort)value);

// Mask to help select first lane that is set.
// Each lane in the mask has different bit pattern to distinguish the lane selected.
Vector128<byte> mask = Vector128.Create((byte)1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64, 1, 4, 16, 64);
int matchedLane = 0;

do
{
Debug.Assert(lengthToExamine >= Vector128<ushort>.Count);

Vector128<ushort> search = LoadVector128(ref searchSpace, offset);
Vector128<ushort> compareResult = AdvSimd.CompareEqual(values, search);

if (!TryFindFirstMatchedLane(mask, compareResult.AsByte(), ref matchedLane))
{
// Zero flags set so no matches
offset += Vector128<ushort>.Count;
lengthToExamine -= Vector128<ushort>.Count;
continue;
}

return (int)(offset + (matchedLane >> sizeof(char)));
} while (lengthToExamine > 0);
}

if (offset < length)
{
lengthToExamine = length - offset;
goto SequentialScan;
}
}
}
else if (Vector.IsHardwareAccelerated)
{
if (offset < length)
Expand Down