From e02e9abaecace159944d887df4e707929b7694b1 Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Fri, 25 Jan 2019 23:33:41 +0000 Subject: [PATCH] Intrinsicify SpanHelpers.IndexOfAny(char,char,char) --- .../shared/System/SpanHelpers.Char.cs | 114 +++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs index 9e7efc66ae22..8b65c742cd07 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Char.cs @@ -703,7 +703,15 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha int offset = 0; int lengthToExamine = length; - if (Vector.IsHardwareAccelerated) + if (Avx2.IsSupported || Sse2.IsSupported) + { + // Avx2 branch also operates on Sse2 sizes, so check is combined. + if (length >= Vector128.Count * 2) + { + lengthToExamine = UnalignedCountVector128(ref searchSpace); + } + } + else if (Vector.IsHardwareAccelerated) { if (length >= Vector.Count * 2) { @@ -744,9 +752,109 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha offset += 1; } - // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Vector.IsHardwareAccelerated) + if (Avx2.IsSupported) + { + if (offset < length) + { + lengthToExamine = GetCharVector256SpanLength(offset, length); + if (lengthToExamine > offset) + { + Vector256 values0 = Vector256.Create(value0); + Vector256 values1 = Vector256.Create(value1); + Vector256 values2 = Vector256.Create(value2); + do + { + Vector256 search = LoadVector256(ref searchSpace, offset); + // Note that MoveMask has converted the equal vector elements into a set of bit flags, + // So the bit position in 'matches' corresponds to the element offset. + int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); + // Bitwise Or to combine the flagged matches for the second and third values to our match flags + matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); + matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); + if (matches == 0) + { + // Zero flags set so no matches + offset += Vector256.Count; + continue; + } + + // Find bitflag offset of first match and add to current offset, + // flags are in bytes so divide for chars + return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char)); + } while (lengthToExamine > offset); + } + + lengthToExamine = GetCharVector128SpanLength(offset, length); + if (lengthToExamine > offset) + { + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); + Vector128 values2 = Vector128.Create(value2); + Vector128 search = LoadVector128(ref searchSpace, offset); + + // Same method as above + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); + if (matches == 0) + { + // Zero flags set so no matches + offset += Vector128.Count; + } + else + { + // Find bitflag offset of first match and add to current offset, + // flags are in bytes so divide for chars + return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char)); + } + } + + if (offset < length) + { + lengthToExamine = length - offset; + goto SequentialScan; + } + } + } + else if (Sse2.IsSupported) + { + if (offset < length) + { + lengthToExamine = GetCharVector128SpanLength(offset, length); + + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); + Vector128 values2 = Vector128.Create(value2); + while (lengthToExamine > offset) + { + Vector128 search = LoadVector128(ref searchSpace, offset); + + // Same method as above + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); + if (matches == 0) + { + // Zero flags set so no matches + offset += Vector128.Count; + continue; + } + + // Find bitflag offset of first match and add to current offset, + // flags are in bytes so divide for chars + return offset + (BitOps.TrailingZeroCount(matches) / sizeof(char)); + } + + if (offset < length) + { + lengthToExamine = length - offset; + goto SequentialScan; + } + } + } + else if (Vector.IsHardwareAccelerated) { if (offset < length) {