dotnet · danmoseley · Mar 24, 2022 · Feb 7, 2022 · Feb 7, 2022 · Feb 7, 2022
diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/ValueListBuilder.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/ValueListBuilder.cs
@@ -44,9 +44,24 @@ public ref T this[int index]
         public void Append(T item)
         {
             int pos = _pos;
-            if (pos >= _span.Length)
-                Grow();
+            if ((uint)pos < (uint)_span.Length)
+            {
+                _span[pos] = item;
+                _pos = pos + 1;
+            }
+            else
+            {
+                AddWithResize(item);
+            }
+        }
 
+        // Hide uncommon path
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        private void AddWithResize(T item)
+        {
+            Debug.Assert(_pos == _span.Length);
+            int pos = _pos;
+            Grow();
             _span[pos] = item;
             _pos = pos + 1;
         }

diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs
@@ -1609,27 +1609,13 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
             }
 
             // Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator.
-            else if (separators.Length <= 3)
+            else if ((uint)separators.Length <= (uint)3)
             {
                 char sep0, sep1, sep2;
                 sep0 = separators[0];
                 sep1 = separators.Length > 1 ? separators[1] : sep0;
                 sep2 = separators.Length > 2 ? separators[2] : sep1;
-
-                if (Length >= 16 && Sse41.IsSupported)
-                {
-                    MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
-                    return;
-                }
-
-                for (int i = 0; i < Length; i++)
-                {
-                    char c = this[i];
-                    if (c == sep0 || c == sep1 || c == sep2)
-                    {
-                        sepListBuilder.Append(i);
-                    }
-                }
+                MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2);
             }
 
             // Handle > 3 separators with a probabilistic map, ala IndexOfAny.
@@ -1658,76 +1644,105 @@ private void MakeSeparatorList(ReadOnlySpan<char> separators, ref ValueListBuild
 
         private void MakeSeparatorListVectorized(ref ValueListBuilder<int> sepListBuilder, char c, char c2, char c3)
         {
-            // Redundant test so we won't prejit remainder of this method
-            // on platforms without SSE.
-            if (!Sse41.IsSupported)
-            {
-                throw new PlatformNotSupportedException();
-            }
-
-            // Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0)
-            Vector128<byte> shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
-
-            Vector128<ushort> v1 = Vector128.Create((ushort)c);
-            Vector128<ushort> v2 = Vector128.Create((ushort)c2);
-            Vector128<ushort> v3 = Vector128.Create((ushort)c3);
+            nuint offset = 0;
+            nuint lengthToExamine = (nuint)(uint)Length;
 
-            ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan());
-            int cond = Length & -Vector128<ushort>.Count;
-            int i = 0;
+            ref ushort source = ref Unsafe.As<char, ushort>(ref _firstChar);
 
-            for (; i < cond; i += Vector128<ushort>.Count)
+            if (Vector256.IsHardwareAccelerated)
             {
-                Vector128<ushort> charVector = ReadVector(ref c0, i);
-                Vector128<ushort> cmp = Sse2.CompareEqual(charVector, v1);
-
-                cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp);
-                cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp);
+                if (lengthToExamine >= (nuint)Vector256<ushort>.Count)
+                {
+                    Vector256<ushort> v1 = Vector256.Create((ushort)c);
+                    Vector256<ushort> v2 = Vector256.Create((ushort)c2);
+                    Vector256<ushort> v3 = Vector256.Create((ushort)c3);
 
-                if (Sse41.TestZ(cmp, cmp)) { continue; }
+                    do
+                    {
+                        Vector256<ushort> vector = Vector256.LoadUnsafe(ref source, offset);
+                        Vector256<ushort> v1Eq = Vector256.Equals(vector, v1);
+                        Vector256<ushort> v2Eq = Vector256.Equals(vector, v2);
+                        Vector256<ushort> v3Eq = Vector256.Equals(vector, v3);
+                        Vector256<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
 
-                Vector128<byte> mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte();
-                mask = Ssse3.Shuffle(mask, shuffleConstant);
+                        if (cmp != Vector256<byte>.Zero)
+                        {
+                            uint mask = cmp.ExtractMostSignificantBits() & 0x55555555;
+                            do
+                            {
+                                uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
+                                sepListBuilder.Append((int)(offset + bitPos));
+                                mask = BitOperations.ResetLowestSetBit(mask);
+                            } while (mask != 0);
+                        }
 
-                uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32());
-                mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte();
-                uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32());
+                        offset += (nuint)Vector256<ushort>.Count;
+                    } while (offset <= lengthToExamine - (nuint)Vector256<ushort>.Count);
 
-                for (int idx = i; lowBits != 0; idx++)
-                {
-                    if ((lowBits & 0xF) != 0)
+                    // See if we fit another 128 bit vector
+                    if (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count)
                     {
-                        sepListBuilder.Append(idx);
-                    }
+                        Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
+                        Vector128<ushort> v1Eq = Vector128.Equals(vector, v1.GetLower());
+                        Vector128<ushort> v2Eq = Vector128.Equals(vector, v2.GetLower());
+                        Vector128<ushort> v3Eq = Vector128.Equals(vector, v3.GetLower());
+                        Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
 
-                    lowBits >>= 8;
-                }
+                        if (cmp != Vector128<byte>.Zero)
+                        {
+                            uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
+                            do
+                            {
+                                uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
+                                sepListBuilder.Append((int)(offset + bitPos));
+                                mask = BitOperations.ResetLowestSetBit(mask);
+                            } while (mask != 0);
+                        }
 
-                for (int idx = i + 4; highBits != 0; idx++)
+                        offset += (nuint)Vector128<ushort>.Count;
+                    }
+                }
+            }
+            else if (Vector128.IsHardwareAccelerated)
+            {
+                if (lengthToExamine >= (nuint)Vector128<ushort>.Count * 2)
                 {
-                    if ((highBits & 0xF) != 0)
+                    Vector128<ushort> v1 = Vector128.Create((ushort)c);
+                    Vector128<ushort> v2 = Vector128.Create((ushort)c2);
+                    Vector128<ushort> v3 = Vector128.Create((ushort)c3);
+
+                    do
                     {
-                        sepListBuilder.Append(idx);
-                    }
+                        Vector128<ushort> vector = Vector128.LoadUnsafe(ref source, offset);
+                        Vector128<ushort> v1Eq = Vector128.Equals(vector, v1);
+                        Vector128<ushort> v2Eq = Vector128.Equals(vector, v2);
+                        Vector128<ushort> v3Eq = Vector128.Equals(vector, v3);
+                        Vector128<byte> cmp = (v1Eq | v2Eq | v3Eq).AsByte();
+
+                        if (cmp != Vector128<byte>.Zero)
+                        {
+                            uint mask = cmp.ExtractMostSignificantBits() & 0x5555;
+                            do
+                            {
+                                uint bitPos = (uint)BitOperations.TrailingZeroCount(mask) / sizeof(char);
+                                sepListBuilder.Append((int)(offset + bitPos));
+                                mask = BitOperations.ResetLowestSetBit(mask);
+                            } while (mask != 0);
+                        }
 
-                    highBits >>= 8;
+                        offset += (nuint)Vector128<ushort>.Count;
+                    } while (offset <= lengthToExamine - (nuint)Vector128<ushort>.Count);
                 }
             }
 
-            for (; i < Length; i++)
+            while (offset < lengthToExamine)
             {
-                char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i);
+                char curr = (char)Unsafe.Add(ref source, (nint)offset);
-                char curr = (char)Unsafe.Add(ref source, (nint)offset);
+                char curr = (char)Unsafe.Add(ref source, offset);
-                char curr = (char)Unsafe.Add(ref source, (nint)offset);
+                char curr = (char)Unsafe.Add(ref source, offset);
                 if (curr == c || curr == c2 || curr == c3)
                 {
-                    sepListBuilder.Append(i);
+                    sepListBuilder.Append((int)offset);
                 }
-            }
-
-            static Vector128<ushort> ReadVector(ref char c0, int offset)
-            {
-                ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset);
-                ref byte b = ref Unsafe.As<char, byte>(ref ci);
-                return Unsafe.ReadUnaligned<Vector128<ushort>>(ref b);
+                offset++;
             }
         }
 

diff --git a/src/libraries/System.Runtime/tests/System/String.SplitTests.cs b/src/libraries/System.Runtime/tests/System/String.SplitTests.cs
@@ -530,6 +530,7 @@ public static void SplitNullCharArraySeparator_BindsToCharArrayOverload()
         [InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "thi", " i", " ", " ", "tring", " with ", "ome ", "p", "ce" })]
         [InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.TrimEntries, new[] { "thi", "", "i", "", "", "", "", "tring", "with", "ome", "p", "ce", "" })]
         [InlineData("this, is, a, string, with some spaces", new[] { ',', 's', 'a' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "tring", "with", "ome", "p", "ce" })]
+        [InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ',', 's' }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "thi", "i", "a", "very long", "tring", "with", "ome", "pace", "comma", "and more", "pace" })]
         public static void SplitCharArraySeparator(string value, char[] separators, int count, StringSplitOptions options, string[] expected)
         {
             Assert.Equal(expected, value.Split(separators, count, options));
@@ -561,6 +562,7 @@ public static void SplitCharArraySeparator(string value, char[] separators, int
         [InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries, new[] { "this", " is", " a", "tring", " with", "ome", "paces", " " })]
         [InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "", "tring", "with", "ome", "paces", "" })]
         [InlineData("this, is, a, string, with some spaces, ", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "tring", "with", "ome", "paces" })]
+        [InlineData("this, is, a, very long string, with some spaces, commas and more spaces", new[] { ",", " s" }, M, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries, new[] { "this", "is", "a", "very long", "tring", "with", "ome", "paces", "commas and more", "paces" })]
         public static void SplitStringArraySeparator(string value, string[] separators, int count, StringSplitOptions options, string[] expected)
         {
             Assert.Equal(expected, value.Split(separators, count, options));