From a044e0310a0064199e3cbbb621a839a88a6a8228 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 10 Jul 2024 19:50:26 +0200 Subject: [PATCH 1/4] Revert "Add Avx512 support to IndexOfAnyAsciiSearcher (#103710)" This reverts commit ce1ae77f9fc565facdf041a740e48935e7b60c3b. --- .../System.Memory/tests/Span/SearchValues.cs | 13 +- .../SearchValues/IndexOfAnyAsciiSearcher.cs | 577 +----------------- .../Strings/Helpers/AhoCorasick.cs | 2 +- 3 files changed, 38 insertions(+), 554 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/SearchValues.cs b/src/libraries/System.Memory/tests/Span/SearchValues.cs index 8f39a95bd3c0f4..f76584e15983e1 100644 --- a/src/libraries/System.Memory/tests/Span/SearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/SearchValues.cs @@ -526,17 +526,17 @@ private static void Test(Random rng, ReadOnlySpan haystackRandom, ReadOnly if (expectedIndex != indexOfAnyIndex) { - AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, indexOfAnyIndex, nameof(indexOfAny)); + AssertionFailed(haystack, needle, expectedIndex, indexOfAnyIndex, nameof(indexOfAny)); } if (expectedIndex != searchValuesIndex) { - AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, searchValuesIndex, nameof(searchValues)); + AssertionFailed(haystack, needle, expectedIndex, searchValuesIndex, nameof(searchValues)); } if ((expectedIndex >= 0) != searchValuesContainsResult) { - AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, searchValuesContainsResult ? 0 : -1, nameof(searchValuesContainsResult)); + AssertionFailed(haystack, needle, expectedIndex, searchValuesContainsResult ? 0 : -1, nameof(searchValuesContainsResult)); } } @@ -546,16 +546,13 @@ private static ReadOnlySpan GetRandomSlice(Random rng, ReadOnlySpan spa return slice.Slice(0, Math.Min(slice.Length, rng.Next(maxLength + 1))); } - private static void AssertionFailed(ReadOnlySpan haystack, ReadOnlySpan needle, SearchValues searchValues, int expected, int actual, string approach) + private static void AssertionFailed(ReadOnlySpan haystack, ReadOnlySpan needle, int expected, int actual, string approach) where T : INumber { - Type implType = searchValues.GetType(); - string impl = $"{implType.Name} [{string.Join(", ", implType.GenericTypeArguments.Select(t => t.Name))}]"; - string readableHaystack = string.Join(", ", haystack.ToArray().Select(c => int.CreateChecked(c))); string readableNeedle = string.Join(", ", needle.ToArray().Select(c => int.CreateChecked(c))); - Assert.Fail($"Expected {expected}, got {approach}={actual} for impl='{impl}', needle='{readableNeedle}', haystack='{readableHaystack}'"); + Assert.Fail($"Expected {expected}, got {approach}={actual} for needle='{readableNeedle}', haystack='{readableHaystack}'"); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index ec5cfef5610fd3..42e0fcc2a65e78 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -17,36 +17,18 @@ internal static class IndexOfAnyAsciiSearcher { public struct AsciiState(Vector128 bitmap, BitVector256 lookup) { - public Vector512 Bitmap512 = Vector512.Create(bitmap); + public Vector256 Bitmap = Vector256.Create(bitmap); public BitVector256 Lookup = lookup; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector128 Bitmap128() => Bitmap512._lower._lower; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector256 Bitmap256() => Bitmap512._lower; - public readonly AsciiState CreateInverse() => - new AsciiState(~Bitmap128(), Lookup.CreateInverse()); + new AsciiState(~Bitmap._lower, Lookup.CreateInverse()); } public struct AnyByteState(Vector128 bitmap0, Vector128 bitmap1, BitVector256 lookup) { - public Vector512 Bitmap0_512 = Vector512.Create(bitmap0); - public Vector512 Bitmap1_512 = Vector512.Create(bitmap1); + public Vector256 Bitmap0 = Vector256.Create(bitmap0); + public Vector256 Bitmap1 = Vector256.Create(bitmap1); public BitVector256 Lookup = lookup; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector128 Bitmap0_128() => Bitmap0_512._lower._lower; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector128 Bitmap1_128() => Bitmap1_512._lower._lower; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector256 Bitmap0_256() => Bitmap0_512._lower; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly Vector256 Bitmap1_256() => Bitmap1_512._lower; } internal static bool IsVectorizationSupported => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported; @@ -160,11 +142,11 @@ private static unsafe bool TryIndexOfAny(ref short searchSpace, int se { AsciiState state = default; - if (TryComputeBitmap(asciiValues, (byte*)&state.Bitmap512._lower._lower, out bool needleContainsZero)) + if (TryComputeBitmap(asciiValues, (byte*)&state.Bitmap._lower, out bool needleContainsZero)) { // Only initializing the bitmap here is okay as we can only get here if the search space is long enough // and we support vectorization, so the IndexOfAnyVectorized implementation will never touch state.Lookup. - state.Bitmap512 = Vector512.Create(state.Bitmap128()); + state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero ? IndexOfAny(ref searchSpace, searchSpaceLength, ref state) @@ -187,11 +169,11 @@ private static unsafe bool TryLastIndexOfAny(ref short searchSpace, in { AsciiState state = default; - if (TryComputeBitmap(asciiValues, (byte*)&state.Bitmap512._lower._lower, out bool needleContainsZero)) + if (TryComputeBitmap(asciiValues, (byte*)&state.Bitmap._lower, out bool needleContainsZero)) { // Only initializing the bitmap here is okay as we can only get here if the search space is long enough // and we support vectorization, so the LastIndexOfAnyVectorized implementation will never touch state.Lookup. - state.Bitmap512 = Vector512.Create(state.Bitmap128()); + state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero ? LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state) @@ -255,68 +237,9 @@ private static TResult IndexOfAnyCore 2 * Vector128.Count) #pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > 2 * Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - { - Vector512 bitmap512 = state.Bitmap512; - - if (searchSpaceLength > 2 * Vector512.Count) - { - // Process the input in chunks of 64 characters (2 * Vector512). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector512. - // As packing two Vector512s into a Vector512 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector512.Count)); - - do - { - Vector512 source0 = Vector512.LoadUnsafe(ref currentSearchSpace); - Vector512 source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512.Count); - - Vector512 result = IndexOfAnyLookup(source0, source1, bitmap512); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector512.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd)); - } - - // We have 1-64 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector512.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector512.Count); + Vector256 bitmap256 = state.Bitmap; - ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd) - ? ref oneVectorAwayFromEnd - : ref currentSearchSpace; - - Vector512 source0 = Vector512.LoadUnsafe(ref firstVector); - Vector512 source1 = Vector512.LoadUnsafe(ref oneVectorAwayFromEnd); - - Vector512 result = IndexOfAnyLookup(source0, source1, bitmap512); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); - } - } - - return TResultMapper.NotFound; - } - - Vector256 bitmap256 = state.Bitmap256(); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > 2 * Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > 2 * Vector256.Count) { // Process the input in chunks of 32 characters (2 * Vector256). // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. @@ -365,7 +288,7 @@ private static TResult IndexOfAnyCore bitmap = state.Bitmap128(); + Vector128 bitmap = state.Bitmap._lower; #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) @@ -445,64 +368,9 @@ public static int LastIndexOfAny(ref short searchSpace if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > 2 * Vector256.Count) - { - Vector512 bitmap512 = state.Bitmap512; - - if (searchSpaceLength > 2 * Vector512.Count) - { - // Process the input in chunks of 64 characters (2 * Vector512). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector512. - // As packing two Vector512s into a Vector512 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector512.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector512.Count); - - Vector512 source0 = Vector512.LoadUnsafe(ref currentSearchSpace); - Vector512 source1 = Vector512.LoadUnsafe(ref currentSearchSpace, (nuint)Vector512.Count); - - Vector512 result = IndexOfAnyLookup(source0, source1, bitmap512); - if (result != Vector512.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); - } - - // We have 1-64 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector512.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector512.Count); - - ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector512.Count) - : ref searchSpace; - - Vector512 source0 = Vector512.LoadUnsafe(ref searchSpace); - Vector512 source1 = Vector512.LoadUnsafe(ref secondVector); + Vector256 bitmap256 = state.Bitmap; - Vector512 result = IndexOfAnyLookup(source0, source1, bitmap512); - if (result != Vector512.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; - } - - Vector256 bitmap256 = state.Bitmap256(); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > 2 * Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > 2 * Vector256.Count) { // Process the input in chunks of 32 characters (2 * Vector256). // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. @@ -551,7 +419,7 @@ public static int LastIndexOfAny(ref short searchSpace return -1; } - Vector128 bitmap = state.Bitmap128(); + Vector128 bitmap = state.Bitmap._lower; if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) { @@ -650,62 +518,9 @@ private static TResult IndexOfAnyCore(ref byte if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > Vector256.Count) - { - Vector512 bitmap512 = state.Bitmap512; - - if (searchSpaceLength > Vector512.Count) - { - // Process the input in chunks of 64 bytes. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector512.Count); + Vector256 bitmap256 = state.Bitmap; - do - { - Vector512 source = Vector512.LoadUnsafe(ref currentSearchSpace); - - Vector512 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap512)); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector512.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); - } - - // We have 1-64 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a Vector256."); - { - ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); - - ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) - ? ref halfVectorAwayFromEnd - : ref currentSearchSpace; - - Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); - Vector256 source1 = Vector256.LoadUnsafe(ref halfVectorAwayFromEnd); - Vector512 source = Vector512.Create(source0, source1); - - Vector512 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap512)); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); - } - } - - return TResultMapper.NotFound; - } - - Vector256 bitmap256 = state.Bitmap256(); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > Vector256.Count) { // Process the input in chunks of 32 bytes. // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. @@ -752,7 +567,7 @@ private static TResult IndexOfAnyCore(ref byte return TResultMapper.NotFound; } - Vector128 bitmap = state.Bitmap128(); + Vector128 bitmap = state.Bitmap._lower; if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) { @@ -827,62 +642,9 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > Vector256.Count) - { - Vector512 bitmap512 = state.Bitmap512; - - if (searchSpaceLength > Vector512.Count) - { - // Process the input in chunks of 64 bytes. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector512.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector512.Count); - - Vector512 source = Vector512.LoadUnsafe(ref currentSearchSpace); - - Vector512 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap512)); - if (result != Vector512.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); - } - - // We have 1-64 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a Vector256."); - { - ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); - - ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count) - : ref searchSpace; - - Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); - Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); - Vector512 source = Vector512.Create(source0, source1); - - Vector512 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap512)); - if (result != Vector512.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; - } - - Vector256 bitmap256 = state.Bitmap256(); + Vector256 bitmap256 = state.Bitmap; -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > Vector256.Count) { // Process the input in chunks of 32 bytes. // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. @@ -929,7 +691,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace return -1; } - Vector128 bitmap = state.Bitmap128(); + Vector128 bitmap = state.Bitmap._lower; if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) { @@ -1026,68 +788,10 @@ private static TResult IndexOfAnyCore(ref byte if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) #pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - { - Vector512 bitmap512_0 = state.Bitmap0_512; - Vector512 bitmap512_1 = state.Bitmap1_512; - - if (searchSpaceLength > Vector512.Count) - { - // Process the input in chunks of 64 bytes. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector512.Count); + Vector256 bitmap256_0 = state.Bitmap0; + Vector256 bitmap256_1 = state.Bitmap1; - do - { - Vector512 source = Vector512.LoadUnsafe(ref currentSearchSpace); - - Vector512 result = IndexOfAnyLookup(source, bitmap512_0, bitmap512_1); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector512.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); - } - - // We have 1-64 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a Vector256."); - { - ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); - - ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) - ? ref halfVectorAwayFromEnd - : ref currentSearchSpace; - - Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); - Vector256 source1 = Vector256.LoadUnsafe(ref halfVectorAwayFromEnd); - Vector512 source = Vector512.Create(source0, source1); - - Vector512 result = IndexOfAnyLookup(source, bitmap512_0, bitmap512_1); - if (result != Vector512.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); - } - } - - return TResultMapper.NotFound; - } - - Vector256 bitmap256_0 = state.Bitmap0_256(); - Vector256 bitmap256_1 = state.Bitmap1_256(); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > Vector256.Count) { // Process the input in chunks of 32 bytes. // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. @@ -1134,8 +838,8 @@ private static TResult IndexOfAnyCore(ref byte return TResultMapper.NotFound; } - Vector128 bitmap0 = state.Bitmap0_128(); - Vector128 bitmap1 = state.Bitmap1_128(); + Vector128 bitmap0 = state.Bitmap0._lower; + Vector128 bitmap1 = state.Bitmap1._lower; #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) @@ -1210,70 +914,12 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough { -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && searchSpaceLength > Vector256.Count) #pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - { - Vector512 bitmap512_0 = state.Bitmap0_512; - Vector512 bitmap512_1 = state.Bitmap1_512; - - if (searchSpaceLength > Vector512.Count) - { - // Process the input in chunks of 64 bytes. - // If the input length is a multiple of 64, don't consume the last 64 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector512.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector512.Count); - - Vector512 source = Vector512.LoadUnsafe(ref currentSearchSpace); - - Vector512 result = IndexOfAnyLookup(source, bitmap512_0, bitmap512_1); - if (result != Vector512.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); - } - - // We have 1-64 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a Vector256."); - { - ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); - - ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count) - : ref searchSpace; - - Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); - Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); - Vector512 source = Vector512.Create(source0, source1); - - Vector512 result = IndexOfAnyLookup(source, bitmap512_0, bitmap512_1); - if (result != Vector512.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } + Vector256 bitmap256_0 = state.Bitmap0; + Vector256 bitmap256_1 = state.Bitmap1; - return -1; - } - - Vector256 bitmap256_0 = state.Bitmap0_256(); - Vector256 bitmap256_1 = state.Bitmap1_256(); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibConditionParsing // A negated IsSupported condition isn't parseable by the intrinsics analyzer -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx512BW.IsSupported is false - if (!(Vector512.IsHardwareAccelerated && Avx512BW.IsSupported) && searchSpaceLength > Vector256.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough -#pragma warning restore IntrinsicsInSystemPrivateCoreLibConditionParsing + if (searchSpaceLength > Vector256.Count) { // Process the input in chunks of 32 bytes. // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. @@ -1320,8 +966,8 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace return -1; } - Vector128 bitmap0 = state.Bitmap0_128(); - Vector128 bitmap1 = state.Bitmap1_128(); + Vector128 bitmap0 = state.Bitmap0._lower; + Vector128 bitmap1 = state.Bitmap1._lower; #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) @@ -1442,31 +1088,6 @@ private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vect return result; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] - private static Vector512 IndexOfAnyLookup(Vector512 source0, Vector512 source1, Vector512 bitmapLookup) - where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations - { - Vector512 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector512 result = IndexOfAnyLookupCore(source, bitmapLookup); - - return TNegator.NegateIfNeeded(result); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] - private static Vector512 IndexOfAnyLookupCore(Vector512 source, Vector512 bitmapLookup) - { - // See comments in IndexOfAnyLookupCore(Vector128) above for more details. - Vector512 highNibbles = source >>> 4; - Vector512 bitMask = Avx512BW.Shuffle(bitmapLookup, source); - Vector512 bitPositions = Avx512BW.Shuffle(Vector512.Create(0x8040201008040201).AsByte(), highNibbles); - Vector512 result = bitMask & bitPositions; - return result; - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] @@ -1477,7 +1098,7 @@ private static Vector128 IndexOfAnyLookup(Vector128 source // http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm Vector128 lowNibbles = source & Vector128.Create((byte)0xF); - Vector128 highNibbles = source >>> 4; + Vector128 highNibbles = Vector128.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector128.Create((byte)0xF); Vector128 row0 = Vector128.ShuffleUnsafe(bitmapLookup0, lowNibbles); Vector128 row1 = Vector128.ShuffleUnsafe(bitmapLookup1, lowNibbles); @@ -1500,7 +1121,7 @@ private static Vector256 IndexOfAnyLookup(Vector256 source // http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm Vector256 lowNibbles = source & Vector256.Create((byte)0xF); - Vector256 highNibbles = source >>> 4; + Vector256 highNibbles = Vector256.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector256.Create((byte)0xF); Vector256 row0 = Avx2.Shuffle(bitmapLookup0, lowNibbles); Vector256 row1 = Avx2.Shuffle(bitmapLookup1, lowNibbles); @@ -1515,29 +1136,6 @@ private static Vector256 IndexOfAnyLookup(Vector256 source return TNegator.NegateIfNeeded(result); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] - private static Vector512 IndexOfAnyLookup(Vector512 source, Vector512 bitmapLookup0, Vector512 bitmapLookup1) - where TNegator : struct, INegator - { - // http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm - - Vector512 lowNibbles = source & Vector512.Create((byte)0xF); - Vector512 highNibbles = source >>> 4; - - Vector512 row0 = Avx512BW.Shuffle(bitmapLookup0, lowNibbles); - Vector512 row1 = Avx512BW.Shuffle(bitmapLookup1, lowNibbles); - - Vector512 bitmask = Avx512BW.Shuffle(Vector512.Create(0x8040201008040201).AsByte(), highNibbles); - - Vector512 mask = Vector512.GreaterThan(highNibbles.AsSByte(), Vector512.Create((sbyte)0x7)).AsByte(); - Vector512 bitsets = Vector512.ConditionalSelect(mask, row1, row0); - - Vector512 result = Vector512.Equals(bitsets & bitmask, bitmask); - - return TNegator.NegateIfNeeded(result); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ComputeLastIndex(ref T searchSpace, ref T current, Vector128 result) where TNegator : struct, INegator @@ -1600,53 +1198,13 @@ private static unsafe int ComputeLastIndexOverlapped(ref T searchSp return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512F))] - private static unsafe int ComputeLastIndex(ref T searchSpace, ref T current, Vector512 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = PackedSpanHelpers.FixUpPackedVector512Result(result); - } - - ulong mask = TNegator.ExtractMask(result); - - int offsetInVector = 63 - BitOperations.LeadingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512F))] - private static unsafe int ComputeLastIndexOverlapped(ref T searchSpace, ref T secondVector, Vector512 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = PackedSpanHelpers.FixUpPackedVector512Result(result); - } - - ulong mask = TNegator.ExtractMask(result); - - int offsetInVector = 63 - BitOperations.LeadingZeroCount(mask); - if (offsetInVector < Vector512.Count) - { - return offsetInVector; - } - - // We matched within the second vector - return offsetInVector - Vector512.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); - } - internal interface INegator { static abstract bool NegateIfNeeded(bool result); static abstract Vector128 NegateIfNeeded(Vector128 result); static abstract Vector256 NegateIfNeeded(Vector256 result); - static abstract Vector512 NegateIfNeeded(Vector512 result); static abstract uint ExtractMask(Vector128 result); static abstract uint ExtractMask(Vector256 result); - static abstract ulong ExtractMask(Vector512 result); } internal readonly struct DontNegate : INegator @@ -1654,13 +1212,8 @@ internal interface INegator public static bool NegateIfNeeded(bool result) => result; public static Vector128 NegateIfNeeded(Vector128 result) => result; public static Vector256 NegateIfNeeded(Vector256 result) => result; - public static Vector512 NegateIfNeeded(Vector512 result) => result; - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector128 result) => ~Vector128.Equals(result, Vector128.Zero).ExtractMostSignificantBits(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector256 result) => ~Vector256.Equals(result, Vector256.Zero).ExtractMostSignificantBits(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ulong ExtractMask(Vector512 result) => ~Vector512.Equals(result, Vector512.Zero).ExtractMostSignificantBits(); } internal readonly struct Negate : INegator @@ -1668,18 +1221,10 @@ internal interface INegator public static bool NegateIfNeeded(bool result) => !result; // This is intentionally testing for equality with 0 instead of "~result". // We want to know if any character didn't match, as that means it should be treated as a match for the -Except method. - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 NegateIfNeeded(Vector128 result) => Vector128.Equals(result, Vector128.Zero); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NegateIfNeeded(Vector256 result) => Vector256.Equals(result, Vector256.Zero); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 NegateIfNeeded(Vector512 result) => Vector512.Equals(result, Vector512.Zero); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector128 result) => result.ExtractMostSignificantBits(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector256 result) => result.ExtractMostSignificantBits(); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ulong ExtractMask(Vector512 result) => result.ExtractMostSignificantBits(); } internal interface IOptimizations @@ -1692,7 +1237,6 @@ internal interface IOptimizations // - All values result in min(value, 255) static abstract Vector128 PackSources(Vector128 lower, Vector128 upper); static abstract Vector256 PackSources(Vector256 lower, Vector256 upper); - static abstract Vector512 PackSources(Vector512 lower, Vector512 upper); } internal readonly struct Ssse3AndWasmHandleZeroInNeedle : IOptimizations @@ -1720,16 +1264,6 @@ public static Vector256 PackSources(Vector256 lower, Vector256 PackSources(Vector512 lower, Vector512 upper) - { - return Avx512BW.PackUnsignedSaturate( - Vector512.Min(lower, Vector512.Create((ushort)255)).AsInt16(), - Vector512.Min(upper, Vector512.Create((ushort)255)).AsInt16()); - } } internal readonly struct Default : IOptimizations @@ -1752,13 +1286,6 @@ public static Vector256 PackSources(Vector256 lower, Vector256 PackSources(Vector512 lower, Vector512 upper) - { - return Avx512BW.PackUnsignedSaturate(lower.AsInt16(), upper.AsInt16()); - } } private interface IResultMapper @@ -1769,10 +1296,8 @@ private interface IResultMapper static abstract TResult ScalarResult(ref T searchSpace, ref T current); static abstract TResult FirstIndex(ref T searchSpace, ref T current, Vector128 result) where TNegator : struct, INegator; static abstract TResult FirstIndex(ref T searchSpace, ref T current, Vector256 result) where TNegator : struct, INegator; - static abstract TResult FirstIndex(ref T searchSpace, ref T current, Vector512 result) where TNegator : struct, INegator; static abstract TResult FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector128 result) where TNegator : struct, INegator; static abstract TResult FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector256 result) where TNegator : struct, INegator; - static abstract TResult FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector512 result) where TNegator : struct, INegator; } private readonly struct ContainsAnyResultMapper : IResultMapper @@ -1782,10 +1307,8 @@ private interface IResultMapper public static bool ScalarResult(ref T searchSpace, ref T current) => true; public static bool FirstIndex(ref T searchSpace, ref T current, Vector128 result) where TNegator : struct, INegator => true; public static bool FirstIndex(ref T searchSpace, ref T current, Vector256 result) where TNegator : struct, INegator => true; - public static bool FirstIndex(ref T searchSpace, ref T current, Vector512 result) where TNegator : struct, INegator => true; public static bool FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector128 result) where TNegator : struct, INegator => true; public static bool FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector256 result) where TNegator : struct, INegator => true; - public static bool FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector512 result) where TNegator : struct, INegator => true; } private readonly unsafe struct IndexOfAnyResultMapper : IResultMapper @@ -1821,21 +1344,6 @@ public static int FirstIndex(ref T searchSpace, ref T current, Vector2 return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512F))] - public static int FirstIndex(ref T searchSpace, ref T current, Vector512 result) where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = PackedSpanHelpers.FixUpPackedVector512Result(result); - } - - ulong mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector128 result) where TNegator : struct, INegator { @@ -1870,27 +1378,6 @@ public static int FirstIndexOverlapped(ref T searchSpace, ref T curren } return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / (nuint)sizeof(T)); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512F))] - public static int FirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector512 result) where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = PackedSpanHelpers.FixUpPackedVector512Result(result); - } - - ulong mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - if (offsetInVector >= Vector512.Count) - { - // We matched within the second vector - current0 = ref current1; - offsetInVector -= Vector512.Count; - } - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / (nuint)sizeof(T)); - } } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs index 777251124cbbea..ebc94616ae642c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs @@ -30,7 +30,7 @@ public readonly bool ShouldUseAsciiFastScan { get { - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && _startingAsciiChars.Bitmap128() != default) + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && _startingAsciiChars.Bitmap != default) { // If there are a lot of starting characters such that we often find one early, // the ASCII fast scan may end up performing worse than checking one character at a time. From aade2bb16c668bf8459874c364084f598c1382c5 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 10 Jul 2024 19:51:38 +0200 Subject: [PATCH 2/4] Keep the test improvements --- .../System.Memory/tests/Span/SearchValues.cs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/SearchValues.cs b/src/libraries/System.Memory/tests/Span/SearchValues.cs index f76584e15983e1..8f39a95bd3c0f4 100644 --- a/src/libraries/System.Memory/tests/Span/SearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/SearchValues.cs @@ -526,17 +526,17 @@ private static void Test(Random rng, ReadOnlySpan haystackRandom, ReadOnly if (expectedIndex != indexOfAnyIndex) { - AssertionFailed(haystack, needle, expectedIndex, indexOfAnyIndex, nameof(indexOfAny)); + AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, indexOfAnyIndex, nameof(indexOfAny)); } if (expectedIndex != searchValuesIndex) { - AssertionFailed(haystack, needle, expectedIndex, searchValuesIndex, nameof(searchValues)); + AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, searchValuesIndex, nameof(searchValues)); } if ((expectedIndex >= 0) != searchValuesContainsResult) { - AssertionFailed(haystack, needle, expectedIndex, searchValuesContainsResult ? 0 : -1, nameof(searchValuesContainsResult)); + AssertionFailed(haystack, needle, searchValuesInstance, expectedIndex, searchValuesContainsResult ? 0 : -1, nameof(searchValuesContainsResult)); } } @@ -546,13 +546,16 @@ private static ReadOnlySpan GetRandomSlice(Random rng, ReadOnlySpan spa return slice.Slice(0, Math.Min(slice.Length, rng.Next(maxLength + 1))); } - private static void AssertionFailed(ReadOnlySpan haystack, ReadOnlySpan needle, int expected, int actual, string approach) + private static void AssertionFailed(ReadOnlySpan haystack, ReadOnlySpan needle, SearchValues searchValues, int expected, int actual, string approach) where T : INumber { + Type implType = searchValues.GetType(); + string impl = $"{implType.Name} [{string.Join(", ", implType.GenericTypeArguments.Select(t => t.Name))}]"; + string readableHaystack = string.Join(", ", haystack.ToArray().Select(c => int.CreateChecked(c))); string readableNeedle = string.Join(", ", needle.ToArray().Select(c => int.CreateChecked(c))); - Assert.Fail($"Expected {expected}, got {approach}={actual} for needle='{readableNeedle}', haystack='{readableHaystack}'"); + Assert.Fail($"Expected {expected}, got {approach}={actual} for impl='{impl}', needle='{readableNeedle}', haystack='{readableHaystack}'"); } } } From 2a85dafaeb858184d8b9a3e16334a9503b0d0a20 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 10 Jul 2024 19:54:11 +0200 Subject: [PATCH 3/4] Keep the inlining changes --- .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 42e0fcc2a65e78..0316faedc8491d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -1212,7 +1212,9 @@ internal interface INegator public static bool NegateIfNeeded(bool result) => result; public static Vector128 NegateIfNeeded(Vector128 result) => result; public static Vector256 NegateIfNeeded(Vector256 result) => result; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector128 result) => ~Vector128.Equals(result, Vector128.Zero).ExtractMostSignificantBits(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector256 result) => ~Vector256.Equals(result, Vector256.Zero).ExtractMostSignificantBits(); } @@ -1221,9 +1223,13 @@ internal interface INegator public static bool NegateIfNeeded(bool result) => !result; // This is intentionally testing for equality with 0 instead of "~result". // We want to know if any character didn't match, as that means it should be treated as a match for the -Except method. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 NegateIfNeeded(Vector128 result) => Vector128.Equals(result, Vector128.Zero); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NegateIfNeeded(Vector256 result) => Vector256.Equals(result, Vector256.Zero); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector128 result) => result.ExtractMostSignificantBits(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint ExtractMask(Vector256 result) => result.ExtractMostSignificantBits(); } From 6c34c9b7e080eaafdca6eacd5b6d9923096c9cff Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Wed, 10 Jul 2024 19:55:22 +0200 Subject: [PATCH 4/4] Keep the shift improvements --- .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 0316faedc8491d..bf06e365f40c21 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -1098,7 +1098,7 @@ private static Vector128 IndexOfAnyLookup(Vector128 source // http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm Vector128 lowNibbles = source & Vector128.Create((byte)0xF); - Vector128 highNibbles = Vector128.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector128.Create((byte)0xF); + Vector128 highNibbles = source >>> 4; Vector128 row0 = Vector128.ShuffleUnsafe(bitmapLookup0, lowNibbles); Vector128 row1 = Vector128.ShuffleUnsafe(bitmapLookup1, lowNibbles); @@ -1121,7 +1121,7 @@ private static Vector256 IndexOfAnyLookup(Vector256 source // http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm Vector256 lowNibbles = source & Vector256.Create((byte)0xF); - Vector256 highNibbles = Vector256.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector256.Create((byte)0xF); + Vector256 highNibbles = source >>> 4; Vector256 row0 = Avx2.Shuffle(bitmapLookup0, lowNibbles); Vector256 row1 = Avx2.Shuffle(bitmapLookup1, lowNibbles);