From 1a49f612517d24e67e5c18201768d811af88b591 Mon Sep 17 00:00:00 2001 From: cuteant Date: Wed, 23 Jun 2021 02:00:35 +0800 Subject: [PATCH 1/5] Use C# compiler provided nint/nuint --- .../Internal/ASCIIUtility.Helpers.cs | 6 +- .../{ASCIIUtility.x64.cs => ASCIIUtility.cs} | 54 +- .../Internal/ASCIIUtility.x32.cs | 1729 ----------------- .../Internal/PlatformDependent.cs | 2 +- .../Internal/SpanHelpers.Byte.cs | 27 +- .../Internal/SpanHelpers.Char.cs | 466 +---- .../Internal/TextEncodings.Utf16.NetCore3.cs | 9 +- .../Internal/TextEncodings.Utf8.NetCore3.cs | 17 +- ...lidation.cs => Utf16Utility.Validation.cs} | 43 +- src/DotNetty.Common/Internal/Utf16Utility.cs | 2 +- .../Internal/Utf16Utility64.Validation.cs | 433 ----- ...nscoding.cs => Utf8Utility.Transcoding.cs} | 18 +- ...alidation.cs => Utf8Utility.Validation.cs} | 14 +- src/DotNetty.Common/Internal/Utf8Utility.cs | 4 +- .../Internal/Utf8Utility32.Transcoding.cs | 1477 -------------- .../Internal/Utf8Utility32.Validation.cs | 736 ------- .../Utilities/AsciiString.NetCore3.cs | 4 +- 17 files changed, 153 insertions(+), 4888 deletions(-) rename src/DotNetty.Common/Internal/{ASCIIUtility.x64.cs => ASCIIUtility.cs} (97%) delete mode 100644 src/DotNetty.Common/Internal/ASCIIUtility.x32.cs rename src/DotNetty.Common/Internal/{Utf16Utility32.Validation.cs => Utf16Utility.Validation.cs} (94%) delete mode 100644 src/DotNetty.Common/Internal/Utf16Utility64.Validation.cs rename src/DotNetty.Common/Internal/{Utf8Utility64.Transcoding.cs => Utf8Utility.Transcoding.cs} (98%) rename src/DotNetty.Common/Internal/{Utf8Utility64.Validation.cs => Utf8Utility.Validation.cs} (98%) delete mode 100644 src/DotNetty.Common/Internal/Utf8Utility32.Transcoding.cs delete mode 100644 src/DotNetty.Common/Internal/Utf8Utility32.Validation.cs diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs b/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs index ec23348ae..189d4a4d8 100644 --- a/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs +++ b/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs @@ -13,17 +13,17 @@ namespace DotNetty.Common.Internal { - internal static class ASCIIUtility + partial class ASCIIUtility { /// /// A mask which selects only the high bit of each byte of the given . /// - internal const uint UInt32HighBitsOnlyMask = 0x80808080u; + private const uint UInt32HighBitsOnlyMask = 0x80808080u; /// /// A mask which selects only the high bit of each byte of the given . /// - internal const ulong UInt64HighBitsOnlyMask = 0x80808080_80808080ul; + private const ulong UInt64HighBitsOnlyMask = 0x80808080_80808080ul; /// /// Returns iff all bytes in are ASCII. diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.x64.cs b/src/DotNetty.Common/Internal/ASCIIUtility.cs similarity index 97% rename from src/DotNetty.Common/Internal/ASCIIUtility.x64.cs rename to src/DotNetty.Common/Internal/ASCIIUtility.cs index e518ddec8..cb419bb84 100644 --- a/src/DotNetty.Common/Internal/ASCIIUtility.x64.cs +++ b/src/DotNetty.Common/Internal/ASCIIUtility.cs @@ -1,4 +1,4 @@ -// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/ASCIIUtility.cs +// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/cs // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. @@ -11,27 +11,17 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using nint = System.Int64; -using nuint = System.UInt64; namespace DotNetty.Common.Internal { - internal static partial class ASCIIUtility64 + internal static partial class ASCIIUtility { -#if DEBUG - static ASCIIUtility64() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool AllBytesInUInt64AreAscii(ulong value) { // If the high bit of any byte is set, that byte is non-ASCII. - return (0ul >= (value & ASCIIUtility.UInt64HighBitsOnlyMask)); + return (0ul >= (value & UInt64HighBitsOnlyMask)); } /// @@ -150,12 +140,12 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n currentUInt32 = Unsafe.ReadUnaligned(pBuffer); uint nextUInt32 = Unsafe.ReadUnaligned(pBuffer + 4); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32 | nextUInt32)) + if (!AllBytesInUInt32AreAscii(currentUInt32 | nextUInt32)) { // One of these two values contains non-ASCII bytes. // Figure out which one it is, then put it in 'current' so that we can drain the ASCII bytes. - if (ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) + if (AllBytesInUInt32AreAscii(currentUInt32)) { currentUInt32 = nextUInt32; pBuffer += 4; @@ -173,7 +163,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n if ((bufferLength & 4) != 0) { currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) + if (!AllBytesInUInt32AreAscii(currentUInt32)) { goto FoundNonAsciiData; } @@ -186,7 +176,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n if ((bufferLength & 2) != 0) { currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) + if (!AllBytesInUInt32AreAscii(currentUInt32)) { goto FoundNonAsciiData; } @@ -214,14 +204,14 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n FoundNonAsciiData: - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input."); + Debug.Assert(!AllBytesInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input."); // The method being called doesn't bother looking at whether the high byte is ASCII. There are only // two scenarios: (a) either one of the earlier bytes is not ASCII and the search terminates before // we get to the high byte; or (b) all of the earlier bytes are ASCII, so the high byte must be // non-ASCII. In both cases we only care about the low 24 bits. - pBuffer += ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentUInt32); + pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentUInt32); goto Finish; } @@ -381,8 +371,8 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin FoundNonAsciiDataInCurrentDWord: uint currentDWord; - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); - pBuffer += ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); + Debug.Assert(!AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); + pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); goto Finish; @@ -408,7 +398,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin // Clear everything but the high bit of each byte, then tzcnt. // Remember the / 8 at the end to convert bit count to byte count. - candidateUInt64 &= ASCIIUtility.UInt64HighBitsOnlyMask; + candidateUInt64 &= UInt64HighBitsOnlyMask; pBuffer += (nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8); goto Finish; } @@ -420,12 +410,12 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin currentDWord = Unsafe.ReadUnaligned(pBuffer); uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord | nextDWord)) + if (!AllBytesInUInt32AreAscii(currentDWord | nextDWord)) { // At least one of the values wasn't all-ASCII. // We need to figure out which one it was and stick it in the currentMask local. - if (ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) + if (AllBytesInUInt32AreAscii(currentDWord)) { currentDWord = nextDWord; // this one is the culprit pBuffer += 4; @@ -444,7 +434,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin { currentDWord = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) + if (!AllBytesInUInt32AreAscii(currentDWord)) { goto FoundNonAsciiDataInCurrentDWord; } @@ -459,7 +449,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin { currentDWord = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) + if (!AllBytesInUInt32AreAscii(currentDWord)) { // We only care about the 0x0080 bit of the value. If it's not set, then we // increment currentOffset by 1. If it's set, we don't increment it at all. @@ -512,7 +502,9 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, n char* pOriginalBuffer = pBuffer; +#if NET Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); +#endif // Before we drain off char-by-char, try a generic vectorized loop. // Only run the loop if we have at least two vectors we can pull out. @@ -676,7 +668,9 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW +#if NET Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); +#endif // Read the first vector unaligned. @@ -1526,7 +1520,7 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf do { asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData)) + if (!AllBytesInUInt32AreAscii(asciiData)) { goto FoundNonAsciiData; } @@ -1541,7 +1535,7 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf if (((uint)remainingElementCount & 2) != 0) { asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData)) + if (!AllBytesInUInt32AreAscii(asciiData)) { goto FoundNonAsciiData; } @@ -1580,7 +1574,7 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf FoundNonAsciiData: - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); + Debug.Assert(!AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); // Drain ASCII bytes one at a time. @@ -1693,7 +1687,7 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value) { - Debug.Assert(ASCIIUtility.AllBytesInUInt32AreAscii(value)); + Debug.Assert(AllBytesInUInt32AreAscii(value)); if (Bmi2.X64.IsSupported) { diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.x32.cs b/src/DotNetty.Common/Internal/ASCIIUtility.x32.cs deleted file mode 100644 index 5b1b80dcd..000000000 --- a/src/DotNetty.Common/Internal/ASCIIUtility.x32.cs +++ /dev/null @@ -1,1729 +0,0 @@ -// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/ASCIIUtility.cs - -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#if NETCOREAPP_3_0_GREATER -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using nint = System.Int32; -using nuint = System.UInt32; - -namespace DotNetty.Common.Internal -{ - internal static class ASCIIUtility32 - { -#if DEBUG - static ASCIIUtility32() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllBytesInUInt64AreAscii(ulong value) - { - // If the high bit of any byte is set, that byte is non-ASCII. - - return (0ul >= (value & ASCIIUtility.UInt64HighBitsOnlyMask)); - } - - /// - /// Returns iff all chars in are ASCII. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllCharsInUInt32AreAscii(uint value) - { - return (0u >= (value & ~0x007F007Fu)); - } - - /// - /// Returns iff all chars in are ASCII. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllCharsInUInt64AreAscii(ulong value) - { - return (0ul >= (value & ~0x007F007F_007F007Ful)); - } - - /// - /// Given a DWORD which represents two packed chars in machine-endian order, - /// iff the first char (in machine-endian order) is ASCII. - /// - /// - /// - private static bool FirstCharInUInt32IsAscii(uint value) - { - return (BitConverter.IsLittleEndian && 0u >= (value & 0xFF80u)) - || (!BitConverter.IsLittleEndian && 0u >= (value & 0xFF800000u)); - } - - /// - /// Returns the index in where the first non-ASCII byte is found. - /// Returns if the buffer is empty or all-ASCII. - /// - /// An ASCII byte is defined as 0x00 - 0x7F, inclusive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) - { - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while - // this method is running. - - return (Sse2.IsSupported) - ? GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength) - : GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength); - } - - private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, nuint bufferLength) - { - // Squirrel away the original buffer reference. This method works by determining the exact - // byte reference where non-ASCII data begins, so we need this base value to perform the - // final subtraction at the end of the method to get the index into the original buffer. - - byte* pOriginalBuffer = pBuffer; - - // Before we drain off byte-by-byte, try a generic vectorized loop. - // Only run the loop if we have at least two vectors we can pull out. - // Note use of SBYTE instead of BYTE below; we're using the two's-complement - // representation of negative integers to act as a surrogate for "is ASCII?". - - if (Vector.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector.Count) - { - uint SizeOfVectorInBytes = (uint)Vector.Count; // JIT will make this a const - - if (Vector.GreaterThanOrEqualAll(Unsafe.ReadUnaligned>(pBuffer), Vector.Zero)) - { - // The first several elements of the input buffer were ASCII. Bump up the pointer to the - // next aligned boundary, then perform aligned reads from here on out until we find non-ASCII - // data or we approach the end of the buffer. It's possible we'll reread data; this is ok. - - byte* pFinalVectorReadPos = pBuffer + bufferLength - SizeOfVectorInBytes; - pBuffer = (byte*)(((nuint)pBuffer + SizeOfVectorInBytes) & ~(nuint)(SizeOfVectorInBytes - 1)); - -#if DEBUG - long numBytesRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVectorInBytes, "We should've made forward progress of at least one byte."); - Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - Debug.Assert(pBuffer <= pFinalVectorReadPos, "Should be able to read at least one vector."); - - do - { - Debug.Assert((nuint)pBuffer % SizeOfVectorInBytes == 0, "Vector read should be aligned."); - if (Vector.LessThanAny(Unsafe.Read>(pBuffer), Vector.Zero)) - { - break; // found non-ASCII data - } - - pBuffer += SizeOfVectorInBytes; - } while (pBuffer <= pFinalVectorReadPos); - - // Adjust the remaining buffer length for the number of elements we just consumed. - - bufferLength -= (nuint)pBuffer; - bufferLength += (nuint)pOriginalBuffer; - } - } - - // At this point, the buffer length wasn't enough to perform a vectorized search, or we did perform - // a vectorized search and encountered non-ASCII data. In either case go down a non-vectorized code - // path to drain any remaining ASCII bytes. - // - // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads. - // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII bytes. - - uint currentUInt32; - - // Try reading 64 bits at a time in a loop. - - for (; bufferLength >= 8; bufferLength -= 8) - { - currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - uint nextUInt32 = Unsafe.ReadUnaligned(pBuffer + 4); - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32 | nextUInt32)) - { - // One of these two values contains non-ASCII bytes. - // Figure out which one it is, then put it in 'current' so that we can drain the ASCII bytes. - - if (ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) - { - currentUInt32 = nextUInt32; - pBuffer += 4; - } - - goto FoundNonAsciiData; - } - - pBuffer += 8; // consumed 8 ASCII bytes - } - - // From this point forward we don't need to update bufferLength. - // Try reading 32 bits. - - if ((bufferLength & 4) != 0) - { - currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) - { - goto FoundNonAsciiData; - } - - pBuffer += 4; - } - - // Try reading 16 bits. - - if ((bufferLength & 2) != 0) - { - currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32)) - { - goto FoundNonAsciiData; - } - - pBuffer += 2; - } - - // Try reading 8 bits - - if ((bufferLength & 1) != 0) - { - // If the buffer contains non-ASCII data, the comparison below will fail, and - // we'll end up not incrementing the buffer reference. - - if (*(sbyte*)pBuffer >= 0) - { - pBuffer++; - } - } - - Finish: - - nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer; - return totalNumBytesRead; - - FoundNonAsciiData: - - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input."); - - // The method being called doesn't bother looking at whether the high byte is ASCII. There are only - // two scenarios: (a) either one of the earlier bytes is not ASCII and the search terminates before - // we get to the high byte; or (b) all of the earlier bytes are ASCII, so the high byte must be - // non-ASCII. In both cases we only care about the low 24 bits. - - pBuffer += ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentUInt32); - goto Finish; - } - - private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuint bufferLength) - { - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); - Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); - - uint currentMask, secondMask; - byte* pOriginalBuffer = pBuffer; - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of a large enough buffer and - // "all ASCII". If we see non-ASCII data, we jump out of the hot paths to targets - // after all the main logic. - - if (bufferLength < SizeOfVector128) - { - goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead - } - - // Read the first vector unaligned. - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load - - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - // If we have less than 32 bytes to process, just go straight to the final unaligned - // read. There's no need to mess with the loop logic in the middle of this method. - - if (bufferLength < 2 * SizeOfVector128) - { - goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; - } - - // Now adjust the read pointer so that future reads are aligned. - - pBuffer = (byte*)(((nuint)pBuffer + SizeOfVector128) & ~(nuint)MaskOfAllBitsInVector128); - -#if DEBUG - long numBytesRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVector128, "We should've made forward progress of at least one byte."); - Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - // Adjust the remaining length to account for what we just read. - - bufferLength += (nuint)pOriginalBuffer; - bufferLength -= (nuint)pBuffer; - - // The buffer is now properly aligned. - // Read 2 vectors at a time if possible. - - if (bufferLength >= 2 * SizeOfVector128) - { - byte* pFinalVectorReadPos = (byte*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128); - - // After this point, we no longer need to update the bufferLength value. - - do - { - Vector128 firstVector = Sse2.LoadAlignedVector128(pBuffer); - Vector128 secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); - - currentMask = (uint)Sse2.MoveMask(firstVector); - secondMask = (uint)Sse2.MoveMask(secondVector); - - if ((currentMask | secondMask) != 0) - { - goto FoundNonAsciiDataInInnerLoop; - } - - pBuffer += 2 * SizeOfVector128; - } while (pBuffer <= pFinalVectorReadPos); - } - - // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. - // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. - // But we _can_ rely on it to tell us how much remaining data must be drained by looking - // at what bits of it are set. This works because had we updated it within the loop above, - // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about - // bits which are less significant than those that the addition would've acted on. - - // If there is fewer than one vector length remaining, skip the next aligned read. - - if (0u >= (bufferLength & SizeOfVector128)) - { - goto DoFinalUnalignedVectorRead; - } - - // At least one full vector's worth of data remains, so we can safely read it. - // Remember, at this point pBuffer is still aligned. - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: - - pBuffer += SizeOfVector128; - - DoFinalUnalignedVectorRead: - - if (((byte)bufferLength & MaskOfAllBitsInVector128) != 0) - { - // Perform an unaligned read of the last vector. - // We need to adjust the pointer because we're re-reading data. - - pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128; - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - pBuffer += SizeOfVector128; - } - - Finish: - - return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done! - - FoundNonAsciiDataInInnerLoop: - - // If the current (first) mask isn't the mask that contains non-ASCII data, then it must - // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes - // from the second mask. - - if (0u >= currentMask) - { - pBuffer += SizeOfVector128; - currentMask = secondMask; - } - - FoundNonAsciiDataInCurrentMask: - - // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. - // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't - // available, we'll fall back to a normal loop. - - Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); - pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask); - - goto Finish; - - FoundNonAsciiDataInCurrentDWord: - - uint currentDWord; - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); - pBuffer += ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); - - goto Finish; - - InputBufferLessThanOneVectorInLength: - - // These code paths get hit if the original input length was less than one vector in size. - // We can't perform vectorized reads at this point, so we'll fall back to reading primitives - // directly. Note that all of these reads are unaligned. - - Debug.Assert(bufferLength < SizeOfVector128); - - // QWORD drain - - if ((bufferLength & 8) != 0) - { - if (Bmi1.X64.IsSupported) - { - // If we can use 64-bit tzcnt to count the number of leading ASCII bytes, prefer it. - - ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); - if (!AllBytesInUInt64AreAscii(candidateUInt64)) - { - // Clear everything but the high bit of each byte, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count. - - candidateUInt64 &= ASCIIUtility.UInt64HighBitsOnlyMask; - pBuffer += (nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8); - goto Finish; - } - } - else - { - // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. - - currentDWord = Unsafe.ReadUnaligned(pBuffer); - uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4); - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord | nextDWord)) - { - // At least one of the values wasn't all-ASCII. - // We need to figure out which one it was and stick it in the currentMask local. - - if (ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) - { - currentDWord = nextDWord; // this one is the culprit - pBuffer += 4; - } - - goto FoundNonAsciiDataInCurrentDWord; - } - } - - pBuffer += 8; // successfully consumed 8 ASCII bytes - } - - // DWORD drain - - if ((bufferLength & 4) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) - { - goto FoundNonAsciiDataInCurrentDWord; - } - - pBuffer += 4; // successfully consumed 4 ASCII bytes - } - - // WORD drain - // (We movzx to a DWORD for ease of manipulation.) - - if ((bufferLength & 2) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(currentDWord)) - { - // We only care about the 0x0080 bit of the value. If it's not set, then we - // increment currentOffset by 1. If it's set, we don't increment it at all. - - pBuffer += (nuint)((nint)(sbyte)currentDWord >> 7) + 1; - goto Finish; - } - - pBuffer += 2; // successfully consumed 2 ASCII bytes - } - - // BYTE drain - - if ((bufferLength & 1) != 0) - { - // sbyte has non-negative value if byte is ASCII. - - if (*(sbyte*)(pBuffer) >= 0) - { - pBuffer++; // successfully consumed a single byte - } - } - - goto Finish; - } - - /// - /// Returns the index in where the first non-ASCII char is found. - /// Returns if the buffer is empty or all-ASCII. - /// - /// An ASCII char is defined as 0x0000 - 0x007F, inclusive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */) - { - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while - // this method is running. - - return (Sse2.IsSupported) - ? GetIndexOfFirstNonAsciiChar_Sse2(pBuffer, bufferLength) - : GetIndexOfFirstNonAsciiChar_Default(pBuffer, bufferLength); - } - - private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, nuint bufferLength /* in chars */) - { - // Squirrel away the original buffer reference.This method works by determining the exact - // char reference where non-ASCII data begins, so we need this base value to perform the - // final subtraction at the end of the method to get the index into the original buffer. - - char* pOriginalBuffer = pBuffer; - - Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); - - // Before we drain off char-by-char, try a generic vectorized loop. - // Only run the loop if we have at least two vectors we can pull out. - - if (Vector.IsHardwareAccelerated && bufferLength >= 2 * (uint)Vector.Count) - { - uint SizeOfVectorInChars = (uint)Vector.Count; // JIT will make this a const - uint SizeOfVectorInBytes = (uint)Vector.Count; // JIT will make this a const - - Vector maxAscii = new Vector(0x007F); - - if (Vector.LessThanOrEqualAll(Unsafe.ReadUnaligned>(pBuffer), maxAscii)) - { - // The first several elements of the input buffer were ASCII. Bump up the pointer to the - // next aligned boundary, then perform aligned reads from here on out until we find non-ASCII - // data or we approach the end of the buffer. It's possible we'll reread data; this is ok. - - char* pFinalVectorReadPos = pBuffer + bufferLength - SizeOfVectorInChars; - pBuffer = (char*)(((nuint)pBuffer + SizeOfVectorInBytes) & ~(nuint)(SizeOfVectorInBytes - 1)); - -#if DEBUG - long numCharsRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVectorInChars, "We should've made forward progress of at least one char."); - Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - Debug.Assert(pBuffer <= pFinalVectorReadPos, "Should be able to read at least one vector."); - - do - { - Debug.Assert((nuint)pBuffer % SizeOfVectorInChars == 0, "Vector read should be aligned."); - if (Vector.GreaterThanAny(Unsafe.Read>(pBuffer), maxAscii)) - { - break; // found non-ASCII data - } - pBuffer += SizeOfVectorInChars; - } while (pBuffer <= pFinalVectorReadPos); - - // Adjust the remaining buffer length for the number of elements we just consumed. - - bufferLength -= ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char); - } - } - - // At this point, the buffer length wasn't enough to perform a vectorized search, or we did perform - // a vectorized search and encountered non-ASCII data. In either case go down a non-vectorized code - // path to drain any remaining ASCII chars. - // - // We're going to perform unaligned reads, so prefer 32-bit reads instead of 64-bit reads. - // This also allows us to perform more optimized bit twiddling tricks to count the number of ASCII chars. - - uint currentUInt32; - - // Try reading 64 bits at a time in a loop. - - for (; bufferLength >= 4; bufferLength -= 4) // 64 bits = 4 * 16-bit chars - { - currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - uint nextUInt32 = Unsafe.ReadUnaligned(pBuffer + 4 / sizeof(char)); - - if (!AllCharsInUInt32AreAscii(currentUInt32 | nextUInt32)) - { - // One of these two values contains non-ASCII chars. - // Figure out which one it is, then put it in 'current' so that we can drain the ASCII chars. - - if (AllCharsInUInt32AreAscii(currentUInt32)) - { - currentUInt32 = nextUInt32; - pBuffer += 2; - } - - goto FoundNonAsciiData; - } - - pBuffer += 4; // consumed 4 ASCII chars - } - - // From this point forward we don't need to keep track of the remaining buffer length. - // Try reading 32 bits. - - if ((bufferLength & 2) != 0) // 32 bits = 2 * 16-bit chars - { - currentUInt32 = Unsafe.ReadUnaligned(pBuffer); - if (!AllCharsInUInt32AreAscii(currentUInt32)) - { - goto FoundNonAsciiData; - } - - pBuffer += 2; - } - - // Try reading 16 bits. - // No need to try an 8-bit read after this since we're working with chars. - - if ((bufferLength & 1) != 0) - { - // If the buffer contains non-ASCII data, the comparison below will fail, and - // we'll end up not incrementing the buffer reference. - - if (*pBuffer <= 0x007F) - { - pBuffer++; - } - } - - Finish: - - nuint totalNumBytesRead = (nuint)pBuffer - (nuint)pOriginalBuffer; - Debug.Assert(totalNumBytesRead % sizeof(char) == 0, "Total number of bytes read should be even since we're working with chars."); - return totalNumBytesRead / sizeof(char); // convert byte count -> char count before returning - - FoundNonAsciiData: - - Debug.Assert(!AllCharsInUInt32AreAscii(currentUInt32), "Shouldn't have reached this point if we have an all-ASCII input."); - - // We don't bother looking at the second char - only the first char. - - if (FirstCharInUInt32IsAscii(currentUInt32)) - { - pBuffer++; - } - - goto Finish; - } - - private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuint bufferLength /* in chars */) - { - // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method - // will be elided by JIT once we determine which specific ISAs we support. - - // Quick check for empty inputs. - - if (0u >= bufferLength) - { - return 0; - } - - // JIT turns the below into constants - - uint SizeOfVector128InBytes = (uint)Unsafe.SizeOf>(); - uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof(char); - - Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); - Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); - - Vector128 firstVector, secondVector; - uint currentMask; - char* pOriginalBuffer = pBuffer; - - if (bufferLength < SizeOfVector128InChars) - { - goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead - } - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware - Vector128 asciiMaskForPMINUW = Vector128.Create((ushort)0x0080); // used for PMINUW on supported hardware - Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR - Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW - - Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); - - // Read the first vector unaligned. - - firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load - - if (Sse41.IsSupported) - { - // The SSE41-optimized code path works by forcing the 0x0080 bit in each WORD of the vector to be - // set iff the WORD element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector - // in order to extract the mask. - currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); - } - else - { - // The SSE2-optimized code path works by forcing each WORD of the vector to be 0xFFFF iff the WORD - // element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector in order to extract - // the mask. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - } - - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - // If we have less than 32 bytes to process, just go straight to the final unaligned - // read. There's no need to mess with the loop logic in the middle of this method. - - // Adjust the remaining length to account for what we just read. - // For the remainder of this code path, bufferLength will be in bytes, not chars. - - bufferLength <<= 1; // chars to bytes - - if (bufferLength < 2 * SizeOfVector128InBytes) - { - goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; - } - - // Now adjust the read pointer so that future reads are aligned. - - pBuffer = (char*)(((nuint)pBuffer + SizeOfVector128InBytes) & ~(nuint)(SizeOfVector128InBytes - 1)); - -#if DEBUG - long numCharsRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVector128InChars, "We should've made forward progress of at least one char."); - Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - // Adjust remaining buffer length. - - bufferLength += (nuint)pOriginalBuffer; - bufferLength -= (nuint)pBuffer; - - // The buffer is now properly aligned. - // Read 2 vectors at a time if possible. - - if (bufferLength >= 2 * SizeOfVector128InBytes) - { - char* pFinalVectorReadPos = (char*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128InBytes); - - // After this point, we no longer need to update the bufferLength value. - - do - { - firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); - secondVector = Sse2.LoadAlignedVector128((short*)pBuffer + SizeOfVector128InChars); - Vector128 combinedVector = Sse2.Or(firstVector, secondVector); - - if (Sse41.IsSupported) - { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstOrSecondVector; - } - } - else - { - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto FoundNonAsciiDataInFirstOrSecondVector; - } - } - - pBuffer += 2 * SizeOfVector128InChars; - } while (pBuffer <= pFinalVectorReadPos); - } - - // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. - // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. - // But we _can_ rely on it to tell us how much remaining data must be drained by looking - // at what bits of it are set. This works because had we updated it within the loop above, - // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about - // bits which are less significant than those that the addition would've acted on. - - // If there is fewer than one vector length remaining, skip the next aligned read. - // Remember, at this point bufferLength is measured in bytes, not chars. - - if (0u >= (bufferLength & SizeOfVector128InBytes)) - { - goto DoFinalUnalignedVectorRead; - } - - // At least one full vector's worth of data remains, so we can safely read it. - // Remember, at this point pBuffer is still aligned. - - firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); - - if (Sse41.IsSupported) - { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - // See comment earlier in the method for an explanation of how the below logic works. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: - - pBuffer += SizeOfVector128InChars; - - DoFinalUnalignedVectorRead: - - if (((byte)bufferLength & (SizeOfVector128InBytes - 1)) != 0) - { - // Perform an unaligned read of the last vector. - // We need to adjust the pointer because we're re-reading data. - - pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes); - firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load - - if (Sse41.IsSupported) - { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - // See comment earlier in the method for an explanation of how the below logic works. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - pBuffer += SizeOfVector128InChars; - } - - Finish: - - Debug.Assert(((nuint)pBuffer - (nuint)pOriginalBuffer) % 2 == 0, "Shouldn't have incremented any pointer by an odd byte count."); - return ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char); // and we're done! (remember to adjust for char count) - - FoundNonAsciiDataInFirstOrSecondVector: - - // We don't know if the first or the second vector contains non-ASCII data. Check the first - // vector, and if that's all-ASCII then the second vector must be the culprit. Either way - // we'll make sure the first vector local is the one that contains the non-ASCII data. - - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - // Wasn't the first vector; must be the second. - - pBuffer += SizeOfVector128InChars; - firstVector = secondVector; - - FoundNonAsciiDataInFirstVector: - - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse41.IsSupported) - { - currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); - } - else - { - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - } - - FoundNonAsciiDataInCurrentMask: - - // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. - // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't - // available, we'll fall back to a normal loop. (Even though the original vector used WORD elements, - // masks work on BYTE elements, and we account for this in the final fixup.) - - Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); - pBuffer = (char*)((byte*)pBuffer + (uint)BitOperations.TrailingZeroCount(currentMask)); - - goto Finish; - - FoundNonAsciiDataInCurrentDWord: - - uint currentDWord; - Debug.Assert(!AllCharsInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); - - if (FirstCharInUInt32IsAscii(currentDWord)) - { - pBuffer++; // skip past the ASCII char - } - - goto Finish; - - InputBufferLessThanOneVectorInLength: - - // These code paths get hit if the original input length was less than one vector in size. - // We can't perform vectorized reads at this point, so we'll fall back to reading primitives - // directly. Note that all of these reads are unaligned. - - // Reminder: If this code path is hit, bufferLength is still a char count, not a byte count. - // We skipped the code path that multiplied the count by sizeof(char). - - Debug.Assert(bufferLength < SizeOfVector128InChars); - - // QWORD drain - - if ((bufferLength & 4) != 0) - { - if (Bmi1.X64.IsSupported) - { - // If we can use 64-bit tzcnt to count the number of leading ASCII chars, prefer it. - - ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); - if (!AllCharsInUInt64AreAscii(candidateUInt64)) - { - // Clear the low 7 bits (the ASCII bits) of each char, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count, - // then the & ~1 at the end to treat a match in the high byte of - // any char the same as a match in the low byte of that same char. - - candidateUInt64 &= 0xFF80FF80_FF80FF80ul; - pBuffer = (char*)((byte*)pBuffer + ((nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8) & ~(nuint)1)); - goto Finish; - } - } - else - { - // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. - - currentDWord = Unsafe.ReadUnaligned(pBuffer); - uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4 / sizeof(char)); - - if (!AllCharsInUInt32AreAscii(currentDWord | nextDWord)) - { - // At least one of the values wasn't all-ASCII. - // We need to figure out which one it was and stick it in the currentMask local. - - if (AllCharsInUInt32AreAscii(currentDWord)) - { - currentDWord = nextDWord; // this one is the culprit - pBuffer += 4 / sizeof(char); - } - - goto FoundNonAsciiDataInCurrentDWord; - } - } - - pBuffer += 4; // successfully consumed 4 ASCII chars - } - - // DWORD drain - - if ((bufferLength & 2) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!AllCharsInUInt32AreAscii(currentDWord)) - { - goto FoundNonAsciiDataInCurrentDWord; - } - - pBuffer += 2; // successfully consumed 2 ASCII chars - } - - // WORD drain - // This is the final drain; there's no need for a BYTE drain since our elemental type is 16-bit char. - - if ((bufferLength & 1) != 0) - { - if (*pBuffer <= 0x007F) - { - pBuffer++; // successfully consumed a single char - } - } - - goto Finish; - } - - /// - /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, - /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer - /// also in machine-endian order. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) - { - Debug.Assert(AllCharsInUInt64AreAscii(value)); - - if (Bmi2.X64.IsSupported) - { - // BMI2 will work regardless of the processor's endianness. - Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul)); - } - else - { - if (BitConverter.IsLittleEndian) - { - outputBuffer = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 2) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 3) = (byte)value; - } - else - { - Unsafe.Add(ref outputBuffer, 3) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 2) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - value >>= 16; - outputBuffer = (byte)value; - } - } - } - - /// - /// Given a DWORD which represents a buffer of 2 ASCII chars in machine-endian order, - /// narrows each WORD to a BYTE, then writes the 2-byte result to the output buffer also in - /// machine-endian order. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, uint value) - { - Debug.Assert(AllCharsInUInt32AreAscii(value)); - - if (BitConverter.IsLittleEndian) - { - outputBuffer = (byte)value; - Unsafe.Add(ref outputBuffer, 1) = (byte)(value >> 16); - } - else - { - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - outputBuffer = (byte)(value >> 16); - } - } - - /// - /// Copies as many ASCII characters (U+0000..U+007F) as possible from - /// to , stopping when the first non-ASCII character is encountered - /// or once elements have been converted. Returns the total number - /// of elements that were able to be converted. - /// - public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) - { - nuint currentOffset = 0; - - uint utf16Data32BitsHigh = 0, utf16Data32BitsLow = 0; - ulong utf16Data64Bits = 0; - - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb, ptest, vpminuw which we know are optimized, and (b) we can avoid downclocking the - // processor while this method is running. - - if (Sse2.IsSupported) - { - Debug.Assert(BitConverter.IsLittleEndian, "Assume little endian if SSE2 is supported."); - - if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) - { - // Since there's overhead to setting up the vectorized code path, we only want to - // call into it after a quick probe to ensure the next immediate characters really are ASCII. - // If we see non-ASCII data, we'll jump immediately to the draining logic at the end of the method. - - if (PlatformDependent.Is64BitProcess) - { - utf16Data64Bits = Unsafe.ReadUnaligned(pUtf16Buffer); - if (!AllCharsInUInt64AreAscii(utf16Data64Bits)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - else - { - utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer); - utf16Data32BitsLow = Unsafe.ReadUnaligned(pUtf16Buffer + 4 / sizeof(char)); - if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - - currentOffset = NarrowUtf16ToAscii_Sse2(pUtf16Buffer, pAsciiBuffer, elementCount); - } - } - else if (Vector.IsHardwareAccelerated) - { - uint SizeOfVector = (uint)Unsafe.SizeOf>(); // JIT will make this a const - - // Only bother vectorizing if we have enough data to do so. - if (elementCount >= 2 * SizeOfVector) - { - // Since there's overhead to setting up the vectorized code path, we only want to - // call into it after a quick probe to ensure the next immediate characters really are ASCII. - // If we see non-ASCII data, we'll jump immediately to the draining logic at the end of the method. - - if (PlatformDependent.Is64BitProcess) - { - utf16Data64Bits = Unsafe.ReadUnaligned(pUtf16Buffer); - if (!AllCharsInUInt64AreAscii(utf16Data64Bits)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - else - { - utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer); - utf16Data32BitsLow = Unsafe.ReadUnaligned(pUtf16Buffer + 4 / sizeof(char)); - if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - - Vector maxAscii = new Vector(0x007F); - - nuint finalOffsetWhereCanLoop = elementCount - 2 * SizeOfVector; - do - { - Vector utf16VectorHigh = Unsafe.ReadUnaligned>(pUtf16Buffer + currentOffset); - Vector utf16VectorLow = Unsafe.ReadUnaligned>(pUtf16Buffer + currentOffset + Vector.Count); - - if (Vector.GreaterThanAny(Vector.BitwiseOr(utf16VectorHigh, utf16VectorLow), maxAscii)) - { - break; // found non-ASCII data - } - - // TODO: Is the below logic also valid for big-endian platforms? - Vector asciiVector = Vector.Narrow(utf16VectorHigh, utf16VectorLow); - Unsafe.WriteUnaligned>(pAsciiBuffer + currentOffset, asciiVector); - - currentOffset += SizeOfVector; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - } - - Debug.Assert(currentOffset <= elementCount); - nuint remainingElementCount = elementCount - currentOffset; - - // Try to narrow 64 bits -> 32 bits at a time. - // We needn't update remainingElementCount after this point. - - if (remainingElementCount >= 4) - { - nuint finalOffsetWhereCanLoop = currentOffset + remainingElementCount - 4; - do - { - if (PlatformDependent.Is64BitProcess) - { - // Only perform QWORD reads on a 64-bit platform. - utf16Data64Bits = Unsafe.ReadUnaligned(pUtf16Buffer + currentOffset); - if (!AllCharsInUInt64AreAscii(utf16Data64Bits)) - { - goto FoundNonAsciiDataIn64BitRead; - } - - NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset], utf16Data64Bits); - } - else - { - utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer + currentOffset); - utf16Data32BitsLow = Unsafe.ReadUnaligned(pUtf16Buffer + currentOffset + 4 / sizeof(char)); - if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow)) - { - goto FoundNonAsciiDataIn64BitRead; - } - - NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset], utf16Data32BitsHigh); - NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset + 2], utf16Data32BitsLow); - } - - currentOffset += 4; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - - // Try to narrow 32 bits -> 16 bits. - - if (((uint)remainingElementCount & 2) != 0) - { - utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer + currentOffset); - if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh)) - { - goto FoundNonAsciiDataInHigh32Bits; - } - - NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset], utf16Data32BitsHigh); - currentOffset += 2; - } - - // Try to narrow 16 bits -> 8 bits. - - if (((uint)remainingElementCount & 1) != 0) - { - utf16Data32BitsHigh = pUtf16Buffer[currentOffset]; - if (utf16Data32BitsHigh <= 0x007Fu) - { - pAsciiBuffer[currentOffset] = (byte)utf16Data32BitsHigh; - currentOffset++; - } - } - - Finish: - - return currentOffset; - - FoundNonAsciiDataIn64BitRead: - - if (PlatformDependent.Is64BitProcess) - { - // Try checking the first 32 bits of the buffer for non-ASCII data. - // Regardless, we'll move the non-ASCII data into the utf16Data32BitsHigh local. - - if (BitConverter.IsLittleEndian) - { - utf16Data32BitsHigh = (uint)utf16Data64Bits; - } - else - { - utf16Data32BitsHigh = (uint)(utf16Data64Bits >> 32); - } - - if (AllCharsInUInt32AreAscii(utf16Data32BitsHigh)) - { - NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset], utf16Data32BitsHigh); - - if (BitConverter.IsLittleEndian) - { - utf16Data32BitsHigh = (uint)(utf16Data64Bits >> 32); - } - else - { - utf16Data32BitsHigh = (uint)utf16Data64Bits; - } - - currentOffset += 2; - } - } - else - { - // Need to determine if the high or the low 32-bit value contained non-ASCII data. - // Regardless, we'll move the non-ASCII data into the utf16Data32BitsHigh local. - - if (AllCharsInUInt32AreAscii(utf16Data32BitsHigh)) - { - NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref pAsciiBuffer[currentOffset], utf16Data32BitsHigh); - utf16Data32BitsHigh = utf16Data32BitsLow; - currentOffset += 2; - } - } - - FoundNonAsciiDataInHigh32Bits: - - Debug.Assert(!AllCharsInUInt32AreAscii(utf16Data32BitsHigh), "Shouldn't have reached this point if we have an all-ASCII input."); - - // There's at most one char that needs to be drained. - - if (FirstCharInUInt32IsAscii(utf16Data32BitsHigh)) - { - if (!BitConverter.IsLittleEndian) - { - utf16Data32BitsHigh >>= 16; // move high char down to low char - } - - pAsciiBuffer[currentOffset] = (byte)utf16Data32BitsHigh; - currentOffset++; - } - - goto Finish; - } - - private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) - { - // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method - // will be elided by JIT once we determine which specific ISAs we support. - - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Debug.Assert(Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(elementCount >= 2 * SizeOfVector128); - - Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware - Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR - Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW - - // First, perform an unaligned read of the first part of the input buffer. - - Vector128 utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer); // unaligned load - - // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - return 0; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - return 0; - } - } - - // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. - - Vector128 asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - Sse2.StoreScalar((ulong*)pAsciiBuffer, asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED - - nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far - - // We're going to get the best performance when we have aligned writes, so we'll take the - // hit of potentially unaligned reads in order to hit this sweet spot. - - // pAsciiBuffer points to the start of the destination buffer, immediately before where we wrote - // the 8 bytes previously. If the 0x08 bit is set at the pinned address, then the 8 bytes we wrote - // previously mean that the 0x08 bit is *not* set at address &pAsciiBuffer[SizeOfVector128 / 2]. In - // that case we can immediately back up to the previous aligned boundary and start the main loop. - // If the 0x08 bit is *not* set at the pinned address, then it means the 0x08 bit *is* set at - // address &pAsciiBuffer[SizeOfVector128 / 2], and we should perform one more 8-byte write to bump - // just past the next aligned boundary address. - - if (0u >= ((uint)pAsciiBuffer & (SizeOfVector128 / 2))) - { - // We need to perform one more partial vector write before we can get the alignment we want. - - utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load - - // See comments earlier in this method for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - goto Finish; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto Finish; - } - } - - // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED - } - - // Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment - // point, then use that as the base offset going forward. - - currentOffsetInElements = SizeOfVector128 - ((nuint)pAsciiBuffer & MaskOfAllBitsInVector128); - Debug.Assert(0 < currentOffsetInElements && currentOffsetInElements <= SizeOfVector128, "We wrote at least 1 byte but no more than a whole vector."); - - Debug.Assert(currentOffsetInElements <= elementCount, "Shouldn't have overrun the destination buffer."); - Debug.Assert(elementCount - currentOffsetInElements >= SizeOfVector128, "We should be able to run at least one whole vector."); - - nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; - do - { - // In a loop, perform two unaligned reads, narrow to a single vector, then aligned write one vector. - - utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load - Vector128 utf16VectorSecond = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements + SizeOfVector128 / sizeof(short)); // unaligned load - Vector128 combinedVector = Sse2.Or(utf16VectorFirst, utf16VectorSecond); - - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInLoop; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto FoundNonAsciiDataInLoop; - } - } - - // Build up the UTF-8 vector and perform the store. - - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond); - - Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % SizeOfVector128 == 0, "Write should be aligned."); - Sse2.StoreAligned(pAsciiBuffer + currentOffsetInElements, asciiVector); // aligned - - currentOffsetInElements += SizeOfVector128; - } while (currentOffsetInElements <= finalOffsetWhereCanRunLoop); - - Finish: - - // There might be some ASCII data left over. That's fine - we'll let our caller handle the final drain. - return currentOffsetInElements; - - FoundNonAsciiDataInLoop: - - // Can we at least narrow the high vector? - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - goto Finish; // found non-ASCII data - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto Finish; // found non-ASCII data - } - } - - // First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector. - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - - Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned."); - - Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is aligned - currentOffsetInElements += SizeOfVector128 / 2; - - goto Finish; - } - - /// - /// Copies as many ASCII bytes (00..7F) as possible from - /// to , stopping when the first non-ASCII byte is encountered - /// or once elements have been converted. Returns the total number - /// of elements that were able to be converted. - /// - public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) - { - nuint currentOffset = 0; - - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while - // this method is running. - - if (Sse2.IsSupported) - { - if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) - { - currentOffset = WidenAsciiToUtf16_Sse2(pAsciiBuffer, pUtf16Buffer, elementCount); - } - } - else if (Vector.IsHardwareAccelerated) - { - uint SizeOfVector = (uint)Unsafe.SizeOf>(); // JIT will make this a const - - // Only bother vectorizing if we have enough data to do so. - if (elementCount >= SizeOfVector) - { - // Note use of SBYTE instead of BYTE below; we're using the two's-complement - // representation of negative integers to act as a surrogate for "is ASCII?". - - nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector; - do - { - Vector asciiVector = Unsafe.ReadUnaligned>(pAsciiBuffer + currentOffset); - if (Vector.LessThanAny(asciiVector, Vector.Zero)) - { - break; // found non-ASCII data - } - - Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector utf16LowVector, out Vector utf16HighVector); - - // TODO: Is the below logic also valid for big-endian platforms? - Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset, utf16LowVector); - Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset + Vector.Count, utf16HighVector); - - currentOffset += SizeOfVector; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - } - - Debug.Assert(currentOffset <= elementCount); - nuint remainingElementCount = elementCount - currentOffset; - - // Try to widen 32 bits -> 64 bits at a time. - // We needn't update remainingElementCount after this point. - - uint asciiData; - - if (remainingElementCount >= 4) - { - nuint finalOffsetWhereCanLoop = currentOffset + remainingElementCount - 4; - do - { - asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData)) - { - goto FoundNonAsciiData; - } - - WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pUtf16Buffer[currentOffset], asciiData); - currentOffset += 4; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - - // Try to widen 16 bits -> 32 bits. - - if (((uint)remainingElementCount & 2) != 0) - { - asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData)) - { - goto FoundNonAsciiData; - } - - if (BitConverter.IsLittleEndian) - { - pUtf16Buffer[currentOffset] = (char)(byte)asciiData; - pUtf16Buffer[currentOffset + 1] = (char)(asciiData >> 8); - } - else - { - pUtf16Buffer[currentOffset + 1] = (char)(byte)asciiData; - pUtf16Buffer[currentOffset] = (char)(asciiData >> 8); - } - - currentOffset += 2; - } - - // Try to widen 8 bits -> 16 bits. - - if (((uint)remainingElementCount & 1) != 0) - { - asciiData = pAsciiBuffer[currentOffset]; - if (((byte)asciiData & 0x80) != 0) - { - goto Finish; - } - - pUtf16Buffer[currentOffset] = (char)asciiData; - currentOffset += 1; - } - - Finish: - - return currentOffset; - - FoundNonAsciiData: - - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); - - // Drain ASCII bytes one at a time. - - while (0u >= (uint)((byte)asciiData & 0x80)) - { - pUtf16Buffer[currentOffset] = (char)(byte)asciiData; - currentOffset += 1; - asciiData >>= 8; - } - - goto Finish; - } - - private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) - { - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Debug.Assert(Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(elementCount >= 2 * SizeOfVector128); - - // We're going to get the best performance when we have aligned writes, so we'll take the - // hit of potentially unaligned reads in order to hit this sweet spot. - - Vector128 asciiVector; - Vector128 utf16FirstHalfVector; - uint mask; - - // First, perform an unaligned read of the first part of the input buffer. - - asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); - - // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. - - if ((byte)mask != 0) - { - return 0; - } - - // Then perform an unaligned write of the first part of the input buffer. - - Vector128 zeroVector = Vector128.Zero; - - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned - - // Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment - // point, then use that as the base offset going forward. Remember the >> 1 to account for - // that we wrote chars, not bytes. This means we may re-read data in the next iteration of - // the loop, but this is ok. - - nuint currentOffset = (SizeOfVector128 >> 1) - (((nuint)pUtf16Buffer >> 1) & (MaskOfAllBitsInVector128 >> 1)); - Debug.Assert(0 < currentOffset && currentOffset <= SizeOfVector128 / sizeof(char)); - - nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; - - do - { - // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors. - - asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); - - if (mask != 0) - { - // non-ASCII byte somewhere - goto NonAsciiDataSeenInInnerLoop; - } - - byte* pStore = (byte*)(pUtf16Buffer + currentOffset); - Sse2.StoreAligned(pStore, Sse2.UnpackLow(asciiVector, zeroVector)); - - pStore += SizeOfVector128; - Sse2.StoreAligned(pStore, Sse2.UnpackHigh(asciiVector, zeroVector)); - - currentOffset += SizeOfVector128; - } while (currentOffset <= finalOffsetWhereCanRunLoop); - - Finish: - - return currentOffset; - - NonAsciiDataSeenInInnerLoop: - - // Can we at least widen the first part of the vector? - - if (0u >= ((byte)mask)) - { - // First part was all ASCII, widen - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); - currentOffset += SizeOfVector128 / 2; - } - - goto Finish; - } - - /// - /// Given a DWORD which represents a buffer of 4 bytes, widens the buffer into 4 WORDs and - /// writes them to the output buffer with machine endianness. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value) - { - Debug.Assert(ASCIIUtility.AllBytesInUInt32AreAscii(value)); - - if (Bmi2.X64.IsSupported) - { - // BMI2 will work regardless of the processor's endianness. - Unsafe.WriteUnaligned(ref Unsafe.As(ref outputBuffer), Bmi2.X64.ParallelBitDeposit(value, 0x00FF00FF_00FF00FFul)); - } - else - { - if (BitConverter.IsLittleEndian) - { - outputBuffer = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 3) = (char)value; - } - else - { - Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; - value >>= 8; - outputBuffer = (char)value; - } - } - } - } -} -#endif diff --git a/src/DotNetty.Common/Internal/PlatformDependent.cs b/src/DotNetty.Common/Internal/PlatformDependent.cs index 09de54e88..b98391d6e 100644 --- a/src/DotNetty.Common/Internal/PlatformDependent.cs +++ b/src/DotNetty.Common/Internal/PlatformDependent.cs @@ -59,7 +59,7 @@ public static unsafe bool ByteArrayEquals(byte[] bytes1, int startPos1, byte[] b return true; } - return SpanHelpers.SequenceEqual(ref bytes1[startPos1], ref bytes2[startPos2], unchecked((uint)length)); + return SpanHelpers.SequenceEqual(ref bytes1[startPos1], ref bytes2[startPos2], length); } public static unsafe int ByteArrayEqualsConstantTime(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) diff --git a/src/DotNetty.Common/Internal/SpanHelpers.Byte.cs b/src/DotNetty.Common/Internal/SpanHelpers.Byte.cs index d79bad49a..e16b0198a 100644 --- a/src/DotNetty.Common/Internal/SpanHelpers.Byte.cs +++ b/src/DotNetty.Common/Internal/SpanHelpers.Byte.cs @@ -427,22 +427,12 @@ public static unsafe bool Contains(ref byte searchSpace, byte value, int length) // Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte // where the length can exceed 2Gb once scaled by sizeof(T). //[MethodImpl(MethodImplOptions.AggressiveOptimization)] - public static unsafe bool SequenceEqual(ref byte first, ref byte second, long length) + public static unsafe bool SequenceEqual(ref byte first, ref byte second, nint length) { if (Unsafe.AreSame(ref first, ref second)) { goto Equal; } IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations - IntPtr lengthToExamine; - if (PlatformDependent.Is64BitProcess) - { - ulong nlen = unchecked((ulong)length); - lengthToExamine = (IntPtr)(void*)nlen; - } - else - { - uint nlen = unchecked((uint)length); - lengthToExamine = (IntPtr)(void*)nlen; - } + IntPtr lengthToExamine = (IntPtr)(void*)((nuint)length); if (Vector.IsHardwareAccelerated && (byte*)lengthToExamine >= (byte*)Vector.Count) { @@ -692,7 +682,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte { return 0; // A zero-length sequence is always treated as "found" at the start of the search space. } - if (1u >= (uValueLength)) + if (1u >= uValueLength) { return IndexOf(ref searchSpace, value, searchSpaceLength); } @@ -809,16 +799,7 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) { if ((int)(byte*)offset < length) { - bool isAlignedToVector128; - if (PlatformDependent.Is64BitProcess) - { - isAlignedToVector128 = (((ulong)Unsafe.AsPointer(ref searchSpace) + (ulong)offset) & (ulong)(Vector256.Count - 1)) != 0; - } - else - { - isAlignedToVector128 = (((uint)Unsafe.AsPointer(ref searchSpace) + (uint)offset) & (uint)(Vector256.Count - 1)) != 0; - } - if (isAlignedToVector128) + if ((((nuint)Unsafe.AsPointer(ref searchSpace) + (nuint)(nint)offset) & (nuint)(Vector256.Count - 1)) != 0) { // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches // with no upper bound e.g. String.strlen. diff --git a/src/DotNetty.Common/Internal/SpanHelpers.Char.cs b/src/DotNetty.Common/Internal/SpanHelpers.Char.cs index 689ae8319..87923c682 100644 --- a/src/DotNetty.Common/Internal/SpanHelpers.Char.cs +++ b/src/DotNetty.Common/Internal/SpanHelpers.Char.cs @@ -21,9 +21,7 @@ public static unsafe bool Contains(ref char searchSpace, char value, int length) Debug.Assert(length >= 0); #if NETCOREAPP_3_0_GREATER - int index = PlatformDependent.Is64BitProcess - ? InternalIndexOf_x64(ref searchSpace, value, length) - : InternalIndexOf_x32(ref searchSpace, value, length); + int index = IndexOf(ref searchSpace, value, length); return SharedConstants.TooBigOrNegative >= (uint)index; #else fixed (char* pChars = &searchSpace) @@ -300,7 +298,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), ref Unsafe.As(ref valueTail), - (long)valueTailLength * 2)) // nuint + (nint)valueTailLength * 2)) // nuint { return index; // The tail matched. Return a successful find. } @@ -316,106 +314,8 @@ public static unsafe int IndexOf(ref char searchSpace, char value, int length) Debug.Assert(length >= 0); #if NETCOREAPP_3_0_GREATER - return PlatformDependent.Is64BitProcess - ? InternalIndexOf_x64(ref searchSpace, value, length) - : InternalIndexOf_x32(ref searchSpace, value, length); -#else - fixed (char* pChars = &searchSpace) - { - char* pCh = pChars; - char* pEndCh = pCh + length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - // Figure out how many characters to read sequentially until we are vector aligned - // This is equivalent to: - // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte - // length = (Vector.Count - unaligned) % Vector.Count - const int elementsPerByte = sizeof(ushort) / sizeof(byte); - int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; - length = (Vector.Count - unaligned) & (Vector.Count - 1); - } - - SequentialScan: - while (length >= 4) - { - length -= 4; - - if (pCh[0] == value) - goto Found; - if (pCh[1] == value) - goto Found1; - if (pCh[2] == value) - goto Found2; - if (pCh[3] == value) - goto Found3; - - pCh += 4; - } - - while (length > 0) - { - length--; - - if (pCh[0] == value) - goto Found; - - pCh++; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Vector.IsHardwareAccelerated && pCh < pEndCh) - { - // Get the highest multiple of Vector.Count that is within the search space. - // That will be how many times we iterate in the loop below. - // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) - length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); - - // Get comparison Vector - Vector vComparison = new Vector(value); - - while (length > 0) - { - // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned - Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); - Vector vMatches = Vector.Equals(vComparison, Unsafe.Read>(pCh)); - if (Vector.Zero.Equals(vMatches)) - { - pCh += Vector.Count; - length -= Vector.Count; - continue; - } - // Find offset of first match - return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); - } - - if (pCh < pEndCh) - { - length = (int)(pEndCh - pCh); - goto SequentialScan; - } - } - - return -1; - Found3: - pCh++; - Found2: - pCh++; - Found1: - pCh++; - Found: - return (int)(pCh - pChars); - } -#endif - } - -#if NETCOREAPP_3_0_GREATER - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - private static unsafe int InternalIndexOf_x64(ref char searchSpace, char value, int length) - { - long offset = 0L; - long lengthToExamine = length; + nint offset = 0; + nint lengthToExamine = length; if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0) { @@ -427,7 +327,7 @@ private static unsafe int InternalIndexOf_x64(ref char searchSpace, char value, // Needs to be double length to allow us to align the data first. if (length >= Vector128.Count * 2) { - lengthToExamine = UnalignedCountVector128_x64(ref searchSpace); + lengthToExamine = UnalignedCountVector128(ref searchSpace); } } else if (Vector.IsHardwareAccelerated) @@ -435,7 +335,7 @@ private static unsafe int InternalIndexOf_x64(ref char searchSpace, char value, // Needs to be double length to allow us to align the data first. if (length >= Vector.Count * 2) { - lengthToExamine = UnalignedCountVector_x64(ref searchSpace); + lengthToExamine = UnalignedCountVector(ref searchSpace); } } @@ -477,7 +377,7 @@ private static unsafe int InternalIndexOf_x64(ref char searchSpace, char value, if (offset < length) { Debug.Assert(length - offset >= Vector128.Count); - if (((long)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (IntPtr)offset)) & (long)(Vector256.Count - 1)) != 0) + if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (IntPtr)offset)) & (nint)(Vector256.Count - 1)) != 0) { // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256, @@ -648,247 +548,96 @@ private static unsafe int InternalIndexOf_x64(ref char searchSpace, char value, return (int)(offset + 1); Found: return (int)(offset); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static unsafe int InternalIndexOf_x32(ref char searchSpace, char value, int length) - { - int offset = 0; - int lengthToExamine = length; - - if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0) - { - // Input isn't char aligned, we won't be able to align it to a Vector - } - else if (Sse2.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - // Needs to be double length to allow us to align the data first. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128_x32(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) +#else + fixed (char* pChars = &searchSpace) { - // Needs to be double length to allow us to align the data first. - if (length >= Vector.Count * 2) + char* pCh = pChars; + char* pEndCh = pCh + length; + + if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) { - lengthToExamine = UnalignedCountVector_x32(ref searchSpace); + // Figure out how many characters to read sequentially until we are vector aligned + // This is equivalent to: + // unaligned = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte + // length = (Vector.Count - unaligned) % Vector.Count + const int elementsPerByte = sizeof(ushort) / sizeof(byte); + int unaligned = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; + length = (Vector.Count - unaligned) & (Vector.Count - 1); } - } - SequentialScan: - // In the non-vector case lengthToExamine is the total length. - // In the vector case lengthToExamine first aligns to Vector, - // then in a second pass after the Vector lengths is the - // remaining data that is shorter than a Vector length. - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchSpace, offset); - - if (value == current) - goto Found; - if (value == Add(ref current, 1)) - goto Found1; - if (value == Add(ref current, 2)) - goto Found2; - if (value == Add(ref current, 3)) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } + SequentialScan: + while (length >= 4) + { + length -= 4; - while (lengthToExamine > 0) - { - if (value == Add(ref searchSpace, offset)) - goto Found; + if (pCh[0] == value) + goto Found; + if (pCh[1] == value) + goto Found1; + if (pCh[2] == value) + goto Found2; + if (pCh[3] == value) + goto Found3; - offset += 1; - lengthToExamine -= 1; - } + pCh += 4; + } - // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Avx2.IsSupported) - { - if (offset < length) + while (length > 0) { - Debug.Assert(length - offset >= Vector128.Count); - if (((int)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (IntPtr)offset)) & (int)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256, - // before moving to processing Vector256. - - // If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages - // while searching for an end of string. Specifically that this assumes that the length is either correct - // or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an - // unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found, - // again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather - // than ever causing an AV. - - // If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this - // method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over - // the misalignment that may occur after; to we default to giving the GC a free hand to relocate and - // its up to the caller whether they are operating over fixed data. - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); + length--; - // Same method as below - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (0u >= (uint)matches) - { - // Zero flags set so no matches - offset += Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } + if (pCh[0] == value) + goto Found; - lengthToExamine = GetCharVector256SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector256 values = Vector256.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector256.Count); + pCh++; + } - Vector256 search = LoadVector256(ref searchSpace, offset); - int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (0u >= (uint)matches) - { - // Zero flags set so no matches - offset += Vector256.Count; - lengthToExamine -= Vector256.Count; - continue; - } + // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow + // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. + if (Vector.IsHardwareAccelerated && pCh < pEndCh) + { + // Get the highest multiple of Vector.Count that is within the search space. + // That will be how many times we iterate in the loop below. + // This is equivalent to: length = Vector.Count * ((int)(pEndCh - pCh) / Vector.Count) + length = (int)((pEndCh - pCh) & ~(Vector.Count - 1)); - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } + // Get comparison Vector + Vector vComparison = new Vector(value); - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) + while (length > 0) { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (0u >= (uint)matches) - { - // Zero flags set so no matches - offset += Vector128.Count; - // Don't need to change lengthToExamine here as we don't use its current value again. - } - else + // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned + Debug.Assert(((int)pCh & (Unsafe.SizeOf>() - 1)) == 0); + Vector vMatches = Vector.Equals(vComparison, Unsafe.Read>(pCh)); + if (Vector.Zero.Equals(vMatches)) { - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char))); + pCh += Vector.Count; + length -= Vector.Count; + continue; } + // Find offset of first match + return (int)(pCh - pChars) + LocateFirstFoundChar(vMatches); } - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Sse2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (0u >= (uint)matches) - { - // Zero flags set so no matches - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + (BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - if (offset < length) + if (pCh < pEndCh) { - lengthToExamine = length - offset; + length = (int)(pEndCh - pCh); goto SequentialScan; } } - } - else if (Vector.IsHardwareAccelerated && offset < length) - { - Debug.Assert(length - offset >= Vector.Count); - - lengthToExamine = GetCharVectorSpanLength(offset, length); - - if (lengthToExamine > 0) - { - Vector values = new Vector((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector.Count); - - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += Vector.Count; - lengthToExamine -= Vector.Count; - continue; - } - - // Find offset of first match - return (int)(offset + LocateFirstFoundChar(matches)); - } while (lengthToExamine > 0); - } - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } + return -1; + Found3: + pCh++; + Found2: + pCh++; + Found1: + pCh++; + Found: + return (int)(pCh - pChars); } - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)(offset); - } #endif + } #endregion @@ -1769,63 +1518,39 @@ private static int LocateLastFoundChar(ulong match) #if NETCOREAPP_3_0_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ref char Add(ref char source, int elementOffset) - => ref Unsafe.Add(ref source, (IntPtr)elementOffset); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ref char Add(ref char source, long elementOffset) + public static ref char Add(ref char source, nint elementOffset) => ref Unsafe.Add(ref source, (IntPtr)elementOffset); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector LoadVector(ref char start, int offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector LoadVector(ref char start, long offset) + private static unsafe Vector LoadVector(ref char start, nint offset) => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector128 LoadVector128(ref char start, int offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector128 LoadVector128(ref char start, long offset) + private static unsafe Vector128 LoadVector128(ref char start, nint offset) => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector256 LoadVector256(ref char start, int offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe Vector256 LoadVector256(ref char start, long offset) + private static unsafe Vector256 LoadVector256(ref char start, nint offset) => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset) - => Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe UIntPtr LoadUIntPtr(ref char start, long offset) + private static unsafe UIntPtr LoadUIntPtr(ref char start, nint offset) => Unsafe.ReadUnaligned(ref Unsafe.As(ref Unsafe.Add(ref start, (IntPtr)offset))); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int GetCharVectorSpanLength(int offset, int length) - => ((length - offset) & ~(Vector.Count - 1)); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe long GetCharVectorSpanLength(long offset, long length) + private static unsafe nint GetCharVectorSpanLength(nint offset, nint length) => ((length - offset) & ~(Vector.Count - 1)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int GetCharVector128SpanLength(int offset, int length) - => ((length - offset) & ~(Vector128.Count - 1)); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe long GetCharVector128SpanLength(long offset, long length) + private static unsafe nint GetCharVector128SpanLength(nint offset, nint length) => ((length - offset) & ~(Vector128.Count - 1)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int GetCharVector256SpanLength(int offset, int length) - => ((length - offset) & ~(Vector256.Count - 1)); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static long GetCharVector256SpanLength(long offset, long length) + private static nint GetCharVector256SpanLength(nint offset, nint length) => ((length - offset) & ~(Vector256.Count - 1)); [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int UnalignedCountVector_x32(ref char searchSpace) + private static unsafe nint UnalignedCountVector(ref char searchSpace) { const int ElementsPerByte = sizeof(ushort) / sizeof(byte); // Figure out how many characters to read sequentially until we are vector aligned @@ -1836,40 +1561,17 @@ private static unsafe int UnalignedCountVector_x32(ref char searchSpace) // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data. // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it // isn't too important to pin to maintain the alignment. - return (int)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector.Count - 1); + return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector.Count - 1); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe long UnalignedCountVector_x64(ref char searchSpace) - { - const int ElementsPerByte = sizeof(ushort) / sizeof(byte); - // Figure out how many characters to read sequentially until we are vector aligned - // This is equivalent to: - // unaligned = ((int)pCh % Unsafe.SizeOf>()) / ElementsPerByte - // length = (Vector.Count - unaligned) % Vector.Count - // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data. - // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it - // isn't too important to pin to maintain the alignment. - return (long)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector.Count - 1); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int UnalignedCountVector128_x32(ref char searchSpace) - { - const int ElementsPerByte = sizeof(ushort) / sizeof(byte); - // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data. - // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it - // isn't too important to pin to maintain the alignment. - return (int)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector128.Count - 1); - } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe long UnalignedCountVector128_x64(ref char searchSpace) + private static unsafe nint UnalignedCountVector128(ref char searchSpace) { const int ElementsPerByte = sizeof(ushort) / sizeof(byte); // This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data. // If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it // isn't too important to pin to maintain the alignment. - return (long)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector128.Count - 1); + return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector128.Count - 1); } #endif diff --git a/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs b/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs index 6f115b9e0..25bf98fc1 100644 --- a/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs +++ b/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs @@ -65,14 +65,7 @@ private static unsafe int GetBytesFastInternal(char* pChars, int charsLength, by char* pInputBufferRemaining; byte* pOutputBufferRemaining; - if (PlatformDependent.Is64BitProcess) - { - _ = Utf8Utility64.TranscodeToUtf8(pChars, charsLength, pBytes, bytesLength, out pInputBufferRemaining, out pOutputBufferRemaining); - } - else - { - _ = Utf8Utility32.TranscodeToUtf8(pChars, charsLength, pBytes, bytesLength, out pInputBufferRemaining, out pOutputBufferRemaining); - } + _ = Utf8Utility.TranscodeToUtf8(pChars, charsLength, pBytes, bytesLength, out pInputBufferRemaining, out pOutputBufferRemaining); charsConsumed = (int)(pInputBufferRemaining - pChars); return (int)(pOutputBufferRemaining - pBytes); diff --git a/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs b/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs index 9d3bb06aa..c0cda71ca 100644 --- a/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs +++ b/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs @@ -27,9 +27,7 @@ private static unsafe int GetCharCountFastInternal(byte* pBytes, int bytesLength // The number of UTF-16 code units will never exceed the number of UTF-8 code units, // so the addition at the end of this method will not overflow. - byte* ptrToFirstInvalidByte = PlatformDependent.Is64BitProcess - ? Utf8Utility64.GetPointerToFirstInvalidByte(pBytes, bytesLength, out int utf16CodeUnitCountAdjustment, out _) - : Utf8Utility32.GetPointerToFirstInvalidByte(pBytes, bytesLength, out utf16CodeUnitCountAdjustment, out _); + byte* ptrToFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pBytes, bytesLength, out int utf16CodeUnitCountAdjustment, out _); int tempBytesConsumed = (int)(ptrToFirstInvalidByte - pBytes); bytesConsumed = tempBytesConsumed; @@ -69,14 +67,7 @@ private static unsafe int GetCharsFastInternal(byte* pBytes, int bytesLength, ch byte* pInputBufferRemaining; char* pOutputBufferRemaining; - if (PlatformDependent.Is64BitProcess) - { - _ = Utf8Utility64.TranscodeToUtf16(pBytes, bytesLength, pChars, charsLength, out pInputBufferRemaining, out pOutputBufferRemaining); - } - else - { - _ = Utf8Utility32.TranscodeToUtf16(pBytes, bytesLength, pChars, charsLength, out pInputBufferRemaining, out pOutputBufferRemaining); - } + _ = Utf8Utility.TranscodeToUtf16(pBytes, bytesLength, pChars, charsLength, out pInputBufferRemaining, out pOutputBufferRemaining); bytesConsumed = (int)(pInputBufferRemaining - pBytes); return (int)(pOutputBufferRemaining - pChars); @@ -108,9 +99,7 @@ private static unsafe int GetByteCountFastInternal(char* pChars, int charsLength // The number of UTF-8 code units may exceed the number of UTF-16 code units, // so we'll need to check for overflow before casting to Int32. - char* ptrToFirstInvalidChar = PlatformDependent.Is64BitProcess - ? Utf16Utility64.GetPointerToFirstInvalidChar(pChars, charsLength, out long utf8CodeUnitCountAdjustment, out _) - : Utf16Utility32.GetPointerToFirstInvalidChar(pChars, charsLength, out utf8CodeUnitCountAdjustment, out _); + char* ptrToFirstInvalidChar = Utf16Utility.GetPointerToFirstInvalidChar(pChars, charsLength, out long utf8CodeUnitCountAdjustment, out _); int tempCharsConsumed = (int)(ptrToFirstInvalidChar - pChars); charsConsumed = tempCharsConsumed; diff --git a/src/DotNetty.Common/Internal/Utf16Utility32.Validation.cs b/src/DotNetty.Common/Internal/Utf16Utility.Validation.cs similarity index 94% rename from src/DotNetty.Common/Internal/Utf16Utility32.Validation.cs rename to src/DotNetty.Common/Internal/Utf16Utility.Validation.cs index 572dec4b5..c4f438c30 100644 --- a/src/DotNetty.Common/Internal/Utf16Utility32.Validation.cs +++ b/src/DotNetty.Common/Internal/Utf16Utility.Validation.cs @@ -11,21 +11,13 @@ using System.Runtime.Intrinsics.X86; using System.Numerics; using System.Runtime.CompilerServices; -using nint = System.Int32; -using nuint = System.UInt32; +using nuint_64 = System.UInt64; +using nuint_32 = System.UInt32; namespace DotNetty.Common.Internal { - internal static unsafe class Utf16Utility32 + internal static unsafe partial class Utf16Utility { -#if DEBUG - static Utf16Utility32() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - // Returns &inputBuffer[inputLength] if the input buffer is valid. /// /// Given an input buffer of char length , @@ -42,7 +34,7 @@ static Utf16Utility32() // First, we'll handle the common case of all-ASCII. If this is able to // consume the entire buffer, we'll skip the remainder of this method's logic. - int numAsciiCharsConsumedJustNow = (int)ASCIIUtility32.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength); + int numAsciiCharsConsumedJustNow = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength); Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength); pInputBuffer += (uint)numAsciiCharsConsumedJustNow; @@ -280,15 +272,30 @@ static Utf16Utility32() Vector utf16Data = Unsafe.ReadUnaligned>(pInputBuffer); Vector twoOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0080); Vector threeOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0800); - Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); + nuint popcnt = 0; + if (PlatformDependent.Is64BitProcess) + { + Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); - // We'll try summing by a natural word (rather than a 16-bit word) at a time, - // which should halve the number of operations we must perform. + // We'll try summing by a natural word (rather than a 16-bit word) at a time, + // which should halve the number of operations we must perform. - nuint popcnt = 0; - for (int i = 0; i < Vector.Count; i++) + for (int i = 0; i < Vector.Count; i++) + { + popcnt += (nuint)sumVector[i]; + } + } + else { - popcnt += sumVector[i]; + Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); + + // We'll try summing by a natural word (rather than a 16-bit word) at a time, + // which should halve the number of operations we must perform. + + for (int i = 0; i < Vector.Count; i++) + { + popcnt += (nuint)sumVector[i]; + } } uint popcnt32 = (uint)popcnt; diff --git a/src/DotNetty.Common/Internal/Utf16Utility.cs b/src/DotNetty.Common/Internal/Utf16Utility.cs index ef5341bbd..7c7d93e97 100644 --- a/src/DotNetty.Common/Internal/Utf16Utility.cs +++ b/src/DotNetty.Common/Internal/Utf16Utility.cs @@ -10,7 +10,7 @@ namespace DotNetty.Common.Internal { - internal static class Utf16Utility + internal static partial class Utf16Utility { /// /// Returns true iff the UInt32 represents two ASCII UTF-16 characters in machine endianness. diff --git a/src/DotNetty.Common/Internal/Utf16Utility64.Validation.cs b/src/DotNetty.Common/Internal/Utf16Utility64.Validation.cs deleted file mode 100644 index b75a4d4a8..000000000 --- a/src/DotNetty.Common/Internal/Utf16Utility64.Validation.cs +++ /dev/null @@ -1,433 +0,0 @@ -// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode - -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#if NETCOREAPP_3_0_GREATER -using System; -using System.Diagnostics; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -using System.Numerics; -using System.Runtime.CompilerServices; - -using nint = System.Int64; -using nuint = System.UInt64; - -namespace DotNetty.Common.Internal -{ - internal static unsafe class Utf16Utility64 - { -#if DEBUG - static Utf16Utility64() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - - // Returns &inputBuffer[inputLength] if the input buffer is valid. - /// - /// Given an input buffer of char length , - /// returns a pointer to where the first invalid data appears in . - /// - /// - /// Returns a pointer to the end of if the buffer is well-formed. - /// - public static char* GetPointerToFirstInvalidChar(char* pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment) - { - Debug.Assert(inputLength >= 0, "Input length must not be negative."); - Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); - - // First, we'll handle the common case of all-ASCII. If this is able to - // consume the entire buffer, we'll skip the remainder of this method's logic. - - int numAsciiCharsConsumedJustNow = (int)ASCIIUtility64.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength); - Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength); - - pInputBuffer += (uint)numAsciiCharsConsumedJustNow; - inputLength -= numAsciiCharsConsumedJustNow; - - if (0u >= (uint)inputLength) - { - utf8CodeUnitCountAdjustment = 0; - scalarCountAdjustment = 0; - return pInputBuffer; - } - - // If we got here, it means we saw some non-ASCII data, so within our - // vectorized code paths below we'll handle all non-surrogate UTF-16 - // code points branchlessly. We'll only branch if we see surrogates. - // - // We still optimistically assume the data is mostly ASCII. This means that the - // number of UTF-8 code units and the number of scalars almost matches the number - // of UTF-16 code units. As we go through the input and find non-ASCII - // characters, we'll keep track of these "adjustment" fixups. To get the - // total number of UTF-8 code units required to encode the input data, add - // the UTF-8 code unit count adjustment to the number of UTF-16 code units - // seen. To get the total number of scalars present in the input data, - // add the scalar count adjustment to the number of UTF-16 code units seen. - - long tempUtf8CodeUnitCountAdjustment = 0; - int tempScalarCountAdjustment = 0; - - if (Sse2.IsSupported) - { - if (inputLength >= Vector128.Count) - { - Vector128 vector0080 = Vector128.Create((ushort)0x80); - Vector128 vectorA800 = Vector128.Create((ushort)0xA800); - Vector128 vector8800 = Vector128.Create(unchecked((short)0x8800)); - Vector128 vectorZero = Vector128.Zero; - - do - { - Vector128 utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned - uint mask; - - // The 'charIsNonAscii' vector we're about to build will have the 0x8000 or the 0x0080 - // bit set (but not both!) only if the corresponding input char is non-ASCII. Which of - // the two bits is set doesn't matter, as will be explained in the diagram a few lines - // below. - - Vector128 charIsNonAscii; - if (Sse41.IsSupported) - { - // sets 0x0080 bit if corresponding char element is >= 0x0080 - charIsNonAscii = Sse41.Min(utf16Data, vector0080); - } - else - { - // sets 0x8000 bit if corresponding char element is >= 0x0080 - charIsNonAscii = Sse2.AndNot(vector0080, Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 7))); - } - -#if DEBUG - // Quick check to ensure we didn't accidentally set both 0x8080 bits in any element. - uint debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte()); - Debug.Assert((debugMask & (debugMask << 1)) == 0, "Two set bits shouldn't occur adjacent to each other in this mask."); -#endif // DEBUG - - // sets 0x8080 bits if corresponding char element is >= 0x0800 - Vector128 charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11)); - - mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte()); - - // Each odd bit of mask will be 1 only if the char was >= 0x0080, - // and each even bit of mask will be 1 only if the char was >= 0x0800. - // - // Example for UTF-16 input "[ 0123 ] [ 1234 ] ...": - // - // ,-- set if char[1] is non-ASCII - // | ,-- set if char[0] is non-ASCII - // v v - // mask = ... 1 1 1 0 - // ^ ^-- set if char[0] is >= 0x0800 - // `-- set if char[1] is >= 0x0800 - // - // (If the SSE4.1 code path is taken above, the meaning of the odd and even - // bits are swapped, but the logic below otherwise holds.) - // - // This means we can popcnt the number of set bits, and the result is the - // number of *additional* UTF-8 bytes that each UTF-16 code unit requires as - // it expands. This results in the wrong count for UTF-16 surrogate code - // units (we just counted that each individual code unit expands to 3 bytes, - // but in reality a well-formed UTF-16 surrogate pair expands to 4 bytes). - // We'll handle this in just a moment. - // - // For now, compute the popcnt but squirrel it away. We'll fold it in to the - // cumulative UTF-8 adjustment factor once we determine that there are no - // unpaired surrogates in our data. (Unpaired surrogates would invalidate - // our computed result and we'd have to throw it away.) - - uint popcnt = (uint)BitOperations.PopCount(mask); - - // Surrogates need to be special-cased for two reasons: (a) we need - // to account for the fact that we over-counted in the addition above; - // and (b) they require separate validation. - - utf16Data = Sse2.Add(utf16Data, vectorA800); - mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte()); - - if (mask != 0) - { - // There's at least one UTF-16 surrogate code unit present. - // Since we performed a pmovmskb operation on the result of a 16-bit pcmpgtw, - // the resulting bits of 'mask' will occur in pairs: - // - 00 if the corresponding UTF-16 char was not a surrogate code unit; - // - 11 if the corresponding UTF-16 char was a surrogate code unit. - // - // A UTF-16 high/low surrogate code unit has the bit pattern [ 11011q## ######## ], - // where # is any bit; q = 0 represents a high surrogate, and q = 1 represents - // a low surrogate. Since we added 0xA800 in the vectorized operation above, - // our surrogate pairs will now have the bit pattern [ 10000q## ######## ]. - // If we logical right-shift each word by 3, we'll end up with the bit pattern - // [ 00010000 q####### ], which means that we can immediately use pmovmskb to - // determine whether a given char was a high or a low surrogate. - // - // Therefore the resulting bits of 'mask2' will occur in pairs: - // - 00 if the corresponding UTF-16 char was a high surrogate code unit; - // - 01 if the corresponding UTF-16 char was a low surrogate code unit; - // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit. - // Since 'mask' already has 00 in these positions (since the corresponding char - // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions. - - uint mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); - - // 'lowSurrogatesMask' has its bits occur in pairs: - // - 01 if the corresponding char was a low surrogate char, - // - 00 if the corresponding char was a high surrogate char or not a surrogate at all. - - uint lowSurrogatesMask = mask2 & mask; - - // 'highSurrogatesMask' has its bits occur in pairs: - // - 01 if the corresponding char was a high surrogate char, - // - 00 if the corresponding char was a low surrogate char or not a surrogate at all. - - uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask; - - Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0, - "A char cannot simultaneously be both a high and a low surrogate char."); - - Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0, - "Only even bits (no odd bits) of the masks should be set."); - - // Now check that each high surrogate is followed by a low surrogate and that each - // low surrogate follows a high surrogate. We make an exception for the case where - // the final char of the vector is a high surrogate, since we can't perform validation - // on it until the next iteration of the loop when we hope to consume the matching - // low surrogate. - - highSurrogatesMask <<= 2; - if ((ushort)highSurrogatesMask != lowSurrogatesMask) - { - goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic - } - - if (highSurrogatesMask > ushort.MaxValue) - { - // There was a standalone high surrogate at the end of the vector. - // We'll adjust our counters so that we don't consider this char consumed. - - highSurrogatesMask = (ushort)highSurrogatesMask; // don't allow stray high surrogate to be consumed by popcnt - popcnt -= 2; // the '0xC000_0000' bits in the original mask are shifted out and discarded, so account for that here - pInputBuffer--; - inputLength++; - } - - // If we're 64-bit, we can perform the zero-extension of the surrogate pairs count for - // free right now, saving the extension step a few lines below. If we're 32-bit, the - // convertion to nuint immediately below is a no-op, and we'll pay the cost of the real - // 64 -bit extension a few lines below. - nuint surrogatePairsCountNuint = (uint)BitOperations.PopCount(highSurrogatesMask); - - // 2 UTF-16 chars become 1 Unicode scalar - - tempScalarCountAdjustment -= (int)surrogatePairsCountNuint; - - // Since each surrogate code unit was >= 0x0800, we eagerly assumed - // it'd be encoded as 3 UTF-8 code units, so our earlier popcnt computation - // assumes that the pair is encoded as 6 UTF-8 code units. Since each - // pair is in reality only encoded as 4 UTF-8 code units, we need to - // perform this adjustment now. - - if (PlatformDependent.Is64BitProcess) - { - // Since we've already zero-extended surrogatePairsCountNuint, we can directly - // sub + sub. It's more efficient than shl + sub. - tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint; - tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint; - } - else - { - // Take the hit of the 64-bit extension now. - tempUtf8CodeUnitCountAdjustment -= 2 * (uint)surrogatePairsCountNuint; - } - } - - tempUtf8CodeUnitCountAdjustment += popcnt; - pInputBuffer += Vector128.Count; - inputLength -= Vector128.Count; - } while (inputLength >= Vector128.Count); - } - } - else if (Vector.IsHardwareAccelerated) - { - if (inputLength >= Vector.Count) - { - Vector vector0080 = new Vector(0x0080); - Vector vector0400 = new Vector(0x0400); - Vector vector0800 = new Vector(0x0800); - Vector vectorD800 = new Vector(0xD800); - - do - { - // The 'twoOrMoreUtf8Bytes' and 'threeOrMoreUtf8Bytes' vectors will contain - // elements whose values are 0xFFFF (-1 as signed word) iff the corresponding - // UTF-16 code unit was >= 0x0080 and >= 0x0800, respectively. By summing these - // vectors, each element of the sum will contain one of three values: - // - // 0x0000 ( 0) = original char was 0000..007F - // 0xFFFF (-1) = original char was 0080..07FF - // 0xFFFE (-2) = original char was 0800..FFFF - // - // We'll negate them to produce a value 0..2 for each element, then sum all the - // elements together to produce the number of *additional* UTF-8 code units - // required to represent this UTF-16 data. This is similar to the popcnt step - // performed by the SSE2 code path. This will overcount surrogates, but we'll - // handle that shortly. - - Vector utf16Data = Unsafe.ReadUnaligned>(pInputBuffer); - Vector twoOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0080); - Vector threeOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0800); - Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); - - // We'll try summing by a natural word (rather than a 16-bit word) at a time, - // which should halve the number of operations we must perform. - - nuint popcnt = 0; - for (int i = 0; i < Vector.Count; i++) - { - popcnt += sumVector[i]; - } - - uint popcnt32 = (uint)popcnt; - if (PlatformDependent.Is64BitProcess) - { - popcnt32 += (uint)(popcnt >> 32); - } - - // As in the SSE4.1 paths, compute popcnt but don't fold it in until we - // know there aren't any unpaired surrogates in the input data. - - popcnt32 = (ushort)popcnt32 + (popcnt32 >> 16); - - // Now check for surrogates. - - utf16Data -= vectorD800; - Vector surrogateChars = Vector.LessThan(utf16Data, vector0800); - if (surrogateChars != Vector.Zero) - { - // There's at least one surrogate (high or low) UTF-16 code unit in - // the vector. We'll build up additional vectors: 'highSurrogateChars' - // and 'lowSurrogateChars', where the elements are 0xFFFF iff the original - // UTF-16 code unit was a high or low surrogate, respectively. - - Vector highSurrogateChars = Vector.LessThan(utf16Data, vector0400); - Vector lowSurrogateChars = Vector.AndNot(surrogateChars, highSurrogateChars); - - // We want to make sure that each high surrogate code unit is followed by - // a low surrogate code unit and each low surrogate code unit follows a - // high surrogate code unit. Since we don't have an equivalent of pmovmskb - // or palignr available to us, we'll do this as a loop. We won't look at - // the very last high surrogate char element since we don't yet know if - // the next vector read will have a low surrogate char element. - - if (lowSurrogateChars[0] != 0) - { - goto Error; // error: start of buffer contains standalone low surrogate char - } - - ushort surrogatePairsCount = 0; - for (int i = 0; i < Vector.Count - 1; i++) - { - surrogatePairsCount -= highSurrogateChars[i]; // turns into +1 or +0 - if (highSurrogateChars[i] != lowSurrogateChars[i + 1]) - { - goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic - } - } - - if (highSurrogateChars[Vector.Count - 1] != 0) - { - // There was a standalone high surrogate at the end of the vector. - // We'll adjust our counters so that we don't consider this char consumed. - - pInputBuffer--; - inputLength++; - popcnt32 -= 2; - } - - nint surrogatePairsCountNint = (nint)surrogatePairsCount; // zero-extend to native int size - - // 2 UTF-16 chars become 1 Unicode scalar - - tempScalarCountAdjustment -= (int)surrogatePairsCountNint; - - // Since each surrogate code unit was >= 0x0800, we eagerly assumed - // it'd be encoded as 3 UTF-8 code units. Each surrogate half is only - // encoded as 2 UTF-8 code units (for 4 UTF-8 code units total), - // so we'll adjust this now. - - tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint; - tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint; - } - - tempUtf8CodeUnitCountAdjustment += popcnt32; - pInputBuffer += Vector.Count; - inputLength -= Vector.Count; - } while (inputLength >= Vector.Count); - } - } - - NonVectorizedLoop: - - // Vectorization isn't supported on our current platform, or the input was too small to benefit - // from vectorization, or we saw invalid UTF-16 data in the vectorized code paths and need to - // drain remaining valid chars before we report failure. - - for (; inputLength > 0; pInputBuffer++, inputLength--) - { - uint thisChar = pInputBuffer[0]; - if (thisChar <= 0x7F) - { - continue; - } - - // Bump adjustment by +1 for U+0080..U+07FF; by +2 for U+0800..U+FFFF. - // This optimistically assumes no surrogates, which we'll handle shortly. - - tempUtf8CodeUnitCountAdjustment += (thisChar + 0x0001_F800u) >> 16; - - if (!UnicodeUtility.IsSurrogateCodePoint(thisChar)) - { - continue; - } - - // Found a surrogate char. Back out the adjustment we made above, then - // try to consume the entire surrogate pair all at once. We won't bother - // trying to interpret the surrogate pair as a scalar value; we'll only - // validate that its bit pattern matches what's expected for a surrogate pair. - - tempUtf8CodeUnitCountAdjustment -= 2; - - if (inputLength == 1) - { - goto Error; // input buffer too small to read a surrogate pair - } - - thisChar = Unsafe.ReadUnaligned(pInputBuffer); - if (((thisChar - (BitConverter.IsLittleEndian ? 0xDC00_D800u : 0xD800_DC00u)) & 0xFC00_FC00u) != 0) - { - goto Error; // not a well-formed surrogate pair - } - - tempScalarCountAdjustment--; // 2 UTF-16 code units -> 1 scalar - tempUtf8CodeUnitCountAdjustment += 2; // 2 UTF-16 code units -> 4 UTF-8 code units - - pInputBuffer++; // consumed one extra char - inputLength--; - } - - Error: - - // Also used for normal return. - - utf8CodeUnitCountAdjustment = tempUtf8CodeUnitCountAdjustment; - scalarCountAdjustment = tempScalarCountAdjustment; - return pInputBuffer; - } - } -} -#endif diff --git a/src/DotNetty.Common/Internal/Utf8Utility64.Transcoding.cs b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs similarity index 98% rename from src/DotNetty.Common/Internal/Utf8Utility64.Transcoding.cs rename to src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs index 1cfa94388..65670a61b 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility64.Transcoding.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs @@ -12,23 +12,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.X86; -using nint = System.Int64; -using nuint = System.UInt64; namespace DotNetty.Common.Internal { - internal static unsafe partial class Utf8Utility64 + internal static unsafe partial class Utf8Utility { -#if DEBUG - static Utf8Utility64() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - - _ValidateAdditionalNIntDefinitions(); - } -#endif // DEBUG - // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where // the next byte would have been consumed from / the next char would have been written to. // inputLength in bytes, outputCharsRemaining in chars. @@ -43,7 +31,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // First, try vectorized conversion. { - nuint numElementsConverted = ASCIIUtility64.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); + nuint numElementsConverted = ASCIIUtility.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); pInputBuffer += numElementsConverted; pOutputBuffer += numElementsConverted; @@ -871,7 +859,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // First, try vectorized conversion. { - nuint numElementsConverted = ASCIIUtility64.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); + nuint numElementsConverted = ASCIIUtility.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); pInputBuffer += numElementsConverted; pOutputBuffer += numElementsConverted; diff --git a/src/DotNetty.Common/Internal/Utf8Utility64.Validation.cs b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs similarity index 98% rename from src/DotNetty.Common/Internal/Utf8Utility64.Validation.cs rename to src/DotNetty.Common/Internal/Utf8Utility.Validation.cs index b6789a205..33b0a4e86 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility64.Validation.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs @@ -10,21 +10,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.X86; -using nint = System.Int64; -using nuint = System.UInt64; namespace DotNetty.Common.Internal { - internal static unsafe partial class Utf8Utility64 + internal static unsafe partial class Utf8Utility { -#if DEBUG - private static void _ValidateAdditionalNIntDefinitions() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - // Returns &inputBuffer[inputLength] if the input buffer is valid. /// /// Given an input buffer of byte length , @@ -41,7 +31,7 @@ private static void _ValidateAdditionalNIntDefinitions() // First, try to drain off as many ASCII bytes as we can from the beginning. { - nuint numAsciiBytesCounted = ASCIIUtility64.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength); + nuint numAsciiBytesCounted = ASCIIUtility.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength); pInputBuffer += numAsciiBytesCounted; // Quick check - did we just end up consuming the entire input buffer? diff --git a/src/DotNetty.Common/Internal/Utf8Utility.cs b/src/DotNetty.Common/Internal/Utf8Utility.cs index ef81623ae..e7febc58b 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.cs @@ -41,9 +41,7 @@ public unsafe static int GetIndexOfFirstInvalidUtf8Sequence(in ReadOnlySpan= (uint)utf16CodeUnitCountAdjustment); // If UTF-16 char count == UTF-8 byte count, it's ASCII. diff --git a/src/DotNetty.Common/Internal/Utf8Utility32.Transcoding.cs b/src/DotNetty.Common/Internal/Utf8Utility32.Transcoding.cs deleted file mode 100644 index 077eaac51..000000000 --- a/src/DotNetty.Common/Internal/Utf8Utility32.Transcoding.cs +++ /dev/null @@ -1,1477 +0,0 @@ -// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode - -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#if NETCOREAPP_3_0_GREATER -using System; -using System.Buffers; -using System.Buffers.Binary; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics.X86; -using nint = System.Int32; -using nuint = System.UInt32; - -namespace DotNetty.Common.Internal -{ - internal static unsafe partial class Utf8Utility32 - { -#if DEBUG - static Utf8Utility32() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - - _ValidateAdditionalNIntDefinitions(); - } -#endif // DEBUG - - // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where - // the next byte would have been consumed from / the next char would have been written to. - // inputLength in bytes, outputCharsRemaining in chars. - public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLength, char* pOutputBuffer, int outputCharsRemaining, out byte* pInputBufferRemaining, out char* pOutputBufferRemaining) - { - Debug.Assert(inputLength >= 0, "Input length must not be negative."); - Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); - - Debug.Assert(outputCharsRemaining >= 0, "Destination length must not be negative."); - Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); - - // First, try vectorized conversion. - - { - nuint numElementsConverted = ASCIIUtility32.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); - - pInputBuffer += numElementsConverted; - pOutputBuffer += numElementsConverted; - - // Quick check - did we just end up consuming the entire input buffer? - // If so, short-circuit the remainder of the method. - - if ((int)numElementsConverted == inputLength) - { - pInputBufferRemaining = pInputBuffer; - pOutputBufferRemaining = pOutputBuffer; - return OperationStatus.Done; - } - - inputLength -= (int)numElementsConverted; - outputCharsRemaining -= (int)numElementsConverted; - } - - if (inputLength < sizeof(uint)) - { - goto ProcessInputOfLessThanDWordSize; - } - - byte* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - 4; - - // Begin the main loop. - -#if DEBUG - byte* pLastBufferPosProcessed = null; // used for invariant checking in debug builds -#endif - - while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - // Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar. - - uint thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - AfterReadDWord: - -#if DEBUG - Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read."); - pLastBufferPosProcessed = pInputBuffer; -#endif - // First, check for the common case of all-ASCII bytes. - - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) - { - // We read an all-ASCII sequence. - - if (outputCharsRemaining < sizeof(uint)) - { - goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data - } - - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); - pInputBuffer += 4; - pOutputBuffer += 4; - outputCharsRemaining -= 4; - - // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. - // Below is basically unrolled loops with poor man's vectorization. - - uint remainingInputBytes = (uint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4; - uint maxIters = Math.Min(remainingInputBytes, (uint)outputCharsRemaining) / (2 * sizeof(uint)); - uint secondDWord; - int i; - for (i = 0; (uint)i < maxIters; i++) - { - // Reading two DWORDs in parallel benchmarked faster than reading a single QWORD. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - secondDWord = Unsafe.ReadUnaligned(pInputBuffer + sizeof(uint)); - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord | secondDWord)) - { - goto LoopTerminatedEarlyDueToNonAsciiData; - } - - pInputBuffer += 8; - - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[0], thisDWord); - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[4], secondDWord); - - pOutputBuffer += 8; - } - - outputCharsRemaining -= 8 * i; - - continue; // need to perform a bounds check because we might be running out of data - - LoopTerminatedEarlyDueToNonAsciiData: - - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) - { - // The first DWORD contained all-ASCII bytes, so expand it. - - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); - - // continue the outer loop from the second DWORD - - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(secondDWord)); - thisDWord = secondDWord; - - pInputBuffer += 4; - pOutputBuffer += 4; - outputCharsRemaining -= 4; - } - - outputCharsRemaining -= 8 * i; - - // We know that there's *at least* one DWORD of data remaining in the buffer. - // We also know that it's not all-ASCII. We can skip the logic at the beginning of the main loop. - - goto AfterReadDWordSkipAllBytesAsciiCheck; - } - - AfterReadDWordSkipAllBytesAsciiCheck: - - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier - - // Next, try stripping off ASCII bytes one at a time. - // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. - - if (Utf8Utility.UInt32FirstByteIsAscii(thisDWord)) - { - if (outputCharsRemaining >= 3) - { - // Fast-track: we don't need to check the destination length for subsequent - // ASCII bytes since we know we can write them all now. - - uint thisDWordLittleEndian = Utf8Utility.ToLittleEndian(thisDWord); - - nuint adjustment = 1; - pOutputBuffer[0] = (char)(byte)thisDWordLittleEndian; - - if (Utf8Utility.UInt32SecondByteIsAscii(thisDWord)) - { - adjustment++; - thisDWordLittleEndian >>= 8; - pOutputBuffer[1] = (char)(byte)thisDWordLittleEndian; - - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) - { - adjustment++; - thisDWordLittleEndian >>= 8; - pOutputBuffer[2] = (char)(byte)thisDWordLittleEndian; - } - } - - pInputBuffer += adjustment; - pOutputBuffer += adjustment; - outputCharsRemaining -= (int)adjustment; - } - else - { - // Slow-track: we need to make sure each individual write has enough - // of a buffer so that we don't overrun the destination. - - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; - } - - uint thisDWordLittleEndian = Utf8Utility.ToLittleEndian(thisDWord); - - pInputBuffer++; - *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian; - outputCharsRemaining--; - - if (Utf8Utility.UInt32SecondByteIsAscii(thisDWord)) - { - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; - } - - pInputBuffer++; - thisDWordLittleEndian >>= 8; - *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian; - - // We can perform a small optimization here. We know at this point that - // the output buffer is fully consumed (we read two ASCII bytes and wrote - // two ASCII chars, and we checked earlier that the destination buffer - // can't store a third byte). If the next byte is ASCII, we can jump straight - // to the return statement since the end-of-method logic only relies on the - // destination buffer pointer -- NOT the output chars remaining count -- being - // correct. If the next byte is not ASCII, we'll need to continue with the - // rest of the main loop, but we can set the buffer length directly to zero - // rather than decrementing it from 1 to 0. - - Debug.Assert(outputCharsRemaining == 1); - - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) - { - goto OutputBufferTooSmall; - } - else - { - outputCharsRemaining = 0; - } - } - } - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessRemainingBytesSlow; // input buffer doesn't contain enough data to read a DWORD - } - else - { - // The input buffer at the current offset contains a non-ASCII byte. - // Read an entire DWORD and fall through to multi-byte consumption logic. - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - } - } - - BeforeProcessTwoByteSequence: - - // At this point, we know we're working with a multi-byte code unit, - // but we haven't yet validated it. - - // The masks and comparands are derived from the Unicode Standard, Table 3-6. - // Additionally, we need to check for valid byte sequences per Table 3-7. - - // Check the 2-byte case. - - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) - { - // Per Table 3-7, valid sequences are: - // [ C2..DF ] [ 80..BF ] - - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) - { - goto Error; - } - - ProcessTwoByteSequenceSkipOverlongFormCheck: - - // Optimization: If this is a two-byte-per-character language like Cyrillic or Hebrew, - // there's a good chance that if we see one two-byte run then there's another two-byte - // run immediately after. Let's check that now. - - // On little-endian platforms, we can check for the two-byte UTF8 mask *and* validate that - // the value isn't overlong using a single comparison. On big-endian platforms, we'll need - // to validate the mask and validate that the sequence isn't overlong as two separate comparisons. - - if ((BitConverter.IsLittleEndian && Utf8Utility.UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - || (!BitConverter.IsLittleEndian && (Utf8Utility.UInt32EndsWithUtf8TwoByteMask(thisDWord) && !Utf8Utility.UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) - { - // We have two runs of two bytes each. - - if (outputCharsRemaining < 2) - { - goto ProcessRemainingBytesSlow; // running out of output buffer - } - - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(thisDWord)); - - pInputBuffer += 4; - pOutputBuffer += 2; - outputCharsRemaining -= 2; - - if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - // Optimization: If we read a long run of two-byte sequences, the next sequence is probably - // also two bytes. Check for that first before going back to the beginning of the loop. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (BitConverter.IsLittleEndian) - { - if (Utf8Utility.UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - { - // The next sequence is a valid two-byte sequence. - goto ProcessTwoByteSequenceSkipOverlongFormCheck; - } - } - else - { - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) - { - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) - { - goto Error; // The next sequence purports to be a 2-byte sequence but is overlong. - } - - goto ProcessTwoByteSequenceSkipOverlongFormCheck; - } - } - - // If we reached this point, the next sequence is something other than a valid - // two-byte sequence, so go back to the beginning of the loop. - goto AfterReadDWord; - } - else - { - goto ProcessRemainingBytesSlow; // Running out of data - go down slow path - } - } - - // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. - // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining - // bytes are ASCII? - - uint charToWrite = Utf8Utility.ExtractCharFromFirstTwoByteSequence(thisDWord); // optimistically compute this now, but don't store until we know dest is large enough - - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) - { - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) - { - if (outputCharsRemaining < 3) - { - goto ProcessRemainingBytesSlow; // running out of output buffer - } - - pOutputBuffer[0] = (char)charToWrite; - if (BitConverter.IsLittleEndian) - { - thisDWord >>= 16; - pOutputBuffer[1] = (char)(byte)thisDWord; - thisDWord >>= 8; - pOutputBuffer[2] = (char)thisDWord; - } - else - { - pOutputBuffer[2] = (char)(byte)thisDWord; - pOutputBuffer[1] = (char)(byte)(thisDWord >> 8); - } - pInputBuffer += 4; - pOutputBuffer += 3; - outputCharsRemaining -= 3; - - continue; // go back to original bounds check and check for ASCII - } - else - { - if (outputCharsRemaining < 2) - { - goto ProcessRemainingBytesSlow; // running out of output buffer - } - - pOutputBuffer[0] = (char)charToWrite; - pOutputBuffer[1] = (char)(byte)(thisDWord >> (BitConverter.IsLittleEndian ? 16 : 8)); - pInputBuffer += 3; - pOutputBuffer += 2; - outputCharsRemaining -= 2; - - // A two-byte sequence followed by an ASCII byte followed by a non-ASCII byte. - // Read in the next DWORD and jump directly to the start of the multi-byte processing block. - - if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) - { - goto ProcessRemainingBytesSlow; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - goto BeforeProcessTwoByteSequence; - } - } - } - else - { - if (0u >= (uint)outputCharsRemaining) - { - goto ProcessRemainingBytesSlow; // running out of output buffer - } - - pOutputBuffer[0] = (char)charToWrite; - pInputBuffer += 2; - pOutputBuffer += 1; - outputCharsRemaining--; - - if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) - { - goto ProcessRemainingBytesSlow; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above) - } - } - } - - // Check the 3-byte case. - - BeforeProcessThreeByteSequence: - - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) - { - ProcessThreeByteSequenceWithCheck: - - // We need to check for overlong or surrogate three-byte sequences. - // - // Per Table 3-7, valid sequences are: - // [ E0 ] [ A0..BF ] [ 80..BF ] - // [ E1..EC ] [ 80..BF ] [ 80..BF ] - // [ ED ] [ 80..9F ] [ 80..BF ] - // [ EE..EF ] [ 80..BF ] [ 80..BF ] - // - // Big-endian examples of using the above validation table: - // E0A0 = 1110 0000 1010 0000 => invalid (overlong ) patterns are 1110 0000 100# #### - // ED9F = 1110 1101 1001 1111 => invalid (surrogate) patterns are 1110 1101 101# #### - // If using the bitmask ......................................... 0000 1111 0010 0000 (=0F20), - // Then invalid (overlong) patterns match the comparand ......... 0000 0000 0000 0000 (=0000), - // And invalid (surrogate) patterns match the comparand ......... 0000 1101 0010 0000 (=0D20). - - if (BitConverter.IsLittleEndian) - { - // The "overlong or surrogate" check can be implemented using a single jump, but there's - // some overhead to moving the bits into the correct locations in order to perform the - // correct comparison, and in practice the processor's branch prediction capability is - // good enough that we shouldn't bother. So we'll use two jumps instead. - - // Can't extract this check into its own helper method because JITter produces suboptimal - // assembly, even with aggressive inlining. - - // Code below becomes 5 instructions: test, jz, lea, test, jz - - if ((0u >= (thisDWord & 0x0000_200Fu)) || (0u >= ((thisDWord - 0x0000_200Du) & 0x0000_200Fu))) - { - goto Error; // overlong or surrogate - } - } - else - { - if ((0u >= (thisDWord & 0x0F20_0000u)) || (0u >= ((thisDWord - 0x0D20_0000u) & 0x0F20_0000u))) - { - goto Error; // overlong or surrogate - } - } - - // At this point, we know the incoming scalar is well-formed. - - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; // not enough space in the destination buffer to write - } - - // As an optimization, on compatible platforms check if a second three-byte sequence immediately - // follows the one we just read, and if so use BSWAP and BMI2 to extract them together. - - if (Bmi2.X64.IsSupported) - { - Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian."); - - // First, check that the leftover byte from the original DWORD is in the range [ E0..EF ], which - // would indicate the potential start of a second three-byte sequence. - - if (0u >= ((thisDWord - 0xE000_0000u) & 0xF000_0000u)) - { - // The const '3' below is correct because pFinalPosWhereCanReadDWordFromInputBuffer represents - // the final place where we can safely perform a DWORD read, and we want to probe whether it's - // safe to read a DWORD beginning at address &pInputBuffer[3]. - - if (outputCharsRemaining > 1 && (nint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) >= 3) - { - // We're going to attempt to read a second 3-byte sequence and write them both out simultaneously using PEXT. - // We need to check the continuation bit mask on the remaining two bytes (and we may as well check the leading - // byte mask again since it's free), then perform overlong + surrogate checks. If the overlong or surrogate - // checks fail, we'll fall through to the remainder of the logic which will transcode the original valid - // 3-byte UTF-8 sequence we read; and on the next iteration of the loop the validation routine will run again, - // fail, and redirect control flow to the error handling logic at the very end of this method. - - uint secondDWord = Unsafe.ReadUnaligned(pInputBuffer + 3); - - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(secondDWord) - && ((secondDWord & 0x0000_200Fu) != 0) - && (((secondDWord - 0x0000_200Du) & 0x0000_200Fu) != 0)) - { - // combinedQWord = [ 1110ZZZZ 10YYYYYY 10XXXXXX ######## | 1110zzzz 10yyyyyy 10xxxxxx ######## ], where xyz are from first DWORD, XYZ are from second DWORD - ulong combinedQWord = ((ulong)BinaryPrimitives.ReverseEndianness(secondDWord) << 32) | BinaryPrimitives.ReverseEndianness(thisDWord); - thisDWord = secondDWord; // store this value in the correct local for the ASCII drain logic - - // extractedQWord = [ 00000000 00000000 00000000 00000000 | ZZZZYYYYYYXXXXXX zzzzyyyyyyxxxxxx ] - ulong extractedQWord = Bmi2.X64.ParallelBitExtract(combinedQWord, 0x0F3F3F00_0F3F3F00ul); - - Unsafe.WriteUnaligned(pOutputBuffer, (uint)extractedQWord); - pInputBuffer += 6; - pOutputBuffer += 2; - outputCharsRemaining -= 2; - - // Drain any ASCII data following the second three-byte sequence. - - goto CheckForAsciiByteAfterThreeByteSequence; - } - } - } - } - - // Couldn't extract 2x three-byte sequences together, just do this one by itself. - - *pOutputBuffer = (char)Utf8Utility.ExtractCharFromFirstThreeByteSequence(thisDWord); - pInputBuffer += 3; - pOutputBuffer += 1; - outputCharsRemaining -= 1; - - CheckForAsciiByteAfterThreeByteSequence: - - // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way - // in to the text. If this happens strip it off now before seeing if the next character - // consists of three code units. - - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) - { - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; - } - - if (BitConverter.IsLittleEndian) - { - *pOutputBuffer = (char)(thisDWord >> 24); - } - else - { - *pOutputBuffer = (char)(byte)thisDWord; - } - - pInputBuffer += 1; - pOutputBuffer += 1; - outputCharsRemaining -= 1; - } - - if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - // Optimization: A three-byte character could indicate CJK text, which makes it likely - // that the character following this one is also CJK. We'll check for a three-byte sequence - // marker now and jump directly to three-byte sequence processing if we see one, skipping - // all of the logic at the beginning of the loop. - - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) - { - goto ProcessThreeByteSequenceWithCheck; // found a three-byte sequence marker; validate and consume - } - else - { - goto AfterReadDWord; // probably ASCII punctuation or whitespace - } - } - else - { - goto ProcessRemainingBytesSlow; // Running out of data - go down slow path - } - } - - // Assume the 4-byte case, but we need to validate. - - { - // We need to check for overlong or invalid (over U+10FFFF) four-byte sequences. - // - // Per Table 3-7, valid sequences are: - // [ F0 ] [ 90..BF ] [ 80..BF ] [ 80..BF ] - // [ F1..F3 ] [ 80..BF ] [ 80..BF ] [ 80..BF ] - // [ F4 ] [ 80..8F ] [ 80..BF ] [ 80..BF ] - - if (!Utf8Utility.UInt32BeginsWithUtf8FourByteMask(thisDWord)) - { - goto Error; - } - - // Now check for overlong / out-of-range sequences. - - if (BitConverter.IsLittleEndian) - { - // The DWORD we read is [ 10xxxxxx 10yyyyyy 10zzzzzz 11110www ]. - // We want to get the 'w' byte in front of the 'z' byte so that we can perform - // a single range comparison. We'll take advantage of the fact that the JITter - // can detect a ROR / ROL operation, then we'll just zero out the bytes that - // aren't involved in the range check. - - uint toCheck = thisDWord & 0x0000_FFFFu; - - // At this point, toCheck = [ 00000000 00000000 10zzzzzz 11110www ]. - - toCheck = BitOperations.RotateRight(toCheck, 8); - - // At this point, toCheck = [ 11110www 00000000 00000000 10zzzzzz ]. - - if (!UnicodeUtility.IsInRangeInclusive(toCheck, 0xF000_0090u, 0xF400_008Fu)) - { - goto Error; - } - } - else - { - if (!UnicodeUtility.IsInRangeInclusive(thisDWord, 0xF090_0000u, 0xF48F_FFFFu)) - { - goto Error; - } - } - - // Validation complete. - - if (outputCharsRemaining < 2) - { - // There's no point to falling back to the "drain the input buffer" logic, since we know - // we can't write anything to the destination. So we'll just exit immediately. - goto OutputBufferTooSmall; - } - - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractCharsFromFourByteSequence(thisDWord)); - - pInputBuffer += 4; - pOutputBuffer += 2; - outputCharsRemaining -= 2; - - continue; // go back to beginning of loop for processing - } - } - - ProcessRemainingBytesSlow: - inputLength = (int)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4; - - ProcessInputOfLessThanDWordSize: - while (inputLength > 0) - { - uint firstByte = pInputBuffer[0]; - if (firstByte <= 0x7Fu) - { - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - // 1-byte (ASCII) case - *pOutputBuffer = (char)firstByte; - - pInputBuffer += 1; - pOutputBuffer += 1; - inputLength -= 1; - outputCharsRemaining -= 1; - continue; - } - - // Potentially the start of a multi-byte sequence? - - firstByte -= 0xC2u; - if ((byte)firstByte <= (0xDFu - 0xC2u)) - { - // Potentially a 2-byte sequence? - if (inputLength < 2) - { - goto InputBufferTooSmall; // out of data - } - - uint secondByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) - { - goto Error; // 2-byte marker not followed by continuation byte - } - - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - uint asChar = (firstByte << 6) + secondByte + ((0xC2u - 0xC0u) << 6) - 0x80u; // remove UTF-8 markers from scalar - *pOutputBuffer = (char)asChar; - - pInputBuffer += 2; - pOutputBuffer += 1; - inputLength -= 2; - outputCharsRemaining -= 1; - continue; - } - else if ((byte)firstByte <= (0xEFu - 0xC2u)) - { - // Potentially a 3-byte sequence? - if (inputLength >= 3) - { - uint secondByte = pInputBuffer[1]; - uint thirdByte = pInputBuffer[2]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte) || !Utf8Utility.IsLowByteUtf8ContinuationByte(thirdByte)) - { - goto Error; // 3-byte marker not followed by 2 continuation bytes - } - - // To speed up the validation logic below, we're not going to remove the UTF-8 markers from the partial char just yet. - // We account for this in the comparisons below. - - uint partialChar = (firstByte << 12) + (secondByte << 6); - if (partialChar < ((0xE0u - 0xC2u) << 12) + (0xA0u << 6)) - { - goto Error; // this is an overlong encoding; fail - } - - partialChar -= ((0xEDu - 0xC2u) << 12) + (0xA0u << 6); //if partialChar = 0, we're at beginning of UTF-16 surrogate code point range - if (partialChar < (0x0800u /* number of code points in UTF-16 surrogate code point range */)) - { - goto Error; // attempted to encode a UTF-16 surrogate code point; fail - } - - if (0u >= (uint)outputCharsRemaining) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - // Now restore the full scalar value. - - partialChar += thirdByte; - partialChar += 0xD800; // undo "move to beginning of UTF-16 surrogate code point range" from earlier, fold it with later adds - partialChar -= 0x80u; // remove third byte continuation marker - - *pOutputBuffer = (char)partialChar; - - pInputBuffer += 3; - pOutputBuffer += 1; - inputLength -= 3; - outputCharsRemaining -= 1; - continue; - } - else if (inputLength >= 2) - { - uint secondByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) - { - goto Error; // 3-byte marker not followed by continuation byte - } - - // We can't build up the entire scalar value now, but we can check for overlong / surrogate representations - // from just the first two bytes. - - uint partialChar = (firstByte << 6) + secondByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison - if (partialChar < ((0xE0u - 0xC2u) << 6) + 0xA0u) - { - goto Error; // failed overlong check - } - if (UnicodeUtility.IsInRangeInclusive(partialChar, ((0xEDu - 0xC2u) << 6) + 0xA0u, ((0xEEu - 0xC2u) << 6) + 0x7Fu)) - { - goto Error; // failed surrogate check - } - } - - goto InputBufferTooSmall; // out of data - } - else if ((byte)firstByte <= (0xF4u - 0xC2u)) - { - // Potentially a 4-byte sequence? - - if (inputLength < 2) - { - goto InputBufferTooSmall; // ran out of data - } - - uint nextByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(nextByte)) - { - goto Error; // 4-byte marker not followed by a continuation byte - } - - uint asPartialChar = (firstByte << 6) + nextByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison - if (!UnicodeUtility.IsInRangeInclusive(asPartialChar, ((0xF0u - 0xC2u) << 6) + 0x90u, ((0xF4u - 0xC2u) << 6) + 0x8Fu)) - { - goto Error; // failed overlong / out-of-range check - } - - if (inputLength < 3) - { - goto InputBufferTooSmall; // ran out of data - } - - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(pInputBuffer[2])) - { - goto Error; // third byte in 4-byte sequence not a continuation byte - } - - if (inputLength < 4) - { - goto InputBufferTooSmall; // ran out of data - } - - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(pInputBuffer[3])) - { - goto Error; // fourth byte in 4-byte sequence not a continuation byte - } - - // If we read a valid astral scalar value, the only way we could've fallen down this code path - // is that we didn't have enough output buffer to write the result. - - goto OutputBufferTooSmall; - } - else - { - goto Error; // didn't begin with [ C2 .. F4 ], so invalid multi-byte sequence header byte - } - } - - OperationStatus retVal = OperationStatus.Done; - goto ReturnCommon; - - InputBufferTooSmall: - retVal = OperationStatus.NeedMoreData; - goto ReturnCommon; - - OutputBufferTooSmall: - retVal = OperationStatus.DestinationTooSmall; - goto ReturnCommon; - - Error: - retVal = OperationStatus.InvalidData; - goto ReturnCommon; - - ReturnCommon: - pInputBufferRemaining = pInputBuffer; - pOutputBufferRemaining = pOutputBuffer; - return retVal; - } - - // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where - // the next char would have been consumed from / the next byte would have been written to. - // inputLength in chars, outputBytesRemaining in bytes. - public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLength, byte* pOutputBuffer, int outputBytesRemaining, out char* pInputBufferRemaining, out byte* pOutputBufferRemaining) - { - const int CharsPerDWord = sizeof(uint) / sizeof(char); - - Debug.Assert(inputLength >= 0, "Input length must not be negative."); - Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); - - Debug.Assert(outputBytesRemaining >= 0, "Destination length must not be negative."); - Debug.Assert(pOutputBuffer != null || outputBytesRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); - - // First, try vectorized conversion. - - { - nuint numElementsConverted = ASCIIUtility32.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); - - pInputBuffer += numElementsConverted; - pOutputBuffer += numElementsConverted; - - // Quick check - did we just end up consuming the entire input buffer? - // If so, short-circuit the remainder of the method. - - if ((int)numElementsConverted == inputLength) - { - pInputBufferRemaining = pInputBuffer; - pOutputBufferRemaining = pOutputBuffer; - return OperationStatus.Done; - } - - inputLength -= (int)numElementsConverted; - outputBytesRemaining -= (int)numElementsConverted; - } - - if (inputLength < CharsPerDWord) - { - goto ProcessInputOfLessThanDWordSize; - } - - char* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - CharsPerDWord; - - // Begin the main loop. - -#if DEBUG - char* pLastBufferPosProcessed = null; // used for invariant checking in debug builds -#endif - - uint thisDWord; - - while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - // Read 32 bits at a time. This is enough to hold any possible UTF16-encoded scalar. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - AfterReadDWord: - -#if DEBUG - Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read."); - pLastBufferPosProcessed = pInputBuffer; -#endif - - // First, check for the common case of all-ASCII chars. - - if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) - { - // We read an all-ASCII sequence (2 chars). - - if (outputBytesRemaining < 2) - { - goto ProcessOneCharFromCurrentDWordAndFinish; // running out of space, but may be able to write some data - } - - // The high WORD of the local declared below might be populated with garbage - // as a result of our shifts below, but that's ok since we're only going to - // write the low WORD. - // - // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] - // (Same logic works regardless of endianness.) - uint valueToWrite = thisDWord | (thisDWord >> 8); - - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)valueToWrite); - - pInputBuffer += 2; - pOutputBuffer += 2; - outputBytesRemaining -= 2; - - // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. - // Below is basically unrolled loops with poor man's vectorization. - - uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2; - uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining); - - if (Bmi2.X64.IsSupported) - { - Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian."); - const ulong PEXT_MASK = 0x00FF00FF_00FF00FFul; - - // Try reading and writing 8 elements per iteration. - uint maxIters = minElementsRemaining / 8; - ulong firstQWord, secondQWord; - int i; - for (i = 0; (uint)i < maxIters; i++) - { - firstQWord = Unsafe.ReadUnaligned(pInputBuffer); - secondQWord = Unsafe.ReadUnaligned(pInputBuffer + 4); - - if (!Utf16Utility.AllCharsInUInt64AreAscii(firstQWord | secondQWord)) - { - goto LoopTerminatedDueToNonAsciiData; - } - - Unsafe.WriteUnaligned(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK)); - Unsafe.WriteUnaligned(pOutputBuffer + 4, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK)); - - pInputBuffer += 8; - pOutputBuffer += 8; - } - - outputBytesRemaining -= 8 * i; - - // Can we perform one more iteration, but reading & writing 4 elements instead of 8? - - if ((minElementsRemaining & 4) != 0) - { - secondQWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord)) - { - goto LoopTerminatedDueToNonAsciiDataInSecondQWord; - } - - Unsafe.WriteUnaligned(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK)); - - pInputBuffer += 4; - pOutputBuffer += 4; - outputBytesRemaining -= 4; - } - - continue; // Go back to beginning of main loop, read data, check for ASCII - - LoopTerminatedDueToNonAsciiData: - - outputBytesRemaining -= 8 * i; - - // First, see if we can drain any ASCII data from the first QWORD. - - if (Utf16Utility.AllCharsInUInt64AreAscii(firstQWord)) - { - Unsafe.WriteUnaligned(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK)); - pInputBuffer += 4; - pOutputBuffer += 4; - outputBytesRemaining -= 4; - } - else - { - secondQWord = firstQWord; - } - - LoopTerminatedDueToNonAsciiDataInSecondQWord: - - Debug.Assert(!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord)); // this condition should've been checked earlier - - thisDWord = (uint)secondQWord; - if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) - { - // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); - pInputBuffer += 2; - pOutputBuffer += 2; - outputBytesRemaining -= 2; - thisDWord = (uint)(secondQWord >> 32); - } - - goto AfterReadDWordSkipAllCharsAsciiCheck; - } - else - { - // Can't use BMI2 x64, so we'll only read and write 4 elements per iteration. - uint maxIters = minElementsRemaining / 4; - uint secondDWord; - int i; - for (i = 0; (uint)i < maxIters; i++) - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - secondDWord = Unsafe.ReadUnaligned(pInputBuffer + 2); - - if (!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord | secondDWord)) - { - goto LoopTerminatedDueToNonAsciiData; - } - - // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] - // (Same logic works regardless of endianness.) - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); - Unsafe.WriteUnaligned(pOutputBuffer + 2, (ushort)(secondDWord | (secondDWord >> 8))); - - pInputBuffer += 4; - pOutputBuffer += 4; - } - - outputBytesRemaining -= 4 * i; - - continue; // Go back to beginning of main loop, read data, check for ASCII - - LoopTerminatedDueToNonAsciiData: - - outputBytesRemaining -= 4 * i; - - // First, see if we can drain any ASCII data from the first DWORD. - - if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) - { - // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] - // (Same logic works regardless of endianness.) - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); - pInputBuffer += 2; - pOutputBuffer += 2; - outputBytesRemaining -= 2; - thisDWord = secondDWord; - } - - goto AfterReadDWordSkipAllCharsAsciiCheck; - } - } - - AfterReadDWordSkipAllCharsAsciiCheck: - - Debug.Assert(!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)); // this should have been handled earlier - - // Next, try stripping off the first ASCII char if it exists. - // We don't check for a second ASCII char since that should have been handled above. - - if (Utf8Utility.IsFirstCharAscii(thisDWord)) - { - if (0u >= (uint)outputBytesRemaining) - { - goto OutputBufferTooSmall; - } - - if (BitConverter.IsLittleEndian) - { - pOutputBuffer[0] = (byte)thisDWord; // extract [ ## ## 00 AA ] - } - else - { - pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ] - } - - pInputBuffer += 1; - pOutputBuffer += 1; - outputBytesRemaining -= 1; - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // input buffer doesn't contain enough data to read a DWORD - } - else - { - // The input buffer at the current offset contains a non-ASCII char. - // Read an entire DWORD and fall through to non-ASCII consumption logic. - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - } - } - - // At this point, we know the first char in the buffer is non-ASCII, but we haven't yet validated it. - - if (!Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) - { - TryConsumeMultipleTwoByteSequences: - - // For certain text (Greek, Cyrillic, ...), 2-byte sequences tend to be clustered. We'll try transcoding them in - // a tight loop without falling back to the main loop. - - if (Utf8Utility.IsSecondCharTwoUtf8Bytes(thisDWord)) - { - // We have two runs of two bytes each. - - if (outputBytesRemaining < 4) - { - goto ProcessOneCharFromCurrentDWordAndFinish; // running out of output buffer - } - - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(thisDWord)); - - pInputBuffer += 2; - pOutputBuffer += 4; - outputBytesRemaining -= 4; - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // Running out of data - go down slow path - } - else - { - // Optimization: If we read a long run of two-byte sequences, the next sequence is probably - // also two bytes. Check for that first before going back to the beginning of the loop. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (Utf8Utility.IsFirstCharTwoUtf8Bytes(thisDWord)) - { - // Validated we have a two-byte sequence coming up - goto TryConsumeMultipleTwoByteSequences; - } - - // If we reached this point, the next sequence is something other than a valid - // two-byte sequence, so go back to the beginning of the loop. - goto AfterReadDWord; - } - } - - if (outputBytesRemaining < 2) - { - goto OutputBufferTooSmall; - } - - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)Utf8Utility.ExtractUtf8TwoByteSequenceFromFirstUtf16Char(thisDWord)); - - // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. - // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining - // char is ASCII? - - if (Utf8Utility.IsSecondCharAscii(thisDWord)) - { - if (outputBytesRemaining >= 3) - { - if (BitConverter.IsLittleEndian) - { - thisDWord >>= 16; - } - pOutputBuffer[2] = (byte)thisDWord; - - pInputBuffer += 2; - pOutputBuffer += 3; - outputBytesRemaining -= 3; - - continue; // go back to original bounds check and check for ASCII - } - else - { - pInputBuffer += 1; - pOutputBuffer += 2; - goto OutputBufferTooSmall; - } - } - else - { - pInputBuffer += 1; - pOutputBuffer += 2; - outputBytesRemaining -= 2; - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above) - } - } - } - - // Check the 3-byte case. - - BeforeProcessThreeByteSequence: - - if (!Utf8Utility.IsFirstCharSurrogate(thisDWord)) - { - // Optimization: A three-byte character could indicate CJK text, which makes it likely - // that the character following this one is also CJK. We'll perform the check now - // rather than jumping to the beginning of the main loop. - - if (Utf8Utility.IsSecondCharAtLeastThreeUtf8Bytes(thisDWord)) - { - if (!Utf8Utility.IsSecondCharSurrogate(thisDWord)) - { - if (outputBytesRemaining < 6) - { - goto ConsumeSingleThreeByteRun; // not enough space - try consuming as much as we can - } - - Utf8Utility.WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref *pOutputBuffer, thisDWord); - - pInputBuffer += 2; - pOutputBuffer += 6; - outputBytesRemaining -= 6; - - // Try to remain in the 3-byte processing loop if at all possible. - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) - { - goto BeforeProcessThreeByteSequence; - } - else - { - // Fall back to standard processing loop since we don't know how to optimize this. - goto AfterReadDWord; - } - } - } - } - - ConsumeSingleThreeByteRun: - - if (outputBytesRemaining < 3) - { - goto OutputBufferTooSmall; - } - - Utf8Utility.WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref *pOutputBuffer, thisDWord); - - pInputBuffer += 1; - pOutputBuffer += 3; - outputBytesRemaining -= 3; - - // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way - // in to the text. If this happens strip it off now before seeing if the next character - // consists of three code units. - - if (Utf8Utility.IsSecondCharAscii(thisDWord)) - { - if (0u >= (uint)outputBytesRemaining) - { - goto OutputBufferTooSmall; - } - - if (BitConverter.IsLittleEndian) - { - *pOutputBuffer = (byte)(thisDWord >> 16); - } - else - { - *pOutputBuffer = (byte)(thisDWord); - } - - pInputBuffer += 1; - pOutputBuffer += 1; - outputBytesRemaining -= 1; - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) - { - goto BeforeProcessThreeByteSequence; - } - else - { - // Fall back to standard processing loop since we don't know how to optimize this. - goto AfterReadDWord; - } - } - } - - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessNextCharAndFinish; // Running out of data - go down slow path - } - else - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - goto AfterReadDWordSkipAllCharsAsciiCheck; // we just checked above that this value isn't ASCII - } - } - - // Four byte sequence processing - - if (Utf8Utility.IsWellFormedUtf16SurrogatePair(thisDWord)) - { - if (outputBytesRemaining < 4) - { - goto OutputBufferTooSmall; - } - - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractFourUtf8BytesFromSurrogatePair(thisDWord)); - - pInputBuffer += 2; - pOutputBuffer += 4; - outputBytesRemaining -= 4; - - continue; // go back to beginning of loop for processing - } - - goto Error; // an ill-formed surrogate sequence: high not followed by low, or low not preceded by high - } - - ProcessNextCharAndFinish: - inputLength = (int)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + CharsPerDWord; - - ProcessInputOfLessThanDWordSize: - Debug.Assert(inputLength < CharsPerDWord); - - if (0u >= (uint)inputLength) - { - goto InputBufferFullyConsumed; - } - - uint thisChar = *pInputBuffer; - goto ProcessFinalChar; - - ProcessOneCharFromCurrentDWordAndFinish: - if (BitConverter.IsLittleEndian) - { - thisChar = thisDWord & 0xFFFFu; // preserve only the first char - } - else - { - thisChar = thisDWord >> 16; // preserve only the first char - } - - ProcessFinalChar: - { - if (thisChar <= 0x7Fu) - { - if (0u >= (uint)outputBytesRemaining) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - // 1-byte (ASCII) case - *pOutputBuffer = (byte)thisChar; - - pInputBuffer += 1; - pOutputBuffer += 1; - } - else if (thisChar < 0x0800u) - { - if (outputBytesRemaining < 2) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - // 2-byte case - pOutputBuffer[1] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ] - pOutputBuffer[0] = (byte)((thisChar >> 6) | unchecked((uint)(sbyte)0xC0)); // [ 110yyyyy ] - - pInputBuffer += 1; - pOutputBuffer += 2; - } - else if (!UnicodeUtility.IsSurrogateCodePoint(thisChar)) - { - if (outputBytesRemaining < 3) - { - goto OutputBufferTooSmall; // we have no hope of writing anything to the output - } - - // 3-byte case - pOutputBuffer[2] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ] - pOutputBuffer[1] = (byte)(((thisChar >> 6) & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10yyyyyy ] - pOutputBuffer[0] = (byte)((thisChar >> 12) | unchecked((uint)(sbyte)0xE0)); // [ 1110zzzz ] - - pInputBuffer += 1; - pOutputBuffer += 3; - } - else if (thisChar <= 0xDBFFu) - { - // UTF-16 high surrogate code point with no trailing data, report incomplete input buffer - goto InputBufferTooSmall; - } - else - { - // UTF-16 low surrogate code point with no leading data, report error - goto Error; - } - } - - // There are two ways we can end up here. Either we were running low on input data, - // or we were running low on space in the destination buffer. If we're running low on - // input data (label targets ProcessInputOfLessThanDWordSize and ProcessNextCharAndFinish), - // then the inputLength value is guaranteed to be between 0 and 1, and we should return Done. - // If we're running low on destination buffer space (label target ProcessOneCharFromCurrentDWordAndFinish), - // then we didn't modify inputLength since entering the main loop, which means it should - // still have a value of >= 2. So checking the value of inputLength is all we need to do to determine - // which of the two scenarios we're in. - - if (inputLength > 1) - { - goto OutputBufferTooSmall; - } - - InputBufferFullyConsumed: - OperationStatus retVal = OperationStatus.Done; - goto ReturnCommon; - - InputBufferTooSmall: - retVal = OperationStatus.NeedMoreData; - goto ReturnCommon; - - OutputBufferTooSmall: - retVal = OperationStatus.DestinationTooSmall; - goto ReturnCommon; - - Error: - retVal = OperationStatus.InvalidData; - goto ReturnCommon; - - ReturnCommon: - pInputBufferRemaining = pInputBuffer; - pOutputBufferRemaining = pOutputBuffer; - return retVal; - } - } -} -#endif diff --git a/src/DotNetty.Common/Internal/Utf8Utility32.Validation.cs b/src/DotNetty.Common/Internal/Utf8Utility32.Validation.cs deleted file mode 100644 index 3592e8d9d..000000000 --- a/src/DotNetty.Common/Internal/Utf8Utility32.Validation.cs +++ /dev/null @@ -1,736 +0,0 @@ -// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode - -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -#if NETCOREAPP_3_0_GREATER -using System; -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics.X86; -using nint = System.Int32; -using nuint = System.UInt32; - -namespace DotNetty.Common.Internal -{ - internal static unsafe partial class Utf8Utility32 - { -#if DEBUG - private static void _ValidateAdditionalNIntDefinitions() - { - Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly."); - Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly."); - } -#endif // DEBUG - - // Returns &inputBuffer[inputLength] if the input buffer is valid. - /// - /// Given an input buffer of byte length , - /// returns a pointer to where the first invalid data appears in . - /// - /// - /// Returns a pointer to the end of if the buffer is well-formed. - /// - public static byte* GetPointerToFirstInvalidByte(byte* pInputBuffer, int inputLength, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment) - { - Debug.Assert(inputLength >= 0, "Input length must not be negative."); - Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); - - // First, try to drain off as many ASCII bytes as we can from the beginning. - - { - nuint numAsciiBytesCounted = ASCIIUtility32.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength); - pInputBuffer += numAsciiBytesCounted; - - // Quick check - did we just end up consuming the entire input buffer? - // If so, short-circuit the remainder of the method. - - inputLength -= (int)numAsciiBytesCounted; - if (0u >= (uint)inputLength) - { - utf16CodeUnitCountAdjustment = 0; - scalarCountAdjustment = 0; - return pInputBuffer; - } - } - -#if DEBUG - // Keep these around for final validation at the end of the method. - byte* pOriginalInputBuffer = pInputBuffer; - int originalInputLength = inputLength; -#endif - - // Enregistered locals that we'll eventually out to our caller. - - int tempUtf16CodeUnitCountAdjustment = 0; - int tempScalarCountAdjustment = 0; - - if (inputLength < sizeof(uint)) - { - goto ProcessInputOfLessThanDWordSize; - } - - byte* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - sizeof(uint); - - // Begin the main loop. - -#if DEBUG - byte* pLastBufferPosProcessed = null; // used for invariant checking in debug builds -#endif - - while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - // Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar. - - uint thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - AfterReadDWord: - -#if DEBUG - Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read."); - pLastBufferPosProcessed = pInputBuffer; -#endif - - // First, check for the common case of all-ASCII bytes. - - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) - { - // We read an all-ASCII sequence. - - pInputBuffer += sizeof(uint); - - // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. - // Below is basically unrolled loops with poor man's vectorization. - - // Below check is "can I read at least five DWORDs from the input stream?" - // n.b. Since we incremented pInputBuffer above the below subtraction may result in a negative value, - // hence using nint instead of nuint. - - if ((nint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) >= 4 * sizeof(uint)) - { - // We want reads in the inner loop to be aligned. So let's perform a quick - // ASCII check of the next 32 bits (4 bytes) now, and if that succeeds bump - // the read pointer up to the next aligned address. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) - { - goto AfterReadDWordSkipAllBytesAsciiCheck; - } - - pInputBuffer = (byte*)((nuint)(pInputBuffer + 4) & ~(nuint)3); - - // At this point, the input buffer offset points to an aligned DWORD. We also know that there's - // enough room to read at least four DWORDs from the buffer. (Heed the comment a few lines above: - // the original 'if' check confirmed that there were 5 DWORDs before the alignment check, and - // the alignment check consumes at most a single DWORD.) - - byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here - uint mask; - - do - { - if (Sse2.IsSupported && Bmi1.IsSupported) - { - // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're - // going to perform an unaligned load. We don't necessarily care about aligning - // this because we pessimistically assume we'll encounter non-ASCII data at some - // point in the not-too-distant future (otherwise we would've stayed entirely - // within the all-ASCII vectorized code at the entry to this method). - - mask = (uint)Sse2.MoveMask(Sse2.LoadVector128((byte*)pInputBuffer)); - if (mask != 0) - { - goto Sse2LoopTerminatedEarlyDueToNonAsciiData; - } - } - else - { - if (!ASCIIUtility.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[0] | ((uint*)pInputBuffer)[1])) - { - goto LoopTerminatedEarlyDueToNonAsciiDataInFirstPair; - } - - if (!ASCIIUtility.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[2] | ((uint*)pInputBuffer)[3])) - { - goto LoopTerminatedEarlyDueToNonAsciiDataInSecondPair; - } - } - - pInputBuffer += 4 * sizeof(uint); // consumed 4 DWORDs - } while (pInputBuffer <= pInputBufferFinalPosAtWhichCanSafelyLoop); - - continue; // need to perform a bounds check because we might be running out of data - - Sse2LoopTerminatedEarlyDueToNonAsciiData: - - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(Sse2.IsSupported); - Debug.Assert(Bmi1.IsSupported); - - // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit - // for each non-ASCII byte we saw. We can count the number of ASCII bytes, - // bump our input counter by that amount, and resume processing from the - // "the first byte is no longer ASCII" portion of the main loop. - - Debug.Assert(mask != 0); - - pInputBuffer += Bmi1.TrailingZeroCount(mask); - if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) - { - goto ProcessRemainingBytesSlow; - } - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); // no longer guaranteed to be aligned - goto BeforeProcessTwoByteSequence; - - LoopTerminatedEarlyDueToNonAsciiDataInSecondPair: - - pInputBuffer += 2 * sizeof(uint); // consumed 2 DWORDs - - LoopTerminatedEarlyDueToNonAsciiDataInFirstPair: - - // We know that there's *at least* two DWORDs of data remaining in the buffer. - // We also know that one of them (or both of them) contains non-ASCII data somewhere. - // Let's perform a quick check here to bypass the logic at the beginning of the main loop. - - thisDWord = *(uint*)pInputBuffer; // still aligned here - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) - { - pInputBuffer += sizeof(uint); // consumed 1 more DWORD - thisDWord = *(uint*)pInputBuffer; // still aligned here - } - - goto AfterReadDWordSkipAllBytesAsciiCheck; - } - - continue; // not enough data remaining to unroll loop - go back to beginning with bounds checks - } - - AfterReadDWordSkipAllBytesAsciiCheck: - - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier - - // Next, try stripping off ASCII bytes one at a time. - // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. - - { - uint numLeadingAsciiBytes = ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(thisDWord); - pInputBuffer += numLeadingAsciiBytes; - - if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) - { - goto ProcessRemainingBytesSlow; // Input buffer doesn't contain enough data to read a DWORD - } - else - { - // The input buffer at the current offset contains a non-ASCII byte. - // Read an entire DWORD and fall through to multi-byte consumption logic. - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - } - } - - BeforeProcessTwoByteSequence: - - // At this point, we suspect we're working with a multi-byte code unit sequence, - // but we haven't yet validated it for well-formedness. - - // The masks and comparands are derived from the Unicode Standard, Table 3-6. - // Additionally, we need to check for valid byte sequences per Table 3-7. - - // Check the 2-byte case. - - thisDWord -= (BitConverter.IsLittleEndian) ? 0x0000_80C0u : 0xC080_0000u; - if (0u >= (thisDWord & (BitConverter.IsLittleEndian ? 0x0000_C0E0u : 0xE0C0_0000u))) - { - // Per Table 3-7, valid sequences are: - // [ C2..DF ] [ 80..BF ] - // - // Due to our modification of 'thisDWord' above, this becomes: - // [ 02..1F ] [ 00..3F ] - // - // We've already checked that the leading byte was originally in the range [ C0..DF ] - // and that the trailing byte was originally in the range [ 80..BF ], so now we only need - // to check that the modified leading byte is >= [ 02 ]. - - if ((BitConverter.IsLittleEndian && (byte)thisDWord < 0x02u) - || (!BitConverter.IsLittleEndian && thisDWord < 0x0200_0000u)) - { - goto Error; // overlong form - leading byte was [ C0 ] or [ C1 ] - } - - ProcessTwoByteSequenceSkipOverlongFormCheck: - - // Optimization: If this is a two-byte-per-character language like Cyrillic or Hebrew, - // there's a good chance that if we see one two-byte run then there's another two-byte - // run immediately after. Let's check that now. - - // On little-endian platforms, we can check for the two-byte UTF8 mask *and* validate that - // the value isn't overlong using a single comparison. On big-endian platforms, we'll need - // to validate the mask and validate that the sequence isn't overlong as two separate comparisons. - - if ((BitConverter.IsLittleEndian && Utf8Utility.UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - || (!BitConverter.IsLittleEndian && (Utf8Utility.UInt32EndsWithUtf8TwoByteMask(thisDWord) && !Utf8Utility.UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) - { - // We have two runs of two bytes each. - pInputBuffer += 4; - tempUtf16CodeUnitCountAdjustment -= 2; // 4 UTF-8 code units -> 2 UTF-16 code units (and 2 scalars) - - if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - // Optimization: If we read a long run of two-byte sequences, the next sequence is probably - // also two bytes. Check for that first before going back to the beginning of the loop. - - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - if (BitConverter.IsLittleEndian) - { - if (Utf8Utility.UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - { - // The next sequence is a valid two-byte sequence. - goto ProcessTwoByteSequenceSkipOverlongFormCheck; - } - } - else - { - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) - { - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) - { - goto Error; // The next sequence purports to be a 2-byte sequence but is overlong. - } - - goto ProcessTwoByteSequenceSkipOverlongFormCheck; - } - } - - // If we reached this point, the next sequence is something other than a valid - // two-byte sequence, so go back to the beginning of the loop. - goto AfterReadDWord; - } - else - { - goto ProcessRemainingBytesSlow; // Running out of data - go down slow path - } - } - - // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. - // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining - // bytes are ASCII? - - tempUtf16CodeUnitCountAdjustment--; // 2-byte sequence + (some number of ASCII bytes) -> 1 UTF-16 code units (and 1 scalar) [+ trailing] - - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) - { - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) - { - pInputBuffer += 4; - } - else - { - pInputBuffer += 3; - - // A two-byte sequence followed by an ASCII byte followed by a non-ASCII byte. - // Read in the next DWORD and jump directly to the start of the multi-byte processing block. - - if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - goto BeforeProcessTwoByteSequence; - } - } - } - else - { - pInputBuffer += 2; - } - - continue; - } - - // Check the 3-byte case. - // We need to restore the C0 leading byte we stripped out earlier, then we can strip out the expected E0 byte. - - thisDWord -= (BitConverter.IsLittleEndian) ? (0x0080_00E0u - 0x0000_00C0u) : (0xE000_8000u - 0xC000_0000u); - if (0u >= (thisDWord & (BitConverter.IsLittleEndian ? 0x00C0_C0F0u : 0xF0C0_C000u))) - { - ProcessThreeByteSequenceWithCheck: - - // We assume the caller has confirmed that the bit pattern is representative of a three-byte - // sequence, but it may still be overlong or surrogate. We need to check for these possibilities. - // - // Per Table 3-7, valid sequences are: - // [ E0 ] [ A0..BF ] [ 80..BF ] - // [ E1..EC ] [ 80..BF ] [ 80..BF ] - // [ ED ] [ 80..9F ] [ 80..BF ] - // [ EE..EF ] [ 80..BF ] [ 80..BF ] - // - // Big-endian examples of using the above validation table: - // E0A0 = 1110 0000 1010 0000 => invalid (overlong ) patterns are 1110 0000 100# #### - // ED9F = 1110 1101 1001 1111 => invalid (surrogate) patterns are 1110 1101 101# #### - // If using the bitmask ......................................... 0000 1111 0010 0000 (=0F20), - // Then invalid (overlong) patterns match the comparand ......... 0000 0000 0000 0000 (=0000), - // And invalid (surrogate) patterns match the comparand ......... 0000 1101 0010 0000 (=0D20). - // - // It's ok if the caller has manipulated 'thisDWord' (e.g., by subtracting 0xE0 or 0x80) - // as long as they haven't touched the bits we're about to use in our mask checking below. - - if (BitConverter.IsLittleEndian) - { - // The "overlong or surrogate" check can be implemented using a single jump, but there's - // some overhead to moving the bits into the correct locations in order to perform the - // correct comparison, and in practice the processor's branch prediction capability is - // good enough that we shouldn't bother. So we'll use two jumps instead. - - // Can't extract this check into its own helper method because JITter produces suboptimal - // assembly, even with aggressive inlining. - - // Code below becomes 5 instructions: test, jz, lea, test, jz - - if ((0u >= (thisDWord & 0x0000_200Fu)) || (0u >= ((thisDWord - 0x0000_200Du) & 0x0000_200Fu))) - { - goto Error; // overlong or surrogate - } - } - else - { - if ((0u >= (thisDWord & 0x0F20_0000u)) || (0u >= ((thisDWord - 0x0D20_0000u) & 0x0F20_0000u))) - { - goto Error; // overlong or surrogate - } - } - - ProcessSingleThreeByteSequenceSkipOverlongAndSurrogateChecks: - - // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way - // in to the text. If this happens strip it off now before seeing if the next character - // consists of three code units. - - // Branchless: consume a 3-byte UTF-8 sequence and optionally an extra ASCII byte hanging off the end - - nint asciiAdjustment; - if (BitConverter.IsLittleEndian) - { - asciiAdjustment = (int)thisDWord >> 31; // smear most significant bit across entire value - } - else - { - asciiAdjustment = (nint)(sbyte)thisDWord >> 7; // smear most significant bit of least significant byte across entire value - } - - // asciiAdjustment = 0 if fourth byte is ASCII; -1 otherwise - - // Please *DO NOT* reorder the below two lines. It provides extra defense in depth in case this method - // is ever changed such that pInputBuffer becomes a 'ref byte' instead of a simple 'byte*'. It's valid - // to add 4 before backing up since we already checked previously that the input buffer contains at - // least a DWORD's worth of data, so we're not going to run past the end of the buffer where the GC can - // no longer track the reference. However, we can't back up before adding 4, since we might back up to - // before the start of the buffer, and the GC isn't guaranteed to be able to track this. - - pInputBuffer += 4; // optimistically, assume consumed a 3-byte UTF-8 sequence plus an extra ASCII byte - pInputBuffer += asciiAdjustment; // back up if we didn't actually consume an ASCII byte - - tempUtf16CodeUnitCountAdjustment -= 2; // 3 (or 4) UTF-8 bytes -> 1 (or 2) UTF-16 code unit (and 1 [or 2] scalar) - - SuccessfullyProcessedThreeByteSequence: - - if (PlatformDependent.Is64BitProcess && BitConverter.IsLittleEndian) - { - // x64 little-endian optimization: A three-byte character could indicate CJK text, - // which makes it likely that the character following this one is also CJK. - // We'll try to process several three-byte sequences at a time. - - // The check below is really "can we read 9 bytes from the input buffer?" since 'pFinalPos...' is already offset - // n.b. The subtraction below could result in a negative value (since we advanced pInputBuffer above), so - // use nint instead of nuint. - - if ((nint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) >= 5) - { - ulong thisQWord = Unsafe.ReadUnaligned(pInputBuffer); - - // Stage the next 32 bits into 'thisDWord' so that it's ready for us in case we need to jump backward - // to a previous location in the loop. This offers defense against reading main memory again (which may - // have been modified and could lead to a race condition). - - thisDWord = (uint)thisQWord; - - // Is this three 3-byte sequences in a row? - // thisQWord = [ 10yyyyyy 1110zzzz | 10xxxxxx 10yyyyyy 1110zzzz | 10xxxxxx 10yyyyyy 1110zzzz ] [ 10xxxxxx ] - // ---- CHAR 3 ---- --------- CHAR 2 --------- --------- CHAR 1 --------- -CHAR 3- - if ((thisQWord & 0xC0F0_C0C0_F0C0_C0F0ul) == 0x80E0_8080_E080_80E0ul && Utf8Utility.IsUtf8ContinuationByte(in pInputBuffer[8])) - { - // Saw a proper bitmask for three incoming 3-byte sequences, perform the - // overlong and surrogate sequence checking now. - - // Check the first character. - // If the first character is overlong or a surrogate, fail immediately. - - if ((0u >= ((uint)thisQWord & 0x200Fu)) || (0u >= (((uint)thisQWord - 0x200Du) & 0x200Fu))) - { - goto Error; - } - - // Check the second character. - // At this point, we now know the first three bytes represent a well-formed sequence. - // If there's an error beyond here, we'll jump back to the "process three known good bytes" - // logic. - - thisQWord >>= 24; - if ((0u >= ((uint)thisQWord & 0x200Fu)) || (0u >= (((uint)thisQWord - 0x200Du) & 0x200Fu))) - { - goto ProcessSingleThreeByteSequenceSkipOverlongAndSurrogateChecks; - } - - // Check the third character (we already checked that it's followed by a continuation byte). - - thisQWord >>= 24; - if ((0u >= ((uint)thisQWord & 0x200Fu)) || (0u >= (((uint)thisQWord - 0x200Du) & 0x200Fu))) - { - goto ProcessSingleThreeByteSequenceSkipOverlongAndSurrogateChecks; - } - - pInputBuffer += 9; - tempUtf16CodeUnitCountAdjustment -= 6; // 9 UTF-8 bytes -> 3 UTF-16 code units (and 3 scalars) - - goto SuccessfullyProcessedThreeByteSequence; - } - - // Is this two 3-byte sequences in a row? - // thisQWord = [ ######## ######## | 10xxxxxx 10yyyyyy 1110zzzz | 10xxxxxx 10yyyyyy 1110zzzz ] - // --------- CHAR 2 --------- --------- CHAR 1 --------- - if ((thisQWord & 0xC0C0_F0C0_C0F0ul) == 0x8080_E080_80E0ul) - { - // Saw a proper bitmask for two incoming 3-byte sequences, perform the - // overlong and surrogate sequence checking now. - - // Check the first character. - // If the first character is overlong or a surrogate, fail immediately. - - if ((0u >= ((uint)thisQWord & 0x200Fu)) || (0u >= (((uint)thisQWord - 0x200Du) & 0x200Fu))) - { - goto Error; - } - - // Check the second character. - // At this point, we now know the first three bytes represent a well-formed sequence. - // If there's an error beyond here, we'll jump back to the "process three known good bytes" - // logic. - - thisQWord >>= 24; - if ((0u >= ((uint)thisQWord & 0x200Fu)) || (0u >= (((uint)thisQWord - 0x200Du) & 0x200Fu))) - { - goto ProcessSingleThreeByteSequenceSkipOverlongAndSurrogateChecks; - } - - pInputBuffer += 6; - tempUtf16CodeUnitCountAdjustment -= 4; // 6 UTF-8 bytes -> 2 UTF-16 code units (and 2 scalars) - - // The next byte in the sequence didn't have a 3-byte marker, so it's probably - // an ASCII character. Jump back to the beginning of loop processing. - - continue; - } - - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) - { - // A single three-byte sequence. - goto ProcessThreeByteSequenceWithCheck; - } - else - { - // Not a three-byte sequence; perhaps ASCII? - goto AfterReadDWord; - } - } - } - - if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) - { - thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - - // Optimization: A three-byte character could indicate CJK text, which makes it likely - // that the character following this one is also CJK. We'll check for a three-byte sequence - // marker now and jump directly to three-byte sequence processing if we see one, skipping - // all of the logic at the beginning of the loop. - - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) - { - goto ProcessThreeByteSequenceWithCheck; // Found another [not yet validated] three-byte sequence; process - } - else - { - goto AfterReadDWord; // Probably ASCII punctuation or whitespace; go back to start of loop - } - } - else - { - goto ProcessRemainingBytesSlow; // Running out of data - } - } - - // Assume the 4-byte case, but we need to validate. - - if (BitConverter.IsLittleEndian) - { - thisDWord &= 0xC0C0_FFFFu; - - // After the above modifications earlier in this method, we expect 'thisDWord' - // to have the structure [ 10000000 00000000 00uuzzzz 00010uuu ]. We'll now - // perform two checks to confirm this. The first will verify the - // [ 10000000 00000000 00###### ######## ] structure by taking advantage of two's - // complement representation to perform a single *signed* integer check. - - if ((int)thisDWord > unchecked((int)0x8000_3FFF)) - { - goto Error; // didn't have three trailing bytes - } - - // Now we want to confirm that 0x01 <= uuuuu (otherwise this is an overlong encoding) - // and that uuuuu <= 0x10 (otherwise this is an out-of-range encoding). - - thisDWord = BitOperations.RotateRight(thisDWord, 8); - - // Now, thisDWord = [ 00010uuu 10000000 00000000 00uuzzzz ]. - // The check is now a simple add / cmp / jcc combo. - - if (!UnicodeUtility.IsInRangeInclusive(thisDWord, 0x1080_0010u, 0x1480_000Fu)) - { - goto Error; // overlong or out-of-range - } - } - else - { - thisDWord -= 0x80u; - - // After the above modifications earlier in this method, we expect 'thisDWord' - // to have the structure [ 00010uuu 00uuzzzz 00yyyyyy 00xxxxxx ]. We'll now - // perform two checks to confirm this. The first will verify the - // [ ######## 00###### 00###### 00###### ] structure. - - if ((thisDWord & 0x00C0_C0C0u) != 0) - { - goto Error; // didn't have three trailing bytes - } - - // Now we want to confirm that 0x01 <= uuuuu (otherwise this is an overlong encoding) - // and that uuuuu <= 0x10 (otherwise this is an out-of-range encoding). - // This is a simple range check. (We don't care about the low two bytes.) - - if (!UnicodeUtility.IsInRangeInclusive(thisDWord, 0x1010_0000u, 0x140F_FFFFu)) - { - goto Error; // overlong or out-of-range - } - } - - // Validation of 4-byte case complete. - - pInputBuffer += 4; - tempUtf16CodeUnitCountAdjustment -= 2; // 4 UTF-8 bytes -> 2 UTF-16 code units - tempScalarCountAdjustment--; // 2 UTF-16 code units -> 1 scalar - - continue; // go back to beginning of loop for processing - } - - goto ProcessRemainingBytesSlow; - - ProcessInputOfLessThanDWordSize: - - Debug.Assert(inputLength < 4); - nuint inputBufferRemainingBytes = (uint)inputLength; - goto ProcessSmallBufferCommon; - - ProcessRemainingBytesSlow: - - inputBufferRemainingBytes = (nuint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4; - - ProcessSmallBufferCommon: - - Debug.Assert(inputBufferRemainingBytes < 4); - while (inputBufferRemainingBytes > 0) - { - uint firstByte = pInputBuffer[0]; - - if ((byte)firstByte < 0x80u) - { - // 1-byte (ASCII) case - pInputBuffer++; - inputBufferRemainingBytes--; - continue; - } - else if (inputBufferRemainingBytes >= 2) - { - uint secondByte = pInputBuffer[1]; // typed as 32-bit since we perform arithmetic (not just comparisons) on this value - if ((byte)firstByte < 0xE0u) - { - // 2-byte case - if ((byte)firstByte >= 0xC2u && Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) - { - pInputBuffer += 2; - tempUtf16CodeUnitCountAdjustment--; // 2 UTF-8 bytes -> 1 UTF-16 code unit (and 1 scalar) - inputBufferRemainingBytes -= 2; - continue; - } - } - else if (inputBufferRemainingBytes >= 3) - { - if ((byte)firstByte < 0xF0u) - { - if ((byte)firstByte == 0xE0u) - { - if (!UnicodeUtility.IsInRangeInclusive(secondByte, 0xA0u, 0xBFu)) - { - goto Error; // overlong encoding - } - } - else if ((byte)firstByte == 0xEDu) - { - if (!UnicodeUtility.IsInRangeInclusive(secondByte, 0x80u, 0x9Fu)) - { - goto Error; // would be a UTF-16 surrogate code point - } - } - else - { - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) - { - goto Error; // first trailing byte doesn't have proper continuation marker - } - } - - if (Utf8Utility.IsUtf8ContinuationByte(in pInputBuffer[2])) - { - pInputBuffer += 3; - tempUtf16CodeUnitCountAdjustment -= 2; // 3 UTF-8 bytes -> 2 UTF-16 code units (and 2 scalars) - inputBufferRemainingBytes -= 3; - continue; - } - } - } - } - - // Error - no match. - - goto Error; - } - - // If we reached this point, we're out of data, and we saw no bad UTF8 sequence. - -#if DEBUG - // Quick check that for the success case we're going to fulfill our contract of returning &inputBuffer[inputLength]. - Debug.Assert(pOriginalInputBuffer + originalInputLength == pInputBuffer, "About to return an unexpected value."); -#endif - - Error: - - // Report back to our caller how far we got before seeing invalid data. - // (Also used for normal termination when falling out of the loop above.) - - utf16CodeUnitCountAdjustment = tempUtf16CodeUnitCountAdjustment; - scalarCountAdjustment = tempScalarCountAdjustment; - return pInputBuffer; - } - } -} -#endif diff --git a/src/DotNetty.Common/Utilities/AsciiString.NetCore3.cs b/src/DotNetty.Common/Utilities/AsciiString.NetCore3.cs index 9f522caf7..0b72ab992 100644 --- a/src/DotNetty.Common/Utilities/AsciiString.NetCore3.cs +++ b/src/DotNetty.Common/Utilities/AsciiString.NetCore3.cs @@ -72,9 +72,7 @@ private static unsafe bool TryGetBytesFast(char* pChars, int charCount, byte* pB [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon private static unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed) { - int bytesWritten = PlatformDependent.Is64BitProcess - ? (int)ASCIIUtility64.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength)) - : (int)ASCIIUtility32.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength)); + int bytesWritten = (int)ASCIIUtility.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength)); charsConsumed = bytesWritten; return bytesWritten; From f6948235ce31c237c571d3138c66a4466892f9ee Mon Sep 17 00:00:00 2001 From: cuteant Date: Thu, 24 Jun 2021 00:52:24 +0800 Subject: [PATCH 2/5] Align with dotnet/runtime/CoreLib --- src/DotNetty.Buffers/ByteBufferUtil.Utf8.cs | 2 +- src/DotNetty.Buffers/DotNetty.Buffers.csproj | 2 +- .../Writer/ByteBufferWriter.Binary.Helper.cs | 42 +- .../Internal/ASCIIUtility.Helpers.cs | 17 + .../Internal/ASCIIUtility.Net.cs | 1307 ++++++++++++++ .../Internal/ASCIIUtility.NetCore3.cs | 1092 ++++++++++++ src/DotNetty.Common/Internal/ASCIIUtility.cs | 1256 ++------------ .../Internal/TextEncodings.Utf16.NetCore3.cs | 92 + .../Internal/TextEncodings.Utf8.NetCore3.cs | 99 ++ src/DotNetty.Common/Internal/TextEncodings.cs | 6 +- src/DotNetty.Common/Internal/UnicodeDebug.cs | 25 +- .../Internal/UnicodeUtility.cs | 10 +- .../Internal/Utf16Utility.Validation.Net.cs | 508 ++++++ ...cs => Utf16Utility.Validation.NetCore3.cs} | 6 +- src/DotNetty.Common/Internal/Utf16Utility.cs | 55 + .../Internal/Utf8Utility.Helpers.cs | 86 +- .../Internal/Utf8Utility.Transcoding.Net.cs | 1510 +++++++++++++++++ ...cs => Utf8Utility.Transcoding.NetCore3.cs} | 108 +- .../Internal/Utf8Utility.Validation.Net.cs | 32 + .../Internal/Utf8Utility.Validation.cs | 83 +- .../Internal/Utf8Utility.WhiteSpace.cs | 132 ++ src/DotNetty.Common/Internal/Utf8Utility.cs | 59 - .../DotNetty.Common.Tests.csproj | 5 +- .../Internal/CoreLib/ASCIIUtilityTests.cs | 419 +++++ .../CoreLib/BoundedMemory.Creation.cs | 95 ++ .../Internal/CoreLib/BoundedMemory.Unix.cs | 50 + .../Internal/CoreLib/BoundedMemory.Windows.cs | 335 ++++ .../Internal/CoreLib/BoundedMemory.cs | 53 + .../Internal/CoreLib/PoisonPagePlacement.cs | 28 + .../Utf16UtilityTests.ValidateChars.cs | 267 +++ .../Internal/CoreLib/Utf8Tests.cs | 799 +++++++++ .../CoreLib/Utf8UtilityTests.ValidateBytes.cs | 396 +++++ 32 files changed, 7621 insertions(+), 1355 deletions(-) create mode 100644 src/DotNetty.Common/Internal/ASCIIUtility.Net.cs create mode 100644 src/DotNetty.Common/Internal/ASCIIUtility.NetCore3.cs create mode 100644 src/DotNetty.Common/Internal/Utf16Utility.Validation.Net.cs rename src/DotNetty.Common/Internal/{Utf16Utility.Validation.cs => Utf16Utility.Validation.NetCore3.cs} (99%) create mode 100644 src/DotNetty.Common/Internal/Utf8Utility.Transcoding.Net.cs rename src/DotNetty.Common/Internal/{Utf8Utility.Transcoding.cs => Utf8Utility.Transcoding.NetCore3.cs} (93%) create mode 100644 src/DotNetty.Common/Internal/Utf8Utility.Validation.Net.cs create mode 100644 src/DotNetty.Common/Internal/Utf8Utility.WhiteSpace.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/ASCIIUtilityTests.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Creation.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Unix.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Windows.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/PoisonPagePlacement.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/Utf16UtilityTests.ValidateChars.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/Utf8Tests.cs create mode 100644 test/DotNetty.Common.Tests/Internal/CoreLib/Utf8UtilityTests.ValidateBytes.cs diff --git a/src/DotNetty.Buffers/ByteBufferUtil.Utf8.cs b/src/DotNetty.Buffers/ByteBufferUtil.Utf8.cs index 5e49b34a9..dcecd65da 100644 --- a/src/DotNetty.Buffers/ByteBufferUtil.Utf8.cs +++ b/src/DotNetty.Buffers/ByteBufferUtil.Utf8.cs @@ -414,7 +414,7 @@ static bool IsUtf8(IByteBuffer buf, int index, int length) var utf8Span = buf.GetReadableSpan(index, length); ref byte utf8Source = ref MemoryMarshal.GetReference(utf8Span); - IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + nint offset = 0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations uint uLength = (uint)length; while ((uint)index < uLength) diff --git a/src/DotNetty.Buffers/DotNetty.Buffers.csproj b/src/DotNetty.Buffers/DotNetty.Buffers.csproj index 043649d35..c1262b48e 100644 --- a/src/DotNetty.Buffers/DotNetty.Buffers.csproj +++ b/src/DotNetty.Buffers/DotNetty.Buffers.csproj @@ -2,7 +2,7 @@ - netcoreapp2.1;netstandard2.1;$(StandardTfms) + net5.0;netcoreapp2.1;netstandard2.1;$(StandardTfms) DotNetty.Buffers SpanNetty.Buffers true diff --git a/src/DotNetty.Buffers/Writer/ByteBufferWriter.Binary.Helper.cs b/src/DotNetty.Buffers/Writer/ByteBufferWriter.Binary.Helper.cs index efae526ab..7bff2dabd 100644 --- a/src/DotNetty.Buffers/Writer/ByteBufferWriter.Binary.Helper.cs +++ b/src/DotNetty.Buffers/Writer/ByteBufferWriter.Binary.Helper.cs @@ -42,7 +42,7 @@ private unsafe static void SetMedium(ref byte start, int value) // UnsafeByteBufferUtil.SetMedium(bytes, value); //} uint unsignedValue = (uint)value; - IntPtr offset = (IntPtr)0; + nint offset = 0; Unsafe.AddByteOffset(ref start, offset) = (byte)(unsignedValue >> 16); Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(unsignedValue >> 8); Unsafe.AddByteOffset(ref start, offset + 2) = (byte)unsignedValue; @@ -52,7 +52,7 @@ private unsafe static void SetMedium(ref byte start, int value) private unsafe static void SetMediumLE(ref byte start, int value) { uint unsignedValue = (uint)value; - IntPtr offset = (IntPtr)0; + nint offset = 0; Unsafe.AddByteOffset(ref start, offset) = (byte)unsignedValue; Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(unsignedValue >> 8); Unsafe.AddByteOffset(ref start, offset + 2) = (byte)(unsignedValue >> 16); @@ -66,8 +66,8 @@ private unsafe static void SetDecimal(ref byte start, decimal value) uint mid = (uint)bits[1]; uint high = (uint)bits[2]; uint flags = (uint)bits[3]; - IntPtr offset = (IntPtr)0; + nint offset = 0; Unsafe.AddByteOffset(ref start, offset) = (byte)(lo >> 24); // lo Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(lo >> 16); Unsafe.AddByteOffset(ref start, offset + 2) = (byte)(lo >> 8); @@ -76,14 +76,15 @@ private unsafe static void SetDecimal(ref byte start, decimal value) Unsafe.AddByteOffset(ref start, offset + 5) = (byte)(mid >> 16); Unsafe.AddByteOffset(ref start, offset + 6) = (byte)(mid >> 8); Unsafe.AddByteOffset(ref start, offset + 7) = (byte)mid; - Unsafe.AddByteOffset(ref start, offset + 8) = (byte)(high >> 24); // high - Unsafe.AddByteOffset(ref start, offset + 9) = (byte)(high >> 16); - Unsafe.AddByteOffset(ref start, offset + 10) = (byte)(high >> 8); - Unsafe.AddByteOffset(ref start, offset + 11) = (byte)high; - Unsafe.AddByteOffset(ref start, offset + 12) = (byte)(flags >> 24); // flags - Unsafe.AddByteOffset(ref start, offset + 13) = (byte)(flags >> 16); - Unsafe.AddByteOffset(ref start, offset + 14) = (byte)(flags >> 8); - Unsafe.AddByteOffset(ref start, offset + 15) = (byte)flags; + offset += 8; + Unsafe.AddByteOffset(ref start, offset) = (byte)(high >> 24); // high + Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(high >> 16); + Unsafe.AddByteOffset(ref start, offset + 2) = (byte)(high >> 8); + Unsafe.AddByteOffset(ref start, offset + 3) = (byte)high; + Unsafe.AddByteOffset(ref start, offset + 4) = (byte)(flags >> 24); // flags + Unsafe.AddByteOffset(ref start, offset + 5) = (byte)(flags >> 16); + Unsafe.AddByteOffset(ref start, offset + 6) = (byte)(flags >> 8); + Unsafe.AddByteOffset(ref start, offset + 7) = (byte)flags; } [MethodImpl(InlineMethod.AggressiveInlining)] @@ -94,8 +95,8 @@ private unsafe static void SetDecimalLE(ref byte start, decimal value) uint mid = (uint)bits[1]; uint high = (uint)bits[2]; uint flags = (uint)bits[3]; - IntPtr offset = (IntPtr)0; + nint offset = 0; Unsafe.AddByteOffset(ref start, offset) = (byte)lo; Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(lo >> 8); Unsafe.AddByteOffset(ref start, offset + 2) = (byte)(lo >> 16); @@ -104,14 +105,15 @@ private unsafe static void SetDecimalLE(ref byte start, decimal value) Unsafe.AddByteOffset(ref start, offset + 5) = (byte)(mid >> 8); Unsafe.AddByteOffset(ref start, offset + 6) = (byte)(mid >> 16); Unsafe.AddByteOffset(ref start, offset + 7) = (byte)(mid >> 24); // mid - Unsafe.AddByteOffset(ref start, offset + 8) = (byte)high; - Unsafe.AddByteOffset(ref start, offset + 9) = (byte)(high >> 8); - Unsafe.AddByteOffset(ref start, offset + 10) = (byte)(high >> 16); - Unsafe.AddByteOffset(ref start, offset + 11) = (byte)(high >> 24); // high - Unsafe.AddByteOffset(ref start, offset + 12) = (byte)flags; - Unsafe.AddByteOffset(ref start, offset + 13) = (byte)(flags >> 8); - Unsafe.AddByteOffset(ref start, offset + 14) = (byte)(flags >> 16); - Unsafe.AddByteOffset(ref start, offset + 15) = (byte)(flags >> 24); // flags + offset += 8; + Unsafe.AddByteOffset(ref start, offset) = (byte)high; + Unsafe.AddByteOffset(ref start, offset + 1) = (byte)(high >> 8); + Unsafe.AddByteOffset(ref start, offset + 2) = (byte)(high >> 16); + Unsafe.AddByteOffset(ref start, offset + 3) = (byte)(high >> 24); // high + Unsafe.AddByteOffset(ref start, offset + 4) = (byte)flags; + Unsafe.AddByteOffset(ref start, offset + 5) = (byte)(flags >> 8); + Unsafe.AddByteOffset(ref start, offset + 6) = (byte)(flags >> 16); + Unsafe.AddByteOffset(ref start, offset + 7) = (byte)(flags >> 24); // flags } /// Writes a 32-bit integer in a compressed format. diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs b/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs index 189d4a4d8..f67534b52 100644 --- a/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs +++ b/src/DotNetty.Common/Internal/ASCIIUtility.Helpers.cs @@ -9,7 +9,9 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +#if NETCOREAPP3_1 using System.Runtime.Intrinsics.X86; +#endif namespace DotNetty.Common.Internal { @@ -48,6 +50,7 @@ internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiDat { Debug.Assert(!AllBytesInUInt32AreAscii(value), "Caller shouldn't provide an all-ASCII value."); +#if NETCOREAPP3_1 // Use BMI1 directly rather than going through BitOperations. We only see a perf gain here // if we're able to emit a real tzcnt instruction; the software fallback used by BitOperations // is too slow for our purposes since we can provide our own faster, specialized software fallback. @@ -84,8 +87,22 @@ internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiDat return numAsciiBytes; } +#else + if (BitConverter.IsLittleEndian) + { + return (uint)BitOperations.TrailingZeroCount(value & UInt32HighBitsOnlyMask) >> 3; + } +#endif else { +#if NET + // Couldn't use tzcnt, use specialized software fallback. + // The 'allBytesUpToNowAreAscii' DWORD uses bit twiddling to hold a 1 or a 0 depending + // on whether all processed bytes were ASCII. Then we accumulate all of the + // results to calculate how many consecutive ASCII bytes are present. + + value = ~value; +#endif // BinaryPrimitives.ReverseEndianness is only implemented as an intrinsic on // little-endian platforms, so using it in this big-endian path would be too // expensive. Instead we'll just change how we perform the shifts. diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.Net.cs b/src/DotNetty.Common/Internal/ASCIIUtility.Net.cs new file mode 100644 index 000000000..7c73bd34a --- /dev/null +++ b/src/DotNetty.Common/Internal/ASCIIUtility.Net.cs @@ -0,0 +1,1307 @@ +// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/ASCIIUtility.cs + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NET +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace DotNetty.Common.Internal +{ + partial class ASCIIUtility + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int GetIndexOfFirstNonAsciiByteInLane_AdvSimd(Vector128 value, Vector128 bitmask) + { + if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian) + { + throw new PlatformNotSupportedException(); + } + + // extractedBits[i] = (value[i] >> 7) & (1 << (12 * (i % 2))); + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitmask); + + // collapse mask to lower bits + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + ulong mask = extractedBits.AsUInt64().ToScalar(); + + // calculate the index + int index = BitOperations.TrailingZeroCount(mask) >> 2; + Debug.Assert((mask != 0) ? index < 16 : index >= 16); + return index; + } + + /// + /// Returns the index in where the first non-ASCII byte is found. + /// Returns if the buffer is empty or all-ASCII. + /// + /// An ASCII byte is defined as 0x00 - 0x7F, inclusive. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) + { + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized + // code below. This has two benefits: (a) we can take advantage of specific instructions like + // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while + // this method is running. + + return (Sse2.IsSupported || AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) + ? GetIndexOfFirstNonAsciiByte_Intrinsified(pBuffer, bufferLength) + : GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool ContainsNonAsciiByte_Sse2(uint sseMask) + { + Debug.Assert(sseMask != uint.MaxValue); + Debug.Assert(Sse2.IsSupported); + return sseMask != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool ContainsNonAsciiByte_AdvSimd(uint advSimdIndex) + { + Debug.Assert(advSimdIndex != uint.MaxValue); + Debug.Assert(AdvSimd.IsSupported); + return advSimdIndex < 16; + } + + private static unsafe nuint GetIndexOfFirstNonAsciiByte_Intrinsified(byte* pBuffer, nuint bufferLength) + { + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported, "Sse2 or AdvSimd64 required."); + Debug.Assert(BitConverter.IsLittleEndian, "This SSE2/Arm64 implementation assumes little-endian."); + + Vector128 bitmask = BitConverter.IsLittleEndian ? + Vector128.Create((ushort)0x1001).AsByte() : + Vector128.Create((ushort)0x0110).AsByte(); + + uint currentSseMask = uint.MaxValue, secondSseMask = uint.MaxValue; + uint currentAdvSimdIndex = uint.MaxValue, secondAdvSimdIndex = uint.MaxValue; + byte* pOriginalBuffer = pBuffer; + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of a large enough buffer and + // "all ASCII". If we see non-ASCII data, we jump out of the hot paths to targets + // after all the main logic. + + if (bufferLength < SizeOfVector128) + { + goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead + } + + // Read the first vector unaligned. + + if (Sse2.IsSupported) + { + currentSseMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load + if (ContainsNonAsciiByte_Sse2(currentSseMask)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + } + else if (AdvSimd.Arm64.IsSupported) + { + currentAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane_AdvSimd(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load + if (ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + } + else + { + throw new PlatformNotSupportedException(); + } + + // If we have less than 32 bytes to process, just go straight to the final unaligned + // read. There's no need to mess with the loop logic in the middle of this method. + + if (bufferLength < 2 * SizeOfVector128) + { + goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; + } + + // Now adjust the read pointer so that future reads are aligned. + + pBuffer = (byte*)(((nuint)pBuffer + SizeOfVector128) & ~(nuint)MaskOfAllBitsInVector128); + +#if DEBUG + long numBytesRead = pBuffer - pOriginalBuffer; + Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVector128, "We should've made forward progress of at least one byte."); + Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); +#endif + + // Adjust the remaining length to account for what we just read. + + bufferLength += (nuint)pOriginalBuffer; + bufferLength -= (nuint)pBuffer; + + // The buffer is now properly aligned. + // Read 2 vectors at a time if possible. + + if (bufferLength >= 2 * SizeOfVector128) + { + byte* pFinalVectorReadPos = (byte*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128); + + // After this point, we no longer need to update the bufferLength value. + + do + { + if (Sse2.IsSupported) + { + Vector128 firstVector = Sse2.LoadAlignedVector128(pBuffer); + Vector128 secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); + + currentSseMask = (uint)Sse2.MoveMask(firstVector); + secondSseMask = (uint)Sse2.MoveMask(secondVector); + if (ContainsNonAsciiByte_Sse2(currentSseMask | secondSseMask)) + { + goto FoundNonAsciiDataInInnerLoop; + } + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 firstVector = AdvSimd.LoadVector128(pBuffer); + Vector128 secondVector = AdvSimd.LoadVector128(pBuffer + SizeOfVector128); + + currentAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane_AdvSimd(firstVector, bitmask); + secondAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane_AdvSimd(secondVector, bitmask); + if (ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex) || ContainsNonAsciiByte_AdvSimd(secondAdvSimdIndex)) + { + goto FoundNonAsciiDataInInnerLoop; + } + } + else + { + throw new PlatformNotSupportedException(); + } + + pBuffer += 2 * SizeOfVector128; + } while (pBuffer <= pFinalVectorReadPos); + } + + // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. + // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. + // But we _can_ rely on it to tell us how much remaining data must be drained by looking + // at what bits of it are set. This works because had we updated it within the loop above, + // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about + // bits which are less significant than those that the addition would've acted on. + + // If there is fewer than one vector length remaining, skip the next aligned read. + + if ((bufferLength & SizeOfVector128) == 0) + { + goto DoFinalUnalignedVectorRead; + } + + // At least one full vector's worth of data remains, so we can safely read it. + // Remember, at this point pBuffer is still aligned. + + if (Sse2.IsSupported) + { + currentSseMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); + if (ContainsNonAsciiByte_Sse2(currentSseMask)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + } + else if (AdvSimd.Arm64.IsSupported) + { + currentAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane_AdvSimd(AdvSimd.LoadVector128(pBuffer), bitmask); + if (ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + } + else + { + throw new PlatformNotSupportedException(); + } + + IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: + + pBuffer += SizeOfVector128; + + DoFinalUnalignedVectorRead: + + if (((byte)bufferLength & MaskOfAllBitsInVector128) != 0) + { + // Perform an unaligned read of the last vector. + // We need to adjust the pointer because we're re-reading data. + + pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128; + + if (Sse2.IsSupported) + { + currentSseMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load + if (ContainsNonAsciiByte_Sse2(currentSseMask)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + + } + else if (AdvSimd.Arm64.IsSupported) + { + currentAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane_AdvSimd(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load + if (ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex)) + { + goto FoundNonAsciiDataInCurrentChunk; + } + + } + else + { + throw new PlatformNotSupportedException(); + } + + pBuffer += SizeOfVector128; + } + + Finish: + return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done! + + FoundNonAsciiDataInInnerLoop: + + // If the current (first) mask isn't the mask that contains non-ASCII data, then it must + // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes + // from the second mask. + + if (Sse2.IsSupported) + { + if (!ContainsNonAsciiByte_Sse2(currentSseMask)) + { + pBuffer += SizeOfVector128; + currentSseMask = secondSseMask; + } + } + else if (AdvSimd.IsSupported) + { + if (!ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex)) + { + pBuffer += SizeOfVector128; + currentAdvSimdIndex = secondAdvSimdIndex; + } + } + else + { + throw new PlatformNotSupportedException(); + } + FoundNonAsciiDataInCurrentChunk: + + + if (Sse2.IsSupported) + { + // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. + // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't + // available, we'll fall back to a normal loop. + Debug.Assert(ContainsNonAsciiByte_Sse2(currentSseMask), "Shouldn't be here unless we see non-ASCII data."); + pBuffer += (uint)BitOperations.TrailingZeroCount(currentSseMask); + } + else if (AdvSimd.Arm64.IsSupported) + { + Debug.Assert(ContainsNonAsciiByte_AdvSimd(currentAdvSimdIndex), "Shouldn't be here unless we see non-ASCII data."); + pBuffer += currentAdvSimdIndex; + } + else + { + throw new PlatformNotSupportedException(); + } + + goto Finish; + + FoundNonAsciiDataInCurrentDWord: + + uint currentDWord; + Debug.Assert(!AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); + pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); + + goto Finish; + + InputBufferLessThanOneVectorInLength: + + // These code paths get hit if the original input length was less than one vector in size. + // We can't perform vectorized reads at this point, so we'll fall back to reading primitives + // directly. Note that all of these reads are unaligned. + + Debug.Assert(bufferLength < SizeOfVector128); + + // QWORD drain + + if ((bufferLength & 8) != 0) + { + if (UIntPtr.Size == sizeof(ulong)) + { + // If we can use 64-bit tzcnt to count the number of leading ASCII bytes, prefer it. + + ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); + if (!AllBytesInUInt64AreAscii(candidateUInt64)) + { + // Clear everything but the high bit of each byte, then tzcnt. + // Remember to divide by 8 at the end to convert bit count to byte count. + + candidateUInt64 &= UInt64HighBitsOnlyMask; + pBuffer += (nuint)(BitOperations.TrailingZeroCount(candidateUInt64) >> 3); + goto Finish; + } + } + else + { + // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. + + currentDWord = Unsafe.ReadUnaligned(pBuffer); + uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4); + + if (!AllBytesInUInt32AreAscii(currentDWord | nextDWord)) + { + // At least one of the values wasn't all-ASCII. + // We need to figure out which one it was and stick it in the currentMask local. + + if (AllBytesInUInt32AreAscii(currentDWord)) + { + currentDWord = nextDWord; // this one is the culprit + pBuffer += 4; + } + + goto FoundNonAsciiDataInCurrentDWord; + } + } + + pBuffer += 8; // successfully consumed 8 ASCII bytes + } + + // DWORD drain + + if ((bufferLength & 4) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllBytesInUInt32AreAscii(currentDWord)) + { + goto FoundNonAsciiDataInCurrentDWord; + } + + pBuffer += 4; // successfully consumed 4 ASCII bytes + } + + // WORD drain + // (We movzx to a DWORD for ease of manipulation.) + + if ((bufferLength & 2) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllBytesInUInt32AreAscii(currentDWord)) + { + // We only care about the 0x0080 bit of the value. If it's not set, then we + // increment currentOffset by 1. If it's set, we don't increment it at all. + + pBuffer += (nuint)((nint)(sbyte)currentDWord >> 7) + 1; + goto Finish; + } + + pBuffer += 2; // successfully consumed 2 ASCII bytes + } + + // BYTE drain + + if ((bufferLength & 1) != 0) + { + // sbyte has non-negative value if byte is ASCII. + + if (*(sbyte*)(pBuffer) >= 0) + { + pBuffer++; // successfully consumed a single byte + } + } + + goto Finish; + } + + private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuint bufferLength /* in chars */) + { + // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method + // will be elided by JIT once we determine which specific ISAs we support. + + // Quick check for empty inputs. + + if (bufferLength == 0) + { + return 0; + } + + // JIT turns the below into constants + + uint SizeOfVector128InBytes = (uint)Unsafe.SizeOf>(); + uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof(char); + + Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); + Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); + + Vector128 firstVector, secondVector; + uint currentMask; + char* pOriginalBuffer = pBuffer; + + if (bufferLength < SizeOfVector128InChars) + { + goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead + } + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Vector128 asciiMaskForTestZ = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware + Vector128 asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW + const uint NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether 'currentMask' contains non-ASCII data + +//#if SYSTEM_PRIVATE_CORELIB + Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); +//#endif + + // Read the first vector unaligned. + + firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load + + // The operation below forces the 0x8000 bit of each WORD to be set iff the WORD element + // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order + // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored. + + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); + + if ((currentMask & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + + // If we have less than 32 bytes to process, just go straight to the final unaligned + // read. There's no need to mess with the loop logic in the middle of this method. + + // Adjust the remaining length to account for what we just read. + // For the remainder of this code path, bufferLength will be in bytes, not chars. + + bufferLength <<= 1; // chars to bytes + + if (bufferLength < 2 * SizeOfVector128InBytes) + { + goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; + } + + // Now adjust the read pointer so that future reads are aligned. + + pBuffer = (char*)(((nuint)pBuffer + SizeOfVector128InBytes) & ~(nuint)(SizeOfVector128InBytes - 1)); + +#if DEBUG + long numCharsRead = pBuffer - pOriginalBuffer; + Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVector128InChars, "We should've made forward progress of at least one char."); + Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); +#endif + + // Adjust remaining buffer length. + + bufferLength += (nuint)pOriginalBuffer; + bufferLength -= (nuint)pBuffer; + + // The buffer is now properly aligned. + // Read 2 vectors at a time if possible. + + if (bufferLength >= 2 * SizeOfVector128InBytes) + { + char* pFinalVectorReadPos = (char*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128InBytes); + + // After this point, we no longer need to update the bufferLength value. + + do + { + firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); + secondVector = Sse2.LoadAlignedVector128((ushort*)pBuffer + SizeOfVector128InChars); + Vector128 combinedVector = Sse2.Or(firstVector, secondVector); + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ)) + { + goto FoundNonAsciiDataInFirstOrSecondVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForAddSaturate).AsByte()); + if ((currentMask & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInFirstOrSecondVector; + } + } + + pBuffer += 2 * SizeOfVector128InChars; + } while (pBuffer <= pFinalVectorReadPos); + } + + // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. + // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. + // But we _can_ rely on it to tell us how much remaining data must be drained by looking + // at what bits of it are set. This works because had we updated it within the loop above, + // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about + // bits which are less significant than those that the addition would've acted on. + + // If there is fewer than one vector length remaining, skip the next aligned read. + // Remember, at this point bufferLength is measured in bytes, not chars. + + if ((bufferLength & SizeOfVector128InBytes) == 0) + { + goto DoFinalUnalignedVectorRead; + } + + // At least one full vector's worth of data remains, so we can safely read it. + // Remember, at this point pBuffer is still aligned. + + firstVector = Sse2.LoadAlignedVector128((ushort*)pBuffer); + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(firstVector, asciiMaskForTestZ)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); + if ((currentMask & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: + + pBuffer += SizeOfVector128InChars; + + DoFinalUnalignedVectorRead: + + if (((byte)bufferLength & (SizeOfVector128InBytes - 1)) != 0) + { + // Perform an unaligned read of the last vector. + // We need to adjust the pointer because we're re-reading data. + + pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes); + firstVector = Sse2.LoadVector128((ushort*)pBuffer); // unaligned load + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(firstVector, asciiMaskForTestZ)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); + if ((currentMask & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + pBuffer += SizeOfVector128InChars; + } + + Finish: + + Debug.Assert(((nuint)pBuffer - (nuint)pOriginalBuffer) % 2 == 0, "Shouldn't have incremented any pointer by an odd byte count."); + return ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char); // and we're done! (remember to adjust for char count) + + FoundNonAsciiDataInFirstOrSecondVector: + + // We don't know if the first or the second vector contains non-ASCII data. Check the first + // vector, and if that's all-ASCII then the second vector must be the culprit. Either way + // we'll make sure the first vector local is the one that contains the non-ASCII data. + + // See comment earlier in the method for an explanation of how the below logic works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(firstVector, asciiMaskForTestZ)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); + if ((currentMask & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + // Wasn't the first vector; must be the second. + + pBuffer += SizeOfVector128InChars; + firstVector = secondVector; + + FoundNonAsciiDataInFirstVector: + + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte()); + + FoundNonAsciiDataInCurrentMask: + + // See comment earlier in the method accounting for the 0x8000 and 0x0080 bits set after the WORD-sized operations. + + currentMask &= NonAsciiDataSeenMask; + + // Now, the mask contains - from the LSB - a 0b00 pair for each ASCII char we saw, and a 0b10 pair for each non-ASCII char. + // + // (Keep endianness in mind in the below examples.) + // A non-ASCII char followed by two ASCII chars is 0b..._00_00_10. (tzcnt = 1) + // An ASCII char followed by two non-ASCII chars is 0b..._10_10_00. (tzcnt = 3) + // Two ASCII chars followed by a non-ASCII char is 0b..._10_00_00. (tzcnt = 5) + // + // This means tzcnt = 2 * numLeadingAsciiChars + 1. We can conveniently take advantage of the fact + // that the 2x multiplier already matches the char* stride length, then just subtract 1 at the end to + // compute the correct final ending pointer value. + + Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); + pBuffer = (char*)((byte*)pBuffer + (uint)BitOperations.TrailingZeroCount(currentMask) - 1); + + goto Finish; + + FoundNonAsciiDataInCurrentDWord: + + uint currentDWord; + Debug.Assert(!AllCharsInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); + + if (FirstCharInUInt32IsAscii(currentDWord)) + { + pBuffer++; // skip past the ASCII char + } + + goto Finish; + + InputBufferLessThanOneVectorInLength: + + // These code paths get hit if the original input length was less than one vector in size. + // We can't perform vectorized reads at this point, so we'll fall back to reading primitives + // directly. Note that all of these reads are unaligned. + + // Reminder: If this code path is hit, bufferLength is still a char count, not a byte count. + // We skipped the code path that multiplied the count by sizeof(char). + + Debug.Assert(bufferLength < SizeOfVector128InChars); + + // QWORD drain + + if ((bufferLength & 4) != 0) + { + if (UIntPtr.Size == sizeof(ulong)) + { + // If we can use 64-bit tzcnt to count the number of leading ASCII chars, prefer it. + + ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); + if (!AllCharsInUInt64AreAscii(candidateUInt64)) + { + // Clear the low 7 bits (the ASCII bits) of each char, then tzcnt. + // Remember to divide by 8 at the end to convert bit count to byte count, + // then the & ~1 at the end to treat a match in the high byte of + // any char the same as a match in the low byte of that same char. + + candidateUInt64 &= 0xFF80FF80_FF80FF80ul; + pBuffer = (char*)((byte*)pBuffer + ((nuint)(BitOperations.TrailingZeroCount(candidateUInt64) >> 3) & ~(nuint)1)); + goto Finish; + } + } + else + { + // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. + + currentDWord = Unsafe.ReadUnaligned(pBuffer); + uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4 / sizeof(char)); + + if (!AllCharsInUInt32AreAscii(currentDWord | nextDWord)) + { + // At least one of the values wasn't all-ASCII. + // We need to figure out which one it was and stick it in the currentMask local. + + if (AllCharsInUInt32AreAscii(currentDWord)) + { + currentDWord = nextDWord; // this one is the culprit + pBuffer += 4 / sizeof(char); + } + + goto FoundNonAsciiDataInCurrentDWord; + } + } + + pBuffer += 4; // successfully consumed 4 ASCII chars + } + + // DWORD drain + + if ((bufferLength & 2) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllCharsInUInt32AreAscii(currentDWord)) + { + goto FoundNonAsciiDataInCurrentDWord; + } + + pBuffer += 2; // successfully consumed 2 ASCII chars + } + + // WORD drain + // This is the final drain; there's no need for a BYTE drain since our elemental type is 16-bit char. + + if ((bufferLength & 1) != 0) + { + if (*pBuffer <= 0x007F) + { + pBuffer++; // successfully consumed a single char + } + } + + goto Finish; + } + + private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) + { + // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method + // will be elided by JIT once we determine which specific ISAs we support. + + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Debug.Assert(Sse2.IsSupported); + Debug.Assert(BitConverter.IsLittleEndian); + Debug.Assert(elementCount >= 2 * SizeOfVector128); + + Vector128 asciiMaskForTestZ = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware + Vector128 asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW + const int NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether the pmovmskb operation saw non-ASCII chars + + // First, perform an unaligned read of the first part of the input buffer. + + Vector128 utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer); // unaligned load + + // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ)) + { + return 0; + } + } + else + { + if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0) + { + return 0; + } + } + + // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. + + Vector128 asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + Sse2.StoreScalar((ulong*)pAsciiBuffer, asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED + + nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far + + // We're going to get the best performance when we have aligned writes, so we'll take the + // hit of potentially unaligned reads in order to hit this sweet spot. + + // pAsciiBuffer points to the start of the destination buffer, immediately before where we wrote + // the 8 bytes previously. If the 0x08 bit is set at the pinned address, then the 8 bytes we wrote + // previously mean that the 0x08 bit is *not* set at address &pAsciiBuffer[SizeOfVector128 / 2]. In + // that case we can immediately back up to the previous aligned boundary and start the main loop. + // If the 0x08 bit is *not* set at the pinned address, then it means the 0x08 bit *is* set at + // address &pAsciiBuffer[SizeOfVector128 / 2], and we should perform one more 8-byte write to bump + // just past the next aligned boundary address. + + if (((uint)pAsciiBuffer & (SizeOfVector128 / 2)) == 0) + { + // We need to perform one more partial vector write before we can get the alignment we want. + + utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load + + // See comments earlier in this method for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ)) + { + goto Finish; + } + } + else + { + if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0) + { + goto Finish; + } + } + + // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED + } + + // Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment + // point, then use that as the base offset going forward. + + currentOffsetInElements = SizeOfVector128 - ((nuint)pAsciiBuffer & MaskOfAllBitsInVector128); + Debug.Assert(0 < currentOffsetInElements && currentOffsetInElements <= SizeOfVector128, "We wrote at least 1 byte but no more than a whole vector."); + + Debug.Assert(currentOffsetInElements <= elementCount, "Shouldn't have overrun the destination buffer."); + Debug.Assert(elementCount - currentOffsetInElements >= SizeOfVector128, "We should be able to run at least one whole vector."); + + nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; + do + { + // In a loop, perform two unaligned reads, narrow to a single vector, then aligned write one vector. + + utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load + Vector128 utf16VectorSecond = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements + SizeOfVector128 / sizeof(short)); // unaligned load + Vector128 combinedVector = Sse2.Or(utf16VectorFirst, utf16VectorSecond); + + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ)) + { + goto FoundNonAsciiDataInLoop; + } + } + else + { + if ((Sse2.MoveMask(Sse2.AddSaturate(combinedVector.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0) + { + goto FoundNonAsciiDataInLoop; + } + } + + // Build up the ASCII vector and perform the store. + + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond); + + Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % SizeOfVector128 == 0, "Write should be aligned."); + Sse2.StoreAligned(pAsciiBuffer + currentOffsetInElements, asciiVector); // aligned + + currentOffsetInElements += SizeOfVector128; + } while (currentOffsetInElements <= finalOffsetWhereCanRunLoop); + + Finish: + + // There might be some ASCII data left over. That's fine - we'll let our caller handle the final drain. + return currentOffsetInElements; + + FoundNonAsciiDataInLoop: + + // Can we at least narrow the high vector? + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ)) + { + goto Finish; // found non-ASCII data + } + } + else + { + if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0) + { + goto Finish; // found non-ASCII data + } + } + + // First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector. + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + + Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned."); + + Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is aligned + currentOffsetInElements += SizeOfVector128 / 2; + + goto Finish; + } + + /// + /// Copies as many ASCII bytes (00..7F) as possible from + /// to , stopping when the first non-ASCII byte is encountered + /// or once elements have been converted. Returns the total number + /// of elements that were able to be converted. + /// + public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + { + // Intrinsified in mono interpreter + nuint currentOffset = 0; + + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized + // code below. This has two benefits: (a) we can take advantage of specific instructions like + // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while + // this method is running. + + if (BitConverter.IsLittleEndian && (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)) + { + if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) + { + currentOffset = WidenAsciiToUtf16_Intrinsified(pAsciiBuffer, pUtf16Buffer, elementCount); + } + } + else if (Vector.IsHardwareAccelerated) + { + uint SizeOfVector = (uint)Unsafe.SizeOf>(); // JIT will make this a const + + // Only bother vectorizing if we have enough data to do so. + if (elementCount >= SizeOfVector) + { + // Note use of SBYTE instead of BYTE below; we're using the two's-complement + // representation of negative integers to act as a surrogate for "is ASCII?". + + nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector; + do + { + Vector asciiVector = Unsafe.ReadUnaligned>(pAsciiBuffer + currentOffset); + if (Vector.LessThanAny(asciiVector, Vector.Zero)) + { + break; // found non-ASCII data + } + + Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector utf16LowVector, out Vector utf16HighVector); + + // TODO: Is the below logic also valid for big-endian platforms? + Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset, utf16LowVector); + Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset + Vector.Count, utf16HighVector); + + currentOffset += SizeOfVector; + } while (currentOffset <= finalOffsetWhereCanLoop); + } + } + + Debug.Assert(currentOffset <= elementCount); + nuint remainingElementCount = elementCount - currentOffset; + + // Try to widen 32 bits -> 64 bits at a time. + // We needn't update remainingElementCount after this point. + + uint asciiData; + + if (remainingElementCount >= 4) + { + nuint finalOffsetWhereCanLoop = currentOffset + remainingElementCount - 4; + do + { + asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); + if (!AllBytesInUInt32AreAscii(asciiData)) + { + goto FoundNonAsciiData; + } + + WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pUtf16Buffer[currentOffset], asciiData); + currentOffset += 4; + } while (currentOffset <= finalOffsetWhereCanLoop); + } + + // Try to widen 16 bits -> 32 bits. + + if (((uint)remainingElementCount & 2) != 0) + { + asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); + if (!AllBytesInUInt32AreAscii(asciiData)) + { + goto FoundNonAsciiData; + } + + if (BitConverter.IsLittleEndian) + { + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + pUtf16Buffer[currentOffset + 1] = (char)(asciiData >> 8); + } + else + { + pUtf16Buffer[currentOffset + 1] = (char)(byte)asciiData; + pUtf16Buffer[currentOffset] = (char)(asciiData >> 8); + } + + currentOffset += 2; + } + + // Try to widen 8 bits -> 16 bits. + + if (((uint)remainingElementCount & 1) != 0) + { + asciiData = pAsciiBuffer[currentOffset]; + if (((byte)asciiData & 0x80) != 0) + { + goto Finish; + } + + pUtf16Buffer[currentOffset] = (char)asciiData; + currentOffset++; + } + + Finish: + + return currentOffset; + + FoundNonAsciiData: + + Debug.Assert(!AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); + + // Drain ASCII bytes one at a time. + + while (((byte)asciiData & 0x80) == 0) + { + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + currentOffset++; + asciiData >>= 8; + } + + goto Finish; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool ContainsNonAsciiByte(Vector128 value) + { + if (!AdvSimd.Arm64.IsSupported) + { + throw new PlatformNotSupportedException(); + } + value = AdvSimd.Arm64.MaxPairwise(value, value); + return (value.AsUInt64().ToScalar() & 0x8080808080808080) != 0; + } + + private static unsafe nuint WidenAsciiToUtf16_Intrinsified(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + { + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(BitConverter.IsLittleEndian); + Debug.Assert(elementCount >= 2 * SizeOfVector128); + + // We're going to get the best performance when we have aligned writes, so we'll take the + // hit of potentially unaligned reads in order to hit this sweet spot. + + Vector128 asciiVector; + Vector128 utf16FirstHalfVector; + bool containsNonAsciiBytes; + + // First, perform an unaligned read of the first part of the input buffer. + + if (Sse2.IsSupported) + { + asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load + containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0; + } + else if (AdvSimd.Arm64.IsSupported) + { + asciiVector = AdvSimd.LoadVector128(pAsciiBuffer); + containsNonAsciiBytes = ContainsNonAsciiByte(asciiVector); + } + else + { + throw new PlatformNotSupportedException(); + } + + // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. + + if (containsNonAsciiBytes) + { + return 0; + } + + // Then perform an unaligned write of the first part of the input buffer. + + Vector128 zeroVector = Vector128.Zero; + + if (Sse2.IsSupported) + { + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + } + else if (AdvSimd.IsSupported) + { + utf16FirstHalfVector = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()).AsByte(); + AdvSimd.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + } + else + { + throw new PlatformNotSupportedException(); + } + + // Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment + // point, then use that as the base offset going forward. Remember the >> 1 to account for + // that we wrote chars, not bytes. This means we may re-read data in the next iteration of + // the loop, but this is ok. + + nuint currentOffset = (SizeOfVector128 >> 1) - (((nuint)pUtf16Buffer >> 1) & (MaskOfAllBitsInVector128 >> 1)); + Debug.Assert(0 < currentOffset && currentOffset <= SizeOfVector128 / sizeof(char)); + + nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; + + // Calculating the destination address outside the loop results in significant + // perf wins vs. relying on the JIT to fold memory addressing logic into the + // write instructions. See: https://github.com/dotnet/runtime/issues/33002 + + char* pCurrentWriteAddress = pUtf16Buffer + currentOffset; + + do + { + // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors. + + if (Sse2.IsSupported) + { + asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load + containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0; + } + else if (AdvSimd.Arm64.IsSupported) + { + asciiVector = AdvSimd.LoadVector128(pAsciiBuffer + currentOffset); + containsNonAsciiBytes = ContainsNonAsciiByte(asciiVector); + } + else + { + throw new PlatformNotSupportedException(); + } + + if (containsNonAsciiBytes) + { + // non-ASCII byte somewhere + goto NonAsciiDataSeenInInnerLoop; + } + + if (Sse2.IsSupported) + { + Vector128 low = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)pCurrentWriteAddress, low); + + Vector128 high = Sse2.UnpackHigh(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high); + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 low = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()); + Vector128 high = AdvSimd.ZeroExtendWideningUpper(asciiVector); + AdvSimd.Arm64.StorePair((ushort*)pCurrentWriteAddress, low, high); + } + else + { + throw new PlatformNotSupportedException(); + } + + currentOffset += SizeOfVector128; + pCurrentWriteAddress += SizeOfVector128; + } while (currentOffset <= finalOffsetWhereCanRunLoop); + + Finish: + + return currentOffset; + + NonAsciiDataSeenInInnerLoop: + + // Can we at least widen the first part of the vector? + + if (!containsNonAsciiBytes) + { + // First part was all ASCII, widen + if (Sse2.IsSupported) + { + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 lower = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()); + AdvSimd.Store((ushort*)(pUtf16Buffer + currentOffset), lower); + } + else + { + throw new PlatformNotSupportedException(); + } + currentOffset += SizeOfVector128 / 2; + } + + goto Finish; + } + + /// + /// Given a DWORD which represents a buffer of 4 bytes, widens the buffer into 4 WORDs and + /// writes them to the output buffer with machine endianness. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value) + { + Debug.Assert(AllBytesInUInt32AreAscii(value)); + + if (Sse2.X64.IsSupported) + { + Debug.Assert(BitConverter.IsLittleEndian, "SSE2 widening assumes little-endian."); + Vector128 vecNarrow = Sse2.ConvertScalarToVector128UInt32(value).AsByte(); + Vector128 vecWide = Sse2.UnpackLow(vecNarrow, Vector128.Zero).AsUInt64(); + Unsafe.WriteUnaligned(ref Unsafe.As(ref outputBuffer), Sse2.X64.ConvertToUInt64(vecWide)); + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 vecNarrow = AdvSimd.DuplicateToVector128(value).AsByte(); + Vector128 vecWide = AdvSimd.Arm64.ZipLow(vecNarrow, Vector128.Zero).AsUInt64(); + Unsafe.WriteUnaligned(ref Unsafe.As(ref outputBuffer), vecWide.ToScalar()); + } + else + { + if (BitConverter.IsLittleEndian) + { + outputBuffer = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 3) = (char)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; + value >>= 8; + outputBuffer = (char)value; + } + } + } + } +} +#endif diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.NetCore3.cs b/src/DotNetty.Common/Internal/ASCIIUtility.NetCore3.cs new file mode 100644 index 000000000..a642045ee --- /dev/null +++ b/src/DotNetty.Common/Internal/ASCIIUtility.NetCore3.cs @@ -0,0 +1,1092 @@ +// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/ASCIIUtility.cs + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NETCOREAPP3_1 +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace DotNetty.Common.Internal +{ + partial class ASCIIUtility + { + /// + /// Returns the index in where the first non-ASCII byte is found. + /// Returns if the buffer is empty or all-ASCII. + /// + /// An ASCII byte is defined as 0x00 - 0x7F, inclusive. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) + { + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized + // code below. This has two benefits: (a) we can take advantage of specific instructions like + // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while + // this method is running. + + return Sse2.IsSupported + ? GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength) + : GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength); + } + + private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuint bufferLength) + { + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); + Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); + + uint currentMask, secondMask; + byte* pOriginalBuffer = pBuffer; + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of a large enough buffer and + // "all ASCII". If we see non-ASCII data, we jump out of the hot paths to targets + // after all the main logic. + + if (bufferLength < SizeOfVector128) + { + goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead + } + + // Read the first vector unaligned. + + currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load + + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + + // If we have less than 32 bytes to process, just go straight to the final unaligned + // read. There's no need to mess with the loop logic in the middle of this method. + + if (bufferLength < 2 * SizeOfVector128) + { + goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; + } + + // Now adjust the read pointer so that future reads are aligned. + + pBuffer = (byte*)(((nuint)pBuffer + SizeOfVector128) & ~(nuint)MaskOfAllBitsInVector128); + +#if DEBUG + long numBytesRead = pBuffer - pOriginalBuffer; + Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVector128, "We should've made forward progress of at least one byte."); + Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); +#endif + + // Adjust the remaining length to account for what we just read. + + bufferLength += (nuint)pOriginalBuffer; + bufferLength -= (nuint)pBuffer; + + // The buffer is now properly aligned. + // Read 2 vectors at a time if possible. + + if (bufferLength >= 2 * SizeOfVector128) + { + byte* pFinalVectorReadPos = (byte*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128); + + // After this point, we no longer need to update the bufferLength value. + + do + { + Vector128 firstVector = Sse2.LoadAlignedVector128(pBuffer); + Vector128 secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); + + currentMask = (uint)Sse2.MoveMask(firstVector); + secondMask = (uint)Sse2.MoveMask(secondVector); + + if ((currentMask | secondMask) != 0) + { + goto FoundNonAsciiDataInInnerLoop; + } + + pBuffer += 2 * SizeOfVector128; + } while (pBuffer <= pFinalVectorReadPos); + } + + // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. + // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. + // But we _can_ rely on it to tell us how much remaining data must be drained by looking + // at what bits of it are set. This works because had we updated it within the loop above, + // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about + // bits which are less significant than those that the addition would've acted on. + + // If there is fewer than one vector length remaining, skip the next aligned read. + + if (0ul >= (bufferLength & SizeOfVector128)) + { + goto DoFinalUnalignedVectorRead; + } + + // At least one full vector's worth of data remains, so we can safely read it. + // Remember, at this point pBuffer is still aligned. + + currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + + IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: + + pBuffer += SizeOfVector128; + + DoFinalUnalignedVectorRead: + + if (((byte)bufferLength & MaskOfAllBitsInVector128) != 0) + { + // Perform an unaligned read of the last vector. + // We need to adjust the pointer because we're re-reading data. + + pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128; + + currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + + pBuffer += SizeOfVector128; + } + + Finish: + + return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done! + + FoundNonAsciiDataInInnerLoop: + + // If the current (first) mask isn't the mask that contains non-ASCII data, then it must + // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes + // from the second mask. + + if (0u >= currentMask) + { + pBuffer += SizeOfVector128; + currentMask = secondMask; + } + + FoundNonAsciiDataInCurrentMask: + + // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. + // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't + // available, we'll fall back to a normal loop. + + Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); + pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask); + + goto Finish; + + FoundNonAsciiDataInCurrentDWord: + + uint currentDWord; + Debug.Assert(!AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); + pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); + + goto Finish; + + InputBufferLessThanOneVectorInLength: + + // These code paths get hit if the original input length was less than one vector in size. + // We can't perform vectorized reads at this point, so we'll fall back to reading primitives + // directly. Note that all of these reads are unaligned. + + Debug.Assert(bufferLength < SizeOfVector128); + + // QWORD drain + + if ((bufferLength & 8) != 0) + { + if (Bmi1.X64.IsSupported) + { + // If we can use 64-bit tzcnt to count the number of leading ASCII bytes, prefer it. + + ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); + if (!AllBytesInUInt64AreAscii(candidateUInt64)) + { + // Clear everything but the high bit of each byte, then tzcnt. + // Remember the / 8 at the end to convert bit count to byte count. + + candidateUInt64 &= UInt64HighBitsOnlyMask; + pBuffer += (nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8); + goto Finish; + } + } + else + { + // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. + + currentDWord = Unsafe.ReadUnaligned(pBuffer); + uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4); + + if (!AllBytesInUInt32AreAscii(currentDWord | nextDWord)) + { + // At least one of the values wasn't all-ASCII. + // We need to figure out which one it was and stick it in the currentMask local. + + if (AllBytesInUInt32AreAscii(currentDWord)) + { + currentDWord = nextDWord; // this one is the culprit + pBuffer += 4; + } + + goto FoundNonAsciiDataInCurrentDWord; + } + } + + pBuffer += 8; // successfully consumed 8 ASCII bytes + } + + // DWORD drain + + if ((bufferLength & 4) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllBytesInUInt32AreAscii(currentDWord)) + { + goto FoundNonAsciiDataInCurrentDWord; + } + + pBuffer += 4; // successfully consumed 4 ASCII bytes + } + + // WORD drain + // (We movzx to a DWORD for ease of manipulation.) + + if ((bufferLength & 2) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllBytesInUInt32AreAscii(currentDWord)) + { + // We only care about the 0x0080 bit of the value. If it's not set, then we + // increment currentOffset by 1. If it's set, we don't increment it at all. + + pBuffer += (nuint)((nint)(sbyte)currentDWord >> 7) + 1; + goto Finish; + } + + pBuffer += 2; // successfully consumed 2 ASCII bytes + } + + // BYTE drain + + if ((bufferLength & 1) != 0) + { + // sbyte has non-negative value if byte is ASCII. + + if (*(sbyte*)(pBuffer) >= 0) + { + pBuffer++; // successfully consumed a single byte + } + } + + goto Finish; + } + + private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuint bufferLength /* in chars */) + { + // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method + // will be elided by JIT once we determine which specific ISAs we support. + + // Quick check for empty inputs. + + if (0ul >= bufferLength) + { + return 0; + } + + // JIT turns the below into constants + + uint SizeOfVector128InBytes = (uint)Unsafe.SizeOf>(); + uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof(char); + + Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); + Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); + + Vector128 firstVector, secondVector; + uint currentMask; + char* pOriginalBuffer = pBuffer; + + if (bufferLength < SizeOfVector128InChars) + { + goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead + } + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware + Vector128 asciiMaskForPMINUW = Vector128.Create((ushort)0x0080); // used for PMINUW on supported hardware + Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR + Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW + +#if NET + Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); +#endif + + // Read the first vector unaligned. + + firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load + + if (Sse41.IsSupported) + { + // The SSE41-optimized code path works by forcing the 0x0080 bit in each WORD of the vector to be + // set iff the WORD element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector + // in order to extract the mask. + currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); + } + else + { + // The SSE2-optimized code path works by forcing each WORD of the vector to be 0xFFFF iff the WORD + // element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector in order to extract + // the mask. + currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + } + + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + + // If we have less than 32 bytes to process, just go straight to the final unaligned + // read. There's no need to mess with the loop logic in the middle of this method. + + // Adjust the remaining length to account for what we just read. + // For the remainder of this code path, bufferLength will be in bytes, not chars. + + bufferLength <<= 1; // chars to bytes + + if (bufferLength < 2 * SizeOfVector128InBytes) + { + goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; + } + + // Now adjust the read pointer so that future reads are aligned. + + pBuffer = (char*)(((nuint)pBuffer + SizeOfVector128InBytes) & ~(nuint)(SizeOfVector128InBytes - 1)); + +#if DEBUG + long numCharsRead = pBuffer - pOriginalBuffer; + Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVector128InChars, "We should've made forward progress of at least one char."); + Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); +#endif + + // Adjust remaining buffer length. + + bufferLength += (nuint)pOriginalBuffer; + bufferLength -= (nuint)pBuffer; + + // The buffer is now properly aligned. + // Read 2 vectors at a time if possible. + + if (bufferLength >= 2 * SizeOfVector128InBytes) + { + char* pFinalVectorReadPos = (char*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128InBytes); + + // After this point, we no longer need to update the bufferLength value. + + do + { + firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); + secondVector = Sse2.LoadAlignedVector128((short*)pBuffer + SizeOfVector128InChars); + Vector128 combinedVector = Sse2.Or(firstVector, secondVector); + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) + { + goto FoundNonAsciiDataInFirstOrSecondVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + { + goto FoundNonAsciiDataInFirstOrSecondVector; + } + } + + pBuffer += 2 * SizeOfVector128InChars; + } while (pBuffer <= pFinalVectorReadPos); + } + + // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. + // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. + // But we _can_ rely on it to tell us how much remaining data must be drained by looking + // at what bits of it are set. This works because had we updated it within the loop above, + // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about + // bits which are less significant than those that the addition would've acted on. + + // If there is fewer than one vector length remaining, skip the next aligned read. + // Remember, at this point bufferLength is measured in bytes, not chars. + + if (0ul >= (bufferLength & SizeOfVector128InBytes)) + { + goto DoFinalUnalignedVectorRead; + } + + // At least one full vector's worth of data remains, so we can safely read it. + // Remember, at this point pBuffer is still aligned. + + firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: + + pBuffer += SizeOfVector128InChars; + + DoFinalUnalignedVectorRead: + + if (((byte)bufferLength & (SizeOfVector128InBytes - 1)) != 0) + { + // Perform an unaligned read of the last vector. + // We need to adjust the pointer because we're re-reading data. + + pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes); + firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load + + if (Sse41.IsSupported) + { + // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. + // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. + if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + // See comment earlier in the method for an explanation of how the below logic works. + currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + pBuffer += SizeOfVector128InChars; + } + + Finish: + + Debug.Assert(((nuint)pBuffer - (nuint)pOriginalBuffer) % 2 == 0, "Shouldn't have incremented any pointer by an odd byte count."); + return ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char); // and we're done! (remember to adjust for char count) + + FoundNonAsciiDataInFirstOrSecondVector: + + // We don't know if the first or the second vector contains non-ASCII data. Check the first + // vector, and if that's all-ASCII then the second vector must be the culprit. Either way + // we'll make sure the first vector local is the one that contains the non-ASCII data. + + // See comment earlier in the method for an explanation of how the below logic works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) + { + goto FoundNonAsciiDataInFirstVector; + } + } + else + { + currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + if (currentMask != 0) + { + goto FoundNonAsciiDataInCurrentMask; + } + } + + // Wasn't the first vector; must be the second. + + pBuffer += SizeOfVector128InChars; + firstVector = secondVector; + + FoundNonAsciiDataInFirstVector: + + // See comment earlier in the method for an explanation of how the below logic works. + if (Sse41.IsSupported) + { + currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); + } + else + { + currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + } + + FoundNonAsciiDataInCurrentMask: + + // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. + // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't + // available, we'll fall back to a normal loop. (Even though the original vector used WORD elements, + // masks work on BYTE elements, and we account for this in the final fixup.) + + Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); + pBuffer = (char*)((byte*)pBuffer + (uint)BitOperations.TrailingZeroCount(currentMask)); + + goto Finish; + + FoundNonAsciiDataInCurrentDWord: + + uint currentDWord; + Debug.Assert(!AllCharsInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); + + if (FirstCharInUInt32IsAscii(currentDWord)) + { + pBuffer++; // skip past the ASCII char + } + + goto Finish; + + InputBufferLessThanOneVectorInLength: + + // These code paths get hit if the original input length was less than one vector in size. + // We can't perform vectorized reads at this point, so we'll fall back to reading primitives + // directly. Note that all of these reads are unaligned. + + // Reminder: If this code path is hit, bufferLength is still a char count, not a byte count. + // We skipped the code path that multiplied the count by sizeof(char). + + Debug.Assert(bufferLength < SizeOfVector128InChars); + + // QWORD drain + + if ((bufferLength & 4) != 0) + { + if (Bmi1.X64.IsSupported) + { + // If we can use 64-bit tzcnt to count the number of leading ASCII chars, prefer it. + + ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); + if (!AllCharsInUInt64AreAscii(candidateUInt64)) + { + // Clear the low 7 bits (the ASCII bits) of each char, then tzcnt. + // Remember the / 8 at the end to convert bit count to byte count, + // then the & ~1 at the end to treat a match in the high byte of + // any char the same as a match in the low byte of that same char. + + candidateUInt64 &= 0xFF80FF80_FF80FF80ul; + pBuffer = (char*)((byte*)pBuffer + ((nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8) & ~(nuint)1)); + goto Finish; + } + } + else + { + // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. + + currentDWord = Unsafe.ReadUnaligned(pBuffer); + uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4 / sizeof(char)); + + if (!AllCharsInUInt32AreAscii(currentDWord | nextDWord)) + { + // At least one of the values wasn't all-ASCII. + // We need to figure out which one it was and stick it in the currentMask local. + + if (AllCharsInUInt32AreAscii(currentDWord)) + { + currentDWord = nextDWord; // this one is the culprit + pBuffer += 4 / sizeof(char); + } + + goto FoundNonAsciiDataInCurrentDWord; + } + } + + pBuffer += 4; // successfully consumed 4 ASCII chars + } + + // DWORD drain + + if ((bufferLength & 2) != 0) + { + currentDWord = Unsafe.ReadUnaligned(pBuffer); + + if (!AllCharsInUInt32AreAscii(currentDWord)) + { + goto FoundNonAsciiDataInCurrentDWord; + } + + pBuffer += 2; // successfully consumed 2 ASCII chars + } + + // WORD drain + // This is the final drain; there's no need for a BYTE drain since our elemental type is 16-bit char. + + if ((bufferLength & 1) != 0) + { + if (*pBuffer <= 0x007F) + { + pBuffer++; // successfully consumed a single char + } + } + + goto Finish; + } + + private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) + { + // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method + // will be elided by JIT once we determine which specific ISAs we support. + + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Debug.Assert(Sse2.IsSupported); + Debug.Assert(BitConverter.IsLittleEndian); + Debug.Assert(elementCount >= 2 * SizeOfVector128); + + Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware + Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR + Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW + + // First, perform an unaligned read of the first part of the input buffer. + + Vector128 utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer); // unaligned load + + // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) + { + return 0; + } + } + else + { + if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + { + return 0; + } + } + + // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. + + Vector128 asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + Sse2.StoreScalar((ulong*)pAsciiBuffer, asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED + + nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far + + // We're going to get the best performance when we have aligned writes, so we'll take the + // hit of potentially unaligned reads in order to hit this sweet spot. + + // pAsciiBuffer points to the start of the destination buffer, immediately before where we wrote + // the 8 bytes previously. If the 0x08 bit is set at the pinned address, then the 8 bytes we wrote + // previously mean that the 0x08 bit is *not* set at address &pAsciiBuffer[SizeOfVector128 / 2]. In + // that case we can immediately back up to the previous aligned boundary and start the main loop. + // If the 0x08 bit is *not* set at the pinned address, then it means the 0x08 bit *is* set at + // address &pAsciiBuffer[SizeOfVector128 / 2], and we should perform one more 8-byte write to bump + // just past the next aligned boundary address. + + if (0u >= ((uint)pAsciiBuffer & (SizeOfVector128 / 2))) + { + // We need to perform one more partial vector write before we can get the alignment we want. + + utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load + + // See comments earlier in this method for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) + { + goto Finish; + } + } + else + { + if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + { + goto Finish; + } + } + + // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED + } + + // Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment + // point, then use that as the base offset going forward. + + currentOffsetInElements = SizeOfVector128 - ((nuint)pAsciiBuffer & MaskOfAllBitsInVector128); + Debug.Assert(0 < currentOffsetInElements && currentOffsetInElements <= SizeOfVector128, "We wrote at least 1 byte but no more than a whole vector."); + + Debug.Assert(currentOffsetInElements <= elementCount, "Shouldn't have overrun the destination buffer."); + Debug.Assert(elementCount - currentOffsetInElements >= SizeOfVector128, "We should be able to run at least one whole vector."); + + nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; + do + { + // In a loop, perform two unaligned reads, narrow to a single vector, then aligned write one vector. + + utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load + Vector128 utf16VectorSecond = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements + SizeOfVector128 / sizeof(short)); // unaligned load + Vector128 combinedVector = Sse2.Or(utf16VectorFirst, utf16VectorSecond); + + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) + { + goto FoundNonAsciiDataInLoop; + } + } + else + { + if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + { + goto FoundNonAsciiDataInLoop; + } + } + + // Build up the UTF-8 vector and perform the store. + + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond); + + Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % SizeOfVector128 == 0, "Write should be aligned."); + Sse2.StoreAligned(pAsciiBuffer + currentOffsetInElements, asciiVector); // aligned + + currentOffsetInElements += SizeOfVector128; + } while (currentOffsetInElements <= finalOffsetWhereCanRunLoop); + + Finish: + + // There might be some ASCII data left over. That's fine - we'll let our caller handle the final drain. + return currentOffsetInElements; + + FoundNonAsciiDataInLoop: + + // Can we at least narrow the high vector? + // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. + if (Sse41.IsSupported) + { + if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) + { + goto Finish; // found non-ASCII data + } + } + else + { + if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + { + goto Finish; // found non-ASCII data + } + } + + // First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector. + asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); + + Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned."); + + Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is aligned + currentOffsetInElements += SizeOfVector128 / 2; + + goto Finish; + } + + /// + /// Copies as many ASCII bytes (00..7F) as possible from + /// to , stopping when the first non-ASCII byte is encountered + /// or once elements have been converted. Returns the total number + /// of elements that were able to be converted. + /// + public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + { + nuint currentOffset = 0; + + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized + // code below. This has two benefits: (a) we can take advantage of specific instructions like + // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while + // this method is running. + + if (Sse2.IsSupported) + { + if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) + { + currentOffset = WidenAsciiToUtf16_Sse2(pAsciiBuffer, pUtf16Buffer, elementCount); + } + } + else if (Vector.IsHardwareAccelerated) + { + uint SizeOfVector = (uint)Unsafe.SizeOf>(); // JIT will make this a const + + // Only bother vectorizing if we have enough data to do so. + if (elementCount >= SizeOfVector) + { + // Note use of SBYTE instead of BYTE below; we're using the two's-complement + // representation of negative integers to act as a surrogate for "is ASCII?". + + nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector; + do + { + Vector asciiVector = Unsafe.ReadUnaligned>(pAsciiBuffer + currentOffset); + if (Vector.LessThanAny(asciiVector, Vector.Zero)) + { + break; // found non-ASCII data + } + + Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector utf16LowVector, out Vector utf16HighVector); + + // TODO: Is the below logic also valid for big-endian platforms? + Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset, utf16LowVector); + Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset + Vector.Count, utf16HighVector); + + currentOffset += SizeOfVector; + } while (currentOffset <= finalOffsetWhereCanLoop); + } + } + + Debug.Assert(currentOffset <= elementCount); + nuint remainingElementCount = elementCount - currentOffset; + + // Try to widen 32 bits -> 64 bits at a time. + // We needn't update remainingElementCount after this point. + + uint asciiData; + + if (remainingElementCount >= 4) + { + nuint finalOffsetWhereCanLoop = currentOffset + remainingElementCount - 4; + do + { + asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); + if (!AllBytesInUInt32AreAscii(asciiData)) + { + goto FoundNonAsciiData; + } + + WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pUtf16Buffer[currentOffset], asciiData); + currentOffset += 4; + } while (currentOffset <= finalOffsetWhereCanLoop); + } + + // Try to widen 16 bits -> 32 bits. + + if (((uint)remainingElementCount & 2) != 0) + { + asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); + if (!AllBytesInUInt32AreAscii(asciiData)) + { + goto FoundNonAsciiData; + } + + if (BitConverter.IsLittleEndian) + { + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + pUtf16Buffer[currentOffset + 1] = (char)(asciiData >> 8); + } + else + { + pUtf16Buffer[currentOffset + 1] = (char)(byte)asciiData; + pUtf16Buffer[currentOffset] = (char)(asciiData >> 8); + } + + currentOffset += 2; + } + + // Try to widen 8 bits -> 16 bits. + + if (((uint)remainingElementCount & 1) != 0) + { + asciiData = pAsciiBuffer[currentOffset]; + if (((byte)asciiData & 0x80) != 0) + { + goto Finish; + } + + pUtf16Buffer[currentOffset] = (char)asciiData; + currentOffset += 1; + } + + Finish: + + return currentOffset; + + FoundNonAsciiData: + + Debug.Assert(!AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); + + // Drain ASCII bytes one at a time. + + while (0u >= (uint)((byte)asciiData & 0x80)) + { + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + currentOffset += 1; + asciiData >>= 8; + } + + goto Finish; + } + + private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + { + // JIT turns the below into constants + + uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); + nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); + + // This method is written such that control generally flows top-to-bottom, avoiding + // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII + // data, we jump out of the hot paths to targets at the end of the method. + + Debug.Assert(Sse2.IsSupported); + Debug.Assert(BitConverter.IsLittleEndian); + Debug.Assert(elementCount >= 2 * SizeOfVector128); + + // We're going to get the best performance when we have aligned writes, so we'll take the + // hit of potentially unaligned reads in order to hit this sweet spot. + + Vector128 asciiVector; + Vector128 utf16FirstHalfVector; + uint mask; + + // First, perform an unaligned read of the first part of the input buffer. + + asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load + mask = (uint)Sse2.MoveMask(asciiVector); + + // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. + + if ((byte)mask != 0) + { + return 0; + } + + // Then perform an unaligned write of the first part of the input buffer. + + Vector128 zeroVector = Vector128.Zero; + + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + + // Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment + // point, then use that as the base offset going forward. Remember the >> 1 to account for + // that we wrote chars, not bytes. This means we may re-read data in the next iteration of + // the loop, but this is ok. + + nuint currentOffset = (SizeOfVector128 >> 1) - (((nuint)pUtf16Buffer >> 1) & (MaskOfAllBitsInVector128 >> 1)); + Debug.Assert(0 < currentOffset && currentOffset <= SizeOfVector128 / sizeof(char)); + + nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; + + do + { + // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors. + + asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load + mask = (uint)Sse2.MoveMask(asciiVector); + + if (mask != 0) + { + // non-ASCII byte somewhere + goto NonAsciiDataSeenInInnerLoop; + } + + byte* pStore = (byte*)(pUtf16Buffer + currentOffset); + Sse2.StoreAligned(pStore, Sse2.UnpackLow(asciiVector, zeroVector)); + + pStore += SizeOfVector128; + Sse2.StoreAligned(pStore, Sse2.UnpackHigh(asciiVector, zeroVector)); + + currentOffset += SizeOfVector128; + } while (currentOffset <= finalOffsetWhereCanRunLoop); + + Finish: + + return currentOffset; + + NonAsciiDataSeenInInnerLoop: + + // Can we at least widen the first part of the vector? + + if (0u >= ((byte)mask)) + { + // First part was all ASCII, widen + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); + currentOffset += SizeOfVector128 / 2; + } + + goto Finish; + } + + /// + /// Given a DWORD which represents a buffer of 4 bytes, widens the buffer into 4 WORDs and + /// writes them to the output buffer with machine endianness. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value) + { + Debug.Assert(AllBytesInUInt32AreAscii(value)); + + if (Bmi2.X64.IsSupported) + { + // BMI2 will work regardless of the processor's endianness. + Unsafe.WriteUnaligned(ref Unsafe.As(ref outputBuffer), Bmi2.X64.ParallelBitDeposit(value, 0x00FF00FF_00FF00FFul)); + } + else + { + if (BitConverter.IsLittleEndian) + { + outputBuffer = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 3) = (char)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; + value >>= 8; + Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; + value >>= 8; + outputBuffer = (char)value; + } + } + } + } +} +#endif diff --git a/src/DotNetty.Common/Internal/ASCIIUtility.cs b/src/DotNetty.Common/Internal/ASCIIUtility.cs index cb419bb84..5427bdb00 100644 --- a/src/DotNetty.Common/Internal/ASCIIUtility.cs +++ b/src/DotNetty.Common/Internal/ASCIIUtility.cs @@ -1,4 +1,4 @@ -// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/cs +// borrowed from https://github.com/dotnet/corefx/blob/release/3.1/src/Common/src/CoreLib/System/Text/ASCIIUtility.cs // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. @@ -11,6 +11,9 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +#if NET +using System.Runtime.Intrinsics.Arm; +#endif namespace DotNetty.Common.Internal { @@ -21,7 +24,7 @@ private static bool AllBytesInUInt64AreAscii(ulong value) { // If the high bit of any byte is set, that byte is non-ASCII. - return (0ul >= (value & UInt64HighBitsOnlyMask)); + return 0ul >= (value & UInt64HighBitsOnlyMask); } /// @@ -30,7 +33,7 @@ private static bool AllBytesInUInt64AreAscii(ulong value) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool AllCharsInUInt32AreAscii(uint value) { - return (0u >= (value & ~0x007F007Fu)); + return 0u >= (value & ~0x007F007Fu); } /// @@ -39,7 +42,7 @@ private static bool AllCharsInUInt32AreAscii(uint value) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool AllCharsInUInt64AreAscii(ulong value) { - return (0ul >= (value & ~0x007F007F_007F007Ful)); + return 0ul >= (value & ~0x007F007F_007F007Ful); } /// @@ -54,24 +57,6 @@ private static bool FirstCharInUInt32IsAscii(uint value) || (!BitConverter.IsLittleEndian && 0u >= (value & 0xFF800000u)); } - /// - /// Returns the index in where the first non-ASCII byte is found. - /// Returns if the buffer is empty or all-ASCII. - /// - /// An ASCII byte is defined as 0x00 - 0x7F, inclusive. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) - { - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while - // this method is running. - - return (Sse2.IsSupported) - ? GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength) - : GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength); - } - private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, nuint bufferLength) { // Squirrel away the original buffer reference. This method works by determining the exact @@ -215,267 +200,6 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n goto Finish; } - private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuint bufferLength) - { - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); - Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); - - uint currentMask, secondMask; - byte* pOriginalBuffer = pBuffer; - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of a large enough buffer and - // "all ASCII". If we see non-ASCII data, we jump out of the hot paths to targets - // after all the main logic. - - if (bufferLength < SizeOfVector128) - { - goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead - } - - // Read the first vector unaligned. - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load - - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - // If we have less than 32 bytes to process, just go straight to the final unaligned - // read. There's no need to mess with the loop logic in the middle of this method. - - if (bufferLength < 2 * SizeOfVector128) - { - goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; - } - - // Now adjust the read pointer so that future reads are aligned. - - pBuffer = (byte*)(((nuint)pBuffer + SizeOfVector128) & ~(nuint)MaskOfAllBitsInVector128); - -#if DEBUG - long numBytesRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numBytesRead && numBytesRead <= SizeOfVector128, "We should've made forward progress of at least one byte."); - Debug.Assert((nuint)numBytesRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - // Adjust the remaining length to account for what we just read. - - bufferLength += (nuint)pOriginalBuffer; - bufferLength -= (nuint)pBuffer; - - // The buffer is now properly aligned. - // Read 2 vectors at a time if possible. - - if (bufferLength >= 2 * SizeOfVector128) - { - byte* pFinalVectorReadPos = (byte*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128); - - // After this point, we no longer need to update the bufferLength value. - - do - { - Vector128 firstVector = Sse2.LoadAlignedVector128(pBuffer); - Vector128 secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); - - currentMask = (uint)Sse2.MoveMask(firstVector); - secondMask = (uint)Sse2.MoveMask(secondVector); - - if ((currentMask | secondMask) != 0) - { - goto FoundNonAsciiDataInInnerLoop; - } - - pBuffer += 2 * SizeOfVector128; - } while (pBuffer <= pFinalVectorReadPos); - } - - // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. - // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. - // But we _can_ rely on it to tell us how much remaining data must be drained by looking - // at what bits of it are set. This works because had we updated it within the loop above, - // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about - // bits which are less significant than those that the addition would've acted on. - - // If there is fewer than one vector length remaining, skip the next aligned read. - - if (0ul >= (bufferLength & SizeOfVector128)) - { - goto DoFinalUnalignedVectorRead; - } - - // At least one full vector's worth of data remains, so we can safely read it. - // Remember, at this point pBuffer is still aligned. - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: - - pBuffer += SizeOfVector128; - - DoFinalUnalignedVectorRead: - - if (((byte)bufferLength & MaskOfAllBitsInVector128) != 0) - { - // Perform an unaligned read of the last vector. - // We need to adjust the pointer because we're re-reading data. - - pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128; - - currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - - pBuffer += SizeOfVector128; - } - - Finish: - - return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done! - - FoundNonAsciiDataInInnerLoop: - - // If the current (first) mask isn't the mask that contains non-ASCII data, then it must - // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes - // from the second mask. - - if (0u >= currentMask) - { - pBuffer += SizeOfVector128; - currentMask = secondMask; - } - - FoundNonAsciiDataInCurrentMask: - - // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. - // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't - // available, we'll fall back to a normal loop. - - Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); - pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask); - - goto Finish; - - FoundNonAsciiDataInCurrentDWord: - - uint currentDWord; - Debug.Assert(!AllBytesInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); - pBuffer += CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(currentDWord); - - goto Finish; - - InputBufferLessThanOneVectorInLength: - - // These code paths get hit if the original input length was less than one vector in size. - // We can't perform vectorized reads at this point, so we'll fall back to reading primitives - // directly. Note that all of these reads are unaligned. - - Debug.Assert(bufferLength < SizeOfVector128); - - // QWORD drain - - if ((bufferLength & 8) != 0) - { - if (Bmi1.X64.IsSupported) - { - // If we can use 64-bit tzcnt to count the number of leading ASCII bytes, prefer it. - - ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); - if (!AllBytesInUInt64AreAscii(candidateUInt64)) - { - // Clear everything but the high bit of each byte, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count. - - candidateUInt64 &= UInt64HighBitsOnlyMask; - pBuffer += (nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8); - goto Finish; - } - } - else - { - // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. - - currentDWord = Unsafe.ReadUnaligned(pBuffer); - uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4); - - if (!AllBytesInUInt32AreAscii(currentDWord | nextDWord)) - { - // At least one of the values wasn't all-ASCII. - // We need to figure out which one it was and stick it in the currentMask local. - - if (AllBytesInUInt32AreAscii(currentDWord)) - { - currentDWord = nextDWord; // this one is the culprit - pBuffer += 4; - } - - goto FoundNonAsciiDataInCurrentDWord; - } - } - - pBuffer += 8; // successfully consumed 8 ASCII bytes - } - - // DWORD drain - - if ((bufferLength & 4) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!AllBytesInUInt32AreAscii(currentDWord)) - { - goto FoundNonAsciiDataInCurrentDWord; - } - - pBuffer += 4; // successfully consumed 4 ASCII bytes - } - - // WORD drain - // (We movzx to a DWORD for ease of manipulation.) - - if ((bufferLength & 2) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!AllBytesInUInt32AreAscii(currentDWord)) - { - // We only care about the 0x0080 bit of the value. If it's not set, then we - // increment currentOffset by 1. If it's set, we don't increment it at all. - - pBuffer += (nuint)((nint)(sbyte)currentDWord >> 7) + 1; - goto Finish; - } - - pBuffer += 2; // successfully consumed 2 ASCII bytes - } - - // BYTE drain - - if ((bufferLength & 1) != 0) - { - // sbyte has non-negative value if byte is ASCII. - - if (*(sbyte*)(pBuffer) >= 0) - { - pBuffer++; // successfully consumed a single byte - } - } - - goto Finish; - } - /// /// Returns the index in where the first non-ASCII char is found. /// Returns if the buffer is empty or all-ASCII. @@ -630,476 +354,137 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Default(char* pBuffer, n goto Finish; } - private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuint bufferLength /* in chars */) + /// + /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, + /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer + /// also in machine-endian order. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) { - // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method - // will be elided by JIT once we determine which specific ISAs we support. - - // Quick check for empty inputs. + Debug.Assert(AllCharsInUInt64AreAscii(value)); - if (0ul >= bufferLength) +#if NETCOREAPP3_1 + if (Bmi2.X64.IsSupported) { - return 0; + // BMI2 will work regardless of the processor's endianness. + Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul)); } - - // JIT turns the below into constants - - uint SizeOfVector128InBytes = (uint)Unsafe.SizeOf>(); - uint SizeOfVector128InChars = SizeOfVector128InBytes / sizeof(char); - - Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); - Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); - - Vector128 firstVector, secondVector; - uint currentMask; - char* pOriginalBuffer = pBuffer; - - if (bufferLength < SizeOfVector128InChars) +#else + if (Sse2.X64.IsSupported) { - goto InputBufferLessThanOneVectorInLength; // can't vectorize; drain primitives instead - } - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware - Vector128 asciiMaskForPMINUW = Vector128.Create((ushort)0x0080); // used for PMINUW on supported hardware - Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR - Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW - -#if NET - Debug.Assert(bufferLength <= nuint.MaxValue / sizeof(char)); -#endif - - // Read the first vector unaligned. + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination. - firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load - - if (Sse41.IsSupported) + Vector128 vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16(); + Vector128 vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32(); + Unsafe.WriteUnaligned(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow)); + } + else if (AdvSimd.IsSupported) { - // The SSE41-optimized code path works by forcing the 0x0080 bit in each WORD of the vector to be - // set iff the WORD element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector - // in order to extract the mask. - currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); + // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes + // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination. + + Vector128 vecWide = Vector128.CreateScalarUnsafe(value).AsInt16(); + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide); + Unsafe.WriteUnaligned(ref outputBuffer, lower.AsUInt32().ToScalar()); } +#endif else { - // The SSE2-optimized code path works by forcing each WORD of the vector to be 0xFFFF iff the WORD - // element has value >= 0x0080 (non-ASCII). Then we'll treat it as a BYTE vector in order to extract - // the mask. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); + if (BitConverter.IsLittleEndian) + { + outputBuffer = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + } + else + { + Unsafe.Add(ref outputBuffer, 3) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 2) = (byte)value; + value >>= 16; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + value >>= 16; + outputBuffer = (byte)value; + } } + } + + /// + /// Given a DWORD which represents a buffer of 2 ASCII chars in machine-endian order, + /// narrows each WORD to a BYTE, then writes the 2-byte result to the output buffer also in + /// machine-endian order. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, uint value) + { + Debug.Assert(AllCharsInUInt32AreAscii(value)); - if (currentMask != 0) + if (BitConverter.IsLittleEndian) { - goto FoundNonAsciiDataInCurrentMask; + outputBuffer = (byte)value; + Unsafe.Add(ref outputBuffer, 1) = (byte)(value >> 16); } - - // If we have less than 32 bytes to process, just go straight to the final unaligned - // read. There's no need to mess with the loop logic in the middle of this method. - - // Adjust the remaining length to account for what we just read. - // For the remainder of this code path, bufferLength will be in bytes, not chars. - - bufferLength <<= 1; // chars to bytes - - if (bufferLength < 2 * SizeOfVector128InBytes) + else { - goto IncrementCurrentOffsetBeforeFinalUnalignedVectorRead; + Unsafe.Add(ref outputBuffer, 1) = (byte)value; + outputBuffer = (byte)(value >> 16); } + } - // Now adjust the read pointer so that future reads are aligned. - - pBuffer = (char*)(((nuint)pBuffer + SizeOfVector128InBytes) & ~(nuint)(SizeOfVector128InBytes - 1)); - -#if DEBUG - long numCharsRead = pBuffer - pOriginalBuffer; - Debug.Assert(0 < numCharsRead && numCharsRead <= SizeOfVector128InChars, "We should've made forward progress of at least one char."); - Debug.Assert((nuint)numCharsRead <= bufferLength, "We shouldn't have read past the end of the input buffer."); -#endif - - // Adjust remaining buffer length. + /// + /// Copies as many ASCII characters (U+0000..U+007F) as possible from + /// to , stopping when the first non-ASCII character is encountered + /// or once elements have been converted. Returns the total number + /// of elements that were able to be converted. + /// + public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) + { + nuint currentOffset = 0; - bufferLength += (nuint)pOriginalBuffer; - bufferLength -= (nuint)pBuffer; + uint utf16Data32BitsHigh = 0, utf16Data32BitsLow = 0; + ulong utf16Data64Bits = 0; - // The buffer is now properly aligned. - // Read 2 vectors at a time if possible. + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized + // code below. This has two benefits: (a) we can take advantage of specific instructions like + // pmovmskb, ptest, vpminuw which we know are optimized, and (b) we can avoid downclocking the + // processor while this method is running. - if (bufferLength >= 2 * SizeOfVector128InBytes) + if (Sse2.IsSupported) { - char* pFinalVectorReadPos = (char*)((nuint)pBuffer + bufferLength - 2 * SizeOfVector128InBytes); - - // After this point, we no longer need to update the bufferLength value. + Debug.Assert(BitConverter.IsLittleEndian, "Assume little endian if SSE2 is supported."); - do + if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) { - firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); - secondVector = Sse2.LoadAlignedVector128((short*)pBuffer + SizeOfVector128InChars); - Vector128 combinedVector = Sse2.Or(firstVector, secondVector); + // Since there's overhead to setting up the vectorized code path, we only want to + // call into it after a quick probe to ensure the next immediate characters really are ASCII. + // If we see non-ASCII data, we'll jump immediately to the draining logic at the end of the method. - if (Sse41.IsSupported) + if (PlatformDependent.Is64BitProcess) { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) + utf16Data64Bits = Unsafe.ReadUnaligned(pUtf16Buffer); + if (!AllCharsInUInt64AreAscii(utf16Data64Bits)) { - goto FoundNonAsciiDataInFirstOrSecondVector; + goto FoundNonAsciiDataIn64BitRead; } } else { - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) + utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer); + utf16Data32BitsLow = Unsafe.ReadUnaligned(pUtf16Buffer + 4 / sizeof(char)); + if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow)) { - goto FoundNonAsciiDataInFirstOrSecondVector; + goto FoundNonAsciiDataIn64BitRead; } } - pBuffer += 2 * SizeOfVector128InChars; - } while (pBuffer <= pFinalVectorReadPos); - } - - // We have somewhere between 0 and (2 * vector length) - 1 bytes remaining to read from. - // Since the above loop doesn't update bufferLength, we can't rely on its absolute value. - // But we _can_ rely on it to tell us how much remaining data must be drained by looking - // at what bits of it are set. This works because had we updated it within the loop above, - // we would've been adding 2 * SizeOfVector128 on each iteration, but we only care about - // bits which are less significant than those that the addition would've acted on. - - // If there is fewer than one vector length remaining, skip the next aligned read. - // Remember, at this point bufferLength is measured in bytes, not chars. - - if (0ul >= (bufferLength & SizeOfVector128InBytes)) - { - goto DoFinalUnalignedVectorRead; - } - - // At least one full vector's worth of data remains, so we can safely read it. - // Remember, at this point pBuffer is still aligned. - - firstVector = Sse2.LoadAlignedVector128((short*)pBuffer); - - if (Sse41.IsSupported) - { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - // See comment earlier in the method for an explanation of how the below logic works. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: - - pBuffer += SizeOfVector128InChars; - - DoFinalUnalignedVectorRead: - - if (((byte)bufferLength & (SizeOfVector128InBytes - 1)) != 0) - { - // Perform an unaligned read of the last vector. - // We need to adjust the pointer because we're re-reading data. - - pBuffer = (char*)((byte*)pBuffer + (bufferLength & (SizeOfVector128InBytes - 1)) - SizeOfVector128InBytes); - firstVector = Sse2.LoadVector128((short*)pBuffer); // unaligned load - - if (Sse41.IsSupported) - { - // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data. - // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data. - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - // See comment earlier in the method for an explanation of how the below logic works. - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - pBuffer += SizeOfVector128InChars; - } - - Finish: - - Debug.Assert(((nuint)pBuffer - (nuint)pOriginalBuffer) % 2 == 0, "Shouldn't have incremented any pointer by an odd byte count."); - return ((nuint)pBuffer - (nuint)pOriginalBuffer) / sizeof(char); // and we're done! (remember to adjust for char count) - - FoundNonAsciiDataInFirstOrSecondVector: - - // We don't know if the first or the second vector contains non-ASCII data. Check the first - // vector, and if that's all-ASCII then the second vector must be the culprit. Either way - // we'll make sure the first vector local is the one that contains the non-ASCII data. - - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(firstVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInFirstVector; - } - } - else - { - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - if (currentMask != 0) - { - goto FoundNonAsciiDataInCurrentMask; - } - } - - // Wasn't the first vector; must be the second. - - pBuffer += SizeOfVector128InChars; - firstVector = secondVector; - - FoundNonAsciiDataInFirstVector: - - // See comment earlier in the method for an explanation of how the below logic works. - if (Sse41.IsSupported) - { - currentMask = (uint)Sse2.MoveMask(Sse41.Min(firstVector.AsUInt16(), asciiMaskForPMINUW).AsByte()); - } - else - { - currentMask = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(firstVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()); - } - - FoundNonAsciiDataInCurrentMask: - - // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. - // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't - // available, we'll fall back to a normal loop. (Even though the original vector used WORD elements, - // masks work on BYTE elements, and we account for this in the final fixup.) - - Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); - pBuffer = (char*)((byte*)pBuffer + (uint)BitOperations.TrailingZeroCount(currentMask)); - - goto Finish; - - FoundNonAsciiDataInCurrentDWord: - - uint currentDWord; - Debug.Assert(!AllCharsInUInt32AreAscii(currentDWord), "Shouldn't be here unless we see non-ASCII data."); - - if (FirstCharInUInt32IsAscii(currentDWord)) - { - pBuffer++; // skip past the ASCII char - } - - goto Finish; - - InputBufferLessThanOneVectorInLength: - - // These code paths get hit if the original input length was less than one vector in size. - // We can't perform vectorized reads at this point, so we'll fall back to reading primitives - // directly. Note that all of these reads are unaligned. - - // Reminder: If this code path is hit, bufferLength is still a char count, not a byte count. - // We skipped the code path that multiplied the count by sizeof(char). - - Debug.Assert(bufferLength < SizeOfVector128InChars); - - // QWORD drain - - if ((bufferLength & 4) != 0) - { - if (Bmi1.X64.IsSupported) - { - // If we can use 64-bit tzcnt to count the number of leading ASCII chars, prefer it. - - ulong candidateUInt64 = Unsafe.ReadUnaligned(pBuffer); - if (!AllCharsInUInt64AreAscii(candidateUInt64)) - { - // Clear the low 7 bits (the ASCII bits) of each char, then tzcnt. - // Remember the / 8 at the end to convert bit count to byte count, - // then the & ~1 at the end to treat a match in the high byte of - // any char the same as a match in the low byte of that same char. - - candidateUInt64 &= 0xFF80FF80_FF80FF80ul; - pBuffer = (char*)((byte*)pBuffer + ((nuint)(Bmi1.X64.TrailingZeroCount(candidateUInt64) / 8) & ~(nuint)1)); - goto Finish; - } - } - else - { - // If we can't use 64-bit tzcnt, no worries. We'll just do 2x 32-bit reads instead. - - currentDWord = Unsafe.ReadUnaligned(pBuffer); - uint nextDWord = Unsafe.ReadUnaligned(pBuffer + 4 / sizeof(char)); - - if (!AllCharsInUInt32AreAscii(currentDWord | nextDWord)) - { - // At least one of the values wasn't all-ASCII. - // We need to figure out which one it was and stick it in the currentMask local. - - if (AllCharsInUInt32AreAscii(currentDWord)) - { - currentDWord = nextDWord; // this one is the culprit - pBuffer += 4 / sizeof(char); - } - - goto FoundNonAsciiDataInCurrentDWord; - } - } - - pBuffer += 4; // successfully consumed 4 ASCII chars - } - - // DWORD drain - - if ((bufferLength & 2) != 0) - { - currentDWord = Unsafe.ReadUnaligned(pBuffer); - - if (!AllCharsInUInt32AreAscii(currentDWord)) - { - goto FoundNonAsciiDataInCurrentDWord; - } - - pBuffer += 2; // successfully consumed 2 ASCII chars - } - - // WORD drain - // This is the final drain; there's no need for a BYTE drain since our elemental type is 16-bit char. - - if ((bufferLength & 1) != 0) - { - if (*pBuffer <= 0x007F) - { - pBuffer++; // successfully consumed a single char - } - } - - goto Finish; - } - - /// - /// Given a QWORD which represents a buffer of 4 ASCII chars in machine-endian order, - /// narrows each WORD to a BYTE, then writes the 4-byte result to the output buffer - /// also in machine-endian order. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) - { - Debug.Assert(AllCharsInUInt64AreAscii(value)); - - if (Bmi2.X64.IsSupported) - { - // BMI2 will work regardless of the processor's endianness. - Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul)); - } - else - { - if (BitConverter.IsLittleEndian) - { - outputBuffer = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 2) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 3) = (byte)value; - } - else - { - Unsafe.Add(ref outputBuffer, 3) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 2) = (byte)value; - value >>= 16; - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - value >>= 16; - outputBuffer = (byte)value; - } - } - } - - /// - /// Given a DWORD which represents a buffer of 2 ASCII chars in machine-endian order, - /// narrows each WORD to a BYTE, then writes the 2-byte result to the output buffer also in - /// machine-endian order. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, uint value) - { - Debug.Assert(AllCharsInUInt32AreAscii(value)); - - if (BitConverter.IsLittleEndian) - { - outputBuffer = (byte)value; - Unsafe.Add(ref outputBuffer, 1) = (byte)(value >> 16); - } - else - { - Unsafe.Add(ref outputBuffer, 1) = (byte)value; - outputBuffer = (byte)(value >> 16); - } - } - - /// - /// Copies as many ASCII characters (U+0000..U+007F) as possible from - /// to , stopping when the first non-ASCII character is encountered - /// or once elements have been converted. Returns the total number - /// of elements that were able to be converted. - /// - public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) - { - nuint currentOffset = 0; - - uint utf16Data32BitsHigh = 0, utf16Data32BitsLow = 0; - ulong utf16Data64Bits = 0; - - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb, ptest, vpminuw which we know are optimized, and (b) we can avoid downclocking the - // processor while this method is running. - - if (Sse2.IsSupported) - { - Debug.Assert(BitConverter.IsLittleEndian, "Assume little endian if SSE2 is supported."); - - if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) - { - // Since there's overhead to setting up the vectorized code path, we only want to - // call into it after a quick probe to ensure the next immediate characters really are ASCII. - // If we see non-ASCII data, we'll jump immediately to the draining logic at the end of the method. - - if (PlatformDependent.Is64BitProcess) - { - utf16Data64Bits = Unsafe.ReadUnaligned(pUtf16Buffer); - if (!AllCharsInUInt64AreAscii(utf16Data64Bits)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - else - { - utf16Data32BitsHigh = Unsafe.ReadUnaligned(pUtf16Buffer); - utf16Data32BitsLow = Unsafe.ReadUnaligned(pUtf16Buffer + 4 / sizeof(char)); - if (!AllCharsInUInt32AreAscii(utf16Data32BitsHigh | utf16Data32BitsLow)) - { - goto FoundNonAsciiDataIn64BitRead; - } - } - - currentOffset = NarrowUtf16ToAscii_Sse2(pUtf16Buffer, pAsciiBuffer, elementCount); - } + currentOffset = NarrowUtf16ToAscii_Sse2(pUtf16Buffer, pAsciiBuffer, elementCount); + } } else if (Vector.IsHardwareAccelerated) { @@ -1285,439 +670,6 @@ public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBu goto Finish; } - - private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) - { - // This method contains logic optimized for both SSE2 and SSE41. Much of the logic in this method - // will be elided by JIT once we determine which specific ISAs we support. - - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Debug.Assert(Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(elementCount >= 2 * SizeOfVector128); - - Vector128 asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware - Vector128 asciiMaskForPXOR = Vector128.Create(unchecked((short)0x8000)); // used for PXOR - Vector128 asciiMaskForPCMPGTW = Vector128.Create(unchecked((short)0x807F)); // used for PCMPGTW - - // First, perform an unaligned read of the first part of the input buffer. - - Vector128 utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer); // unaligned load - - // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - return 0; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - return 0; - } - } - - // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. - - Vector128 asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - Sse2.StoreScalar((ulong*)pAsciiBuffer, asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED - - nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far - - // We're going to get the best performance when we have aligned writes, so we'll take the - // hit of potentially unaligned reads in order to hit this sweet spot. - - // pAsciiBuffer points to the start of the destination buffer, immediately before where we wrote - // the 8 bytes previously. If the 0x08 bit is set at the pinned address, then the 8 bytes we wrote - // previously mean that the 0x08 bit is *not* set at address &pAsciiBuffer[SizeOfVector128 / 2]. In - // that case we can immediately back up to the previous aligned boundary and start the main loop. - // If the 0x08 bit is *not* set at the pinned address, then it means the 0x08 bit *is* set at - // address &pAsciiBuffer[SizeOfVector128 / 2], and we should perform one more 8-byte write to bump - // just past the next aligned boundary address. - - if (0u >= ((uint)pAsciiBuffer & (SizeOfVector128 / 2))) - { - // We need to perform one more partial vector write before we can get the alignment we want. - - utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load - - // See comments earlier in this method for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - goto Finish; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto Finish; - } - } - - // Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination. - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is UNALIGNED - } - - // Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment - // point, then use that as the base offset going forward. - - currentOffsetInElements = SizeOfVector128 - ((nuint)pAsciiBuffer & MaskOfAllBitsInVector128); - Debug.Assert(0 < currentOffsetInElements && currentOffsetInElements <= SizeOfVector128, "We wrote at least 1 byte but no more than a whole vector."); - - Debug.Assert(currentOffsetInElements <= elementCount, "Shouldn't have overrun the destination buffer."); - Debug.Assert(elementCount - currentOffsetInElements >= SizeOfVector128, "We should be able to run at least one whole vector."); - - nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; - do - { - // In a loop, perform two unaligned reads, narrow to a single vector, then aligned write one vector. - - utf16VectorFirst = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements); // unaligned load - Vector128 utf16VectorSecond = Sse2.LoadVector128((short*)pUtf16Buffer + currentOffsetInElements + SizeOfVector128 / sizeof(short)); // unaligned load - Vector128 combinedVector = Sse2.Or(utf16VectorFirst, utf16VectorSecond); - - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST)) - { - goto FoundNonAsciiDataInLoop; - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(combinedVector, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto FoundNonAsciiDataInLoop; - } - } - - // Build up the UTF-8 vector and perform the store. - - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond); - - Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % SizeOfVector128 == 0, "Write should be aligned."); - Sse2.StoreAligned(pAsciiBuffer + currentOffsetInElements, asciiVector); // aligned - - currentOffsetInElements += SizeOfVector128; - } while (currentOffsetInElements <= finalOffsetWhereCanRunLoop); - - Finish: - - // There might be some ASCII data left over. That's fine - we'll let our caller handle the final drain. - return currentOffsetInElements; - - FoundNonAsciiDataInLoop: - - // Can we at least narrow the high vector? - // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works. - if (Sse41.IsSupported) - { - if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST)) - { - goto Finish; // found non-ASCII data - } - } - else - { - if (Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(utf16VectorFirst, asciiMaskForPXOR), asciiMaskForPCMPGTW).AsByte()) != 0) - { - goto Finish; // found non-ASCII data - } - } - - // First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector. - asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorFirst); - - Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned."); - - Sse2.StoreScalar((ulong*)(pAsciiBuffer + currentOffsetInElements), asciiVector.AsUInt64()); // ulong* calculated here is aligned - currentOffsetInElements += SizeOfVector128 / 2; - - goto Finish; - } - - /// - /// Copies as many ASCII bytes (00..7F) as possible from - /// to , stopping when the first non-ASCII byte is encountered - /// or once elements have been converted. Returns the total number - /// of elements that were able to be converted. - /// - public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) - { - nuint currentOffset = 0; - - // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized - // code below. This has two benefits: (a) we can take advantage of specific instructions like - // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while - // this method is running. - - if (Sse2.IsSupported) - { - if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) - { - currentOffset = WidenAsciiToUtf16_Sse2(pAsciiBuffer, pUtf16Buffer, elementCount); - } - } - else if (Vector.IsHardwareAccelerated) - { - uint SizeOfVector = (uint)Unsafe.SizeOf>(); // JIT will make this a const - - // Only bother vectorizing if we have enough data to do so. - if (elementCount >= SizeOfVector) - { - // Note use of SBYTE instead of BYTE below; we're using the two's-complement - // representation of negative integers to act as a surrogate for "is ASCII?". - - nuint finalOffsetWhereCanLoop = elementCount - SizeOfVector; - do - { - Vector asciiVector = Unsafe.ReadUnaligned>(pAsciiBuffer + currentOffset); - if (Vector.LessThanAny(asciiVector, Vector.Zero)) - { - break; // found non-ASCII data - } - - Vector.Widen(Vector.AsVectorByte(asciiVector), out Vector utf16LowVector, out Vector utf16HighVector); - - // TODO: Is the below logic also valid for big-endian platforms? - Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset, utf16LowVector); - Unsafe.WriteUnaligned>(pUtf16Buffer + currentOffset + Vector.Count, utf16HighVector); - - currentOffset += SizeOfVector; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - } - - Debug.Assert(currentOffset <= elementCount); - nuint remainingElementCount = elementCount - currentOffset; - - // Try to widen 32 bits -> 64 bits at a time. - // We needn't update remainingElementCount after this point. - - uint asciiData; - - if (remainingElementCount >= 4) - { - nuint finalOffsetWhereCanLoop = currentOffset + remainingElementCount - 4; - do - { - asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!AllBytesInUInt32AreAscii(asciiData)) - { - goto FoundNonAsciiData; - } - - WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pUtf16Buffer[currentOffset], asciiData); - currentOffset += 4; - } while (currentOffset <= finalOffsetWhereCanLoop); - } - - // Try to widen 16 bits -> 32 bits. - - if (((uint)remainingElementCount & 2) != 0) - { - asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); - if (!AllBytesInUInt32AreAscii(asciiData)) - { - goto FoundNonAsciiData; - } - - if (BitConverter.IsLittleEndian) - { - pUtf16Buffer[currentOffset] = (char)(byte)asciiData; - pUtf16Buffer[currentOffset + 1] = (char)(asciiData >> 8); - } - else - { - pUtf16Buffer[currentOffset + 1] = (char)(byte)asciiData; - pUtf16Buffer[currentOffset] = (char)(asciiData >> 8); - } - - currentOffset += 2; - } - - // Try to widen 8 bits -> 16 bits. - - if (((uint)remainingElementCount & 1) != 0) - { - asciiData = pAsciiBuffer[currentOffset]; - if (((byte)asciiData & 0x80) != 0) - { - goto Finish; - } - - pUtf16Buffer[currentOffset] = (char)asciiData; - currentOffset += 1; - } - - Finish: - - return currentOffset; - - FoundNonAsciiData: - - Debug.Assert(!AllBytesInUInt32AreAscii(asciiData), "Shouldn't have reached this point if we have an all-ASCII input."); - - // Drain ASCII bytes one at a time. - - while (0u >= (uint)((byte)asciiData & 0x80)) - { - pUtf16Buffer[currentOffset] = (char)(byte)asciiData; - currentOffset += 1; - asciiData >>= 8; - } - - goto Finish; - } - - private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) - { - // JIT turns the below into constants - - uint SizeOfVector128 = (uint)Unsafe.SizeOf>(); - nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); - - // This method is written such that control generally flows top-to-bottom, avoiding - // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII - // data, we jump out of the hot paths to targets at the end of the method. - - Debug.Assert(Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(elementCount >= 2 * SizeOfVector128); - - // We're going to get the best performance when we have aligned writes, so we'll take the - // hit of potentially unaligned reads in order to hit this sweet spot. - - Vector128 asciiVector; - Vector128 utf16FirstHalfVector; - uint mask; - - // First, perform an unaligned read of the first part of the input buffer. - - asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); - - // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. - - if ((byte)mask != 0) - { - return 0; - } - - // Then perform an unaligned write of the first part of the input buffer. - - Vector128 zeroVector = Vector128.Zero; - - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned - - // Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment - // point, then use that as the base offset going forward. Remember the >> 1 to account for - // that we wrote chars, not bytes. This means we may re-read data in the next iteration of - // the loop, but this is ok. - - nuint currentOffset = (SizeOfVector128 >> 1) - (((nuint)pUtf16Buffer >> 1) & (MaskOfAllBitsInVector128 >> 1)); - Debug.Assert(0 < currentOffset && currentOffset <= SizeOfVector128 / sizeof(char)); - - nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128; - - do - { - // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors. - - asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); - - if (mask != 0) - { - // non-ASCII byte somewhere - goto NonAsciiDataSeenInInnerLoop; - } - - byte* pStore = (byte*)(pUtf16Buffer + currentOffset); - Sse2.StoreAligned(pStore, Sse2.UnpackLow(asciiVector, zeroVector)); - - pStore += SizeOfVector128; - Sse2.StoreAligned(pStore, Sse2.UnpackHigh(asciiVector, zeroVector)); - - currentOffset += SizeOfVector128; - } while (currentOffset <= finalOffsetWhereCanRunLoop); - - Finish: - - return currentOffset; - - NonAsciiDataSeenInInnerLoop: - - // Can we at least widen the first part of the vector? - - if (0u >= ((byte)mask)) - { - // First part was all ASCII, widen - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); - currentOffset += SizeOfVector128 / 2; - } - - goto Finish; - } - - /// - /// Given a DWORD which represents a buffer of 4 bytes, widens the buffer into 4 WORDs and - /// writes them to the output buffer with machine endianness. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref char outputBuffer, uint value) - { - Debug.Assert(AllBytesInUInt32AreAscii(value)); - - if (Bmi2.X64.IsSupported) - { - // BMI2 will work regardless of the processor's endianness. - Unsafe.WriteUnaligned(ref Unsafe.As(ref outputBuffer), Bmi2.X64.ParallelBitDeposit(value, 0x00FF00FF_00FF00FFul)); - } - else - { - if (BitConverter.IsLittleEndian) - { - outputBuffer = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 3) = (char)value; - } - else - { - Unsafe.Add(ref outputBuffer, 3) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 2) = (char)(byte)value; - value >>= 8; - Unsafe.Add(ref outputBuffer, 1) = (char)(byte)value; - value >>= 8; - outputBuffer = (char)value; - } - } - } } } #endif diff --git a/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs b/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs index 25bf98fc1..8e4010a0f 100644 --- a/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs +++ b/src/DotNetty.Common/Internal/TextEncodings.Utf16.NetCore3.cs @@ -70,6 +70,98 @@ private static unsafe int GetBytesFastInternal(char* pChars, int charsLength, by charsConsumed = (int)(pInputBufferRemaining - pChars); return (int)(pOutputBufferRemaining - pBytes); } + + + /// + /// Transcodes the UTF-16 buffer to as UTF-8. + /// + /// + /// If is , invalid UTF-16 sequences + /// in will be replaced with U+FFFD in , and + /// this method will not return . + /// + public static unsafe OperationStatus ToUtf8(ReadOnlySpan source, Span destination, out int charsRead, out int bytesWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) + { + // Throwaway span accesses - workaround for https://github.com/dotnet/coreclr/issues/12332 + + _ = source.Length; + _ = destination.Length; + + fixed (char* pOriginalSource = &MemoryMarshal.GetReference(source)) + fixed (byte* pOriginalDestination = &MemoryMarshal.GetReference(destination)) + { + // We're going to bulk transcode as much as we can in a loop, iterating + // every time we see bad data that requires replacement. + + OperationStatus operationStatus = OperationStatus.Done; + char* pInputBufferRemaining = pOriginalSource; + byte* pOutputBufferRemaining = pOriginalDestination; + + while (!source.IsEmpty) + { + // We've pinned the spans at the entry point to this method. + // It's safe for us to use Unsafe.AsPointer on them during this loop. + + operationStatus = Utf8Utility.TranscodeToUtf8( + pInputBuffer: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)), + inputLength: source.Length, + pOutputBuffer: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)), + outputBytesRemaining: destination.Length, + pInputBufferRemaining: out pInputBufferRemaining, + pOutputBufferRemaining: out pOutputBufferRemaining); + + // If we finished the operation entirely or we ran out of space in the destination buffer, + // or if we need more input data and the caller told us that there's possibly more data + // coming, return immediately. + + if (operationStatus <= OperationStatus.DestinationTooSmall + || (operationStatus == OperationStatus.NeedMoreData && !isFinalBlock)) + { + break; + } + + // We encountered invalid data, or we need more data but the caller told us we're + // at the end of the stream. In either case treat this as truly invalid. + // If the caller didn't tell us to replace invalid sequences, return immediately. + + if (!replaceInvalidSequences) + { + operationStatus = OperationStatus.InvalidData; // status code may have been NeedMoreData - force to be error + break; + } + + // We're going to attempt to write U+FFFD to the destination buffer. + // Do we even have enough space to do so? + + destination = destination.Slice((int)(pOutputBufferRemaining - (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)))); + + if (2 >= (uint)destination.Length) + { + operationStatus = OperationStatus.DestinationTooSmall; + break; + } + + destination[0] = 0xEF; // U+FFFD = [ EF BF BD ] in UTF-8 + destination[1] = 0xBF; + destination[2] = 0xBD; + destination = destination.Slice(3); + + // Invalid UTF-16 sequences are always of length 1. Just skip the next character. + + source = source.Slice((int)(pInputBufferRemaining - (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source))) + 1); + + operationStatus = OperationStatus.Done; // we patched the error - if we're about to break out of the loop this is a success case + pInputBufferRemaining = (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + pOutputBufferRemaining = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)); + } + + // Not possible to make any further progress - report to our caller how far we got. + + charsRead = (int)(pInputBufferRemaining - pOriginalSource); + bytesWritten = (int)(pOutputBufferRemaining - pOriginalDestination); + return operationStatus; + } + } } } } diff --git a/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs b/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs index c0cda71ca..200b5d460 100644 --- a/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs +++ b/src/DotNetty.Common/Internal/TextEncodings.Utf8.NetCore3.cs @@ -2,8 +2,11 @@ namespace DotNetty.Common.Internal { using System; + using System.Buffers; + using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; + using System.Text; public static partial class TextEncodings { @@ -122,6 +125,102 @@ static ArgumentException GetArgumentException() return new ArgumentException("Argument_ConversionOverflow"); } } + + /// + /// Transcodes the UTF-8 buffer to as UTF-16. + /// + /// + /// If is , invalid UTF-8 sequences + /// in will be replaced with U+FFFD in , and + /// this method will not return . + /// + public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int bytesRead, out int charsWritten, bool replaceInvalidSequences = true, bool isFinalBlock = true) + { + // Throwaway span accesses - workaround for https://github.com/dotnet/coreclr/issues/12332 + + _ = source.Length; + _ = destination.Length; + + // We'll be mutating these values throughout our loop. + + fixed (byte* pOriginalSource = &MemoryMarshal.GetReference(source)) + fixed (char* pOriginalDestination = &MemoryMarshal.GetReference(destination)) + { + // We're going to bulk transcode as much as we can in a loop, iterating + // every time we see bad data that requires replacement. + + OperationStatus operationStatus = OperationStatus.Done; + byte* pInputBufferRemaining = pOriginalSource; + char* pOutputBufferRemaining = pOriginalDestination; + + while (!source.IsEmpty) + { + // We've pinned the spans at the entry point to this method. + // It's safe for us to use Unsafe.AsPointer on them during this loop. + + operationStatus = Utf8Utility.TranscodeToUtf16( + pInputBuffer: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)), + inputLength: source.Length, + pOutputBuffer: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)), + outputCharsRemaining: destination.Length, + pInputBufferRemaining: out pInputBufferRemaining, + pOutputBufferRemaining: out pOutputBufferRemaining); + + // If we finished the operation entirely or we ran out of space in the destination buffer, + // or if we need more input data and the caller told us that there's possibly more data + // coming, return immediately. + + if (operationStatus <= OperationStatus.DestinationTooSmall + || (operationStatus == OperationStatus.NeedMoreData && !isFinalBlock)) + { + break; + } + + // We encountered invalid data, or we need more data but the caller told us we're + // at the end of the stream. In either case treat this as truly invalid. + // If the caller didn't tell us to replace invalid sequences, return immediately. + + if (!replaceInvalidSequences) + { + operationStatus = OperationStatus.InvalidData; // status code may have been NeedMoreData - force to be error + break; + } + + // We're going to attempt to write U+FFFD to the destination buffer. + // Do we even have enough space to do so? + + destination = destination.Slice((int)(pOutputBufferRemaining - (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)))); + + if (destination.IsEmpty) + { + operationStatus = OperationStatus.DestinationTooSmall; + break; + } + + destination[0] = (char)UnicodeUtility.ReplacementChar; + destination = destination.Slice(1); + + // Now figure out how many bytes of the source we must skip over before we should retry + // the operation. This might be more than 1 byte. + + source = source.Slice((int)(pInputBufferRemaining - (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)))); + Debug.Assert(!source.IsEmpty, "Expected 'Done' if source is fully consumed."); + + Rune.DecodeFromUtf8(source, out _, out int bytesConsumedJustNow); + source = source.Slice(bytesConsumedJustNow); + + operationStatus = OperationStatus.Done; // we patched the error - if we're about to break out of the loop this is a success case + pInputBufferRemaining = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source)); + pOutputBufferRemaining = (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(destination)); + } + + // Not possible to make any further progress - report to our caller how far we got. + + bytesRead = (int)(pInputBufferRemaining - pOriginalSource); + charsWritten = (int)(pOutputBufferRemaining - pOriginalDestination); + return operationStatus; + } + } } } } diff --git a/src/DotNetty.Common/Internal/TextEncodings.cs b/src/DotNetty.Common/Internal/TextEncodings.cs index f43029558..e3e252148 100644 --- a/src/DotNetty.Common/Internal/TextEncodings.cs +++ b/src/DotNetty.Common/Internal/TextEncodings.cs @@ -7,13 +7,13 @@ public static partial class TextEncodings { /// 不提供 Unicode 字节顺序标记,检测到无效的编码时不引发异常 - public static readonly UTF8Encoding UTF8NoBOM = new UTF8Encoding(false); + public static readonly UTF8Encoding UTF8NoBOM = new(false); /// 不提供 Unicode 字节顺序标记,检测到无效的编码时引发异常 - public static readonly UTF8Encoding SecureUTF8NoBOM = new UTF8Encoding(false, true); + public static readonly UTF8Encoding SecureUTF8NoBOM = new(false, true); /// 提供 Unicode 字节顺序标记,检测到无效的编码时引发异常 - public static readonly UTF8Encoding SecureUTF8 = new UTF8Encoding(true, true); + public static readonly UTF8Encoding SecureUTF8 = new(true, true); public const int ASCIICodePage = 20127; diff --git a/src/DotNetty.Common/Internal/UnicodeDebug.cs b/src/DotNetty.Common/Internal/UnicodeDebug.cs index 095370d94..c1b04d2ed 100644 --- a/src/DotNetty.Common/Internal/UnicodeDebug.cs +++ b/src/DotNetty.Common/Internal/UnicodeDebug.cs @@ -13,31 +13,46 @@ internal static class UnicodeDebug [Conditional("DEBUG")] internal static void AssertIsHighSurrogateCodePoint(uint codePoint) { - Debug.Assert(UnicodeUtility.IsHighSurrogateCodePoint(codePoint), $"The value {ToHexString(codePoint)} is not a valid UTF-16 high surrogate code point."); + if (!UnicodeUtility.IsHighSurrogateCodePoint(codePoint)) + { + Debug.Fail($"The value {ToHexString(codePoint)} is not a valid UTF-16 high surrogate code point."); + } } [Conditional("DEBUG")] internal static void AssertIsLowSurrogateCodePoint(uint codePoint) { - Debug.Assert(UnicodeUtility.IsLowSurrogateCodePoint(codePoint), $"The value {ToHexString(codePoint)} is not a valid UTF-16 low surrogate code point."); + if (!UnicodeUtility.IsLowSurrogateCodePoint(codePoint)) + { + Debug.Fail($"The value {ToHexString(codePoint)} is not a valid UTF-16 low surrogate code point."); + } } [Conditional("DEBUG")] internal static void AssertIsValidCodePoint(uint codePoint) { - Debug.Assert(UnicodeUtility.IsValidCodePoint(codePoint), $"The value {ToHexString(codePoint)} is not a valid Unicode code point."); + if (!UnicodeUtility.IsValidCodePoint(codePoint)) + { + Debug.Fail($"The value {ToHexString(codePoint)} is not a valid Unicode code point."); + } } [Conditional("DEBUG")] internal static void AssertIsValidScalar(uint scalarValue) { - Debug.Assert(UnicodeUtility.IsValidUnicodeScalar(scalarValue), $"The value {ToHexString(scalarValue)} is not a valid Unicode scalar value."); + if (!UnicodeUtility.IsValidUnicodeScalar(scalarValue)) + { + Debug.Fail($"The value {ToHexString(scalarValue)} is not a valid Unicode scalar value."); + } } [Conditional("DEBUG")] internal static void AssertIsValidSupplementaryPlaneScalar(uint scalarValue) { - Debug.Assert(UnicodeUtility.IsValidUnicodeScalar(scalarValue) && !UnicodeUtility.IsBmpCodePoint(scalarValue), $"The value {ToHexString(scalarValue)} is not a valid supplementary plane Unicode scalar value."); + if (!UnicodeUtility.IsValidUnicodeScalar(scalarValue) || UnicodeUtility.IsBmpCodePoint(scalarValue)) + { + Debug.Fail($"The value {ToHexString(scalarValue)} is not a valid supplementary plane Unicode scalar value."); + } } /// diff --git a/src/DotNetty.Common/Internal/UnicodeUtility.cs b/src/DotNetty.Common/Internal/UnicodeUtility.cs index da12856f4..d2c98244a 100644 --- a/src/DotNetty.Common/Internal/UnicodeUtility.cs +++ b/src/DotNetty.Common/Internal/UnicodeUtility.cs @@ -120,14 +120,14 @@ public static int GetUtf8SequenceLength(uint value) /// Per http://www.unicode.org/glossary/#ASCII, ASCII is only U+0000..U+007F. /// [MethodImpl(InlineMethod.AggressiveOptimization)] - public static bool IsAsciiCodePoint(uint value) => (value <= 0x7Fu); + public static bool IsAsciiCodePoint(uint value) => value <= 0x7Fu; /// /// Returns iff is in the /// Basic Multilingual Plane (BMP). /// [MethodImpl(InlineMethod.AggressiveOptimization)] - public static bool IsBmpCodePoint(uint value) => (value <= 0xFFFFu); + public static bool IsBmpCodePoint(uint value) => value <= 0xFFFFu; /// /// Returns iff is a UTF-16 high surrogate code point, @@ -142,7 +142,7 @@ public static int GetUtf8SequenceLength(uint value) /// [MethodImpl(InlineMethod.AggressiveOptimization)] public static bool IsInRangeInclusive(uint value, uint lowerBound, uint upperBound) - => ((value - lowerBound) <= (upperBound - lowerBound)); + => (value - lowerBound) <= (upperBound - lowerBound); ///// ///// Returns if is between @@ -187,7 +187,7 @@ public static bool IsInRangeInclusive(long value, long lowerBound, long upperBou /// point, i.e., is in [ U+0000..U+10FFFF ], inclusive. /// [MethodImpl(InlineMethod.AggressiveOptimization)] - public static bool IsValidCodePoint(uint codePoint) => (codePoint <= 0x10FFFFU); + public static bool IsValidCodePoint(uint codePoint) => codePoint <= 0x10FFFFU; /// /// Returns iff is a valid Unicode scalar @@ -197,7 +197,7 @@ public static bool IsInRangeInclusive(long value, long lowerBound, long upperBou public static bool IsValidUnicodeScalar(uint value) { // This is an optimized check that on x86 is just three instructions: lea, xor, cmp. - // + // // After the subtraction operation, the input value is modified as such: // [ 00000000..0010FFFF ] -> [ FFEF0000..FFFFFFFF ] // diff --git a/src/DotNetty.Common/Internal/Utf16Utility.Validation.Net.cs b/src/DotNetty.Common/Internal/Utf16Utility.Validation.Net.cs new file mode 100644 index 000000000..73ec52772 --- /dev/null +++ b/src/DotNetty.Common/Internal/Utf16Utility.Validation.Net.cs @@ -0,0 +1,508 @@ +// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NET +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +using System.Numerics; +using nuint_64 = System.UInt64; +using nuint_32 = System.UInt32; + +namespace DotNetty.Common.Internal +{ + internal static unsafe partial class Utf16Utility + { + // Returns &inputBuffer[inputLength] if the input buffer is valid. + /// + /// Given an input buffer of char length , + /// returns a pointer to where the first invalid data appears in . + /// + /// + /// Returns a pointer to the end of if the buffer is well-formed. + /// + public static char* GetPointerToFirstInvalidChar(char* pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment) + { + Debug.Assert(inputLength >= 0, "Input length must not be negative."); + Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); + + // First, we'll handle the common case of all-ASCII. If this is able to + // consume the entire buffer, we'll skip the remainder of this method's logic. + + int numAsciiCharsConsumedJustNow = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength); + Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength); + + pInputBuffer += (uint)numAsciiCharsConsumedJustNow; + inputLength -= numAsciiCharsConsumedJustNow; + + if (0u >= (uint)inputLength) + { + utf8CodeUnitCountAdjustment = 0; + scalarCountAdjustment = 0; + return pInputBuffer; + } + + // If we got here, it means we saw some non-ASCII data, so within our + // vectorized code paths below we'll handle all non-surrogate UTF-16 + // code points branchlessly. We'll only branch if we see surrogates. + // + // We still optimistically assume the data is mostly ASCII. This means that the + // number of UTF-8 code units and the number of scalars almost matches the number + // of UTF-16 code units. As we go through the input and find non-ASCII + // characters, we'll keep track of these "adjustment" fixups. To get the + // total number of UTF-8 code units required to encode the input data, add + // the UTF-8 code unit count adjustment to the number of UTF-16 code units + // seen. To get the total number of scalars present in the input data, + // add the scalar count adjustment to the number of UTF-16 code units seen. + + long tempUtf8CodeUnitCountAdjustment = 0; + int tempScalarCountAdjustment = 0; + + // Per https://github.com/dotnet/runtime/issues/41699, temporarily disabling + // ARM64-intrinsicified code paths. ARM64 platforms may still use the vectorized + // non-intrinsicified 'else' block below. + + if (/* (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || */ Sse2.IsSupported) + { + if (inputLength >= Vector128.Count) + { + Vector128 vector0080 = Vector128.Create((ushort)0x80); + Vector128 vectorA800 = Vector128.Create((ushort)0xA800); + Vector128 vector8800 = Vector128.Create(unchecked((short)0x8800)); + Vector128 vectorZero = Vector128.Zero; + + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create(0x80402010_08040201).AsByte() : + Vector128.Create(0x01020408_10204080).AsByte(); + + do + { + Vector128 utf16Data; + if (AdvSimd.Arm64.IsSupported) + { + utf16Data = AdvSimd.LoadVector128((ushort*)pInputBuffer); // unaligned + } + else + { + utf16Data = Sse2.LoadVector128((ushort*)pInputBuffer); // unaligned + } + + Vector128 charIsNonAscii; + + if (AdvSimd.Arm64.IsSupported) + { + // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding + // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.) + charIsNonAscii = AdvSimd.Min(utf16Data, vector0080); + } + else if (Sse41.IsSupported) + { + // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding + // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.) + charIsNonAscii = Sse41.Min(utf16Data, vector0080); + } + else + { + // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding + // input was 0x0080 <= [value] <= 0x7FFF. The case where 0x8000 <= [value] will + // be handled in a few lines. + + charIsNonAscii = Sse2.AndNot(Sse2.CompareGreaterThan(vector0080.AsInt16(), utf16Data.AsInt16()).AsUInt16(), vector0080); + } + +#if DEBUG + // Quick check to ensure we didn't accidentally set the 0x8000 bit of any element. + uint debugMask; + if (AdvSimd.Arm64.IsSupported) + { + debugMask = GetNonAsciiBytes(charIsNonAscii.AsByte(), bitMask128); + } + else + { + debugMask = (uint)Sse2.MoveMask(charIsNonAscii.AsByte()); + } + Debug.Assert((debugMask & 0b_1010_1010_1010_1010) == 0, "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'."); +#endif // DEBUG + + // Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding + // input was 0x0800 <= [value]. This also handles the missing range a few lines above. + + Vector128 charIsThreeByteUtf8Encoded; + uint mask; + + if (AdvSimd.IsSupported) + { + charIsThreeByteUtf8Encoded = AdvSimd.Subtract(vectorZero, AdvSimd.ShiftRightLogical(utf16Data, 11)); + mask = GetNonAsciiBytes(AdvSimd.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte(), bitMask128); + } + else + { + charIsThreeByteUtf8Encoded = Sse2.Subtract(vectorZero, Sse2.ShiftRightLogical(utf16Data, 11)); + mask = (uint)Sse2.MoveMask(Sse2.Or(charIsNonAscii, charIsThreeByteUtf8Encoded).AsByte()); + } + + // Each even bit of mask will be 1 only if the char was >= 0x0080, + // and each odd bit of mask will be 1 only if the char was >= 0x0800. + // + // Example for UTF-16 input "[ 0123 ] [ 1234 ] ...": + // + // ,-- set if char[1] is >= 0x0800 + // | ,-- set if char[0] is >= 0x0800 + // v v + // mask = ... 1 1 0 1 + // ^ ^-- set if char[0] is non-ASCII + // `-- set if char[1] is non-ASCII + // + // This means we can popcnt the number of set bits, and the result is the + // number of *additional* UTF-8 bytes that each UTF-16 code unit requires as + // it expands. This results in the wrong count for UTF-16 surrogate code + // units (we just counted that each individual code unit expands to 3 bytes, + // but in reality a well-formed UTF-16 surrogate pair expands to 4 bytes). + // We'll handle this in just a moment. + // + // For now, compute the popcnt but squirrel it away. We'll fold it in to the + // cumulative UTF-8 adjustment factor once we determine that there are no + // unpaired surrogates in our data. (Unpaired surrogates would invalidate + // our computed result and we'd have to throw it away.) + + uint popcnt = (uint)BitOperations.PopCount(mask); + + // Surrogates need to be special-cased for two reasons: (a) we need + // to account for the fact that we over-counted in the addition above; + // and (b) they require separate validation. + if (AdvSimd.Arm64.IsSupported) + { + utf16Data = AdvSimd.Add(utf16Data, vectorA800); + mask = GetNonAsciiBytes(AdvSimd.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte(), bitMask128); + } + else + { + utf16Data = Sse2.Add(utf16Data, vectorA800); + mask = (uint)Sse2.MoveMask(Sse2.CompareLessThan(utf16Data.AsInt16(), vector8800).AsByte()); + } + + if (mask != 0) + { + // There's at least one UTF-16 surrogate code unit present. + // Since we performed a pmovmskb operation on the result of a 16-bit pcmpgtw, + // the resulting bits of 'mask' will occur in pairs: + // - 00 if the corresponding UTF-16 char was not a surrogate code unit; + // - 11 if the corresponding UTF-16 char was a surrogate code unit. + // + // A UTF-16 high/low surrogate code unit has the bit pattern [ 11011q## ######## ], + // where # is any bit; q = 0 represents a high surrogate, and q = 1 represents + // a low surrogate. Since we added 0xA800 in the vectorized operation above, + // our surrogate pairs will now have the bit pattern [ 10000q## ######## ]. + // If we logical right-shift each word by 3, we'll end up with the bit pattern + // [ 00010000 q####### ], which means that we can immediately use pmovmskb to + // determine whether a given char was a high or a low surrogate. + // + // Therefore the resulting bits of 'mask2' will occur in pairs: + // - 00 if the corresponding UTF-16 char was a high surrogate code unit; + // - 01 if the corresponding UTF-16 char was a low surrogate code unit; + // - ## (garbage) if the corresponding UTF-16 char was not a surrogate code unit. + // Since 'mask' already has 00 in these positions (since the corresponding char + // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions. + + uint mask2; + if (AdvSimd.Arm64.IsSupported) + { + mask2 = GetNonAsciiBytes(AdvSimd.ShiftRightLogical(utf16Data, 3).AsByte(), bitMask128); + } + else + { + mask2 = (uint)Sse2.MoveMask(Sse2.ShiftRightLogical(utf16Data, 3).AsByte()); + } + + // 'lowSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a low surrogate char, + // - 00 if the corresponding char was a high surrogate char or not a surrogate at all. + + uint lowSurrogatesMask = mask2 & mask; + + // 'highSurrogatesMask' has its bits occur in pairs: + // - 01 if the corresponding char was a high surrogate char, + // - 00 if the corresponding char was a low surrogate char or not a surrogate at all. + + uint highSurrogatesMask = (mask2 ^ 0b_0101_0101_0101_0101u /* flip all even-numbered bits 00 <-> 01 */) & mask; + + Debug.Assert((highSurrogatesMask & lowSurrogatesMask) == 0, + "A char cannot simultaneously be both a high and a low surrogate char."); + + Debug.Assert(((highSurrogatesMask | lowSurrogatesMask) & 0b_1010_1010_1010_1010u) == 0, + "Only even bits (no odd bits) of the masks should be set."); + + // Now check that each high surrogate is followed by a low surrogate and that each + // low surrogate follows a high surrogate. We make an exception for the case where + // the final char of the vector is a high surrogate, since we can't perform validation + // on it until the next iteration of the loop when we hope to consume the matching + // low surrogate. + + highSurrogatesMask <<= 2; + if ((ushort)highSurrogatesMask != lowSurrogatesMask) + { + goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic + } + + if (highSurrogatesMask > ushort.MaxValue) + { + // There was a standalone high surrogate at the end of the vector. + // We'll adjust our counters so that we don't consider this char consumed. + + highSurrogatesMask = (ushort)highSurrogatesMask; // don't allow stray high surrogate to be consumed by popcnt + popcnt -= 2; // the '0xC000_0000' bits in the original mask are shifted out and discarded, so account for that here + pInputBuffer--; + inputLength++; + } + + // If we're 64-bit, we can perform the zero-extension of the surrogate pairs count for + // free right now, saving the extension step a few lines below. If we're 32-bit, the + // convertion to nuint immediately below is a no-op, and we'll pay the cost of the real + // 64 -bit extension a few lines below. + nuint surrogatePairsCountNuint = (uint)BitOperations.PopCount(highSurrogatesMask); + + // 2 UTF-16 chars become 1 Unicode scalar + + tempScalarCountAdjustment -= (int)surrogatePairsCountNuint; + + // Since each surrogate code unit was >= 0x0800, we eagerly assumed + // it'd be encoded as 3 UTF-8 code units, so our earlier popcnt computation + // assumes that the pair is encoded as 6 UTF-8 code units. Since each + // pair is in reality only encoded as 4 UTF-8 code units, we need to + // perform this adjustment now. + + if (PlatformDependent.Is64BitProcess) + { + // Since we've already zero-extended surrogatePairsCountNuint, we can directly + // sub + sub. It's more efficient than shl + sub. + tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint; + tempUtf8CodeUnitCountAdjustment -= (long)surrogatePairsCountNuint; + } + else + { + // Take the hit of the 64-bit extension now. + tempUtf8CodeUnitCountAdjustment -= 2 * (uint)surrogatePairsCountNuint; + } + } + + tempUtf8CodeUnitCountAdjustment += popcnt; + pInputBuffer += Vector128.Count; + inputLength -= Vector128.Count; + } while (inputLength >= Vector128.Count); + } + } + else if (Vector.IsHardwareAccelerated) + { + if (inputLength >= Vector.Count) + { + Vector vector0080 = new Vector(0x0080); + Vector vector0400 = new Vector(0x0400); + Vector vector0800 = new Vector(0x0800); + Vector vectorD800 = new Vector(0xD800); + + do + { + // The 'twoOrMoreUtf8Bytes' and 'threeOrMoreUtf8Bytes' vectors will contain + // elements whose values are 0xFFFF (-1 as signed word) iff the corresponding + // UTF-16 code unit was >= 0x0080 and >= 0x0800, respectively. By summing these + // vectors, each element of the sum will contain one of three values: + // + // 0x0000 ( 0) = original char was 0000..007F + // 0xFFFF (-1) = original char was 0080..07FF + // 0xFFFE (-2) = original char was 0800..FFFF + // + // We'll negate them to produce a value 0..2 for each element, then sum all the + // elements together to produce the number of *additional* UTF-8 code units + // required to represent this UTF-16 data. This is similar to the popcnt step + // performed by the SSE2 code path. This will overcount surrogates, but we'll + // handle that shortly. + + Vector utf16Data = Unsafe.ReadUnaligned>(pInputBuffer); + Vector twoOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0080); + Vector threeOrMoreUtf8Bytes = Vector.GreaterThanOrEqual(utf16Data, vector0800); + nuint popcnt = 0; + if (PlatformDependent.Is64BitProcess) + { + Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); + + // We'll try summing by a natural word (rather than a 16-bit word) at a time, + // which should halve the number of operations we must perform. + + for (int i = 0; i < Vector.Count; i++) + { + popcnt += (nuint)sumVector[i]; + } + } + else + { + Vector sumVector = (Vector)(Vector.Zero - twoOrMoreUtf8Bytes - threeOrMoreUtf8Bytes); + + // We'll try summing by a natural word (rather than a 16-bit word) at a time, + // which should halve the number of operations we must perform. + + for (int i = 0; i < Vector.Count; i++) + { + popcnt += (nuint)sumVector[i]; + } + } + + uint popcnt32 = (uint)popcnt; + if (PlatformDependent.Is64BitProcess) + { + popcnt32 += (uint)(popcnt >> 32); + } + + // As in the SSE4.1 paths, compute popcnt but don't fold it in until we + // know there aren't any unpaired surrogates in the input data. + + popcnt32 = (ushort)popcnt32 + (popcnt32 >> 16); + + // Now check for surrogates. + + utf16Data -= vectorD800; + Vector surrogateChars = Vector.LessThan(utf16Data, vector0800); + if (surrogateChars != Vector.Zero) + { + // There's at least one surrogate (high or low) UTF-16 code unit in + // the vector. We'll build up additional vectors: 'highSurrogateChars' + // and 'lowSurrogateChars', where the elements are 0xFFFF iff the original + // UTF-16 code unit was a high or low surrogate, respectively. + + Vector highSurrogateChars = Vector.LessThan(utf16Data, vector0400); + Vector lowSurrogateChars = Vector.AndNot(surrogateChars, highSurrogateChars); + + // We want to make sure that each high surrogate code unit is followed by + // a low surrogate code unit and each low surrogate code unit follows a + // high surrogate code unit. Since we don't have an equivalent of pmovmskb + // or palignr available to us, we'll do this as a loop. We won't look at + // the very last high surrogate char element since we don't yet know if + // the next vector read will have a low surrogate char element. + + if (lowSurrogateChars[0] != 0) + { + goto Error; // error: start of buffer contains standalone low surrogate char + } + + ushort surrogatePairsCount = 0; + for (int i = 0; i < Vector.Count - 1; i++) + { + surrogatePairsCount -= highSurrogateChars[i]; // turns into +1 or +0 + if (highSurrogateChars[i] != lowSurrogateChars[i + 1]) + { + goto NonVectorizedLoop; // error: mismatched surrogate pair; break out of vectorized logic + } + } + + if (highSurrogateChars[Vector.Count - 1] != 0) + { + // There was a standalone high surrogate at the end of the vector. + // We'll adjust our counters so that we don't consider this char consumed. + + pInputBuffer--; + inputLength++; + popcnt32 -= 2; + } + + nint surrogatePairsCountNint = (nint)surrogatePairsCount; // zero-extend to native int size + + // 2 UTF-16 chars become 1 Unicode scalar + + tempScalarCountAdjustment -= (int)surrogatePairsCountNint; + + // Since each surrogate code unit was >= 0x0800, we eagerly assumed + // it'd be encoded as 3 UTF-8 code units. Each surrogate half is only + // encoded as 2 UTF-8 code units (for 4 UTF-8 code units total), + // so we'll adjust this now. + + tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint; + tempUtf8CodeUnitCountAdjustment -= surrogatePairsCountNint; + } + + tempUtf8CodeUnitCountAdjustment += popcnt32; + pInputBuffer += Vector.Count; + inputLength -= Vector.Count; + } while (inputLength >= Vector.Count); + } + } + + NonVectorizedLoop: + + // Vectorization isn't supported on our current platform, or the input was too small to benefit + // from vectorization, or we saw invalid UTF-16 data in the vectorized code paths and need to + // drain remaining valid chars before we report failure. + + for (; inputLength > 0; pInputBuffer++, inputLength--) + { + uint thisChar = pInputBuffer[0]; + if (thisChar <= 0x7F) + { + continue; + } + + // Bump adjustment by +1 for U+0080..U+07FF; by +2 for U+0800..U+FFFF. + // This optimistically assumes no surrogates, which we'll handle shortly. + + tempUtf8CodeUnitCountAdjustment += (thisChar + 0x0001_F800u) >> 16; + + if (!UnicodeUtility.IsSurrogateCodePoint(thisChar)) + { + continue; + } + + // Found a surrogate char. Back out the adjustment we made above, then + // try to consume the entire surrogate pair all at once. We won't bother + // trying to interpret the surrogate pair as a scalar value; we'll only + // validate that its bit pattern matches what's expected for a surrogate pair. + + tempUtf8CodeUnitCountAdjustment -= 2; + + if (inputLength == 1) + { + goto Error; // input buffer too small to read a surrogate pair + } + + thisChar = Unsafe.ReadUnaligned(pInputBuffer); + if (((thisChar - (BitConverter.IsLittleEndian ? 0xDC00_D800u : 0xD800_DC00u)) & 0xFC00_FC00u) != 0) + { + goto Error; // not a well-formed surrogate pair + } + + tempScalarCountAdjustment--; // 2 UTF-16 code units -> 1 scalar + tempUtf8CodeUnitCountAdjustment += 2; // 2 UTF-16 code units -> 4 UTF-8 code units + + pInputBuffer++; // consumed one extra char + inputLength--; + } + + Error: + + // Also used for normal return. + + utf8CodeUnitCountAdjustment = tempUtf8CodeUnitCountAdjustment; + scalarCountAdjustment = tempScalarCountAdjustment; + return pInputBuffer; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint GetNonAsciiBytes(Vector128 value, Vector128 bitMask128) + { + Debug.Assert(AdvSimd.Arm64.IsSupported); + + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128); + + // self-pairwise add until all flags have moved to the first two bytes of the vector + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + return extractedBits.AsUInt16().ToScalar(); + } + } +} +#endif diff --git a/src/DotNetty.Common/Internal/Utf16Utility.Validation.cs b/src/DotNetty.Common/Internal/Utf16Utility.Validation.NetCore3.cs similarity index 99% rename from src/DotNetty.Common/Internal/Utf16Utility.Validation.cs rename to src/DotNetty.Common/Internal/Utf16Utility.Validation.NetCore3.cs index c4f438c30..dfff6de9b 100644 --- a/src/DotNetty.Common/Internal/Utf16Utility.Validation.cs +++ b/src/DotNetty.Common/Internal/Utf16Utility.Validation.NetCore3.cs @@ -4,13 +4,13 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#if NETCOREAPP_3_0_GREATER +#if NETCOREAPP3_1 using System; using System.Diagnostics; +using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using System.Numerics; -using System.Runtime.CompilerServices; using nuint_64 = System.UInt64; using nuint_32 = System.UInt32; @@ -50,7 +50,7 @@ internal static unsafe partial class Utf16Utility // If we got here, it means we saw some non-ASCII data, so within our // vectorized code paths below we'll handle all non-surrogate UTF-16 // code points branchlessly. We'll only branch if we see surrogates. - // + // // We still optimistically assume the data is mostly ASCII. This means that the // number of UTF-8 code units and the number of scalars almost matches the number // of UTF-16 code units. As we go through the input and find non-ASCII diff --git a/src/DotNetty.Common/Internal/Utf16Utility.cs b/src/DotNetty.Common/Internal/Utf16Utility.cs index 7c7d93e97..29b1ced1a 100644 --- a/src/DotNetty.Common/Internal/Utf16Utility.cs +++ b/src/DotNetty.Common/Internal/Utf16Utility.cs @@ -152,6 +152,7 @@ internal static bool UInt32OrdinalIgnoreCaseAscii(uint valueA, uint valueB) Debug.Assert(AllCharsInUInt32AreAscii(valueA)); Debug.Assert(AllCharsInUInt32AreAscii(valueB)); +#if NETCOREAPP3_1 // a mask of all bits which are different between A and B uint differentBits = valueA ^ valueB; @@ -177,6 +178,48 @@ internal static bool UInt32OrdinalIgnoreCaseAscii(uint valueA, uint valueB) // computation we performed at the beginning of the method. return 0u >= (((combinedIndicator >> 2) | ~0x0020_0020u) & differentBits); +#else + // Generate a mask of all bits which are different between A and B. Since [A-Z] + // and [a-z] differ by the 0x20 bit, we'll left-shift this by 2 now so that + // this is moved over to the 0x80 bit, which nicely aligns with the calculation + // we're going to do on the indicator flag later. + // + // n.b. All of the logic below assumes we have at least 2 "known zero" bits leading + // each of the 7-bit ASCII values. This assumption won't hold if this method is + // ever adapted to deal with packed bytes instead of packed chars. + + uint differentBits = (valueA ^ valueB) << 2; + + // Now, we want to generate a mask where for each word in the input, the mask contains + // 0xFF7F if the word is [A-Za-z], 0xFFFF if the word is not [A-Za-z]. We know each + // input word is ASCII (only low 7 bit set), so we can use a combination of addition + // and logical operators as follows. + // + // original input +05 |A0 +1A + // ==================================================== + // 00 .. 3F -> 05 .. 44 -> A5 .. E4 -> BF .. FE + // 40 -> 45 -> E5 -> FF + // ([A-Z]) 41 .. 5A -> 46 .. 5F -> E6 .. FF -> 00 .. 19 + // 5B .. 5F -> 60 .. 64 -> E0 .. E4 -> FA .. FE + // 60 -> 65 -> E5 -> FF + // ([a-z]) 61 .. 7A -> 66 .. 7F -> E6 .. FF -> 00 .. 19 + // 7B .. 7F -> 80 .. 84 -> A0 .. A4 -> BA .. BE + // + // This combination of operations results in the 0x80 bit of each word being set + // iff the original word value was *not* [A-Za-z]. + + uint indicator = valueA + 0x0005_0005u; + indicator |= 0x00A0_00A0u; + indicator += 0x001A_001Au; + indicator |= 0xFF7F_FF7Fu; // normalize each word to 0xFF7F or 0xFFFF + + // At this point, 'indicator' contains the mask of bits which are *not* allowed to + // differ between the inputs, and 'differentBits' contains the mask of bits which + // actually differ between the inputs. If these masks have any bits in common, then + // the two values are *not* equal under an OrdinalIgnoreCase comparer. + + return 0u >= (differentBits & indicator); +#endif } /// @@ -193,6 +236,7 @@ internal static bool UInt64OrdinalIgnoreCaseAscii(ulong valueA, ulong valueB) Debug.Assert(AllCharsInUInt64AreAscii(valueA)); Debug.Assert(AllCharsInUInt64AreAscii(valueB)); +#if NETCOREAPP3_1 // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A' ulong lowerIndicator = valueA + 0x0080_0080_0080_0080ul - 0x0041_0041_0041_0041ul; @@ -213,6 +257,17 @@ internal static bool UInt64OrdinalIgnoreCaseAscii(ulong valueA, ulong valueB) // happens to be faster on x64. return (valueA | combinedIndicator) == (valueB | combinedIndicator); +#else + // Duplicate of logic in UInt32OrdinalIgnoreCaseAscii, but using 64-bit consts. + // See comments in that method for more info. + + ulong differentBits = (valueA ^ valueB) << 2; + ulong indicator = valueA + 0x0005_0005_0005_0005ul; + indicator |= 0x00A0_00A0_00A0_00A0ul; + indicator += 0x001A_001A_001A_001Aul; + indicator |= 0xFF7F_FF7F_FF7F_FF7Ful; + return 0ul >= (differentBits & indicator); +#endif } } } diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Helpers.cs b/src/DotNetty.Common/Internal/Utf8Utility.Helpers.cs index f88c4b8ab..dfaba7a4e 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.Helpers.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Helpers.cs @@ -10,18 +10,20 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +#if NETCOREAPP3_1 using System.Runtime.Intrinsics.X86; +#endif namespace DotNetty.Common.Internal { - internal static partial class Utf8Utility + partial class Utf8Utility { /// /// Given a machine-endian DWORD which four bytes of UTF-8 data, interprets the /// first three bytes as a three-byte UTF-8 subsequence and returns the UTF-16 representation. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractCharFromFirstThreeByteSequence(uint value) + private static uint ExtractCharFromFirstThreeByteSequence(uint value) { Debug.Assert(UInt32BeginsWithUtf8ThreeByteMask(value)); @@ -46,7 +48,7 @@ internal static uint ExtractCharFromFirstThreeByteSequence(uint value) /// first two bytes as a two-byte UTF-8 subsequence and returns the UTF-16 representation. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractCharFromFirstTwoByteSequence(uint value) + private static uint ExtractCharFromFirstTwoByteSequence(uint value) { Debug.Assert(UInt32BeginsWithUtf8TwoByteMask(value) && !UInt32BeginsWithOverlongUtf8TwoByteSequence(value)); @@ -68,10 +70,11 @@ internal static uint ExtractCharFromFirstTwoByteSequence(uint value) /// four-byte UTF-8 sequence and returns the machine-endian DWORD of the UTF-16 representation. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractCharsFromFourByteSequence(uint value) + private static uint ExtractCharsFromFourByteSequence(uint value) { if (BitConverter.IsLittleEndian) { +#if NETCOREAPP3_1 if (Bmi2.IsSupported) { // need to reverse endianness for bit manipulation to work correctly @@ -91,6 +94,7 @@ internal static uint ExtractCharsFromFourByteSequence(uint value) } else { +#endif // input is UTF8 [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ] = scalar 000uuuuu zzzzyyyy yyxxxxxx // want to return UTF16 scalar 000uuuuuzzzzyyyyyyxxxxxx = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ] // where wwww = uuuuu - 1 @@ -104,7 +108,9 @@ internal static uint ExtractCharsFromFourByteSequence(uint value) retVal += 0x0000_0800u; // retVal = [ 000000yy yyxxxxxx 110110ww wwzzzzyy ] retVal += 0xDC00_0000u; // retVal = [ 110111yy yyxxxxxx 110110ww wwzzzzyy ] return retVal; +#if NETCOREAPP3_1 } +#endif } else { @@ -129,7 +135,7 @@ internal static uint ExtractCharsFromFourByteSequence(uint value) /// returns the packed 4-byte UTF-8 representation of this scalar value, also in machine-endian order. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) + private static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) { Debug.Assert(IsWellFormedUtf16SurrogatePair(value)); @@ -138,6 +144,7 @@ internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) // input = [ 110111yyyyxxxxxx 110110wwwwzzzzyy ] = scalar (000uuuuu zzzzyyyy yyxxxxxx) // must return [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ], where wwww = uuuuu - 1 +#if NETCOREAPP3_1 if (Bmi2.IsSupported) { // Since pdep and pext have high latencies and can only be dispatched to a single execution port, we want @@ -155,6 +162,7 @@ internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) } else { +#endif value += 0x0000_0040u; // = [ 110111yyyyxxxxxx 11011uuuuuzzzzyy ] uint tempA = BinaryPrimitives.ReverseEndianness(value & 0x003F_0700u); // = [ 00000000 00000uuu 00xxxxxx 00000000 ] @@ -167,8 +175,10 @@ internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) uint tempD = (value & 0x03u) << 20; // = [ 00000000 00yy0000 00000000 00000000 ] tempD |= 0x8080_80F0u; - return (tempD | tempA | tempC); // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ] + return tempD | tempA | tempC; // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ] +#if NETCOREAPP3_1 } +#endif } else { @@ -187,7 +197,7 @@ internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) tempD |= tempC; uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ] - return (tempE | tempB | tempD); // = [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ] + return tempE | tempB | tempD; // = [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ] } } @@ -199,7 +209,7 @@ internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(uint value) + private static uint ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(uint value) { // We don't want to swap the position of the high and low WORDs, // as the buffer was read in machine order and will be written in @@ -223,7 +233,7 @@ internal static uint ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(uint v /// adjacent UTF-8 two-byte sequences. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(uint value) + private static uint ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(uint value) { // stays in machine endian @@ -251,7 +261,7 @@ internal static uint ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(uint /// as a UTF-8 two-byte sequence packed into a WORD and zero-extended to DWORD. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value) + private static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value) { // stays in machine endian @@ -282,7 +292,7 @@ internal static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value) /// returns true iff the first UTF-16 character is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsFirstCharAscii(uint value) + private static bool IsFirstCharAscii(uint value) { // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ 0000..007F ]. // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ 0000..007F ]. @@ -299,7 +309,7 @@ internal static bool IsFirstCharAscii(uint value) /// This also returns true if the first UTF-16 character is a surrogate character (well-formedness is not validated). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsFirstCharAtLeastThreeUtf8Bytes(uint value) + private static bool IsFirstCharAtLeastThreeUtf8Bytes(uint value) { // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ 0800..FFFF ]. // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ 0800..FFFF ]. @@ -315,7 +325,7 @@ internal static bool IsFirstCharAtLeastThreeUtf8Bytes(uint value) /// returns true iff the first UTF-16 character is a surrogate character (either high or low). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsFirstCharSurrogate(uint value) + private static bool IsFirstCharSurrogate(uint value) { // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ D800..DFFF ]. // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ D800..DFFF ]. @@ -331,7 +341,7 @@ internal static bool IsFirstCharSurrogate(uint value) /// returns true iff the first UTF-16 character would be encoded as exactly 2 bytes in UTF-8. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsFirstCharTwoUtf8Bytes(uint value) + private static bool IsFirstCharTwoUtf8Bytes(uint value) { // Little-endian: Given [ #### AAAA ], return whether AAAA is in range [ 0080..07FF ]. // Big-endian: Given [ AAAA #### ], return whether AAAA is in range [ 0080..07FF ]. @@ -351,7 +361,7 @@ internal static bool IsFirstCharTwoUtf8Bytes(uint value) /// is a UTF-8 continuation byte. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsLowByteUtf8ContinuationByte(uint value) + private static bool IsLowByteUtf8ContinuationByte(uint value) { // The JIT won't emit a single 8-bit signed cmp instruction (see IsUtf8ContinuationByte), // so the best we can do for now is the lea / cmp pair. @@ -365,7 +375,7 @@ internal static bool IsLowByteUtf8ContinuationByte(uint value) /// returns true iff the second UTF-16 character is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsSecondCharAscii(uint value) + private static bool IsSecondCharAscii(uint value) { // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ 0000..007F ]. // Big-endian: Given [ #### BBBB ], return whether BBBB is in range [ 0000..007F ]. @@ -382,7 +392,7 @@ internal static bool IsSecondCharAscii(uint value) /// This also returns true if the second UTF-16 character is a surrogate character (well-formedness is not validated). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsSecondCharAtLeastThreeUtf8Bytes(uint value) + private static bool IsSecondCharAtLeastThreeUtf8Bytes(uint value) { // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ 0800..FFFF ]. // Big-endian: Given [ #### BBBB ], return whether ABBBBAAA is in range [ 0800..FFFF ]. @@ -398,7 +408,7 @@ internal static bool IsSecondCharAtLeastThreeUtf8Bytes(uint value) /// returns true iff the second UTF-16 character is a surrogate character (either high or low). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsSecondCharSurrogate(uint value) + private static bool IsSecondCharSurrogate(uint value) { // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ D800..DFFF ]. // Big-endian: Given [ #### BBBB ], return whether BBBB is in range [ D800..DFFF ]. @@ -414,7 +424,7 @@ internal static bool IsSecondCharSurrogate(uint value) /// returns true iff the second UTF-16 character would be encoded as exactly 2 bytes in UTF-8. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsSecondCharTwoUtf8Bytes(uint value) + private static bool IsSecondCharTwoUtf8Bytes(uint value) { // Little-endian: Given [ BBBB #### ], return whether BBBB is in range [ 0080..07FF ]. // Big-endian: Given [ #### BBBB ], return whether BBBB is in range [ 0080..07FF ]. @@ -445,7 +455,7 @@ internal static bool IsUtf8ContinuationByte(in byte value) // The below check takes advantage of the two's complement representation of negative numbers. // [ 0b1000_0000, 0b1011_1111 ] is [ -127 (sbyte.MinValue), -65 ] - return ((sbyte)value < -64); + return (sbyte)value < -64; } /// @@ -453,7 +463,7 @@ internal static bool IsUtf8ContinuationByte(in byte value) /// returns true iff the two characters represent a well-formed UTF-16 surrogate pair. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsWellFormedUtf16SurrogatePair(uint value) + private static bool IsWellFormedUtf16SurrogatePair(uint value) { // Little-endian: Given [ LLLL HHHH ], validate that LLLL in [ DC00..DFFF ] and HHHH in [ D800..DBFF ]. // Big-endian: Given [ HHHH LLLL ], validate that HHHH in [ D800..DBFF ] and LLLL in [ DC00..DFFF ]. @@ -474,7 +484,7 @@ internal static bool IsWellFormedUtf16SurrogatePair(uint value) /// Converts a DWORD from machine-endian to little-endian. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static uint ToLittleEndian(uint value) + private static uint ToLittleEndian(uint value) { if (BitConverter.IsLittleEndian) { @@ -494,7 +504,7 @@ internal static uint ToLittleEndian(uint value) /// 2-byte sequence mask (see ). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32BeginsWithOverlongUtf8TwoByteSequence(uint value) + private static bool UInt32BeginsWithOverlongUtf8TwoByteSequence(uint value) { // ASSUMPTION: Caller has already checked the '110yyyyy 10xxxxxx' mask of the input. Debug.Assert(UInt32BeginsWithUtf8TwoByteMask(value)); @@ -517,7 +527,7 @@ internal static bool UInt32BeginsWithOverlongUtf8TwoByteSequence(uint value) /// still perform overlong form or out-of-range checking. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32BeginsWithUtf8FourByteMask(uint value) + private static bool UInt32BeginsWithUtf8FourByteMask(uint value) { // The code in this method is equivalent to the code // below but is slightly more optimized. @@ -549,7 +559,7 @@ internal static bool UInt32BeginsWithUtf8FourByteMask(uint value) /// overlong form or surrogate checking. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32BeginsWithUtf8ThreeByteMask(uint value) + private static bool UInt32BeginsWithUtf8ThreeByteMask(uint value) { // The code in this method is equivalent to the code // below but is slightly more optimized. @@ -581,7 +591,7 @@ internal static bool UInt32BeginsWithUtf8ThreeByteMask(uint value) /// overlong form checking. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32BeginsWithUtf8TwoByteMask(uint value) + private static bool UInt32BeginsWithUtf8TwoByteMask(uint value) { // The code in this method is equivalent to the code // below but is slightly more optimized. @@ -613,7 +623,7 @@ internal static bool UInt32BeginsWithUtf8TwoByteMask(uint value) /// 2-byte sequence mask (see ). /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32EndsWithOverlongUtf8TwoByteSequence(uint value) + private static bool UInt32EndsWithOverlongUtf8TwoByteSequence(uint value) { // ASSUMPTION: Caller has already checked the '110yyyyy 10xxxxxx' mask of the input. Debug.Assert(UInt32EndsWithUtf8TwoByteMask(value)); @@ -639,7 +649,7 @@ internal static bool UInt32EndsWithOverlongUtf8TwoByteSequence(uint value) /// overlong form checking. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32EndsWithUtf8TwoByteMask(uint value) + private static bool UInt32EndsWithUtf8TwoByteMask(uint value) { // The code in this method is equivalent to the code // below but is slightly more optimized. @@ -670,7 +680,7 @@ internal static bool UInt32EndsWithUtf8TwoByteMask(uint value) /// single operation. Returns if running on a big-endian machine. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(uint value) + private static bool UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(uint value) { // Per Table 3-7, valid 2-byte sequences are [ C2..DF ] [ 80..BF ]. // In little-endian, that would be represented as: @@ -695,7 +705,7 @@ internal static bool UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(uint v /// single operation. Returns if running on a big-endian machine. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(uint value) + private static bool UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(uint value) { // See comments in UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian. @@ -712,7 +722,7 @@ internal static bool UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(uint val /// returns iff the first byte of the buffer is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32FirstByteIsAscii(uint value) + private static bool UInt32FirstByteIsAscii(uint value) { // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914. @@ -725,7 +735,7 @@ internal static bool UInt32FirstByteIsAscii(uint value) /// returns iff the fourth byte of the buffer is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32FourthByteIsAscii(uint value) + private static bool UInt32FourthByteIsAscii(uint value) { // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914. @@ -738,7 +748,7 @@ internal static bool UInt32FourthByteIsAscii(uint value) /// returns iff the second byte of the buffer is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32SecondByteIsAscii(uint value) + private static bool UInt32SecondByteIsAscii(uint value) { // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914. @@ -751,7 +761,7 @@ internal static bool UInt32SecondByteIsAscii(uint value) /// returns iff the third byte of the buffer is ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool UInt32ThirdByteIsAscii(uint value) + private static bool UInt32ThirdByteIsAscii(uint value) { // Return statement is written this way to work around https://github.com/dotnet/coreclr/issues/914. @@ -759,12 +769,13 @@ internal static bool UInt32ThirdByteIsAscii(uint value) || (!BitConverter.IsLittleEndian && (0u >= (value & 0x8000u))); } +#if NETCOREAPP3_1 /// /// Given a DWORD which represents a buffer of 4 ASCII bytes, widen each byte to a 16-bit WORD /// and writes the resulting QWORD into the destination with machine endianness. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void Widen4AsciiBytesToCharsAndWrite(ref char outputBuffer, uint value) + private static void Widen4AsciiBytesToCharsAndWrite(ref char outputBuffer, uint value) { if (Bmi2.X64.IsSupported) { @@ -795,6 +806,7 @@ internal static void Widen4AsciiBytesToCharsAndWrite(ref char outputBuffer, uint } } } +#endif /// /// Given a DWORD which represents a buffer of 2 packed UTF-16 values in machine endianess, @@ -802,7 +814,7 @@ internal static void Widen4AsciiBytesToCharsAndWrite(ref char outputBuffer, uint /// resulting 6 bytes to the destination buffer. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref byte outputBuffer, uint value) + private static void WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref byte outputBuffer, uint value) { Debug.Assert(IsFirstCharAtLeastThreeUtf8Bytes(value) && !IsFirstCharSurrogate(value), "First half of value should've been 0800..D7FF or E000..FFFF"); Debug.Assert(IsSecondCharAtLeastThreeUtf8Bytes(value) && !IsSecondCharSurrogate(value), "Second half of value should've been 0800..D7FF or E000..FFFF"); @@ -838,7 +850,7 @@ internal static void WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref byte outp /// resulting 3 bytes to the destination buffer. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static void WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref byte outputBuffer, uint value) + private static void WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref byte outputBuffer, uint value) { Debug.Assert(IsFirstCharAtLeastThreeUtf8Bytes(value) && !IsFirstCharSurrogate(value), "First half of value should've been 0800..D7FF or E000..FFFF"); diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.Net.cs b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.Net.cs new file mode 100644 index 000000000..c088f2b77 --- /dev/null +++ b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.Net.cs @@ -0,0 +1,1510 @@ +// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NET +using System; +using System.Buffers; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace DotNetty.Common.Internal +{ + unsafe partial class Utf8Utility + { + // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where + // the next byte would have been consumed from / the next char would have been written to. + // inputLength in bytes, outputCharsRemaining in chars. + public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLength, char* pOutputBuffer, int outputCharsRemaining, out byte* pInputBufferRemaining, out char* pOutputBufferRemaining) + { + Debug.Assert(inputLength >= 0, "Input length must not be negative."); + Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); + + Debug.Assert(outputCharsRemaining >= 0, "Destination length must not be negative."); + Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); + + // First, try vectorized conversion. + + { + nuint numElementsConverted = ASCIIUtility.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); + + pInputBuffer += numElementsConverted; + pOutputBuffer += numElementsConverted; + + // Quick check - did we just end up consuming the entire input buffer? + // If so, short-circuit the remainder of the method. + + if ((int)numElementsConverted == inputLength) + { + pInputBufferRemaining = pInputBuffer; + pOutputBufferRemaining = pOutputBuffer; + return OperationStatus.Done; + } + + inputLength -= (int)numElementsConverted; + outputCharsRemaining -= (int)numElementsConverted; + } + + if (inputLength < sizeof(uint)) + { + goto ProcessInputOfLessThanDWordSize; + } + + byte* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - 4; + + // Begin the main loop. + +#if DEBUG + byte* pLastBufferPosProcessed = null; // used for invariant checking in debug builds +#endif + + Debug.Assert(pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer); + do + { + // Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar. + + uint thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + AfterReadDWord: + +#if DEBUG + Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read."); + pLastBufferPosProcessed = pInputBuffer; +#endif + // First, check for the common case of all-ASCII bytes. + + if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + { + // We read an all-ASCII sequence. + + if (outputCharsRemaining < sizeof(uint)) + { + goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data + } + + ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); + pInputBuffer += 4; + pOutputBuffer += 4; + outputCharsRemaining -= 4; + + // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. + // Below is basically unrolled loops with poor man's vectorization. + + uint remainingInputBytes = (uint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4; + uint maxIters = Math.Min(remainingInputBytes, (uint)outputCharsRemaining) / (2 * sizeof(uint)); + uint secondDWord; + int i; + for (i = 0; (uint)i < maxIters; i++) + { + // Reading two DWORDs in parallel benchmarked faster than reading a single QWORD. + + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + secondDWord = Unsafe.ReadUnaligned(pInputBuffer + sizeof(uint)); + + if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord | secondDWord)) + { + goto LoopTerminatedEarlyDueToNonAsciiData; + } + + pInputBuffer += 8; + + ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[0], thisDWord); + ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[4], secondDWord); + + pOutputBuffer += 8; + } + + outputCharsRemaining -= 8 * i; + + continue; // need to perform a bounds check because we might be running out of data + + LoopTerminatedEarlyDueToNonAsciiData: + + if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + { + // The first DWORD contained all-ASCII bytes, so expand it. + + ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); + + // continue the outer loop from the second DWORD + + Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(secondDWord)); + thisDWord = secondDWord; + + pInputBuffer += 4; + pOutputBuffer += 4; + outputCharsRemaining -= 4; + } + + outputCharsRemaining -= 8 * i; + + // We know that there's *at least* one DWORD of data remaining in the buffer. + // We also know that it's not all-ASCII. We can skip the logic at the beginning of the main loop. + + goto AfterReadDWordSkipAllBytesAsciiCheck; + } + + AfterReadDWordSkipAllBytesAsciiCheck: + + Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier + + // Next, try stripping off ASCII bytes one at a time. + // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. + + if (UInt32FirstByteIsAscii(thisDWord)) + { + if (outputCharsRemaining >= 3) + { + // Fast-track: we don't need to check the destination length for subsequent + // ASCII bytes since we know we can write them all now. + + uint thisDWordLittleEndian = ToLittleEndian(thisDWord); + + nuint adjustment = 1; + pOutputBuffer[0] = (char)(byte)thisDWordLittleEndian; + + if (UInt32SecondByteIsAscii(thisDWord)) + { + adjustment++; + thisDWordLittleEndian >>= 8; + pOutputBuffer[1] = (char)(byte)thisDWordLittleEndian; + + if (UInt32ThirdByteIsAscii(thisDWord)) + { + adjustment++; + thisDWordLittleEndian >>= 8; + pOutputBuffer[2] = (char)(byte)thisDWordLittleEndian; + } + } + + pInputBuffer += adjustment; + pOutputBuffer += adjustment; + outputCharsRemaining -= (int)adjustment; + } + else + { + // Slow-track: we need to make sure each individual write has enough + // of a buffer so that we don't overrun the destination. + + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; + } + + uint thisDWordLittleEndian = ToLittleEndian(thisDWord); + + pInputBuffer++; + *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian; + outputCharsRemaining--; + + if (UInt32SecondByteIsAscii(thisDWord)) + { + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; + } + + pInputBuffer++; + thisDWordLittleEndian >>= 8; + *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian; + + // We can perform a small optimization here. We know at this point that + // the output buffer is fully consumed (we read two ASCII bytes and wrote + // two ASCII chars, and we checked earlier that the destination buffer + // can't store a third byte). If the next byte is ASCII, we can jump straight + // to the return statement since the end-of-method logic only relies on the + // destination buffer pointer -- NOT the output chars remaining count -- being + // correct. If the next byte is not ASCII, we'll need to continue with the + // rest of the main loop, but we can set the buffer length directly to zero + // rather than decrementing it from 1 to 0. + + Debug.Assert(outputCharsRemaining == 1); + + if (UInt32ThirdByteIsAscii(thisDWord)) + { + goto OutputBufferTooSmall; + } + else + { + outputCharsRemaining = 0; + } + } + } + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessRemainingBytesSlow; // input buffer doesn't contain enough data to read a DWORD + } + else + { + // The input buffer at the current offset contains a non-ASCII byte. + // Read an entire DWORD and fall through to multi-byte consumption logic. + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + } + } + + BeforeProcessTwoByteSequence: + + // At this point, we know we're working with a multi-byte code unit, + // but we haven't yet validated it. + + // The masks and comparands are derived from the Unicode Standard, Table 3-6. + // Additionally, we need to check for valid byte sequences per Table 3-7. + + // Check the 2-byte case. + + if (UInt32BeginsWithUtf8TwoByteMask(thisDWord)) + { + // Per Table 3-7, valid sequences are: + // [ C2..DF ] [ 80..BF ] + + if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) + { + goto Error; + } + + ProcessTwoByteSequenceSkipOverlongFormCheck: + + // Optimization: If this is a two-byte-per-character language like Cyrillic or Hebrew, + // there's a good chance that if we see one two-byte run then there's another two-byte + // run immediately after. Let's check that now. + + // On little-endian platforms, we can check for the two-byte UTF8 mask *and* validate that + // the value isn't overlong using a single comparison. On big-endian platforms, we'll need + // to validate the mask and validate that the sequence isn't overlong as two separate comparisons. + + if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + || (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) + { + // We have two runs of two bytes each. + + if (outputCharsRemaining < 2) + { + goto ProcessRemainingBytesSlow; // running out of output buffer + } + + Unsafe.WriteUnaligned(pOutputBuffer, ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(thisDWord)); + + pInputBuffer += 4; + pOutputBuffer += 2; + outputCharsRemaining -= 2; + + if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) + { + // Optimization: If we read a long run of two-byte sequences, the next sequence is probably + // also two bytes. Check for that first before going back to the beginning of the loop. + + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + if (BitConverter.IsLittleEndian) + { + if (UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + { + // The next sequence is a valid two-byte sequence. + goto ProcessTwoByteSequenceSkipOverlongFormCheck; + } + } + else + { + if (UInt32BeginsWithUtf8TwoByteMask(thisDWord)) + { + if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) + { + goto Error; // The next sequence purports to be a 2-byte sequence but is overlong. + } + + goto ProcessTwoByteSequenceSkipOverlongFormCheck; + } + } + + // If we reached this point, the next sequence is something other than a valid + // two-byte sequence, so go back to the beginning of the loop. + goto AfterReadDWord; + } + else + { + goto ProcessRemainingBytesSlow; // Running out of data - go down slow path + } + } + + // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. + // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining + // bytes are ASCII? + + uint charToWrite = ExtractCharFromFirstTwoByteSequence(thisDWord); // optimistically compute this now, but don't store until we know dest is large enough + + if (UInt32ThirdByteIsAscii(thisDWord)) + { + if (UInt32FourthByteIsAscii(thisDWord)) + { + if (outputCharsRemaining < 3) + { + goto ProcessRemainingBytesSlow; // running out of output buffer + } + + pOutputBuffer[0] = (char)charToWrite; + if (BitConverter.IsLittleEndian) + { + thisDWord >>= 16; + pOutputBuffer[1] = (char)(byte)thisDWord; + thisDWord >>= 8; + pOutputBuffer[2] = (char)thisDWord; + } + else + { + pOutputBuffer[2] = (char)(byte)thisDWord; + pOutputBuffer[1] = (char)(byte)(thisDWord >> 8); + } + pInputBuffer += 4; + pOutputBuffer += 3; + outputCharsRemaining -= 3; + + continue; // go back to original bounds check and check for ASCII + } + else + { + if (outputCharsRemaining < 2) + { + goto ProcessRemainingBytesSlow; // running out of output buffer + } + + pOutputBuffer[0] = (char)charToWrite; + pOutputBuffer[1] = (char)(byte)(thisDWord >> (BitConverter.IsLittleEndian ? 16 : 8)); + pInputBuffer += 3; + pOutputBuffer += 2; + outputCharsRemaining -= 2; + + // A two-byte sequence followed by an ASCII byte followed by a non-ASCII byte. + // Read in the next DWORD and jump directly to the start of the multi-byte processing block. + + if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) + { + goto ProcessRemainingBytesSlow; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + goto BeforeProcessTwoByteSequence; + } + } + } + else + { + if (outputCharsRemaining == 0) + { + goto ProcessRemainingBytesSlow; // running out of output buffer + } + + pOutputBuffer[0] = (char)charToWrite; + pInputBuffer += 2; + pOutputBuffer++; + outputCharsRemaining--; + + if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) + { + goto ProcessRemainingBytesSlow; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above) + } + } + } + + // Check the 3-byte case. + + BeforeProcessThreeByteSequence: + + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + { + ProcessThreeByteSequenceWithCheck: + + // We need to check for overlong or surrogate three-byte sequences. + // + // Per Table 3-7, valid sequences are: + // [ E0 ] [ A0..BF ] [ 80..BF ] + // [ E1..EC ] [ 80..BF ] [ 80..BF ] + // [ ED ] [ 80..9F ] [ 80..BF ] + // [ EE..EF ] [ 80..BF ] [ 80..BF ] + // + // Big-endian examples of using the above validation table: + // E0A0 = 1110 0000 1010 0000 => invalid (overlong ) patterns are 1110 0000 100# #### + // ED9F = 1110 1101 1001 1111 => invalid (surrogate) patterns are 1110 1101 101# #### + // If using the bitmask ......................................... 0000 1111 0010 0000 (=0F20), + // Then invalid (overlong) patterns match the comparand ......... 0000 0000 0000 0000 (=0000), + // And invalid (surrogate) patterns match the comparand ......... 0000 1101 0010 0000 (=0D20). + + if (BitConverter.IsLittleEndian) + { + // The "overlong or surrogate" check can be implemented using a single jump, but there's + // some overhead to moving the bits into the correct locations in order to perform the + // correct comparison, and in practice the processor's branch prediction capability is + // good enough that we shouldn't bother. So we'll use two jumps instead. + + // Can't extract this check into its own helper method because JITter produces suboptimal + // assembly, even with aggressive inlining. + + // Code below becomes 5 instructions: test, jz, lea, test, jz + + if (((thisDWord & 0x0000_200Fu) == 0) || (((thisDWord - 0x0000_200Du) & 0x0000_200Fu) == 0)) + { + goto Error; // overlong or surrogate + } + } + else + { + if (((thisDWord & 0x0F20_0000u) == 0) || (((thisDWord - 0x0D20_0000u) & 0x0F20_0000u) == 0)) + { + goto Error; // overlong or surrogate + } + } + + // At this point, we know the incoming scalar is well-formed. + + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; // not enough space in the destination buffer to write + } + + // As an optimization, on compatible platforms check if a second three-byte sequence immediately + // follows the one we just read, and if so extract them together. + + if (BitConverter.IsLittleEndian) + { + // First, check that the leftover byte from the original DWORD is in the range [ E0..EF ], which + // would indicate the potential start of a second three-byte sequence. + + if (((thisDWord - 0xE000_0000u) & 0xF000_0000u) == 0) + { + // The const '3' below is correct because pFinalPosWhereCanReadDWordFromInputBuffer represents + // the final place where we can safely perform a DWORD read, and we want to probe whether it's + // safe to read a DWORD beginning at address &pInputBuffer[3]. + + if (outputCharsRemaining > 1 && (nint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) >= 3) + { + // We're going to attempt to read a second 3-byte sequence and write them both out one after the other. + // We need to check the continuation bit mask on the remaining two bytes (and we may as well check the leading + // byte mask again since it's free), then perform overlong + surrogate checks. If the overlong or surrogate + // checks fail, we'll fall through to the remainder of the logic which will transcode the original valid + // 3-byte UTF-8 sequence we read; and on the next iteration of the loop the validation routine will run again, + // fail, and redirect control flow to the error handling logic at the very end of this method. + + uint secondDWord = Unsafe.ReadUnaligned(pInputBuffer + 3); + + if (UInt32BeginsWithUtf8ThreeByteMask(secondDWord) + && ((secondDWord & 0x0000_200Fu) != 0) + && (((secondDWord - 0x0000_200Du) & 0x0000_200Fu) != 0)) + { + pOutputBuffer[0] = (char)ExtractCharFromFirstThreeByteSequence(thisDWord); + pOutputBuffer[1] = (char)ExtractCharFromFirstThreeByteSequence(secondDWord); + pInputBuffer += 6; + pOutputBuffer += 2; + outputCharsRemaining -= 2; + + // Drain any ASCII data following the second three-byte sequence. + + goto CheckForAsciiByteAfterThreeByteSequence; + } + } + } + } + + // Couldn't extract 2x three-byte sequences together, just do this one by itself. + + *pOutputBuffer = (char)ExtractCharFromFirstThreeByteSequence(thisDWord); + pInputBuffer += 3; + pOutputBuffer++; + outputCharsRemaining--; + + CheckForAsciiByteAfterThreeByteSequence: + + // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way + // in to the text. If this happens strip it off now before seeing if the next character + // consists of three code units. + + if (UInt32FourthByteIsAscii(thisDWord)) + { + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; + } + + if (BitConverter.IsLittleEndian) + { + *pOutputBuffer = (char)(thisDWord >> 24); + } + else + { + *pOutputBuffer = (char)(byte)thisDWord; + } + + pInputBuffer++; + pOutputBuffer++; + outputCharsRemaining--; + } + + if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer) + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + // Optimization: A three-byte character could indicate CJK text, which makes it likely + // that the character following this one is also CJK. We'll check for a three-byte sequence + // marker now and jump directly to three-byte sequence processing if we see one, skipping + // all of the logic at the beginning of the loop. + + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + { + goto ProcessThreeByteSequenceWithCheck; // found a three-byte sequence marker; validate and consume + } + else + { + goto AfterReadDWord; // probably ASCII punctuation or whitespace + } + } + else + { + goto ProcessRemainingBytesSlow; // Running out of data - go down slow path + } + } + + // Assume the 4-byte case, but we need to validate. + + { + // We need to check for overlong or invalid (over U+10FFFF) four-byte sequences. + // + // Per Table 3-7, valid sequences are: + // [ F0 ] [ 90..BF ] [ 80..BF ] [ 80..BF ] + // [ F1..F3 ] [ 80..BF ] [ 80..BF ] [ 80..BF ] + // [ F4 ] [ 80..8F ] [ 80..BF ] [ 80..BF ] + + if (!UInt32BeginsWithUtf8FourByteMask(thisDWord)) + { + goto Error; + } + + // Now check for overlong / out-of-range sequences. + + if (BitConverter.IsLittleEndian) + { + // The DWORD we read is [ 10xxxxxx 10yyyyyy 10zzzzzz 11110www ]. + // We want to get the 'w' byte in front of the 'z' byte so that we can perform + // a single range comparison. We'll take advantage of the fact that the JITter + // can detect a ROR / ROL operation, then we'll just zero out the bytes that + // aren't involved in the range check. + + uint toCheck = thisDWord & 0x0000_FFFFu; + + // At this point, toCheck = [ 00000000 00000000 10zzzzzz 11110www ]. + + toCheck = BitOperations.RotateRight(toCheck, 8); + + // At this point, toCheck = [ 11110www 00000000 00000000 10zzzzzz ]. + + if (!UnicodeUtility.IsInRangeInclusive(toCheck, 0xF000_0090u, 0xF400_008Fu)) + { + goto Error; + } + } + else + { + if (!UnicodeUtility.IsInRangeInclusive(thisDWord, 0xF090_0000u, 0xF48F_FFFFu)) + { + goto Error; + } + } + + // Validation complete. + + if (outputCharsRemaining < 2) + { + // There's no point to falling back to the "drain the input buffer" logic, since we know + // we can't write anything to the destination. So we'll just exit immediately. + goto OutputBufferTooSmall; + } + + Unsafe.WriteUnaligned(pOutputBuffer, ExtractCharsFromFourByteSequence(thisDWord)); + + pInputBuffer += 4; + pOutputBuffer += 2; + outputCharsRemaining -= 2; + + continue; // go back to beginning of loop for processing + } + } while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer); + + ProcessRemainingBytesSlow: + inputLength = (int)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4; + + ProcessInputOfLessThanDWordSize: + while (inputLength > 0) + { + uint firstByte = pInputBuffer[0]; + if (firstByte <= 0x7Fu) + { + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + // 1-byte (ASCII) case + *pOutputBuffer = (char)firstByte; + + pInputBuffer++; + pOutputBuffer++; + inputLength--; + outputCharsRemaining--; + continue; + } + + // Potentially the start of a multi-byte sequence? + + firstByte -= 0xC2u; + if ((byte)firstByte <= (0xDFu - 0xC2u)) + { + // Potentially a 2-byte sequence? + if (inputLength < 2) + { + goto InputBufferTooSmall; // out of data + } + + uint secondByte = pInputBuffer[1]; + if (!IsLowByteUtf8ContinuationByte(secondByte)) + { + goto Error; // 2-byte marker not followed by continuation byte + } + + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + uint asChar = (firstByte << 6) + secondByte + ((0xC2u - 0xC0u) << 6) - 0x80u; // remove UTF-8 markers from scalar + *pOutputBuffer = (char)asChar; + + pInputBuffer += 2; + pOutputBuffer++; + inputLength -= 2; + outputCharsRemaining--; + continue; + } + else if ((byte)firstByte <= (0xEFu - 0xC2u)) + { + // Potentially a 3-byte sequence? + if (inputLength >= 3) + { + uint secondByte = pInputBuffer[1]; + uint thirdByte = pInputBuffer[2]; + if (!IsLowByteUtf8ContinuationByte(secondByte) || !IsLowByteUtf8ContinuationByte(thirdByte)) + { + goto Error; // 3-byte marker not followed by 2 continuation bytes + } + + // To speed up the validation logic below, we're not going to remove the UTF-8 markers from the partial char just yet. + // We account for this in the comparisons below. + + uint partialChar = (firstByte << 12) + (secondByte << 6); + if (partialChar < ((0xE0u - 0xC2u) << 12) + (0xA0u << 6)) + { + goto Error; // this is an overlong encoding; fail + } + + partialChar -= ((0xEDu - 0xC2u) << 12) + (0xA0u << 6); // if partialChar = 0, we're at beginning of UTF-16 surrogate code point range + if (partialChar < 0x0800u /* number of code points in UTF-16 surrogate code point range */) + { + goto Error; // attempted to encode a UTF-16 surrogate code point; fail + } + + if (outputCharsRemaining == 0) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + // Now restore the full scalar value. + + partialChar += thirdByte; + partialChar += 0xD800; // undo "move to beginning of UTF-16 surrogate code point range" from earlier, fold it with later adds + partialChar -= 0x80u; // remove third byte continuation marker + + *pOutputBuffer = (char)partialChar; + + pInputBuffer += 3; + pOutputBuffer++; + inputLength -= 3; + outputCharsRemaining--; + continue; + } + else if (inputLength >= 2) + { + uint secondByte = pInputBuffer[1]; + if (!IsLowByteUtf8ContinuationByte(secondByte)) + { + goto Error; // 3-byte marker not followed by continuation byte + } + + // We can't build up the entire scalar value now, but we can check for overlong / surrogate representations + // from just the first two bytes. + + uint partialChar = (firstByte << 6) + secondByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison + if (partialChar < ((0xE0u - 0xC2u) << 6) + 0xA0u) + { + goto Error; // failed overlong check + } + if (UnicodeUtility.IsInRangeInclusive(partialChar, ((0xEDu - 0xC2u) << 6) + 0xA0u, ((0xEEu - 0xC2u) << 6) + 0x7Fu)) + { + goto Error; // failed surrogate check + } + } + + goto InputBufferTooSmall; // out of data + } + else if ((byte)firstByte <= (0xF4u - 0xC2u)) + { + // Potentially a 4-byte sequence? + + if (inputLength < 2) + { + goto InputBufferTooSmall; // ran out of data + } + + uint nextByte = pInputBuffer[1]; + if (!IsLowByteUtf8ContinuationByte(nextByte)) + { + goto Error; // 4-byte marker not followed by a continuation byte + } + + uint asPartialChar = (firstByte << 6) + nextByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison + if (!UnicodeUtility.IsInRangeInclusive(asPartialChar, ((0xF0u - 0xC2u) << 6) + 0x90u, ((0xF4u - 0xC2u) << 6) + 0x8Fu)) + { + goto Error; // failed overlong / out-of-range check + } + + if (inputLength < 3) + { + goto InputBufferTooSmall; // ran out of data + } + + if (!IsLowByteUtf8ContinuationByte(pInputBuffer[2])) + { + goto Error; // third byte in 4-byte sequence not a continuation byte + } + + if (inputLength < 4) + { + goto InputBufferTooSmall; // ran out of data + } + + if (!IsLowByteUtf8ContinuationByte(pInputBuffer[3])) + { + goto Error; // fourth byte in 4-byte sequence not a continuation byte + } + + // If we read a valid astral scalar value, the only way we could've fallen down this code path + // is that we didn't have enough output buffer to write the result. + + goto OutputBufferTooSmall; + } + else + { + goto Error; // didn't begin with [ C2 .. F4 ], so invalid multi-byte sequence header byte + } + } + + OperationStatus retVal = OperationStatus.Done; + goto ReturnCommon; + + InputBufferTooSmall: + retVal = OperationStatus.NeedMoreData; + goto ReturnCommon; + + OutputBufferTooSmall: + retVal = OperationStatus.DestinationTooSmall; + goto ReturnCommon; + + Error: + retVal = OperationStatus.InvalidData; + goto ReturnCommon; + + ReturnCommon: + pInputBufferRemaining = pInputBuffer; + pOutputBufferRemaining = pOutputBuffer; + return retVal; + } + + // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where + // the next char would have been consumed from / the next byte would have been written to. + // inputLength in chars, outputBytesRemaining in bytes. + public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLength, byte* pOutputBuffer, int outputBytesRemaining, out char* pInputBufferRemaining, out byte* pOutputBufferRemaining) + { + const int CharsPerDWord = sizeof(uint) / sizeof(char); + + Debug.Assert(inputLength >= 0, "Input length must not be negative."); + Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); + + Debug.Assert(outputBytesRemaining >= 0, "Destination length must not be negative."); + Debug.Assert(pOutputBuffer != null || outputBytesRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); + + // First, try vectorized conversion. + + { + nuint numElementsConverted = ASCIIUtility.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); + + pInputBuffer += numElementsConverted; + pOutputBuffer += numElementsConverted; + + // Quick check - did we just end up consuming the entire input buffer? + // If so, short-circuit the remainder of the method. + + if ((int)numElementsConverted == inputLength) + { + pInputBufferRemaining = pInputBuffer; + pOutputBufferRemaining = pOutputBuffer; + return OperationStatus.Done; + } + + inputLength -= (int)numElementsConverted; + outputBytesRemaining -= (int)numElementsConverted; + } + + if (inputLength < CharsPerDWord) + { + goto ProcessInputOfLessThanDWordSize; + } + + char* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - CharsPerDWord; + + // We have paths for SSE4.1 vectorization inside the inner loop. Since the below + // vector is only used in those code paths, we leave it uninitialized if SSE4.1 + // is not enabled. + + Unsafe.SkipInit(out Vector128 nonAsciiUtf16DataMask); + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) + { + nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char + } + + // Begin the main loop. + +#if DEBUG + char* pLastBufferPosProcessed = null; // used for invariant checking in debug builds +#endif + + uint thisDWord; + + Debug.Assert(pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer); + do + { + // Read 32 bits at a time. This is enough to hold any possible UTF16-encoded scalar. + + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + AfterReadDWord: + +#if DEBUG + Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read."); + pLastBufferPosProcessed = pInputBuffer; +#endif + + // First, check for the common case of all-ASCII chars. + + if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) + { + // We read an all-ASCII sequence (2 chars). + + if (outputBytesRemaining < 2) + { + goto ProcessOneCharFromCurrentDWordAndFinish; // running out of space, but may be able to write some data + } + + // The high WORD of the local declared below might be populated with garbage + // as a result of our shifts below, but that's ok since we're only going to + // write the low WORD. + // + // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] + // (Same logic works regardless of endianness.) + uint valueToWrite = thisDWord | (thisDWord >> 8); + + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)valueToWrite); + + pInputBuffer += 2; + pOutputBuffer += 2; + outputBytesRemaining -= 2; + + // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII. + // Below is basically unrolled loops with poor man's vectorization. + + uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2; + uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining); + + if (Sse41.X64.IsSupported || (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)) + { + // Try reading and writing 8 elements per iteration. + uint maxIters = minElementsRemaining / 8; + ulong possibleNonAsciiQWord; + int i; + Vector128 utf16Data; + for (i = 0; (uint)i < maxIters; i++) + { + utf16Data = Unsafe.ReadUnaligned>(pInputBuffer); + + if (AdvSimd.IsSupported) + { + Vector128 isUtf16DataNonAscii = AdvSimd.CompareTest(utf16Data, nonAsciiUtf16DataMask); + bool hasNonAsciiDataInVector = AdvSimd.Arm64.MinPairwise(isUtf16DataNonAscii, isUtf16DataNonAscii).AsUInt64().ToScalar() != 0; + + if (hasNonAsciiDataInVector) + { + goto LoopTerminatedDueToNonAsciiDataInVectorLocal; + } + + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.Store(pOutputBuffer, lower); + } + else + { + if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask)) + { + goto LoopTerminatedDueToNonAsciiDataInVectorLocal; + } + + // narrow and write + Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64()); + } + + pInputBuffer += 8; + pOutputBuffer += 8; + } + + outputBytesRemaining -= 8 * i; + + // Can we perform one more iteration, but reading & writing 4 elements instead of 8? + + if ((minElementsRemaining & 4) != 0) + { + possibleNonAsciiQWord = Unsafe.ReadUnaligned(pInputBuffer); + if (!Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) + { + goto LoopTerminatedDueToNonAsciiDataInPossibleNonAsciiQWordLocal; + } + + utf16Data = Vector128.CreateScalarUnsafe(possibleNonAsciiQWord).AsInt16(); + + if (AdvSimd.IsSupported) + { + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0); + } + else + { + Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + } + + pInputBuffer += 4; + pOutputBuffer += 4; + outputBytesRemaining -= 4; + } + + continue; // Go back to beginning of main loop, read data, check for ASCII + + LoopTerminatedDueToNonAsciiDataInVectorLocal: + + outputBytesRemaining -= 8 * i; + + if (Sse2.X64.IsSupported) + { + possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64()); + } + else + { + possibleNonAsciiQWord = utf16Data.AsUInt64().ToScalar(); + } + + // Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector, + // then check whether it's all-ASCII. If so, narrow and write to the destination + // buffer. Since we know that either the high 64 bits or the low 64 bits of the + // vector contains non-ASCII data, by the end of the following block the + // 'possibleNonAsciiQWord' local is guaranteed to contain the non-ASCII segment. + + if (Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) // all chars in first QWORD are ASCII + { + if (AdvSimd.IsSupported) + { + Vector64 lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(utf16Data); + AdvSimd.StoreSelectedScalar((uint*)pOutputBuffer, lower.AsUInt32(), 0); + } + else + { + Unsafe.WriteUnaligned(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32())); + } + pInputBuffer += 4; + pOutputBuffer += 4; + outputBytesRemaining -= 4; + possibleNonAsciiQWord = utf16Data.AsUInt64().GetElement(1); + } + + LoopTerminatedDueToNonAsciiDataInPossibleNonAsciiQWordLocal: + + Debug.Assert(!Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)); // this condition should've been checked earlier + + thisDWord = (uint)possibleNonAsciiQWord; + if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) + { + // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); + pInputBuffer += 2; + pOutputBuffer += 2; + outputBytesRemaining -= 2; + thisDWord = (uint)(possibleNonAsciiQWord >> 32); + } + + goto AfterReadDWordSkipAllCharsAsciiCheck; + } + else + { + // Can't use SSE41 x64, so we'll only read and write 4 elements per iteration. + uint maxIters = minElementsRemaining / 4; + uint secondDWord; + int i; + for (i = 0; (uint)i < maxIters; i++) + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + secondDWord = Unsafe.ReadUnaligned(pInputBuffer + 2); + + if (!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord | secondDWord)) + { + goto LoopTerminatedDueToNonAsciiData; + } + + // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] + // (Same logic works regardless of endianness.) + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); + Unsafe.WriteUnaligned(pOutputBuffer + 2, (ushort)(secondDWord | (secondDWord >> 8))); + + pInputBuffer += 4; + pOutputBuffer += 4; + } + + outputBytesRemaining -= 4 * i; + + continue; // Go back to beginning of main loop, read data, check for ASCII + + LoopTerminatedDueToNonAsciiData: + + outputBytesRemaining -= 4 * i; + + // First, see if we can drain any ASCII data from the first DWORD. + + if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)) + { + // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ] + // (Same logic works regardless of endianness.) + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8))); + pInputBuffer += 2; + pOutputBuffer += 2; + outputBytesRemaining -= 2; + thisDWord = secondDWord; + } + + goto AfterReadDWordSkipAllCharsAsciiCheck; + } + } + + AfterReadDWordSkipAllCharsAsciiCheck: + + Debug.Assert(!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)); // this should have been handled earlier + + // Next, try stripping off the first ASCII char if it exists. + // We don't check for a second ASCII char since that should have been handled above. + + if (IsFirstCharAscii(thisDWord)) + { + if (outputBytesRemaining == 0) + { + goto OutputBufferTooSmall; + } + + if (BitConverter.IsLittleEndian) + { + pOutputBuffer[0] = (byte)thisDWord; // extract [ ## ## 00 AA ] + } + else + { + pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ] + } + + pInputBuffer++; + pOutputBuffer++; + outputBytesRemaining--; + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // input buffer doesn't contain enough data to read a DWORD + } + else + { + // The input buffer at the current offset contains a non-ASCII char. + // Read an entire DWORD and fall through to non-ASCII consumption logic. + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + } + } + + // At this point, we know the first char in the buffer is non-ASCII, but we haven't yet validated it. + + if (!IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + { + TryConsumeMultipleTwoByteSequences: + + // For certain text (Greek, Cyrillic, ...), 2-byte sequences tend to be clustered. We'll try transcoding them in + // a tight loop without falling back to the main loop. + + if (IsSecondCharTwoUtf8Bytes(thisDWord)) + { + // We have two runs of two bytes each. + + if (outputBytesRemaining < 4) + { + goto ProcessOneCharFromCurrentDWordAndFinish; // running out of output buffer + } + + Unsafe.WriteUnaligned(pOutputBuffer, ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(thisDWord)); + + pInputBuffer += 2; + pOutputBuffer += 4; + outputBytesRemaining -= 4; + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // Running out of data - go down slow path + } + else + { + // Optimization: If we read a long run of two-byte sequences, the next sequence is probably + // also two bytes. Check for that first before going back to the beginning of the loop. + + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + if (IsFirstCharTwoUtf8Bytes(thisDWord)) + { + // Validated we have a two-byte sequence coming up + goto TryConsumeMultipleTwoByteSequences; + } + + // If we reached this point, the next sequence is something other than a valid + // two-byte sequence, so go back to the beginning of the loop. + goto AfterReadDWord; + } + } + + if (outputBytesRemaining < 2) + { + goto OutputBufferTooSmall; + } + + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)ExtractUtf8TwoByteSequenceFromFirstUtf16Char(thisDWord)); + + // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. + // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining + // char is ASCII? + + if (IsSecondCharAscii(thisDWord)) + { + if (outputBytesRemaining >= 3) + { + if (BitConverter.IsLittleEndian) + { + thisDWord >>= 16; + } + pOutputBuffer[2] = (byte)thisDWord; + + pInputBuffer += 2; + pOutputBuffer += 3; + outputBytesRemaining -= 3; + + continue; // go back to original bounds check and check for ASCII + } + else + { + pInputBuffer++; + pOutputBuffer += 2; + goto OutputBufferTooSmall; + } + } + else + { + pInputBuffer++; + pOutputBuffer += 2; + outputBytesRemaining -= 2; + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above) + } + } + } + + // Check the 3-byte case. + + BeforeProcessThreeByteSequence: + + if (!IsFirstCharSurrogate(thisDWord)) + { + // Optimization: A three-byte character could indicate CJK text, which makes it likely + // that the character following this one is also CJK. We'll perform the check now + // rather than jumping to the beginning of the main loop. + + if (IsSecondCharAtLeastThreeUtf8Bytes(thisDWord)) + { + if (!IsSecondCharSurrogate(thisDWord)) + { + if (outputBytesRemaining < 6) + { + goto ConsumeSingleThreeByteRun; // not enough space - try consuming as much as we can + } + + WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref *pOutputBuffer, thisDWord); + + pInputBuffer += 2; + pOutputBuffer += 6; + outputBytesRemaining -= 6; + + // Try to remain in the 3-byte processing loop if at all possible. + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + { + goto BeforeProcessThreeByteSequence; + } + else + { + // Fall back to standard processing loop since we don't know how to optimize this. + goto AfterReadDWord; + } + } + } + } + + ConsumeSingleThreeByteRun: + + if (outputBytesRemaining < 3) + { + goto OutputBufferTooSmall; + } + + WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref *pOutputBuffer, thisDWord); + + pInputBuffer++; + pOutputBuffer += 3; + outputBytesRemaining -= 3; + + // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way + // in to the text. If this happens strip it off now before seeing if the next character + // consists of three code units. + + if (IsSecondCharAscii(thisDWord)) + { + if (outputBytesRemaining == 0) + { + goto OutputBufferTooSmall; + } + + if (BitConverter.IsLittleEndian) + { + *pOutputBuffer = (byte)(thisDWord >> 16); + } + else + { + *pOutputBuffer = (byte)(thisDWord); + } + + pInputBuffer++; + pOutputBuffer++; + outputBytesRemaining--; + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + + if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + { + goto BeforeProcessThreeByteSequence; + } + else + { + // Fall back to standard processing loop since we don't know how to optimize this. + goto AfterReadDWord; + } + } + } + + if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) + { + goto ProcessNextCharAndFinish; // Running out of data - go down slow path + } + else + { + thisDWord = Unsafe.ReadUnaligned(pInputBuffer); + goto AfterReadDWordSkipAllCharsAsciiCheck; // we just checked above that this value isn't ASCII + } + } + + // Four byte sequence processing + + if (IsWellFormedUtf16SurrogatePair(thisDWord)) + { + if (outputBytesRemaining < 4) + { + goto OutputBufferTooSmall; + } + + Unsafe.WriteUnaligned(pOutputBuffer, ExtractFourUtf8BytesFromSurrogatePair(thisDWord)); + + pInputBuffer += 2; + pOutputBuffer += 4; + outputBytesRemaining -= 4; + + continue; // go back to beginning of loop for processing + } + + goto Error; // an ill-formed surrogate sequence: high not followed by low, or low not preceded by high + } while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer); + + ProcessNextCharAndFinish: + inputLength = (int)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + CharsPerDWord; + + ProcessInputOfLessThanDWordSize: + Debug.Assert(inputLength < CharsPerDWord); + + if (inputLength == 0) + { + goto InputBufferFullyConsumed; + } + + uint thisChar = *pInputBuffer; + goto ProcessFinalChar; + + ProcessOneCharFromCurrentDWordAndFinish: + if (BitConverter.IsLittleEndian) + { + thisChar = thisDWord & 0xFFFFu; // preserve only the first char + } + else + { + thisChar = thisDWord >> 16; // preserve only the first char + } + + ProcessFinalChar: + { + if (thisChar <= 0x7Fu) + { + if (outputBytesRemaining == 0) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + // 1-byte (ASCII) case + *pOutputBuffer = (byte)thisChar; + + pInputBuffer++; + pOutputBuffer++; + } + else if (thisChar < 0x0800u) + { + if (outputBytesRemaining < 2) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + // 2-byte case + pOutputBuffer[1] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ] + pOutputBuffer[0] = (byte)((thisChar >> 6) | unchecked((uint)(sbyte)0xC0)); // [ 110yyyyy ] + + pInputBuffer++; + pOutputBuffer += 2; + } + else if (!UnicodeUtility.IsSurrogateCodePoint(thisChar)) + { + if (outputBytesRemaining < 3) + { + goto OutputBufferTooSmall; // we have no hope of writing anything to the output + } + + // 3-byte case + pOutputBuffer[2] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ] + pOutputBuffer[1] = (byte)(((thisChar >> 6) & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10yyyyyy ] + pOutputBuffer[0] = (byte)((thisChar >> 12) | unchecked((uint)(sbyte)0xE0)); // [ 1110zzzz ] + + pInputBuffer++; + pOutputBuffer += 3; + } + else if (thisChar <= 0xDBFFu) + { + // UTF-16 high surrogate code point with no trailing data, report incomplete input buffer + goto InputBufferTooSmall; + } + else + { + // UTF-16 low surrogate code point with no leading data, report error + goto Error; + } + } + + // There are two ways we can end up here. Either we were running low on input data, + // or we were running low on space in the destination buffer. If we're running low on + // input data (label targets ProcessInputOfLessThanDWordSize and ProcessNextCharAndFinish), + // then the inputLength value is guaranteed to be between 0 and 1, and we should return Done. + // If we're running low on destination buffer space (label target ProcessOneCharFromCurrentDWordAndFinish), + // then we didn't modify inputLength since entering the main loop, which means it should + // still have a value of >= 2. So checking the value of inputLength is all we need to do to determine + // which of the two scenarios we're in. + + if (inputLength > 1) + { + goto OutputBufferTooSmall; + } + + InputBufferFullyConsumed: + OperationStatus retVal = OperationStatus.Done; + goto ReturnCommon; + + InputBufferTooSmall: + retVal = OperationStatus.NeedMoreData; + goto ReturnCommon; + + OutputBufferTooSmall: + retVal = OperationStatus.DestinationTooSmall; + goto ReturnCommon; + + Error: + retVal = OperationStatus.InvalidData; + goto ReturnCommon; + + ReturnCommon: + pInputBufferRemaining = pInputBuffer; + pOutputBufferRemaining = pOutputBuffer; + return retVal; + } + } +} +#endif diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.NetCore3.cs similarity index 93% rename from src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs rename to src/DotNetty.Common/Internal/Utf8Utility.Transcoding.NetCore3.cs index 65670a61b..78c6629c8 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Transcoding.NetCore3.cs @@ -4,7 +4,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -#if NETCOREAPP_3_0_GREATER +#if NETCOREAPP3_1 using System; using System.Buffers; using System.Buffers.Binary; @@ -15,7 +15,7 @@ namespace DotNetty.Common.Internal { - internal static unsafe partial class Utf8Utility + unsafe partial class Utf8Utility { // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where // the next byte would have been consumed from / the next char would have been written to. @@ -86,7 +86,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data } - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); + Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); pInputBuffer += 4; pOutputBuffer += 4; outputCharsRemaining -= 4; @@ -112,8 +112,8 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng pInputBuffer += 8; - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[0], thisDWord); - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[4], secondDWord); + Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[0], thisDWord); + Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[4], secondDWord); pOutputBuffer += 8; } @@ -128,7 +128,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng { // The first DWORD contained all-ASCII bytes, so expand it. - Utf8Utility.Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); + Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord); // continue the outer loop from the second DWORD @@ -155,25 +155,25 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // Next, try stripping off ASCII bytes one at a time. // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. - if (Utf8Utility.UInt32FirstByteIsAscii(thisDWord)) + if (UInt32FirstByteIsAscii(thisDWord)) { if (outputCharsRemaining >= 3) { // Fast-track: we don't need to check the destination length for subsequent // ASCII bytes since we know we can write them all now. - uint thisDWordLittleEndian = Utf8Utility.ToLittleEndian(thisDWord); + uint thisDWordLittleEndian = ToLittleEndian(thisDWord); nuint adjustment = 1; pOutputBuffer[0] = (char)(byte)thisDWordLittleEndian; - if (Utf8Utility.UInt32SecondByteIsAscii(thisDWord)) + if (UInt32SecondByteIsAscii(thisDWord)) { adjustment++; thisDWordLittleEndian >>= 8; pOutputBuffer[1] = (char)(byte)thisDWordLittleEndian; - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) + if (UInt32ThirdByteIsAscii(thisDWord)) { adjustment++; thisDWordLittleEndian >>= 8; @@ -195,13 +195,13 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto OutputBufferTooSmall; } - uint thisDWordLittleEndian = Utf8Utility.ToLittleEndian(thisDWord); + uint thisDWordLittleEndian = ToLittleEndian(thisDWord); pInputBuffer++; *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian; outputCharsRemaining--; - if (Utf8Utility.UInt32SecondByteIsAscii(thisDWord)) + if (UInt32SecondByteIsAscii(thisDWord)) { if (0u >= (uint)outputCharsRemaining) { @@ -224,7 +224,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng Debug.Assert(outputCharsRemaining == 1); - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) + if (UInt32ThirdByteIsAscii(thisDWord)) { goto OutputBufferTooSmall; } @@ -257,12 +257,12 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // Check the 2-byte case. - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) + if (UInt32BeginsWithUtf8TwoByteMask(thisDWord)) { // Per Table 3-7, valid sequences are: // [ C2..DF ] [ 80..BF ] - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) + if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) { goto Error; } @@ -277,8 +277,8 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // the value isn't overlong using a single comparison. On big-endian platforms, we'll need // to validate the mask and validate that the sequence isn't overlong as two separate comparisons. - if ((BitConverter.IsLittleEndian && Utf8Utility.UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - || (!BitConverter.IsLittleEndian && (Utf8Utility.UInt32EndsWithUtf8TwoByteMask(thisDWord) && !Utf8Utility.UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) + if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + || (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) { // We have two runs of two bytes each. @@ -287,7 +287,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto ProcessRemainingBytesSlow; // running out of output buffer } - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(thisDWord)); + Unsafe.WriteUnaligned(pOutputBuffer, ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(thisDWord)); pInputBuffer += 4; pOutputBuffer += 2; @@ -302,7 +302,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng if (BitConverter.IsLittleEndian) { - if (Utf8Utility.UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + if (UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) { // The next sequence is a valid two-byte sequence. goto ProcessTwoByteSequenceSkipOverlongFormCheck; @@ -310,9 +310,9 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng } else { - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) + if (UInt32BeginsWithUtf8TwoByteMask(thisDWord)) { - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) + if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) { goto Error; // The next sequence purports to be a 2-byte sequence but is overlong. } @@ -335,11 +335,11 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining // bytes are ASCII? - uint charToWrite = Utf8Utility.ExtractCharFromFirstTwoByteSequence(thisDWord); // optimistically compute this now, but don't store until we know dest is large enough + uint charToWrite = ExtractCharFromFirstTwoByteSequence(thisDWord); // optimistically compute this now, but don't store until we know dest is large enough - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) + if (UInt32ThirdByteIsAscii(thisDWord)) { - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) + if (UInt32FourthByteIsAscii(thisDWord)) { if (outputCharsRemaining < 3) { @@ -420,7 +420,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng BeforeProcessThreeByteSequence: - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) { ProcessThreeByteSequenceWithCheck: @@ -498,7 +498,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng uint secondDWord = Unsafe.ReadUnaligned(pInputBuffer + 3); - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(secondDWord) + if (UInt32BeginsWithUtf8ThreeByteMask(secondDWord) && ((secondDWord & 0x0000_200Fu) != 0) && (((secondDWord - 0x0000_200Du) & 0x0000_200Fu) != 0)) { @@ -524,7 +524,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // Couldn't extract 2x three-byte sequences together, just do this one by itself. - *pOutputBuffer = (char)Utf8Utility.ExtractCharFromFirstThreeByteSequence(thisDWord); + *pOutputBuffer = (char)ExtractCharFromFirstThreeByteSequence(thisDWord); pInputBuffer += 3; pOutputBuffer += 1; outputCharsRemaining -= 1; @@ -535,7 +535,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // in to the text. If this happens strip it off now before seeing if the next character // consists of three code units. - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) + if (UInt32FourthByteIsAscii(thisDWord)) { if (0u >= (uint)outputCharsRemaining) { @@ -565,7 +565,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // marker now and jump directly to three-byte sequence processing if we see one, skipping // all of the logic at the beginning of the loop. - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) { goto ProcessThreeByteSequenceWithCheck; // found a three-byte sequence marker; validate and consume } @@ -590,7 +590,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng // [ F1..F3 ] [ 80..BF ] [ 80..BF ] [ 80..BF ] // [ F4 ] [ 80..8F ] [ 80..BF ] [ 80..BF ] - if (!Utf8Utility.UInt32BeginsWithUtf8FourByteMask(thisDWord)) + if (!UInt32BeginsWithUtf8FourByteMask(thisDWord)) { goto Error; } @@ -635,7 +635,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto OutputBufferTooSmall; } - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractCharsFromFourByteSequence(thisDWord)); + Unsafe.WriteUnaligned(pOutputBuffer, ExtractCharsFromFourByteSequence(thisDWord)); pInputBuffer += 4; pOutputBuffer += 2; @@ -681,7 +681,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng } uint secondByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) + if (!IsLowByteUtf8ContinuationByte(secondByte)) { goto Error; // 2-byte marker not followed by continuation byte } @@ -707,7 +707,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng { uint secondByte = pInputBuffer[1]; uint thirdByte = pInputBuffer[2]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte) || !Utf8Utility.IsLowByteUtf8ContinuationByte(thirdByte)) + if (!IsLowByteUtf8ContinuationByte(secondByte) || !IsLowByteUtf8ContinuationByte(thirdByte)) { goto Error; // 3-byte marker not followed by 2 continuation bytes } @@ -749,7 +749,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng else if (inputLength >= 2) { uint secondByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) + if (!IsLowByteUtf8ContinuationByte(secondByte)) { goto Error; // 3-byte marker not followed by continuation byte } @@ -780,7 +780,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng } uint nextByte = pInputBuffer[1]; - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(nextByte)) + if (!IsLowByteUtf8ContinuationByte(nextByte)) { goto Error; // 4-byte marker not followed by a continuation byte } @@ -796,7 +796,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto InputBufferTooSmall; // ran out of data } - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(pInputBuffer[2])) + if (!IsLowByteUtf8ContinuationByte(pInputBuffer[2])) { goto Error; // third byte in 4-byte sequence not a continuation byte } @@ -806,7 +806,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto InputBufferTooSmall; // ran out of data } - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(pInputBuffer[3])) + if (!IsLowByteUtf8ContinuationByte(pInputBuffer[3])) { goto Error; // fourth byte in 4-byte sequence not a continuation byte } @@ -1077,7 +1077,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // Next, try stripping off the first ASCII char if it exists. // We don't check for a second ASCII char since that should have been handled above. - if (Utf8Utility.IsFirstCharAscii(thisDWord)) + if (IsFirstCharAscii(thisDWord)) { if (0u >= (uint)outputBytesRemaining) { @@ -1111,14 +1111,14 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // At this point, we know the first char in the buffer is non-ASCII, but we haven't yet validated it. - if (!Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + if (!IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) { TryConsumeMultipleTwoByteSequences: // For certain text (Greek, Cyrillic, ...), 2-byte sequences tend to be clustered. We'll try transcoding them in // a tight loop without falling back to the main loop. - if (Utf8Utility.IsSecondCharTwoUtf8Bytes(thisDWord)) + if (IsSecondCharTwoUtf8Bytes(thisDWord)) { // We have two runs of two bytes each. @@ -1127,7 +1127,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt goto ProcessOneCharFromCurrentDWordAndFinish; // running out of output buffer } - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(thisDWord)); + Unsafe.WriteUnaligned(pOutputBuffer, ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(thisDWord)); pInputBuffer += 2; pOutputBuffer += 4; @@ -1144,7 +1144,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - if (Utf8Utility.IsFirstCharTwoUtf8Bytes(thisDWord)) + if (IsFirstCharTwoUtf8Bytes(thisDWord)) { // Validated we have a two-byte sequence coming up goto TryConsumeMultipleTwoByteSequences; @@ -1161,13 +1161,13 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt goto OutputBufferTooSmall; } - Unsafe.WriteUnaligned(pOutputBuffer, (ushort)Utf8Utility.ExtractUtf8TwoByteSequenceFromFirstUtf16Char(thisDWord)); + Unsafe.WriteUnaligned(pOutputBuffer, (ushort)ExtractUtf8TwoByteSequenceFromFirstUtf16Char(thisDWord)); // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence. // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining // char is ASCII? - if (Utf8Utility.IsSecondCharAscii(thisDWord)) + if (IsSecondCharAscii(thisDWord)) { if (outputBytesRemaining >= 3) { @@ -1212,22 +1212,22 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt BeforeProcessThreeByteSequence: - if (!Utf8Utility.IsFirstCharSurrogate(thisDWord)) + if (!IsFirstCharSurrogate(thisDWord)) { // Optimization: A three-byte character could indicate CJK text, which makes it likely // that the character following this one is also CJK. We'll perform the check now // rather than jumping to the beginning of the main loop. - if (Utf8Utility.IsSecondCharAtLeastThreeUtf8Bytes(thisDWord)) + if (IsSecondCharAtLeastThreeUtf8Bytes(thisDWord)) { - if (!Utf8Utility.IsSecondCharSurrogate(thisDWord)) + if (!IsSecondCharSurrogate(thisDWord)) { if (outputBytesRemaining < 6) { goto ConsumeSingleThreeByteRun; // not enough space - try consuming as much as we can } - Utf8Utility.WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref *pOutputBuffer, thisDWord); + WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref *pOutputBuffer, thisDWord); pInputBuffer += 2; pOutputBuffer += 6; @@ -1243,7 +1243,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt { thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - if (Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) { goto BeforeProcessThreeByteSequence; } @@ -1263,7 +1263,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt goto OutputBufferTooSmall; } - Utf8Utility.WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref *pOutputBuffer, thisDWord); + WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref *pOutputBuffer, thisDWord); pInputBuffer += 1; pOutputBuffer += 3; @@ -1273,7 +1273,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // in to the text. If this happens strip it off now before seeing if the next character // consists of three code units. - if (Utf8Utility.IsSecondCharAscii(thisDWord)) + if (IsSecondCharAscii(thisDWord)) { if (0u >= (uint)outputBytesRemaining) { @@ -1301,7 +1301,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt { thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - if (Utf8Utility.IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) + if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord)) { goto BeforeProcessThreeByteSequence; } @@ -1326,14 +1326,14 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // Four byte sequence processing - if (Utf8Utility.IsWellFormedUtf16SurrogatePair(thisDWord)) + if (IsWellFormedUtf16SurrogatePair(thisDWord)) { if (outputBytesRemaining < 4) { goto OutputBufferTooSmall; } - Unsafe.WriteUnaligned(pOutputBuffer, Utf8Utility.ExtractFourUtf8BytesFromSurrogatePair(thisDWord)); + Unsafe.WriteUnaligned(pOutputBuffer, ExtractFourUtf8BytesFromSurrogatePair(thisDWord)); pInputBuffer += 2; pOutputBuffer += 4; diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Validation.Net.cs b/src/DotNetty.Common/Internal/Utf8Utility.Validation.Net.cs new file mode 100644 index 000000000..299c8c759 --- /dev/null +++ b/src/DotNetty.Common/Internal/Utf8Utility.Validation.Net.cs @@ -0,0 +1,32 @@ +// borrowed from https://github.com/dotnet/corefx/tree/release/3.1/src/Common/src/CoreLib/System/Text/Unicode + +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if NET +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; + +namespace DotNetty.Common.Internal +{ + partial class Utf8Utility + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong GetNonAsciiBytes(Vector128 value, Vector128 bitMask128) + { + if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian) + { + throw ThrowHelper.GetNotSupportedException(); ; + } + + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + return extractedBits.AsUInt64().ToScalar(); + } + } +} +#endif diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs index 33b0a4e86..2141f1f2a 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs @@ -10,10 +10,14 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.X86; +#if NET +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +#endif namespace DotNetty.Common.Internal { - internal static unsafe partial class Utf8Utility + unsafe partial class Utf8Utility { // Returns &inputBuffer[inputLength] if the input buffer is valid. /// @@ -118,6 +122,7 @@ internal static unsafe partial class Utf8Utility // the alignment check consumes at most a single DWORD.) byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here +#if NETCOREAPP3_1 uint mask; do @@ -136,6 +141,39 @@ internal static unsafe partial class Utf8Utility goto Sse2LoopTerminatedEarlyDueToNonAsciiData; } } +#else + nuint trailingZeroCount; + + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create((ushort)0x1001).AsByte() : + Vector128.Create((ushort)0x0110).AsByte(); + + do + { + // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're + // going to perform an unaligned load. We don't necessarily care about aligning + // this because we pessimistically assume we'll encounter non-ASCII data at some + // point in the not-too-distant future (otherwise we would've stayed entirely + // within the all-ASCII vectorized code at the entry to this method). + if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) + { + ulong mask = GetNonAsciiBytes(AdvSimd.LoadVector128(pInputBuffer), bitMask128); + if (mask != 0) + { + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask) >> 2; + goto LoopTerminatedEarlyDueToNonAsciiData; + } + } + else if (Sse2.IsSupported) + { + uint mask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pInputBuffer)); + if (mask != 0) + { + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask); + goto LoopTerminatedEarlyDueToNonAsciiData; + } + } +#endif else { if (!ASCIIUtility.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[0] | ((uint*)pInputBuffer)[1])) @@ -154,6 +192,7 @@ internal static unsafe partial class Utf8Utility continue; // need to perform a bounds check because we might be running out of data +#if NETCOREAPP3_1 Sse2LoopTerminatedEarlyDueToNonAsciiData: Debug.Assert(BitConverter.IsLittleEndian); @@ -168,6 +207,22 @@ internal static unsafe partial class Utf8Utility Debug.Assert(mask != 0); pInputBuffer += Bmi1.TrailingZeroCount(mask); +#else + LoopTerminatedEarlyDueToNonAsciiData: + // x86 can only be little endian, while ARM can be big or little endian + // so if we reached this label we need to check both combinations are supported + Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported); + + + // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit + // for each non-ASCII byte we saw. trailingZeroCount will count the number of ASCII bytes, + // bump our input counter by that amount, and resume processing from the + // "the first byte is no longer ASCII" portion of the main loop. + // We should not expect a total number of zeroes equal or larger than 16. + Debug.Assert(trailingZeroCount < 16); + + pInputBuffer += trailingZeroCount; +#endif if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) { goto ProcessRemainingBytesSlow; @@ -261,8 +316,8 @@ internal static unsafe partial class Utf8Utility // the value isn't overlong using a single comparison. On big-endian platforms, we'll need // to validate the mask and validate that the sequence isn't overlong as two separate comparisons. - if ((BitConverter.IsLittleEndian && Utf8Utility.UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) - || (!BitConverter.IsLittleEndian && (Utf8Utility.UInt32EndsWithUtf8TwoByteMask(thisDWord) && !Utf8Utility.UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) + if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + || (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord)))) { // We have two runs of two bytes each. pInputBuffer += 4; @@ -277,7 +332,7 @@ internal static unsafe partial class Utf8Utility if (BitConverter.IsLittleEndian) { - if (Utf8Utility.UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) + if (UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord)) { // The next sequence is a valid two-byte sequence. goto ProcessTwoByteSequenceSkipOverlongFormCheck; @@ -285,9 +340,9 @@ internal static unsafe partial class Utf8Utility } else { - if (Utf8Utility.UInt32BeginsWithUtf8TwoByteMask(thisDWord)) + if (UInt32BeginsWithUtf8TwoByteMask(thisDWord)) { - if (Utf8Utility.UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) + if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord)) { goto Error; // The next sequence purports to be a 2-byte sequence but is overlong. } @@ -312,9 +367,9 @@ internal static unsafe partial class Utf8Utility tempUtf16CodeUnitCountAdjustment--; // 2-byte sequence + (some number of ASCII bytes) -> 1 UTF-16 code units (and 1 scalar) [+ trailing] - if (Utf8Utility.UInt32ThirdByteIsAscii(thisDWord)) + if (UInt32ThirdByteIsAscii(thisDWord)) { - if (Utf8Utility.UInt32FourthByteIsAscii(thisDWord)) + if (UInt32FourthByteIsAscii(thisDWord)) { pInputBuffer += 4; } @@ -449,7 +504,7 @@ internal static unsafe partial class Utf8Utility // Is this three 3-byte sequences in a row? // thisQWord = [ 10yyyyyy 1110zzzz | 10xxxxxx 10yyyyyy 1110zzzz | 10xxxxxx 10yyyyyy 1110zzzz ] [ 10xxxxxx ] // ---- CHAR 3 ---- --------- CHAR 2 --------- --------- CHAR 1 --------- -CHAR 3- - if ((thisQWord & 0xC0F0_C0C0_F0C0_C0F0ul) == 0x80E0_8080_E080_80E0ul && Utf8Utility.IsUtf8ContinuationByte(in pInputBuffer[8])) + if ((thisQWord & 0xC0F0_C0C0_F0C0_C0F0ul) == 0x80E0_8080_E080_80E0ul && IsUtf8ContinuationByte(in pInputBuffer[8])) { // Saw a proper bitmask for three incoming 3-byte sequences, perform the // overlong and surrogate sequence checking now. @@ -523,7 +578,7 @@ internal static unsafe partial class Utf8Utility continue; } - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) { // A single three-byte sequence. goto ProcessThreeByteSequenceWithCheck; @@ -545,7 +600,7 @@ internal static unsafe partial class Utf8Utility // marker now and jump directly to three-byte sequence processing if we see one, skipping // all of the logic at the beginning of the loop. - if (Utf8Utility.UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) + if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord)) { goto ProcessThreeByteSequenceWithCheck; // Found another [not yet validated] three-byte sequence; process } @@ -655,7 +710,7 @@ internal static unsafe partial class Utf8Utility if ((byte)firstByte < 0xE0u) { // 2-byte case - if ((byte)firstByte >= 0xC2u && Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) + if ((byte)firstByte >= 0xC2u && IsLowByteUtf8ContinuationByte(secondByte)) { pInputBuffer += 2; tempUtf16CodeUnitCountAdjustment--; // 2 UTF-8 bytes -> 1 UTF-16 code unit (and 1 scalar) @@ -683,13 +738,13 @@ internal static unsafe partial class Utf8Utility } else { - if (!Utf8Utility.IsLowByteUtf8ContinuationByte(secondByte)) + if (!IsLowByteUtf8ContinuationByte(secondByte)) { goto Error; // first trailing byte doesn't have proper continuation marker } } - if (Utf8Utility.IsUtf8ContinuationByte(in pInputBuffer[2])) + if (IsUtf8ContinuationByte(in pInputBuffer[2])) { pInputBuffer += 3; tempUtf16CodeUnitCountAdjustment -= 2; // 3 UTF-8 bytes -> 2 UTF-16 code units (and 2 scalars) diff --git a/src/DotNetty.Common/Internal/Utf8Utility.WhiteSpace.cs b/src/DotNetty.Common/Internal/Utf8Utility.WhiteSpace.cs new file mode 100644 index 000000000..5030c5c37 --- /dev/null +++ b/src/DotNetty.Common/Internal/Utf8Utility.WhiteSpace.cs @@ -0,0 +1,132 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if NETCOREAPP_3_0_GREATER +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; + +namespace DotNetty.Common.Internal +{ + partial class Utf8Utility + { + /// + /// Returns the index in where the first non-whitespace character + /// appears, or the input length if the data contains only whitespace characters. + /// + public static int GetIndexOfFirstNonWhiteSpaceChar(ReadOnlySpan utf8Data) + { + return (int)GetIndexOfFirstNonWhiteSpaceChar(ref MemoryMarshal.GetReference(utf8Data), utf8Data.Length); + } + + internal static nint GetIndexOfFirstNonWhiteSpaceChar(ref byte utf8Data, nint length) + { + // This method is optimized for the case where the input data is ASCII, and if the + // data does need to be trimmed it's likely that only a relatively small number of + // bytes will be trimmed. + + nint i = 0; + + while (i < length) + { + // Very quick check: see if the byte is in the range [ 21 .. 7F ]. + // If so, we can skip the more expensive logic later in this method. + + if ((sbyte)Unsafe.AddByteOffset(ref utf8Data, i) > (sbyte)0x20) + { + break; + } + + uint possibleAsciiByte = Unsafe.AddByteOffset(ref utf8Data, i); + if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) + { + // The simple comparison failed. Let's read the actual byte value, + // and if it's ASCII we can delegate to Rune's inlined method + // implementation. + + if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte))) + { + i++; + continue; + } + } + else + { + // Not ASCII data. Go back to the slower "decode the entire scalar" + // code path, then compare it against our Unicode tables. + + Rune.DecodeFromUtf8(MemoryMarshal.CreateReadOnlySpan(ref utf8Data, (int)length).Slice((int)i), out Rune decodedRune, out int bytesConsumed); + if (Rune.IsWhiteSpace(decodedRune)) + { + i += bytesConsumed; + continue; + } + } + + break; // If we got here, we saw a non-whitespace subsequence. + } + + return i; + } + + /// + /// Returns the index in where the trailing whitespace sequence + /// begins, or 0 if the data contains only whitespace characters, or the span length if the + /// data does not end with any whitespace characters. + /// + public static int GetIndexOfTrailingWhiteSpaceSequence(ReadOnlySpan utf8Data) + { + return (int)GetIndexOfTrailingWhiteSpaceSequence(ref MemoryMarshal.GetReference(utf8Data), utf8Data.Length); + } + + internal static nint GetIndexOfTrailingWhiteSpaceSequence(ref byte utf8Data, nint length) + { + // This method is optimized for the case where the input data is ASCII, and if the + // data does need to be trimmed it's likely that only a relatively small number of + // bytes will be trimmed. + + while (length > 0) + { + // Very quick check: see if the byte is in the range [ 21 .. 7F ]. + // If so, we can skip the more expensive logic later in this method. + + if ((sbyte)Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1) > (sbyte)0x20) + { + break; + } + + uint possibleAsciiByte = Unsafe.Add(ref Unsafe.AddByteOffset(ref utf8Data, length), -1); + if (UnicodeUtility.IsAsciiCodePoint(possibleAsciiByte)) + { + // The simple comparison failed. Let's read the actual byte value, + // and if it's ASCII we can delegate to Rune's inlined method + // implementation. + + if (Rune.IsWhiteSpace(new Rune(possibleAsciiByte))) + { + length--; + continue; + } + } + else + { + // Not ASCII data. Go back to the slower "decode the entire scalar" + // code path, then compare it against our Unicode tables. + + Rune.DecodeLastFromUtf8(MemoryMarshal.CreateReadOnlySpan(ref utf8Data, (int)length), out Rune decodedRune, out int bytesConsumed); + if (Rune.IsWhiteSpace(decodedRune)) + { + length -= bytesConsumed; + continue; + } + } + + break; // If we got here, we saw a non-whitespace subsequence. + } + + return length; + } + } +} +#endif \ No newline at end of file diff --git a/src/DotNetty.Common/Internal/Utf8Utility.cs b/src/DotNetty.Common/Internal/Utf8Utility.cs index e7febc58b..d82b34e0f 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.cs @@ -8,12 +8,6 @@ using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -#if FEATURE_UTF8STRING -using System.Buffers; -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; -using System.IO; -#endif namespace DotNetty.Common.Internal { @@ -48,59 +42,6 @@ public unsafe static int GetIndexOfFirstInvalidUtf8Sequence(in ReadOnlySpan - /// Returns if it is null or contains only well-formed UTF-8 data; - /// otherwises allocates a new instance containing the same data as - /// but where all invalid UTF-8 sequences have been replaced - /// with U+FFD. - /// - [return: NotNullIfNotNull("value")] - public static Utf8String? ValidateAndFixupUtf8String(Utf8String? value) - { - if (Utf8String.IsNullOrEmpty(value)) - { - return value; - } - - ReadOnlySpan valueAsBytes = value.AsBytes(); - - int idxOfFirstInvalidData = GetIndexOfFirstInvalidUtf8Sequence(valueAsBytes, out _); - if (idxOfFirstInvalidData < 0) - { - return value; - } - - // TODO_UTF8STRING: Replace this with the faster implementation once it's available. - // (The faster implementation is in the dev/utf8string_bak branch currently.) - - MemoryStream memStream = new MemoryStream(); - memStream.Write(valueAsBytes.Slice(0, idxOfFirstInvalidData)); - - valueAsBytes = valueAsBytes.Slice(idxOfFirstInvalidData); - do - { - if (Rune.DecodeFromUtf8(valueAsBytes, out _, out int bytesConsumed) == OperationStatus.Done) - { - // Valid scalar value - copy data as-is to MemoryStream - memStream.Write(valueAsBytes.Slice(0, bytesConsumed)); - } - else - { - // Invalid scalar value - copy U+FFFD to MemoryStream - memStream.Write(ReplacementCharSequence); - } - - valueAsBytes = valueAsBytes.Slice(bytesConsumed); - } while (!valueAsBytes.IsEmpty); - - bool success = memStream.TryGetBuffer(out ArraySegment memStreamBuffer); - Debug.Assert(success, "Couldn't get underlying MemoryStream buffer."); - - return Utf8String.DangerousCreateWithoutValidation(memStreamBuffer, assumeWellFormed: true); - } -#endif // FEATURE_UTF8STRING } } #endif diff --git a/test/DotNetty.Common.Tests/DotNetty.Common.Tests.csproj b/test/DotNetty.Common.Tests/DotNetty.Common.Tests.csproj index f28d8af8f..5441058a2 100644 --- a/test/DotNetty.Common.Tests/DotNetty.Common.Tests.csproj +++ b/test/DotNetty.Common.Tests/DotNetty.Common.Tests.csproj @@ -4,7 +4,10 @@ $(StandardTestTfms) DotNetty.Common.Tests DotNetty.Common.Tests - false + true + + + $(DefineConstants);CORELIBTEST win-x64 diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/ASCIIUtilityTests.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/ASCIIUtilityTests.cs new file mode 100644 index 000000000..2587a2169 --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/ASCIIUtilityTests.cs @@ -0,0 +1,419 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Buffers; +using System.Numerics; +using System.Runtime.InteropServices; +using System.Security.Cryptography; +using DotNetty.Common.Internal; +using Xunit; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + // Since many of the methods we'll be testing are internal, we'll need to invoke + // them via reflection. + public static unsafe class AsciiUtilityTests + { + private const int SizeOfVector128 = 128 / 8; + + [Fact] + public static void GetIndexOfFirstNonAsciiByte_EmptyInput_NullReference() + { + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.GetIndexOfFirstNonAsciiByte(null, UIntPtr.Zero)); + } + + [Fact] + public static void GetIndexOfFirstNonAsciiByte_EmptyInput_NonNullReference() + { + byte b = default; + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.GetIndexOfFirstNonAsciiByte(&b, UIntPtr.Zero)); + } + + [Fact] + public static void GetIndexOfFirstNonAsciiByte_Vector128InnerLoop() + { + // The purpose of this test is to make sure we're identifying the correct + // vector (of the two that we're reading simultaneously) when performing + // the final ASCII drain at the end of the method once we've broken out + // of the inner loop. + + using (BoundedMemory mem = BoundedMemory.Allocate(1024)) + { + Span bytes = mem.Span; + + for (int i = 0; i < bytes.Length; i++) + { + bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII + } + + // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to + // re-clear the vector every time. + + for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) + { + bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit + Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiByte(bytes)); + } + } + } + + [Fact] + public static void GetIndexOfFirstNonAsciiByte_Boundaries() + { + // The purpose of this test is to make sure we're hitting all of the vectorized + // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened + // code paths. We shouldn't be reading beyond the boundaries we were given. + + // The 5 * Vector test should make sure that we're exercising all possible + // code paths across both implementations. + using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count)) + { + Span bytes = mem.Span; + + // First, try it with all-ASCII buffers. + + for (int i = 0; i < bytes.Length; i++) + { + bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII + } + + for (int i = bytes.Length; i >= 0; i--) + { + Assert.Equal(i, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + } + + // Then, try it with non-ASCII bytes. + + for (int i = bytes.Length; i >= 1; i--) + { + bytes[i - 1] = 0x80; // set non-ASCII + Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + } + } + } + + [Fact] + public static void GetIndexOfFirstNonAsciiChar_EmptyInput_NullReference() + { + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.GetIndexOfFirstNonAsciiChar(null, UIntPtr.Zero)); + } + + [Fact] + public static void GetIndexOfFirstNonAsciiChar_EmptyInput_NonNullReference() + { + char c = default; + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.GetIndexOfFirstNonAsciiChar(&c, UIntPtr.Zero)); + } + + [Fact] + public static void GetIndexOfFirstNonAsciiChar_Vector128InnerLoop() + { + // The purpose of this test is to make sure we're identifying the correct + // vector (of the two that we're reading simultaneously) when performing + // the final ASCII drain at the end of the method once we've broken out + // of the inner loop. + // + // Use U+0123 instead of U+0080 for this test because if our implementation + // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, + // causing our test to produce a false negative. + + using (BoundedMemory mem = BoundedMemory.Allocate(1024)) + { + Span chars = mem.Span; + + for (int i = 0; i < chars.Length; i++) + { + chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII + } + + // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to + // re-clear the vector every time. + + for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) + { + chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit + Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiChar(chars)); + } + } + } + + [Fact] + public static void GetIndexOfFirstNonAsciiChar_Boundaries() + { + // The purpose of this test is to make sure we're hitting all of the vectorized + // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened + // code paths. We shouldn't be reading beyond the boundaries we were given. + // + // The 5 * Vector test should make sure that we're exercising all possible + // code paths across both implementations. The sizeof(char) is because we're + // specifying element count, but underlying implementation reintepret casts to bytes. + // + // Use U+0123 instead of U+0080 for this test because if our implementation + // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, + // causing our test to produce a false negative. + + using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count / sizeof(char))) + { + Span chars = mem.Span; + + for (int i = 0; i < chars.Length; i++) + { + chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII + } + + for (int i = chars.Length; i >= 0; i--) + { + Assert.Equal(i, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + } + + // Then, try it with non-ASCII bytes. + + for (int i = chars.Length; i >= 1; i--) + { + chars[i - 1] = '\u0123'; // set non-ASCII + Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + } + } + } + + [Fact] + public static void WidenAsciiToUtf16_EmptyInput_NullReferences() + { + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.WidenAsciiToUtf16(null, null, UIntPtr.Zero)); + } + + [Fact] + public static void WidenAsciiToUtf16_EmptyInput_NonNullReference() + { + byte b = default; + char c = default; + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.WidenAsciiToUtf16(&b, &c, UIntPtr.Zero)); + } + + [Fact] + public static void WidenAsciiToUtf16_AllAsciiInput() + { + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F, then trap future writes. + + Span asciiSpan = asciiMem.Span; + for (int i = 0; i < asciiSpan.Length; i++) + { + asciiSpan[i] = (byte)i; + } + asciiMem.MakeReadonly(); + + // We'll write to the UTF-16 span. + // We test with a variety of span lengths to test alignment and fallthrough code paths. + + Span utf16Span = utf16Mem.Span; + + for (int i = 0; i < asciiSpan.Length; i++) + { + utf16Span.Clear(); // remove any data from previous iteration + + // First, validate that the workhorse saw the incoming data as all-ASCII. + + Assert.Equal(128 - i, CallWidenAsciiToUtf16(asciiSpan.Slice(i), utf16Span.Slice(i))); + + // Then, validate that the data was transcoded properly. + + for (int j = i; j < 128; j++) + { + Assert.Equal((ushort)asciiSpan[i], (ushort)utf16Span[i]); + } + } + } + + [Fact] + public static void WidenAsciiToUtf16_SomeNonAsciiInput() + { + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F, then trap future writes. + + Span asciiSpan = asciiMem.Span; + for (int i = 0; i < asciiSpan.Length; i++) + { + asciiSpan[i] = (byte)i; + } + + // We'll write to the UTF-16 span. + + Span utf16Span = utf16Mem.Span; + + for (int i = asciiSpan.Length - 1; i >= 0; i--) + { + RandomNumberGenerator.Fill(MemoryMarshal.Cast(utf16Span)); // fill with garbage + + // First, keep track of the garbage we wrote to the destination. + // We want to ensure it wasn't overwritten. + + char[] expectedTrailingData = utf16Span.Slice(i).ToArray(); + + // Then, set the desired byte as non-ASCII, then check that the workhorse + // correctly saw the data as non-ASCII. + + asciiSpan[i] |= (byte)0x80; + Assert.Equal(i, CallWidenAsciiToUtf16(asciiSpan, utf16Span)); + + // Next, validate that the ASCII data was transcoded properly. + + for (int j = 0; j < i; j++) + { + Assert.Equal((ushort)asciiSpan[j], (ushort)utf16Span[j]); + } + + // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. + + Assert.Equal(expectedTrailingData, utf16Span.Slice(i).ToArray()); + } + } + + [Fact] + public static unsafe void NarrowUtf16ToAscii_EmptyInput_NullReferences() + { + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.NarrowUtf16ToAscii(null, null, UIntPtr.Zero)); + } + + [Fact] + public static void NarrowUtf16ToAscii_EmptyInput_NonNullReference() + { + char c = default; + byte b = default; + Assert.Equal(UIntPtr.Zero, (UIntPtr)ASCIIUtility.NarrowUtf16ToAscii(&c, &b, UIntPtr.Zero)); + } + + [Fact] + public static void NarrowUtf16ToAscii_AllAsciiInput() + { + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F. + + Span utf16Span = utf16Mem.Span; + for (int i = 0; i < utf16Span.Length; i++) + { + utf16Span[i] = (char)i; + } + utf16Mem.MakeReadonly(); + + // We'll write to the ASCII span. + // We test with a variety of span lengths to test alignment and fallthrough code paths. + + Span asciiSpan = asciiMem.Span; + + for (int i = 0; i < utf16Span.Length; i++) + { + asciiSpan.Clear(); // remove any data from previous iteration + + // First, validate that the workhorse saw the incoming data as all-ASCII. + + Assert.Equal(128 - i, CallNarrowUtf16ToAscii(utf16Span.Slice(i), asciiSpan.Slice(i))); + + // Then, validate that the data was transcoded properly. + + for (int j = i; j < 128; j++) + { + Assert.Equal((ushort)utf16Span[i], (ushort)asciiSpan[i]); + } + } + } + + [Fact] + public static void NarrowUtf16ToAscii_SomeNonAsciiInput() + { + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F. + + Span utf16Span = utf16Mem.Span; + for (int i = 0; i < utf16Span.Length; i++) + { + utf16Span[i] = (char)i; + } + + // We'll write to the ASCII span. + + Span asciiSpan = asciiMem.Span; + + for (int i = utf16Span.Length - 1; i >= 0; i--) + { + RandomNumberGenerator.Fill(asciiSpan); // fill with garbage + + // First, keep track of the garbage we wrote to the destination. + // We want to ensure it wasn't overwritten. + + byte[] expectedTrailingData = asciiSpan.Slice(i).ToArray(); + + // Then, set the desired byte as non-ASCII, then check that the workhorse + // correctly saw the data as non-ASCII. + + utf16Span[i] = '\u0123'; // use U+0123 instead of U+0080 since it catches inappropriate pmovmskb usage + Assert.Equal(i, CallNarrowUtf16ToAscii(utf16Span, asciiSpan)); + + // Next, validate that the ASCII data was transcoded properly. + + for (int j = 0; j < i; j++) + { + Assert.Equal((ushort)utf16Span[j], (ushort)asciiSpan[j]); + } + + // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. + + Assert.Equal(expectedTrailingData, asciiSpan.Slice(i).ToArray()); + } + } + + private static int CallGetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) + { + fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer)) + { + // Conversions between UIntPtr <-> int are not checked by default. + return checked((int)ASCIIUtility.GetIndexOfFirstNonAsciiByte(pBuffer, (UIntPtr)buffer.Length)); + } + } + + private static int CallGetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) + { + fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer)) + { + // Conversions between UIntPtr <-> int are not checked by default. + return checked((int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pBuffer, (UIntPtr)buffer.Length)); + } + } + + private static int CallNarrowUtf16ToAscii(ReadOnlySpan utf16, Span ascii) + { + Assert.Equal(utf16.Length, ascii.Length); + + fixed (char* pUtf16 = &MemoryMarshal.GetReference(utf16)) + fixed (byte* pAscii = &MemoryMarshal.GetReference(ascii)) + { + // Conversions between UIntPtr <-> int are not checked by default. + return checked((int)ASCIIUtility.NarrowUtf16ToAscii(pUtf16, pAscii, (UIntPtr)utf16.Length)); + } + } + + private static int CallWidenAsciiToUtf16(ReadOnlySpan ascii, Span utf16) + { + Assert.Equal(ascii.Length, utf16.Length); + + fixed (byte* pAscii = &MemoryMarshal.GetReference(ascii)) + fixed (char* pUtf16 = &MemoryMarshal.GetReference(utf16)) + { + // Conversions between UIntPtr <-> int are not checked by default. + return checked((int)ASCIIUtility.WidenAsciiToUtf16(pAscii, pUtf16, (UIntPtr)ascii.Length)); + } + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Creation.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Creation.cs new file mode 100644 index 000000000..9583dc0fc --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Creation.cs @@ -0,0 +1,95 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Runtime.InteropServices; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + /// + /// Contains factory methods to create instances. + /// + public static partial class BoundedMemory + { + /// + /// Allocates a new region which is immediately preceded by + /// or immediately followed by a poison (MEM_NOACCESS) page. If + /// is , then attempting to read the memory + /// immediately before the returned will result in an AV. + /// If is , then + /// attempting to read the memory immediately after the returned + /// will result in AV. + /// + /// + /// The newly-allocated memory will be populated with random data. + /// + public static BoundedMemory Allocate(int elementCount, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + if (elementCount < 0) + { + throw new ArgumentOutOfRangeException(nameof(elementCount)); + } + if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After) + { + throw new ArgumentOutOfRangeException(nameof(placement)); + } + + var retVal = AllocateWithoutDataPopulation(elementCount, placement); + FillRandom(MemoryMarshal.AsBytes(retVal.Span)); + return retVal; + } + + /// + /// Similar to , but populates the allocated + /// native memory block from existing data rather than using random data. + /// + public static BoundedMemory AllocateFromExistingData(ReadOnlySpan data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + if (placement != PoisonPagePlacement.Before && placement != PoisonPagePlacement.After) + { + throw new ArgumentOutOfRangeException(nameof(placement)); + } + + var retVal = AllocateWithoutDataPopulation(data.Length, placement); + data.CopyTo(retVal.Span); + return retVal; + } + + /// + /// Similar to , but populates the allocated + /// native memory block from existing data rather than using random data. + /// + public static BoundedMemory AllocateFromExistingData(T[] data, PoisonPagePlacement placement = PoisonPagePlacement.After) where T : unmanaged + { + return AllocateFromExistingData(new ReadOnlySpan(data), placement); + } + + private static void FillRandom(Span buffer) + { + // Loop over a Random instance manually since Random.NextBytes(Span) doesn't + // exist on all platforms we target. + + Random random = new Random(); // doesn't need to be cryptographically strong + + for (int i = 0; i < buffer.Length; i++) + { + buffer[i] = (byte)random.Next(); + } + } + + private static BoundedMemory AllocateWithoutDataPopulation(int elementCount, PoisonPagePlacement placement) where T : unmanaged + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + return AllocateWithoutDataPopulationWindows(elementCount, placement); + } + else + { + return AllocateWithoutDataPopulationUnix(elementCount, placement); + } + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Unix.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Unix.cs new file mode 100644 index 000000000..8ab9477d7 --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Unix.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + public static partial class BoundedMemory + { + private static UnixImplementation AllocateWithoutDataPopulationUnix(int elementCount, PoisonPagePlacement placement) where T : unmanaged + { + // On non-Windows platforms, we don't yet have support for changing the permissions of individual pages. + return new UnixImplementation(elementCount); + } + + private sealed class UnixImplementation : BoundedMemory where T : unmanaged + { + private readonly T[] _buffer; + + public UnixImplementation(int elementCount) + { + _buffer = new T[elementCount]; + } + + public override bool IsReadonly => false; + + public override Memory Memory => _buffer; + + public override Span Span => _buffer; + + public override void Dispose() + { + // no-op + } + + public override void MakeReadonly() + { + // no-op + } + + public override void MakeWriteable() + { + // no-op + } + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Windows.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Windows.cs new file mode 100644 index 000000000..f42163c1d --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.Windows.cs @@ -0,0 +1,335 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Buffers; +using System.Runtime.ConstrainedExecution; +using System.Runtime.InteropServices; +using System.Security; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + public static unsafe partial class BoundedMemory + { + private static readonly int SystemPageSize = Environment.SystemPageSize; + + private static WindowsImplementation AllocateWithoutDataPopulationWindows(int elementCount, PoisonPagePlacement placement) where T : unmanaged + { + long cb, totalBytesToAllocate; + checked + { + cb = elementCount * sizeof(T); + totalBytesToAllocate = cb; + + // We only need to round the count up if it's not an exact multiple + // of the system page size. + + var leftoverBytes = totalBytesToAllocate % SystemPageSize; + if (leftoverBytes != 0) + { + totalBytesToAllocate += SystemPageSize - leftoverBytes; + } + + // Finally, account for the poison pages at the front and back. + + totalBytesToAllocate += 2 * SystemPageSize; + } + + // Reserve and commit the entire range as NOACCESS. + + var handle = UnsafeNativeMethods.VirtualAlloc( + lpAddress: IntPtr.Zero, + dwSize: (IntPtr)totalBytesToAllocate /* cast throws OverflowException if out of range */, + flAllocationType: VirtualAllocAllocationType.MEM_RESERVE | VirtualAllocAllocationType.MEM_COMMIT, + flProtect: VirtualAllocProtection.PAGE_NOACCESS); + + if (handle == null || handle.IsInvalid) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualAlloc failed unexpectedly."); + } + + // Done allocating! Now carve out a READWRITE section bookended by the NOACCESS + // pages and return that carved-out section to the caller. Since memory protection + // flags only apply at page-level granularity, we need to "left-align" or "right- + // align" the section we carve out so that it's guaranteed adjacent to one of + // the NOACCESS bookend pages. + + return new WindowsImplementation( + handle: handle, + byteOffsetIntoHandle: (placement == PoisonPagePlacement.Before) + ? SystemPageSize /* just after leading poison page */ + : checked((int)(totalBytesToAllocate - SystemPageSize - cb)) /* just before trailing poison page */, + elementCount: elementCount) + { + Protection = VirtualAllocProtection.PAGE_READWRITE + }; + } + + private sealed class WindowsImplementation : BoundedMemory where T : unmanaged + { + private readonly VirtualAllocHandle _handle; + private readonly int _byteOffsetIntoHandle; + private readonly int _elementCount; + private readonly BoundedMemoryManager _memoryManager; + + internal WindowsImplementation(VirtualAllocHandle handle, int byteOffsetIntoHandle, int elementCount) + { + _handle = handle; + _byteOffsetIntoHandle = byteOffsetIntoHandle; + _elementCount = elementCount; + _memoryManager = new BoundedMemoryManager(this); + } + + public override bool IsReadonly => (Protection != VirtualAllocProtection.PAGE_READWRITE); + + internal VirtualAllocProtection Protection + { + get + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + if (UnsafeNativeMethods.VirtualQuery( + lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle, + lpBuffer: out var memoryInfo, + dwLength: (IntPtr)sizeof(MEMORY_BASIC_INFORMATION)) == IntPtr.Zero) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualQuery failed unexpectedly."); + } + return memoryInfo.Protect; + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + set + { + if (_elementCount > 0) + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + if (!UnsafeNativeMethods.VirtualProtect( + lpAddress: _handle.DangerousGetHandle() + _byteOffsetIntoHandle, + dwSize: (IntPtr)(&((T*)null)[_elementCount]), + flNewProtect: value, + lpflOldProtect: out _)) + { + Marshal.ThrowExceptionForHR(Marshal.GetHRForLastWin32Error()); + throw new InvalidOperationException("VirtualProtect failed unexpectedly."); + } + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + } + } + + public override Memory Memory => _memoryManager.Memory; + + public override Span Span + { + get + { + bool refAdded = false; + try + { + _handle.DangerousAddRef(ref refAdded); + return new Span((void*)(_handle.DangerousGetHandle() + _byteOffsetIntoHandle), _elementCount); + } + finally + { + if (refAdded) + { + _handle.DangerousRelease(); + } + } + } + } + + public override void Dispose() + { + _handle.Dispose(); + } + + public override void MakeReadonly() + { + Protection = VirtualAllocProtection.PAGE_READONLY; + } + + public override void MakeWriteable() + { + Protection = VirtualAllocProtection.PAGE_READWRITE; + } + + private sealed class BoundedMemoryManager : MemoryManager + { + private readonly WindowsImplementation _impl; + + public BoundedMemoryManager(WindowsImplementation impl) + { + _impl = impl; + } + + public override Memory Memory => CreateMemory(_impl._elementCount); + + protected override void Dispose(bool disposing) + { + // no-op; the handle will be disposed separately + } + + public override Span GetSpan() + { + throw new NotImplementedException(); + } + + public override MemoryHandle Pin(int elementIndex) + { + if ((uint)elementIndex > (uint)_impl._elementCount) + { + throw new ArgumentOutOfRangeException(paramName: nameof(elementIndex)); + } + + bool refAdded = false; + try + { + _impl._handle.DangerousAddRef(ref refAdded); + return new MemoryHandle((T*)(_impl._handle.DangerousGetHandle() + _impl._byteOffsetIntoHandle) + elementIndex); + } + finally + { + if (refAdded) + { + _impl._handle.DangerousRelease(); + } + } + } + + public override void Unpin() + { + // no-op - we don't unpin native memory + } + } + } + + // from winnt.h + [Flags] + private enum VirtualAllocAllocationType : uint + { + MEM_COMMIT = 0x1000, + MEM_RESERVE = 0x2000, + MEM_DECOMMIT = 0x4000, + MEM_RELEASE = 0x8000, + MEM_FREE = 0x10000, + MEM_PRIVATE = 0x20000, + MEM_MAPPED = 0x40000, + MEM_RESET = 0x80000, + MEM_TOP_DOWN = 0x100000, + MEM_WRITE_WATCH = 0x200000, + MEM_PHYSICAL = 0x400000, + MEM_ROTATE = 0x800000, + MEM_LARGE_PAGES = 0x20000000, + MEM_4MB_PAGES = 0x80000000, + } + + // from winnt.h + [Flags] + private enum VirtualAllocProtection : uint + { + PAGE_NOACCESS = 0x01, + PAGE_READONLY = 0x02, + PAGE_READWRITE = 0x04, + PAGE_WRITECOPY = 0x08, + PAGE_EXECUTE = 0x10, + PAGE_EXECUTE_READ = 0x20, + PAGE_EXECUTE_READWRITE = 0x40, + PAGE_EXECUTE_WRITECOPY = 0x80, + PAGE_GUARD = 0x100, + PAGE_NOCACHE = 0x200, + PAGE_WRITECOMBINE = 0x400, + } + + [StructLayout(LayoutKind.Sequential)] + private struct MEMORY_BASIC_INFORMATION + { + public IntPtr BaseAddress; + public IntPtr AllocationBase; + public VirtualAllocProtection AllocationProtect; + public IntPtr RegionSize; + public VirtualAllocAllocationType State; + public VirtualAllocProtection Protect; + public VirtualAllocAllocationType Type; + }; + + private sealed class VirtualAllocHandle : SafeHandle + { + // Called by P/Invoke when returning SafeHandles + private VirtualAllocHandle() + : base(IntPtr.Zero, ownsHandle: true) + { + } + + // Do not provide a finalizer - SafeHandle's critical finalizer will + // call ReleaseHandle for you. + + public override bool IsInvalid => (handle == IntPtr.Zero); + + protected override bool ReleaseHandle() => + UnsafeNativeMethods.VirtualFree(handle, IntPtr.Zero, VirtualAllocAllocationType.MEM_RELEASE); + } + + [SuppressUnmanagedCodeSecurity] + private static class UnsafeNativeMethods + { + private const string KERNEL32_LIB = "kernel32.dll"; + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366887(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + public static extern VirtualAllocHandle VirtualAlloc( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocAllocationType flAllocationType, + [In] VirtualAllocProtection flProtect); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366892(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool VirtualFree( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocAllocationType dwFreeType); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366898(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool VirtualProtect( + [In] IntPtr lpAddress, + [In] IntPtr dwSize, + [In] VirtualAllocProtection flNewProtect, + [Out] out VirtualAllocProtection lpflOldProtect); + + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa366902(v=vs.85).aspx + [DllImport(KERNEL32_LIB, CallingConvention = CallingConvention.Winapi, SetLastError = true)] + public static extern IntPtr VirtualQuery( + [In] IntPtr lpAddress, + [Out] out MEMORY_BASIC_INFORMATION lpBuffer, + [In] IntPtr dwLength); + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.cs new file mode 100644 index 000000000..14d8cb1fe --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/BoundedMemory.cs @@ -0,0 +1,53 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + /// + /// Represents a region of native memory. The property can be used + /// to get a backed by this memory region. + /// + public abstract class BoundedMemory : IDisposable where T : unmanaged + { + /// + /// Returns a value stating whether this native memory block is readonly. + /// + public abstract bool IsReadonly { get; } + + /// + /// Gets the which represents this native memory. + /// This instance must be kept alive while working with the . + /// + public abstract Memory Memory { get; } + + /// + /// Gets the which represents this native memory. + /// This instance must be kept alive while working with the . + /// + public abstract Span Span { get; } + + /// + /// Disposes this instance. + /// + public abstract void Dispose(); + + /// + /// Sets this native memory block to be readonly. Writes to this block will cause an AV. + /// This method has no effect if the memory block is zero length or if the underlying + /// OS does not support marking the memory block as readonly. + /// + public abstract void MakeReadonly(); + + /// + /// Sets this native memory block to be read+write. + /// This method has no effect if the memory block is zero length or if the underlying + /// OS does not support marking the memory block as read+write. + /// + public abstract void MakeWriteable(); + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/PoisonPagePlacement.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/PoisonPagePlacement.cs new file mode 100644 index 000000000..e1cc54e3d --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/PoisonPagePlacement.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + /// + /// Dictates where the poison page should be placed. + /// + public enum PoisonPagePlacement + { + /// + /// The poison page should be placed immediately after the memory region. + /// Attempting to access the memory page immediately following the + /// span will result in an AV. + /// + After, + + /// + /// The poison page should be placed immediately before the memory region. + /// Attempting to access the memory page immediately before the + /// span will result in an AV. + /// + Before, + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/Utf16UtilityTests.ValidateChars.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf16UtilityTests.ValidateChars.cs new file mode 100644 index 000000000..b0635ea9a --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf16UtilityTests.ValidateChars.cs @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Buffers; +using System.Globalization; +using System.Linq; +using System.Numerics; +using System.Runtime.InteropServices; +using DotNetty.Common.Internal; +using Xunit; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + public class Utf16UtilityTests + { + [Theory] + [InlineData("", 0, 0)] // empty string is OK + [InlineData("X", 1, 1)] + [InlineData("XY", 2, 2)] + [InlineData("XYZ", 3, 3)] + [InlineData("", 1, 2)] + [InlineData("X", 2, 3)] + [InlineData("X", 2, 3)] + [InlineData("", 1, 3)] + [InlineData("", 1, 4)] + [InlineData("XZ", 3, 6)] + [InlineData("X<0000>Z", 3, 3)] // null chars are allowed + public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallValidBuffers(string unprocessedInput, int expectedRuneCount, int expectedUtf8ByteCount) + { + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, -1 /* expectedIdxOfFirstInvalidChar */, expectedRuneCount, expectedUtf8ByteCount); + } + + [Theory] + [InlineData("", 0, 0, 0)] // standalone low surrogate (at beginning of sequence) + [InlineData("X", 1, 1, 1)] // standalone low surrogate (preceded by valid ASCII data) + [InlineData("", 1, 1, 3)] // standalone low surrogate (preceded by valid non-ASCII data) + [InlineData("", 0, 0, 0)] // standalone high surrogate (missing follow-up low surrogate) + [InlineData("Y", 0, 0, 0)] // standalone high surrogate (followed by ASCII char) + [InlineData("", 0, 0, 0)] // standalone high surrogate (followed by high surrogate) + [InlineData("", 0, 0, 0)] // standalone high surrogate (followed by valid non-ASCII char) + [InlineData("", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate) + [InlineData("", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate) + [InlineData("", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair) + [InlineData("", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair) + [InlineData("<0000>", 3, 2, 5)] // standalone low surrogate (preceded by a valid null char) + public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallInvalidBuffers(string unprocessedInput, int idxOfFirstInvalidChar, int expectedRuneCount, int expectedUtf8ByteCount) + { + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, idxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + } + + [Theory] // chars below presented as hex since Xunit doesn't like invalid UTF-16 string literals + [InlineData("<2BB4><218C><1BC0><613F>", 6, 6, 18)] + [InlineData("<1854><012C><4797><41D0><5464>", 4, 4, 11)] + [InlineData("<8BD3><5037><3E3A><6336>", 4, 4, 12)] + [InlineData("<0F25><7352><4025><0B93><4107>", 2, 2, 6)] + [InlineData("<887C><012C><4797><41D0><5464>", 4, 4, 11)] + public void GetIndexOfFirstInvalidUtf16Sequence_WithEightRandomCharsContainingUnpairedSurrogates(string unprocessedInput, int idxOfFirstInvalidChar, int expectedRuneCount, int expectedUtf8ByteCount) + { + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, idxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + } + + [Fact] + public void GetIndexOfFirstInvalidUtf16Sequence_WithInvalidSurrogateSequences() + { + // All ASCII + + char[] chars = Enumerable.Repeat('x', 128).ToArray(); + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 128, expectedUtf8ByteCount: 128); + + // Throw a surrogate pair at the beginning + + chars[0] = '\uD800'; + chars[1] = '\uDFFF'; + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 127, expectedUtf8ByteCount: 130); + + // Throw a surrogate pair near the end + + chars[124] = '\uD800'; + chars[125] = '\uDFFF'; + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 126, expectedUtf8ByteCount: 132); + + // Throw a standalone surrogate code point at the *very* end + + chars[127] = '\uD800'; // high surrogate + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131); + + chars[127] = '\uDFFF'; // low surrogate + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131); + + // Make the final surrogate pair valid + + chars[126] = '\uD800'; // high surrogate + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 125, expectedUtf8ByteCount: 134); + + // Throw an invalid surrogate sequence in the middle (straddles a vector boundary) + + chars[12] = '\u0080'; // 2-byte UTF-8 sequence + chars[13] = '\uD800'; // high surrogate + chars[14] = '\uD800'; // high surrogate + chars[15] = '\uDFFF'; // low surrogate + chars[16] = '\uDFFF'; // low surrogate + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 13, expectedRuneCount: 12, expectedUtf8ByteCount: 16); + + // Correct the surrogate sequence we just added + + chars[14] = '\uDC00'; // low surrogate + chars[15] = '\uDBFF'; // high surrogate + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 123, expectedUtf8ByteCount: 139); + + // Corrupt the surrogate pair that's split across a vector boundary + + chars[16] = 'x'; // ASCII char (remember.. chars[15] is a high surrogate char) + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 15, expectedRuneCount: 13, expectedUtf8ByteCount: 20); + } + + [Fact] + public void GetIndexOfFirstInvalidUtf16Sequence_WithStandaloneLowSurrogateCharAtStart() + { + // The input stream will be a vector's worth of ASCII chars, followed by a single standalone low + // surrogate char, then padded with U+0000 until it's a multiple of the vector size. + // Using Vector.Count here as a stand-in for Vector.Count. + + char[] chars = new char[Vector.Count * 2]; + for (int i = 0; i < Vector.Count; i++) + { + chars[i] = 'x'; // ASCII char + } + + chars[Vector.Count] = '\uDEAD'; // standalone low surrogate char + + for (int i = 0; i <= Vector.Count; i++) + { + // Expect all ASCII chars to be consumed, low surrogate char to be marked invalid. + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars[(Vector.Count - i)..], i, i, i); + } + } + + private static void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(string unprocessedInput, int expectedIdxOfFirstInvalidChar, int expectedRuneCount, long expectedUtf8ByteCount) + { + char[] processedInput = ProcessInput(unprocessedInput).ToCharArray(); + + // Run the test normally + + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + + // Put a bunch of ASCII data at the beginning (to test the call to ASCIIUtility at method entry) + + processedInput = Enumerable.Repeat('x', 128).Concat(processedInput).ToArray(); + + if (expectedIdxOfFirstInvalidChar >= 0) + { + expectedIdxOfFirstInvalidChar += 128; + } + expectedRuneCount += 128; + expectedUtf8ByteCount += 128; + + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + + // Change the first few chars to a mixture of 2-byte and 3-byte UTF-8 sequences + // This makes sure the vectorized code paths can properly handle these. + + processedInput[0] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[1] = '\u0800'; // 3-byte UTF-8 sequence + processedInput[2] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[3] = '\u0800'; // 3-byte UTF-8 sequence + processedInput[4] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[5] = '\u0800'; // 3-byte UTF-8 sequence + processedInput[6] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[7] = '\u0800'; // 3-byte UTF-8 sequence + + expectedUtf8ByteCount += 12; + + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + + // Throw some surrogate pairs into the mix to make sure they're also handled properly + // by the vectorized code paths. + + processedInput[8] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[9] = '\u0800'; // 3-byte UTF-8 sequence + processedInput[10] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[11] = '\u0800'; // 3-byte UTF-8 sequence + processedInput[12] = '\u0080'; // 2-byte UTF-8 sequence + processedInput[13] = '\uD800'; // high surrogate + processedInput[14] = '\uDC00'; // low surrogate + processedInput[15] = 'z'; // ASCII char + + expectedRuneCount--; + expectedUtf8ByteCount += 9; + + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + + // Split the next surrogate pair across the vector boundary (so that we + // don't inadvertently treat this as a standalone surrogate sequence). + + processedInput[15] = '\uDBFF'; // high surrogate + processedInput[16] = '\uDFFF'; // low surrogate + + expectedRuneCount--; + expectedUtf8ByteCount += 2; + + GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount); + } + + private static unsafe void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(char[] input, int expectedRetVal, int expectedRuneCount, long expectedUtf8ByteCount) + { + // Arrange + + using BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(input); + boundedMemory.MakeReadonly(); + + // Act + + int actualRetVal; + long actualUtf8CodeUnitCount; + int actualRuneCount; + + fixed (char* pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span)) + { + char* pFirstInvalidChar = Utf16Utility.GetPointerToFirstInvalidChar(pInputBuffer, input.Length, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment); + + long ptrDiff = pFirstInvalidChar - pInputBuffer; + Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range."); + + Assert.True(utf8CodeUnitCountAdjustment >= 0, "UTF-16 code unit count adjustment must be non-negative."); + Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative."); + + actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff; + + // The last two 'out' parameters are: + // a) The number to be added to the "chars processed" return value to come up with the total UTF-8 code unit count, and + // b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count. + + actualUtf8CodeUnitCount = ptrDiff + utf8CodeUnitCountAdjustment; + actualRuneCount = (int)ptrDiff + scalarCountAdjustment; + } + + // Assert + + Assert.Equal(expectedRetVal, actualRetVal); + Assert.Equal(expectedRuneCount, actualRuneCount); + Assert.Equal(actualUtf8CodeUnitCount, expectedUtf8ByteCount); + } + + private static string ProcessInput(string input) + { + input = input.Replace("", "\u00E9", StringComparison.Ordinal); // U+00E9 LATIN SMALL LETTER E WITH ACUTE + input = input.Replace("", "\u20AC", StringComparison.Ordinal); // U+20AC EURO SIGN + input = input.Replace("", "\U0001F600", StringComparison.Ordinal); // U+1F600 GRINNING FACE + + // Replace with \uABCD. This allows us to flow potentially malformed + // UTF-16 strings without Xunit. (The unit testing framework gets angry when + // we try putting invalid UTF-16 data as inline test data.) + + int idx; + while ((idx = input.IndexOf('<')) >= 0) + { + input = input[..idx] + (char)ushort.Parse(input.Substring(idx + 1, 4), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture) + input[(idx + 6)..]; + } + + return input; + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8Tests.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8Tests.cs new file mode 100644 index 000000000..f0986d77e --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8Tests.cs @@ -0,0 +1,799 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Buffers; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using DotNetty.Common.Internal; +using Xunit; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + public class Utf8Tests + { + private const string X_UTF8 = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte + private const string X_UTF16 = "X"; + + private const string Y_UTF8 = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte + private const string Y_UTF16 = "Y"; + + private const string Z_UTF8 = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte + private const string Z_UTF16 = "Z"; + + private const string E_ACUTE_UTF8 = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes + private const string E_ACUTE_UTF16 = "\u00E9"; + + private const string EURO_SYMBOL_UTF8 = "E282AC"; // U+20AC EURO SIGN, 3 bytes + private const string EURO_SYMBOL_UTF16 = "\u20AC"; + + private const string REPLACEMENT_CHAR_UTF8 = "EFBFBD"; // U+FFFD REPLACEMENT CHAR, 3 bytes + private const string REPLACEMENT_CHAR_UTF16 = "\uFFFD"; + + private const string GRINNING_FACE_UTF8 = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes + private const string GRINNING_FACE_UTF16 = "\U0001F600"; + + private const string WOMAN_CARTWHEELING_MEDSKIN_UTF16 = "\U0001F938\U0001F3FD\u200D\u2640\uFE0F"; // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE + + // All valid scalars [ U+0000 .. U+D7FF ] and [ U+E000 .. U+10FFFF ]. + private static readonly IEnumerable s_allValidScalars = Enumerable.Range(0x0000, 0xD800).Concat(Enumerable.Range(0xE000, 0x110000 - 0xE000)).Select(value => new Rune(value)); + + private static readonly ReadOnlyMemory s_allScalarsAsUtf16; + private static readonly ReadOnlyMemory s_allScalarsAsUtf8; + + static Utf8Tests() + { + List allScalarsAsUtf16 = new List(); + List allScalarsAsUtf8 = new List(); + + foreach (Rune rune in s_allValidScalars) + { + allScalarsAsUtf16.AddRange(ToUtf16(rune)); + allScalarsAsUtf8.AddRange(ToUtf8(rune)); + } + + s_allScalarsAsUtf16 = allScalarsAsUtf16.ToArray().AsMemory(); + s_allScalarsAsUtf8 = allScalarsAsUtf8.ToArray().AsMemory(); + } + + /* + * COMMON UTILITIES FOR UNIT TESTS + */ + + public static byte[] DecodeHex(ReadOnlySpan inputHex) + { + Assert.True(Regex.IsMatch(inputHex.ToString(), "^([0-9a-fA-F]{2})*$"), "Input must be an even number of hex characters."); + +#if NET + return Convert.FromHexString(inputHex); +#else + byte[] retVal = new byte[inputHex.Length / 2]; + for (int i = 0; i < retVal.Length; i++) + { + retVal[i] = byte.Parse(inputHex.Slice(i * 2, 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture); + } + return retVal; +#endif + } + + // !! IMPORTANT !! + // Don't delete this implementation, as we use it as a reference to make sure the framework's + // transcoding logic is correct. + public static byte[] ToUtf8(Rune rune) + { + Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed."); + + if (rune.Value < 0x80) + { + return new[] + { + (byte)rune.Value + }; + } + else if (rune.Value < 0x0800) + { + return new[] + { + (byte)((rune.Value >> 6) | 0xC0), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + else if (rune.Value < 0x10000) + { + return new[] + { + (byte)((rune.Value >> 12) | 0xE0), + (byte)(((rune.Value >> 6) & 0x3F) | 0x80), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + else + { + return new[] + { + (byte)((rune.Value >> 18) | 0xF0), + (byte)(((rune.Value >> 12) & 0x3F) | 0x80), + (byte)(((rune.Value >> 6) & 0x3F) | 0x80), + (byte)((rune.Value & 0x3F) | 0x80) + }; + } + } + + // !! IMPORTANT !! + // Don't delete this implementation, as we use it as a reference to make sure the framework's + // transcoding logic is correct. + private static char[] ToUtf16(Rune rune) + { + Assert.True(Rune.IsValid(rune.Value), $"Rune with value U+{(uint)rune.Value:X4} is not well-formed."); + + if (rune.IsBmp) + { + return new[] + { + (char)rune.Value + }; + } + else + { + return new[] + { + (char)((rune.Value >> 10) + 0xD800 - 0x40), + (char)((rune.Value & 0x03FF) + 0xDC00) + }; + } + } + + [Theory] + [InlineData("", "")] // empty string is OK + [InlineData(X_UTF16, X_UTF8)] + [InlineData(E_ACUTE_UTF16, E_ACUTE_UTF8)] + [InlineData(EURO_SYMBOL_UTF16, EURO_SYMBOL_UTF8)] + public void ToBytes_WithSmallValidBuffers(string utf16Input, string expectedUtf8TranscodingHex) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf8, + // so inputs should be less than 2 chars. + + Assert.InRange(utf16Input.Length, 0, 1); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Theory] + [InlineData("AB")] // 2 ASCII chars, hits fast inner loop + [InlineData("ABCD")] // 4 ASCII chars, hits fast inner loop + [InlineData("ABCDEF")] // 6 ASCII chars, hits fast inner loop + [InlineData("ABCDEFGH")] // 8 ASCII chars, hits fast inner loop + [InlineData("ABCDEFGHIJ")] // 10 ASCII chars, hits fast inner loop + [InlineData("ABCDEF" + E_ACUTE_UTF16 + "HIJ")] // interrupts inner loop due to non-ASCII char in first char of first DWORD + [InlineData("ABCDEFG" + EURO_SYMBOL_UTF16 + "IJ")] // interrupts inner loop due to non-ASCII char in second char of first DWORD + [InlineData("ABCDEFGH" + E_ACUTE_UTF16 + "J")] // interrupts inner loop due to non-ASCII char in first char of second DWORD + [InlineData("ABCDEFGHI" + EURO_SYMBOL_UTF16)] // interrupts inner loop due to non-ASCII char in second char of second DWORD + [InlineData(X_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then falls down to slow path + [InlineData(X_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // drains first ASCII char then consumes 2x 2-byte sequences at once + [InlineData(E_ACUTE_UTF16 + X_UTF16)] // no first ASCII char to drain, consumes 2-byte seq followed by ASCII char + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // stay within 2x 2-byte sequence processing loop + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in second char of DWORD + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + X_UTF16 + X_UTF16)] // break out of 2x 2-byte seq loop due to ASCII data in first char of DWORD + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // break out of 2x 2-byte seq loop due to 3-byte data + [InlineData(E_ACUTE_UTF16 + EURO_SYMBOL_UTF16)] // 2-byte logic sees next char isn't ASCII, cannot read full DWORD from remaining input buffer, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16)] // 2x 3-byte logic can't read a full DWORD from next part of buffer, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + X_UTF16 + X_UTF16)] // 3-byte processing loop consumes trailing ASCII char, but can't read next DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3-byte processing loop can't consume next ASCII char, can't read DWORD, falls down to slow drain loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // stay within 2x 3-byte sequence processing loop + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at beginning of DWORD after 2x 3-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + X_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // consume stray ASCII char at end of DWORD after 2x 3-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + X_UTF16)] // consume 2-byte sequence as second char in DWORD which begins with 3-byte encoded char + [InlineData(EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 3-byte sequence followed by 4-byte sequence + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + GRINNING_FACE_UTF16)] // 2x 3-byte sequence followed by 4-byte sequence + [InlineData(GRINNING_FACE_UTF16)] // single 4-byte surrogate char pair + [InlineData(GRINNING_FACE_UTF16 + EURO_SYMBOL_UTF16)] // 4-byte surrogate char pair, cannot read next DWORD, falls down to slow drain loop + public void ToBytes_WithLargeValidBuffers(string utf16Input) + { + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8, + // so inputs should be at least 2 chars. + + Assert.True(utf16Input.Length >= 2); + + // We're going to run the tests with destination buffer lengths ranging from 0 all the way + // to buffers large enough to hold the full output. This allows us to test logic that + // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall. + + Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray(); + + // 0-length buffer test + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumCharsRead: 0, + expectedUtf8Transcoding: ReadOnlySpan.Empty); + + int expectedNumCharsConsumed = 0; + byte[] concatenatedUtf8 = Array.Empty(); + + for (int i = 0; i < enumeratedScalars.Length; i++) + { + Rune thisScalar = enumeratedScalars[i]; + + // provide partial destination buffers all the way up to (but not including) enough to hold the next full scalar encoding + for (int j = 1; j < thisScalar.Utf8SequenceLength; j++) + { + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length + j, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: concatenatedUtf8); + } + + // now provide a destination buffer large enough to hold the next full scalar encoding + + expectedNumCharsConsumed += thisScalar.Utf16SequenceLength; + concatenatedUtf8 = concatenatedUtf8.Concat(ToUtf8(thisScalar)).ToArray(); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: concatenatedUtf8); + } + + // now throw lots of ASCII data at the beginning so that we exercise the vectorized code paths + + utf16Input = new string('x', 64) + utf16Input; + concatenatedUtf8 = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray(); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: concatenatedUtf8); + + // now throw some non-ASCII data at the beginning so that we *don't* exercise the vectorized code paths + + utf16Input = WOMAN_CARTWHEELING_MEDSKIN_UTF16 + utf16Input[64..]; + concatenatedUtf8 = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray(); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: concatenatedUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: concatenatedUtf8); + } + + [Theory] + [InlineData('\uD800', OperationStatus.NeedMoreData)] // standalone high surrogate + [InlineData('\uDFFF', OperationStatus.InvalidData)] // standalone low surrogate + public void ToBytes_WithOnlyStandaloneSurrogates(char charValue, OperationStatus expectedOperationStatus) + { + ToBytes_Test_Core( + utf16Input: new[] { charValue }, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: expectedOperationStatus, + expectedNumCharsRead: 0, + expectedUtf8Transcoding: Span.Empty); + } + + [Theory] + [InlineData("", 0, "")] // swapped surrogate pair characters + [InlineData("A", 1, "41")] // consume standalone ASCII char, then swapped surrogate pair characters + [InlineData("AB", 1, "41")] // consume standalone ASCII char, then standalone high surrogate char + [InlineData("AB", 1, "41")] // consume standalone ASCII char, then standalone low surrogate char + [InlineData("AB", 2, "4142")] // consume two ASCII chars, then standalone high surrogate char + [InlineData("AB", 2, "4142")] // consume two ASCII chars, then standalone low surrogate char + public void ToBytes_WithInvalidSurrogates(string utf16Input, int expectedNumCharsConsumed, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf8, + // so inputs should be at least 2 chars. + + Assert.True(utf16Input.Length >= 2); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + + // Now try the tests again with a larger buffer. + // This ensures that running out of destination space wasn't the reason we failed. + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: (expectedUtf8TranscodingHex.Length) / 2 + 16, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Theory] + [InlineData("", REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete high surr. + [InlineData("", REPLACEMENT_CHAR_UTF8)] // standalone high surr. and incomplete high surr. + [InlineData("", REPLACEMENT_CHAR_UTF8 + REPLACEMENT_CHAR_UTF8)] // standalone low surr. and incomplete low surr. + [InlineData("ABCD", "41" + REPLACEMENT_CHAR_UTF8 + "42" + REPLACEMENT_CHAR_UTF8 + "43" + REPLACEMENT_CHAR_UTF8 + "44")] // standalone low, low, high surrounded by other data + public void ToBytes_WithReplacements(string utf16Input, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + bool isFinalCharHighSurrogate = char.IsHighSurrogate(utf16Input.Last()); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: (isFinalCharHighSurrogate) ? OperationStatus.NeedMoreData : OperationStatus.Done, + expectedNumCharsRead: (isFinalCharHighSurrogate) ? (utf16Input.Length - 1) : utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + + if (isFinalCharHighSurrogate) + { + // Also test with isFinalChunk = true + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2 + Rune.ReplacementChar.Utf8SequenceLength /* for replacement char */, + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: utf16Input.Length, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex + REPLACEMENT_CHAR_UTF8)); + } + } + + [Theory] + [InlineData(E_ACUTE_UTF16 + "", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD + [InlineData(E_ACUTE_UTF16 + "", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone low surr. at end + [InlineData(E_ACUTE_UTF16 + "", true, 1, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8)] // not enough output buffer to hold U+FFFD + [InlineData(E_ACUTE_UTF16 + "", true, 2, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // replace standalone high surr. at end + [InlineData(E_ACUTE_UTF16 + "", false, 1, OperationStatus.NeedMoreData, E_ACUTE_UTF8)] // don't replace standalone high surr. at end + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, true, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, false, 2, OperationStatus.DestinationTooSmall, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8)] // not enough output buffer to hold 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, true, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X' + [InlineData(E_ACUTE_UTF16 + "" + X_UTF16, false, 3, OperationStatus.Done, E_ACUTE_UTF8 + REPLACEMENT_CHAR_UTF8 + X_UTF8)] // replacement followed by 'X' + public void ToBytes_WithReplacements_AndCustomBufferSizes(string utf16Input, bool isFinalChunk, int expectedNumCharsConsumed, OperationStatus expectedOperationStatus, string expectedUtf8TranscodingHex) + { + // xUnit can't handle ill-formed strings in [InlineData], so we replace here. + + utf16Input = utf16Input.Replace("", "\uD800").Replace("", "\uDFFF"); + + ToBytes_Test_Core( + utf16Input: utf16Input, + destinationSize: expectedUtf8TranscodingHex.Length / 2, + replaceInvalidSequences: true, + isFinalChunk: isFinalChunk, + expectedOperationStatus: expectedOperationStatus, + expectedNumCharsRead: expectedNumCharsConsumed, + expectedUtf8Transcoding: DecodeHex(expectedUtf8TranscodingHex)); + } + + [Fact] + public void ToBytes_AllPossibleScalarValues() + { + ToBytes_Test_Core( + utf16Input: s_allScalarsAsUtf16.Span, + destinationSize: s_allScalarsAsUtf8.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumCharsRead: s_allScalarsAsUtf16.Length, + expectedUtf8Transcoding: s_allScalarsAsUtf8.Span); + } + + private static void ToBytes_Test_Core(ReadOnlySpan utf16Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumCharsRead, ReadOnlySpan expectedUtf8Transcoding) + { + // Arrange + + using (BoundedMemory boundedSource = BoundedMemory.AllocateFromExistingData(utf16Input)) + using (BoundedMemory boundedDestination = BoundedMemory.Allocate(destinationSize)) + { + boundedSource.MakeReadonly(); + + // Act + + OperationStatus actualOperationStatus = TextEncodings.Utf16.ToUtf8(boundedSource.Span, boundedDestination.Span, out int actualNumCharsRead, out int actualNumBytesWritten, replaceInvalidSequences, isFinalChunk); + + // Assert + + Assert.Equal(expectedOperationStatus, actualOperationStatus); + Assert.Equal(expectedNumCharsRead, actualNumCharsRead); + Assert.Equal(expectedUtf8Transcoding.Length, actualNumBytesWritten); + Assert.Equal(expectedUtf8Transcoding.ToArray(), boundedDestination.Span.Slice(0, actualNumBytesWritten).ToArray()); + } + } + + [Theory] + [InlineData("80", 0, "")] // sequence cannot begin with continuation character + [InlineData("8182", 0, "")] // sequence cannot begin with continuation character + [InlineData("838485", 0, "")] // sequence cannot begin with continuation character + [InlineData(X_UTF8 + "80", 1, X_UTF16)] // sequence cannot begin with continuation character + [InlineData(X_UTF8 + "8182", 1, X_UTF16)] // sequence cannot begin with continuation character + [InlineData("C0", 0, "")] // [ C0 ] is always invalid + [InlineData("C080", 0, "")] // [ C0 ] is always invalid + [InlineData("C08081", 0, "")] // [ C0 ] is always invalid + [InlineData(X_UTF8 + "C1", 1, X_UTF16)] // [ C1 ] is always invalid + [InlineData(X_UTF8 + "C180", 1, X_UTF16)] // [ C1 ] is always invalid + [InlineData(X_UTF8 + "C27F", 1, X_UTF16)] // [ C2 ] is improperly terminated + [InlineData("E2827F", 0, "")] // [ E2 82 ] is improperly terminated + [InlineData("E09F80", 0, "")] // [ E0 9F ... ] is overlong + [InlineData("E0C080", 0, "")] // [ E0 ] is improperly terminated + [InlineData("ED7F80", 0, "")] // [ ED ] is improperly terminated + [InlineData("EDA080", 0, "")] // [ ED A0 ... ] is surrogate + public void ToChars_WithSmallInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16, + // so inputs should be less than 4 bytes. + + Assert.InRange(utf8HexInput.Length, 0, 6); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + // Now try the tests again with a larger buffer. + // This ensures that running out of destination space wasn't the reason we failed. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length + 16, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + [InlineData("C2", 0, "")] // [ C2 ] is an incomplete sequence + [InlineData("E282", 0, "")] // [ E2 82 ] is an incomplete sequence + [InlineData(X_UTF8 + "C2", 1, X_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(X_UTF8 + "E0", 1, X_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(X_UTF8 + "E0BF", 1, X_UTF16)] // [ E0 BF ] is an incomplete sequence + [InlineData(X_UTF8 + "F0", 1, X_UTF16)] // [ F0 ] is an incomplete sequence + [InlineData(X_UTF8 + "F0BF", 1, X_UTF16)] // [ F0 BF ] is an incomplete sequence + [InlineData(X_UTF8 + "F0BFA0", 1, X_UTF16)] // [ F0 BF A0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "C2", 2, E_ACUTE_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "E0", 2, E_ACUTE_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "F0", 2, E_ACUTE_UTF16)] // [ F0 ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "E0BF", 2, E_ACUTE_UTF16)] // [ E0 BF ] is an incomplete sequence + [InlineData(E_ACUTE_UTF8 + "F0BF", 2, E_ACUTE_UTF16)] // [ F0 BF ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "C2", 3, EURO_SYMBOL_UTF16)] // [ C2 ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "E0", 3, EURO_SYMBOL_UTF16)] // [ E0 ] is an incomplete sequence + [InlineData(EURO_SYMBOL_UTF8 + "F0", 3, EURO_SYMBOL_UTF16)] // [ F0 ] is an incomplete sequence + public void ToChars_WithVariousIncompleteBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "slow processing" code path at the end of TranscodeToUtf16, + // so inputs should be less than 4 bytes. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.NeedMoreData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + // Now try the tests again with a larger buffer. + // This ensures that running out of destination space wasn't the reason we failed. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length + 16, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.NeedMoreData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + /* SMALL VALID BUFFERS - tests drain loop at end of method */ + [InlineData("")] // empty string is OK + [InlineData("X")] + [InlineData("XY")] + [InlineData("XYZ")] + [InlineData(E_ACUTE_UTF16)] + [InlineData(X_UTF16 + E_ACUTE_UTF16)] + [InlineData(E_ACUTE_UTF16 + X_UTF16)] + [InlineData(EURO_SYMBOL_UTF16)] + /* LARGE VALID BUFFERS - test main loop at beginning of method */ + [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?")] // Loop unrolling at end of buffer + [InlineData(E_ACUTE_UTF16 + "ABCD" + "0123456789:;<=>?" + "01234567" + E_ACUTE_UTF16 + "89:;<=>?")] // Loop unrolling interrupted by non-ASCII + [InlineData("ABC" + E_ACUTE_UTF16 + "0123")] // 3 ASCII bytes followed by non-ASCII + [InlineData("AB" + E_ACUTE_UTF16 + "0123")] // 2 ASCII bytes followed by non-ASCII + [InlineData("A" + E_ACUTE_UTF16 + "0123")] // 1 ASCII byte followed by non-ASCII + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + "PQ")] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + "PQ")] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing + [InlineData(E_ACUTE_UTF16 + "P" + E_ACUTE_UTF16 + "0@P")] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + "@")] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + "@P`")] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL_UTF16 + EURO_SYMBOL_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16 + E_ACUTE_UTF16)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(GRINNING_FACE_UTF16 + GRINNING_FACE_UTF16)] // 2x 4-byte sequences, exercises 4-byte sequence processing + [InlineData(GRINNING_FACE_UTF16 + "@AB")] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic + [InlineData(WOMAN_CARTWHEELING_MEDSKIN_UTF16)] // exercises switching between multiple sequence lengths + public void ToChars_ValidBuffers(string utf16Input) + { + // We're going to run the tests with destination buffer lengths ranging from 0 all the way + // to buffers large enough to hold the full output. This allows us to test logic that + // detects whether we're about to overrun our destination buffer and instead returns DestinationTooSmall. + + Rune[] enumeratedScalars = utf16Input.EnumerateRunes().ToArray(); + + // Convert entire input to UTF-8 using our unit test reference logic. + + byte[] utf8Input = enumeratedScalars.SelectMany(ToUtf8).ToArray(); + + // 0-length buffer test + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: 0, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (utf8Input.Length == 0) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumBytesRead: 0, + expectedUtf16Transcoding: ReadOnlySpan.Empty); + + int expectedNumBytesConsumed = 0; + char[] concatenatedUtf16 = Array.Empty(); + + for (int i = 0; i < enumeratedScalars.Length; i++) + { + Rune thisScalar = enumeratedScalars[i]; + + // if this is an astral scalar value, quickly test a buffer that's not large enough to contain the entire UTF-16 encoding + + if (!thisScalar.IsBmp) + { + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: concatenatedUtf16.Length + 1, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: concatenatedUtf16); + } + + // now provide a destination buffer large enough to hold the next full scalar encoding + + expectedNumBytesConsumed += thisScalar.Utf8SequenceLength; + concatenatedUtf16 = concatenatedUtf16.Concat(ToUtf16(thisScalar)).ToArray(); + + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: concatenatedUtf16.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: (i == enumeratedScalars.Length - 1) ? OperationStatus.Done : OperationStatus.DestinationTooSmall, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: concatenatedUtf16); + } + + // now throw lots of ASCII data at the beginning so that we exercise the vectorized code paths + + utf16Input = new string('x', 64) + utf16Input; + utf8Input = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray(); + + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: utf16Input.Length, + replaceInvalidSequences: false, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8Input.Length, + expectedUtf16Transcoding: utf16Input); + + // now throw some non-ASCII data at the beginning so that we *don't* exercise the vectorized code paths + + utf16Input = WOMAN_CARTWHEELING_MEDSKIN_UTF16 + utf16Input[64..]; + utf8Input = utf16Input.EnumerateRunes().SelectMany(ToUtf8).ToArray(); + + ToChars_Test_Core( + utf8Input: utf8Input, + destinationSize: utf16Input.Length, + replaceInvalidSequences: false, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8Input.Length, + expectedUtf16Transcoding: utf16Input); + } + + [Theory] + [InlineData("3031" + "80" + "202122232425", 2, "01")] // Continuation character at start of sequence should match no bitmask + [InlineData("3031" + "C080" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD + [InlineData("3031" + "C180" + "2021222324", 2, "01")] // Overlong 2-byte sequence at start of DWORD + [InlineData("C280" + "C180", 2, "\u0080")] // Overlong 2-byte sequence at end of DWORD + [InlineData("C27F" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C2C0" + "C280", 0, "")] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C280" + "C27F", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C2C0", 2, "\u0080")] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C280" + "80203040", 4, "\u0080\u0080")] // Continuation character at start of sequence, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C180" + "C280", 4, "\u0080\u0080")] // Overlong 2-byte sequence at start of DWORD, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C280" + "C180", 6, "\u0080\u0080\u0080")] // Overlong 2-byte sequence at end of DWORD, within "stay in 2-byte processing" optimization + [InlineData("3031" + "E09F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Overlong 3-byte sequence at start of DWORD + [InlineData("3031" + "E07F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E0C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E17F80" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E1C080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "EDA080" + EURO_SYMBOL_UTF8 + EURO_SYMBOL_UTF8, 2, "01")] // Surrogate 3-byte sequence at start of DWORD + [InlineData("3031" + "E69C88" + "E59B" + "E69C88", 5, "01\u6708")] // Incomplete 3-byte sequence surrounded by valid 3-byte sequences + [InlineData("3031" + "F5808080", 2, "01")] // [ F5 ] is always invalid + [InlineData("3031" + "F6808080", 2, "01")] // [ F6 ] is always invalid + [InlineData("3031" + "F7808080", 2, "01")] // [ F7 ] is always invalid + [InlineData("3031" + "F8808080", 2, "01")] // [ F8 ] is always invalid + [InlineData("3031" + "F9808080", 2, "01")] // [ F9 ] is always invalid + [InlineData("3031" + "FA808080", 2, "01")] // [ FA ] is always invalid + [InlineData("3031" + "FB808080", 2, "01")] // [ FB ] is always invalid + [InlineData("3031" + "FC808080", 2, "01")] // [ FC ] is always invalid + [InlineData("3031" + "FD808080", 2, "01")] // [ FD ] is always invalid + [InlineData("3031" + "FE808080", 2, "01")] // [ FE ] is always invalid + [InlineData("3031" + "FF808080", 2, "01")] // [ FF ] is always invalid + public void ToChars_WithLargeInvalidBuffers(string utf8HexInput, int expectedNumBytesConsumed, string expectedUtf16Transcoding) + { + // These test cases are for the "fast processing" code which is the main loop of TranscodeToUtf16, + // so inputs should be less >= 4 bytes. + + Assert.True(utf8HexInput.Length >= 8); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + // Now try the tests again with a larger buffer. + // This ensures that running out of destination space wasn't the reason we failed. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length + 16, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.InvalidData, + expectedNumBytesRead: expectedNumBytesConsumed, + expectedUtf16Transcoding: expectedUtf16Transcoding); + } + + [Theory] + [InlineData(X_UTF8 + "80" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // stray continuation byte [ 80 ] + [InlineData(X_UTF8 + "FF" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // invalid UTF-8 byte [ FF ] + [InlineData(X_UTF8 + "C2" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // 2-byte sequence starter [ C2 ] not followed by continuation byte + [InlineData(X_UTF8 + "C1C180" + X_UTF8, X_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ C1 80 ] is overlong but consists of two maximal invalid subsequences, each of length 1 byte + [InlineData(X_UTF8 + E_ACUTE_UTF8 + "E08080", X_UTF16 + E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16)] // [ E0 80 ] is overlong 2-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] is stray continuation byte + [InlineData(GRINNING_FACE_UTF8 + "F08F8080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F0 8F ] is overlong 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes + [InlineData(GRINNING_FACE_UTF8 + "F4908080" + GRINNING_FACE_UTF8, GRINNING_FACE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + GRINNING_FACE_UTF16)] // [ F4 90 ] is out-of-range 4-byte sequence (1 byte maximal invalid subsequence), and following [ 80 ] instances are stray continuation bytes + [InlineData(E_ACUTE_UTF8 + "EDA0" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED A0 ] is encoding of UTF-16 surrogate code point, so consists of two maximal invalid subsequences, each of length 1 byte + [InlineData(E_ACUTE_UTF8 + "ED80" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ ED 80 ] is incomplete 3-byte sequence, so is 2-byte maximal invalid subsequence + [InlineData(E_ACUTE_UTF8 + "F380" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 ] is incomplete 4-byte sequence, so is 2-byte maximal invalid subsequence + [InlineData(E_ACUTE_UTF8 + "F38080" + X_UTF8, E_ACUTE_UTF16 + REPLACEMENT_CHAR_UTF16 + X_UTF16)] // [ F3 80 80 ] is incomplete 4-byte sequence, so is 3-byte maximal invalid subsequence + public void ToChars_WithReplacement(string utf8HexInput, string expectedUtf16Transcoding) + { + // First run the test with isFinalBlock = false, + // both with and without some bytes of incomplete trailing data. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.NeedMoreData, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + // Then run the test with isFinalBlock = true, with incomplete trailing data. + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length, + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.DestinationTooSmall, + expectedNumBytesRead: utf8HexInput.Length / 2, + expectedUtf16Transcoding: expectedUtf16Transcoding); + + ToChars_Test_Core( + utf8Input: DecodeHex(utf8HexInput + "E0BF" /* trailing data */), + destinationSize: expectedUtf16Transcoding.Length + 1, // allow room for U+FFFD + replaceInvalidSequences: true, + isFinalChunk: true, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: utf8HexInput.Length / 2 + 2, + expectedUtf16Transcoding: expectedUtf16Transcoding + REPLACEMENT_CHAR_UTF16); + } + + [Fact] + public void ToChars_AllPossibleScalarValues() + { + ToChars_Test_Core( + utf8Input: s_allScalarsAsUtf8.Span, + destinationSize: s_allScalarsAsUtf16.Length, + replaceInvalidSequences: false, + isFinalChunk: false, + expectedOperationStatus: OperationStatus.Done, + expectedNumBytesRead: s_allScalarsAsUtf8.Length, + expectedUtf16Transcoding: s_allScalarsAsUtf16.Span); + } + + private static void ToChars_Test_Core(ReadOnlySpan utf8Input, int destinationSize, bool replaceInvalidSequences, bool isFinalChunk, OperationStatus expectedOperationStatus, int expectedNumBytesRead, ReadOnlySpan expectedUtf16Transcoding) + { + // Arrange + + using (BoundedMemory boundedSource = BoundedMemory.AllocateFromExistingData(utf8Input)) + using (BoundedMemory boundedDestination = BoundedMemory.Allocate(destinationSize)) + { + boundedSource.MakeReadonly(); + + // Act + + OperationStatus actualOperationStatus = TextEncodings.Utf8.ToUtf16(boundedSource.Span, boundedDestination.Span, out int actualNumBytesRead, out int actualNumCharsWritten, replaceInvalidSequences, isFinalChunk); + + // Assert + + Assert.Equal(expectedOperationStatus, actualOperationStatus); + Assert.Equal(expectedNumBytesRead, actualNumBytesRead); + Assert.Equal(expectedUtf16Transcoding.Length, actualNumCharsWritten); + Assert.Equal(expectedUtf16Transcoding.ToString(), boundedDestination.Span.Slice(0, actualNumCharsWritten).ToString()); + } + } + } +} +#endif diff --git a/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8UtilityTests.ValidateBytes.cs b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8UtilityTests.ValidateBytes.cs new file mode 100644 index 000000000..3f85c6254 --- /dev/null +++ b/test/DotNetty.Common.Tests/Internal/CoreLib/Utf8UtilityTests.ValidateBytes.cs @@ -0,0 +1,396 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#if CORELIBTEST +using System; +using System.Buffers; +using System.Linq; +using System.Runtime.InteropServices; +using DotNetty.Common.Internal; +using Xunit; + +namespace DotNetty.Common.Tests.Internal.CoreLib +{ + public class Utf8UtilityTests + { + private const string X = "58"; // U+0058 LATIN CAPITAL LETTER X, 1 byte + private const string Y = "59"; // U+0058 LATIN CAPITAL LETTER Y, 1 byte + private const string Z = "5A"; // U+0058 LATIN CAPITAL LETTER Z, 1 byte + private const string E_ACUTE = "C3A9"; // U+00E9 LATIN SMALL LETTER E WITH ACUTE, 2 bytes + private const string EURO_SYMBOL = "E282AC"; // U+20AC EURO SIGN, 3 bytes + private const string GRINNING_FACE = "F09F9880"; // U+1F600 GRINNING FACE, 4 bytes + + [Theory] + [InlineData("", 0, 0)] // empty string is OK + [InlineData(X, 1, 0)] + [InlineData(X + Y, 2, 0)] + [InlineData(X + Y + Z, 3, 0)] + [InlineData(E_ACUTE, 1, 0)] + [InlineData(X + E_ACUTE, 2, 0)] + [InlineData(E_ACUTE + X, 2, 0)] + [InlineData(EURO_SYMBOL, 1, 0)] + public void GetIndexOfFirstInvalidUtf8Sequence_WithSmallValidBuffers(string input, int expectedRuneCount, int expectedSurrogatePairCount) + { + // These test cases are for the "slow processing" code path at the end of GetIndexOfFirstInvalidUtf8Sequence, + // so inputs should be less than 4 bytes. + + Assert.InRange(input.Length, 0, 6); + + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(input, -1 /* expectedRetVal */, expectedRuneCount, expectedSurrogatePairCount); + } + + [Theory] + [InlineData("80", 0, 0, 0)] // sequence cannot begin with continuation character + [InlineData("8182", 0, 0, 0)] // sequence cannot begin with continuation character + [InlineData("838485", 0, 0, 0)] // sequence cannot begin with continuation character + [InlineData(X + "80", 1, 1, 0)] // sequence cannot begin with continuation character + [InlineData(X + "8182", 1, 1, 0)] // sequence cannot begin with continuation character + [InlineData("C0", 0, 0, 0)] // [ C0 ] is always invalid + [InlineData("C080", 0, 0, 0)] // [ C0 ] is always invalid + [InlineData("C08081", 0, 0, 0)] // [ C0 ] is always invalid + [InlineData(X + "C1", 1, 1, 0)] // [ C1 ] is always invalid + [InlineData(X + "C180", 1, 1, 0)] // [ C1 ] is always invalid + [InlineData("C2", 0, 0, 0)] // [ C2 ] is improperly terminated + [InlineData(X + "C27F", 1, 1, 0)] // [ C2 ] is improperly terminated + [InlineData(X + "E282", 1, 1, 0)] // [ E2 82 ] is improperly terminated + [InlineData("E2827F", 0, 0, 0)] // [ E2 82 ] is improperly terminated + [InlineData("E09F80", 0, 0, 0)] // [ E0 9F ... ] is overlong + [InlineData("E0C080", 0, 0, 0)] // [ E0 ] is improperly terminated + [InlineData("ED7F80", 0, 0, 0)] // [ ED ] is improperly terminated + [InlineData("EDA080", 0, 0, 0)] // [ ED A0 ... ] is surrogate + public void GetIndexOfFirstInvalidUtf8Sequence_WithSmallInvalidBuffers(string input, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount) + { + // These test cases are for the "slow processing" code path at the end of GetIndexOfFirstInvalidUtf8Sequence, + // so inputs should be less than 4 bytes. + + Assert.InRange(input.Length, 0, 6); + + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(input, expectedRetVal, expectedRuneCount, expectedSurrogatePairCount); + } + + [Theory] + [InlineData(E_ACUTE + "21222324" + "303132333435363738393A3B3C3D3E3F", 21, 0)] // Loop unrolling at end of buffer + [InlineData(E_ACUTE + "21222324" + "303132333435363738393A3B3C3D3E3F" + "3031323334353637" + E_ACUTE + "38393A3B3C3D3E3F", 38, 0)] // Loop unrolling interrupted by non-ASCII + [InlineData("212223" + E_ACUTE + "30313233", 8, 0)] // 3 ASCII bytes followed by non-ASCII + [InlineData("2122" + E_ACUTE + "30313233", 7, 0)] // 2 ASCII bytes followed by non-ASCII + [InlineData("21" + E_ACUTE + "30313233", 6, 0)] // 1 ASCII byte followed by non-ASCII + [InlineData(E_ACUTE + E_ACUTE + E_ACUTE + E_ACUTE, 4, 0)] // 4x 2-byte sequences, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE + E_ACUTE + E_ACUTE + "5051", 5, 0)] // 3x 2-byte sequences + 2 ASCII bytes, exercises optimization code path in 2-byte sequence processing + [InlineData(E_ACUTE + "5051", 3, 0)] // single 2-byte sequence + 2 trailing ASCII bytes, exercises draining logic in 2-byte sequence processing + [InlineData(E_ACUTE + "50" + E_ACUTE + "304050", 6, 0)] // single 2-byte sequences + 1 trailing ASCII byte + 2-byte sequence, exercises draining logic in 2-byte sequence processing + [InlineData(EURO_SYMBOL + "20", 2, 0)] // single 3-byte sequence + 1 trailing ASCII byte, exercises draining logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL + "203040", 4, 0)] // single 3-byte sequence + 3 trailing ASCII byte, exercises draining logic and "running out of data" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL + EURO_SYMBOL + EURO_SYMBOL, 3, 0)] // 3x 3-byte sequences, exercises "stay within 3-byte loop" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL + EURO_SYMBOL + EURO_SYMBOL + EURO_SYMBOL, 4, 0)] // 4x 3-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL + EURO_SYMBOL + EURO_SYMBOL + E_ACUTE, 4, 0)] // 3x 3-byte sequences + single 2-byte sequence, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(EURO_SYMBOL + EURO_SYMBOL + E_ACUTE + E_ACUTE + E_ACUTE + E_ACUTE, 6, 0)] // 2x 3-byte sequences + 4x 2-byte sequences, exercises "consume multiple bytes at a time" logic in 3-byte sequence processing + [InlineData(GRINNING_FACE + GRINNING_FACE, 2, 2)] // 2x 4-byte sequences, exercises 4-byte sequence processing + [InlineData(GRINNING_FACE + "303132", 4, 1)] // single 4-byte sequence + 3 ASCII bytes, exercises 4-byte sequence processing and draining logic + [InlineData("F09FA4B8" + "F09F8FBD" + "E2808D" + "E29980" + "EFB88F", 5, 2)] // U+1F938 U+1F3FD U+200D U+2640 U+FE0F WOMAN CARTWHEELING: MEDIUM SKIN TONE, exercising switching between multiple sequence lengths + public void GetIndexOfFirstInvalidUtf8Sequence_WithLargeValidBuffers(string input, int expectedRuneCount, int expectedSurrogatePairCount) + { + // These test cases are for the "fast processing" code which is the main loop of GetIndexOfFirstInvalidUtf8Sequence, + // so inputs should be less >= 4 bytes. + + Assert.True(input.Length >= 8); + + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(input, -1 /* expectedRetVal */, expectedRuneCount, expectedSurrogatePairCount); + } + + [Theory] + [InlineData("3031" + "80" + "202122232425", 2, 2, 0)] // Continuation character at start of sequence should match no bitmask + [InlineData("3031" + "C080" + "2021222324", 2, 2, 0)] // Overlong 2-byte sequence at start of DWORD + [InlineData("3031" + "C180" + "2021222324", 2, 2, 0)] // Overlong 2-byte sequence at start of DWORD + [InlineData("C280" + "C180", 2, 1, 0)] // Overlong 2-byte sequence at end of DWORD + [InlineData("C27F" + "C280", 0, 0, 0)] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C2C0" + "C280", 0, 0, 0)] // Improperly terminated 2-byte sequence at start of DWORD + [InlineData("C280" + "C27F", 2, 1, 0)] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C2C0", 2, 1, 0)] // Improperly terminated 2-byte sequence at end of DWORD + [InlineData("C280" + "C280" + "80203040", 4, 2, 0)] // Continuation character at start of sequence, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C180" + "C280", 4, 2, 0)] // Overlong 2-byte sequence at start of DWORD, within "stay in 2-byte processing" optimization + [InlineData("C280" + "C280" + "C280" + "C180", 6, 3, 0)] // Overlong 2-byte sequence at end of DWORD, within "stay in 2-byte processing" optimization + [InlineData("3031" + "E09F80" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Overlong 3-byte sequence at start of DWORD + [InlineData("3031" + "E07F80" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E0C080" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E17F80" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "E1C080" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Improperly terminated 3-byte sequence at start of DWORD + [InlineData("3031" + "EDA080" + EURO_SYMBOL + EURO_SYMBOL, 2, 2, 0)] // Surrogate 3-byte sequence at start of DWORD + [InlineData("3031" + "E69C88" + "E59B" + "E69C88", 5, 3, 0)] // Incomplete 3-byte sequence surrounded by valid 3-byte sequences + [InlineData("E78B80" + "80", 3, 1, 0)] // Valid 3-byte sequence followed by standalone continuation byte + [InlineData("3031" + "F5808080", 2, 2, 0)] // [ F5 ] is always invalid + [InlineData("3031" + "F6808080", 2, 2, 0)] // [ F6 ] is always invalid + [InlineData("3031" + "F7808080", 2, 2, 0)] // [ F7 ] is always invalid + [InlineData("3031" + "F8808080", 2, 2, 0)] // [ F8 ] is always invalid + [InlineData("3031" + "F9808080", 2, 2, 0)] // [ F9 ] is always invalid + [InlineData("3031" + "FA808080", 2, 2, 0)] // [ FA ] is always invalid + [InlineData("3031" + "FB808080", 2, 2, 0)] // [ FB ] is always invalid + [InlineData("3031" + "FC808080", 2, 2, 0)] // [ FC ] is always invalid + [InlineData("3031" + "FD808080", 2, 2, 0)] // [ FD ] is always invalid + [InlineData("3031" + "FE808080", 2, 2, 0)] // [ FE ] is always invalid + [InlineData("3031" + "FF808080", 2, 2, 0)] // [ FF ] is always invalid + public void GetIndexOfFirstInvalidUtf8Sequence_WithLargeInvalidBuffers(string input, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount) + { + // These test cases are for the "fast processing" code which is the main loop of GetIndexOfFirstInvalidUtf8Sequence, + // so inputs should be less >= 4 bytes. + + Assert.True(input.Length >= 8); + + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(input, expectedRetVal, expectedRuneCount, expectedSurrogatePairCount); + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithOverlongTwoByteSequences_ReturnsInvalid() + { + // [ C0 ] is never a valid byte, indicates overlong 2-byte sequence + // We'll test that [ C0 ] [ 00..FF ] is treated as invalid + + for (int i = 0; i < 256; i++) + { + AssertIsInvalidTwoByteSequence(new byte[] { 0xC0, (byte)i }); + } + + // [ C1 ] is never a valid byte, indicates overlong 2-byte sequence + // We'll test that [ C1 ] [ 00..FF ] is treated as invalid + + for (int i = 0; i < 256; i++) + { + AssertIsInvalidTwoByteSequence(new byte[] { 0xC1, (byte)i }); + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithImproperlyTerminatedTwoByteSequences_ReturnsInvalid() + { + // Test [ C2..DF ] [ 00..7F ] and [ C2..DF ] [ C0..FF ] + + for (int i = 0xC2; i < 0xDF; i++) + { + for (int j = 0; j < 0x80; j++) + { + AssertIsInvalidTwoByteSequence(new byte[] { (byte)i, (byte)j }); + } + for (int j = 0xC0; j < 0x100; j++) + { + AssertIsInvalidTwoByteSequence(new byte[] { (byte)i, (byte)j }); + } + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithOverlongThreeByteSequences_ReturnsInvalid() + { + // [ E0 ] [ 80..9F ] [ 80..BF ] is overlong 3-byte sequence + + for (int i = 0x00; i < 0xA0; i++) + { + AssertIsInvalidThreeByteSequence(new byte[] { 0xE0, (byte)i, 0x80 }); + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithSurrogateThreeByteSequences_ReturnsInvalid() + { + // [ ED ] [ A0..BF ] [ 80..BF ] is surrogate 3-byte sequence + + for (int i = 0xA0; i < 0x100; i++) + { + AssertIsInvalidThreeByteSequence(new byte[] { 0xED, (byte)i, 0x80 }); + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithImproperlyTerminatedThreeByteSequence_ReturnsInvalid() + { + // [ E0..EF ] [ 80..BF ] [ !(80..BF) ] is improperly terminated 3-byte sequence + + for (int i = 0xE0; i < 0xF0; i++) + { + for (int j = 0x00; j < 0x80; j++) + { + // Use both '9F' and 'A0' to make sure at least one isn't caught by overlong / surrogate checks + AssertIsInvalidThreeByteSequence(new byte[] { (byte)i, 0x9F, (byte)j }); + AssertIsInvalidThreeByteSequence(new byte[] { (byte)i, 0xA0, (byte)j }); + } + for (int j = 0xC0; j < 0x100; j++) + { + // Use both '9F' and 'A0' to make sure at least one isn't caught by overlong / surrogate checks + AssertIsInvalidThreeByteSequence(new byte[] { (byte)i, 0x9F, (byte)j }); + AssertIsInvalidThreeByteSequence(new byte[] { (byte)i, 0xA0, (byte)j }); + } + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithOverlongFourByteSequences_ReturnsInvalid() + { + // [ F0 ] [ 80..8F ] [ 80..BF ] [ 80..BF ] is overlong 4-byte sequence + + for (int i = 0x00; i < 0x90; i++) + { + AssertIsInvalidFourByteSequence(new byte[] { 0xF0, (byte)i, 0x80, 0x80 }); + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithOutOfRangeFourByteSequences_ReturnsInvalid() + { + // [ F4 ] [ 90..BF ] [ 80..BF ] [ 80..BF ] is out-of-range 4-byte sequence + + for (int i = 0x90; i < 0x100; i++) + { + AssertIsInvalidFourByteSequence(new byte[] { 0xF4, (byte)i, 0x80, 0x80 }); + } + } + + [Fact] + public void GetIndexOfFirstInvalidUtf8Sequence_WithInvalidFourByteSequence_ReturnsInvalid() + { + // [ F0..F4 ] [ !(80..BF) ] [ !(80..BF) ] [ !(80..BF) ] is improperly terminated 4-byte sequence + + for (int i = 0xF0; i < 0xF5; i++) + { + for (int j = 0x00; j < 0x80; j++) + { + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, (byte)j, 0x80, 0x80 }); + + // Use both '8F' and '90' to make sure at least one isn't caught by overlong / out-of-range checks + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0x9F, (byte)j, 0x80 }); + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0xA0, (byte)j, 0x80 }); + + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0x9F, 0x80, (byte)j }); + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0xA0, 0x80, (byte)j }); + } + for (int j = 0xC0; j < 0x100; j++) + { + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, (byte)j, 0x80, 0x80 }); + + // Use both '8F' and '90' to make sure at least one isn't caught by overlong / out-of-range checks + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0x9F, (byte)j, 0x80 }); + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0xA0, (byte)j, 0x80 }); + + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0x9F, 0x80, (byte)j }); + AssertIsInvalidFourByteSequence(new byte[] { (byte)i, 0xA0, 0x80, (byte)j }); + } + } + } + + private static void AssertIsInvalidTwoByteSequence(byte[] invalidSequence) + { + Assert.Equal(2, invalidSequence.Length); + + byte[] knownGoodBytes = Utf8Tests.DecodeHex(E_ACUTE); + + byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of first DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); + + toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of first DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 2, 1, 0); + + // Run the same tests but with extra data at the beginning so that we're inside one of + // the 2-byte processing "hot loop" code paths. + + toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of next DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 4, 2, 0); + + toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of next DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 6, 3, 0); + } + + private static void AssertIsInvalidThreeByteSequence(byte[] invalidSequence) + { + Assert.Equal(3, invalidSequence.Length); + + byte[] knownGoodBytes = Utf8Tests.DecodeHex(EURO_SYMBOL); + + byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at start of first DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); + + // Run the same tests but with extra data at the beginning so that we're inside one of + // the 3-byte processing "hot loop" code paths. + + toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // straddling first and second DWORDs + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 3, 1, 0); + + toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // straddling second and third DWORDs + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 6, 2, 0); + + toTest = knownGoodBytes.Concat(knownGoodBytes).Concat(knownGoodBytes).Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); // at end of third DWORD + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 9, 3, 0); + } + + private static void AssertIsInvalidFourByteSequence(byte[] invalidSequence) + { + Assert.Equal(4, invalidSequence.Length); + + byte[] knownGoodBytes = Utf8Tests.DecodeHex(GRINNING_FACE); + + byte[] toTest = invalidSequence.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 0, 0, 0); + + toTest = knownGoodBytes.Concat(invalidSequence).Concat(knownGoodBytes).ToArray(); + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(toTest, 4, 1, 1); + } + + private static void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(string inputHex, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount) + { + byte[] inputBytes = Utf8Tests.DecodeHex(inputHex); + + // Run the test normally + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, expectedRetVal, expectedRuneCount, expectedSurrogatePairCount); + + // Then run the test with a bunch of ASCII data at the beginning (to exercise the vectorized code paths) + inputBytes = Enumerable.Repeat((byte)'x', 128).Concat(inputBytes).ToArray(); + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, (expectedRetVal < 0) ? expectedRetVal : (expectedRetVal + 128), expectedRuneCount + 128, expectedSurrogatePairCount); + + // Then put a few more ASCII bytes at the beginning (to test that offsets are properly handled) + inputBytes = Enumerable.Repeat((byte)'x', 7).Concat(inputBytes).ToArray(); + GetIndexOfFirstInvalidUtf8Sequence_Test_Core(inputBytes, (expectedRetVal < 0) ? expectedRetVal : (expectedRetVal + 135), expectedRuneCount + 135, expectedSurrogatePairCount); + } + + private static unsafe void GetIndexOfFirstInvalidUtf8Sequence_Test_Core(byte[] input, int expectedRetVal, int expectedRuneCount, int expectedSurrogatePairCount) + { + // Arrange + + using BoundedMemory boundedMemory = BoundedMemory.AllocateFromExistingData(input); + boundedMemory.MakeReadonly(); + + // Act + + int actualRetVal; + int actualSurrogatePairCount; + int actualRuneCount; + + fixed (byte* pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span)) + { + byte* pFirstInvalidByte = Utf8Utility.GetPointerToFirstInvalidByte(pInputBuffer, input.Length, out int utf16CodeUnitCountAdjustment, out int scalarCountAdjustment); + + long ptrDiff = pFirstInvalidByte - pInputBuffer; + Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range."); + + Assert.True(utf16CodeUnitCountAdjustment <= 0, "UTF-16 code unit count adjustment must be 0 or negative."); + Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative."); + + actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff; + + // The last two 'out' parameters are: + // a) The number to be added to the "bytes processed" return value to come up with the total UTF-16 code unit count, and + // b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count. + + int totalUtf16CodeUnitCount = (int)ptrDiff + utf16CodeUnitCountAdjustment; + actualRuneCount = totalUtf16CodeUnitCount + scalarCountAdjustment; + + // Surrogate pair count is number of UTF-16 code units less the number of scalars. + + actualSurrogatePairCount = totalUtf16CodeUnitCount - actualRuneCount; + } + + // Assert + + Assert.Equal(expectedRetVal, actualRetVal); + Assert.Equal(expectedRuneCount, actualRuneCount); + Assert.Equal(expectedSurrogatePairCount, actualSurrogatePairCount); + } + } +} +#endif From ed6cec8513c78d6a70ed547d2f22303f2afcb41d Mon Sep 17 00:00:00 2001 From: cuteant Date: Thu, 24 Jun 2021 21:41:09 +0800 Subject: [PATCH 3/5] Update ByteBufferReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * CoreLib(NET5.0-SDK)的部分API没有开放,SpanHelper还是只能使用.NET Core 3.1版本。 * Common/Buffer:NET5.0框架下,移除对SpanHelper的依赖。 --- .../AbstractByteBuffer.NetStandard.cs | 42 ++- .../ByteBufferUtil.Comparable.cs | 8 +- .../ByteBufferUtil.Equatable.cs | 8 +- .../Reader/ByteBufferReader.Search.cs | 96 +++++ .../Reader/ByteBufferReader.cs | 61 +++- .../ByteBufferReaderExtensions.Binary.cs | 4 +- .../AppendableCharSequence.NetStandard.cs | 16 +- .../Internal/PlatformDependent.cs | 4 + .../Internal/Utf8Utility.Validation.cs | 2 +- .../Utilities/AsciiString.NetStandard.cs | 62 ++++ src/DotNetty.Common/Utilities/AsciiString.cs | 4 + src/DotNetty.Common/Utilities/CharUtil.cs | 22 ++ .../Utilities/ICharSequenceExtensions.cs | 11 + src/DotNetty.Common/Utilities/StringUtil.cs | 8 +- ...DotNetty.Buffers.ReaderWriter.Tests.csproj | 2 +- .../test_corefx/BasicTests.cs | 341 +++++++++++++++++- .../test_corefx/ReadTo.cs | 44 ++- .../test_corefxlab/ReaderBasicTests.cs | 2 +- 18 files changed, 716 insertions(+), 21 deletions(-) diff --git a/src/DotNetty.Buffers/AbstractByteBuffer.NetStandard.cs b/src/DotNetty.Buffers/AbstractByteBuffer.NetStandard.cs index 3fd3c4d63..17cf7e304 100644 --- a/src/DotNetty.Buffers/AbstractByteBuffer.NetStandard.cs +++ b/src/DotNetty.Buffers/AbstractByteBuffer.NetStandard.cs @@ -206,7 +206,7 @@ public virtual int ReadBytes(Span destination) var readableBytes = Math.Min(_writerIndex - readerIndex, destination.Length); if (readableBytes > 0) { - _GetBytes(readerIndex, destination, readableBytes); + _GetBytes(readerIndex, destination, readableBytes); _readerIndex = readerIndex + readableBytes; } return readableBytes; @@ -398,14 +398,22 @@ public virtual int IndexOf(int fromIndex, int toIndex, byte value) internal protected virtual int IndexOf0(int index, int count, byte value) { var span = GetReadableSpan(index, count); +#if NET + var result = span.IndexOf(value); +#else var result = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(span), value, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } internal protected virtual int LastIndexOf0(int index, int count, byte value) { var span = GetReadableSpan(index, count); +#if NET + var result = span.LastIndexOf(value); +#else var result = SpanHelpers.LastIndexOf(ref MemoryMarshal.GetReference(span), value, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } @@ -431,14 +439,22 @@ public virtual int IndexOf(int fromIndex, int toIndex, in ReadOnlySpan val internal protected virtual int IndexOf0(int index, int count, in ReadOnlySpan values) { var span = GetReadableSpan(index, count); +#if NET + var result = span.IndexOf(values); +#else var result = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } internal protected virtual int LastIndexOf0(int index, int count, in ReadOnlySpan values) { var span = GetReadableSpan(index, count); +#if NET + var result = span.LastIndexOf(values); +#else var result = SpanHelpers.LastIndexOf(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } @@ -464,14 +480,22 @@ public virtual int IndexOfAny(int fromIndex, int toIndex, byte value0, byte valu internal protected virtual int IndexOfAny0(int index, int count, byte value0, byte value1) { var span = GetReadableSpan(index, count); +#if NET + var result = span.IndexOfAny(value0, value1); +#else var result = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } internal protected virtual int LastIndexOfAny0(int index, int count, byte value0, byte value1) { var span = GetReadableSpan(index, count); +#if NET + var result = span.LastIndexOfAny(value0, value1); +#else var result = SpanHelpers.LastIndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } @@ -497,14 +521,22 @@ public virtual int IndexOfAny(int fromIndex, int toIndex, byte value0, byte valu internal protected virtual int IndexOfAny0(int index, int count, byte value0, byte value1, byte value2) { var span = GetReadableSpan(index, count); +#if NET + var result = span.IndexOfAny(value0, value1, value2); +#else var result = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } internal protected virtual int LastIndexOfAny0(int index, int count, byte value0, byte value1, byte value2) { var span = GetReadableSpan(index, count); +#if NET + var result = span.LastIndexOfAny(value0, value1, value2); +#else var result = SpanHelpers.LastIndexOfAny(ref MemoryMarshal.GetReference(span), value0, value1, value2, span.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } @@ -530,14 +562,22 @@ public virtual int IndexOfAny(int fromIndex, int toIndex, in ReadOnlySpan internal protected virtual int IndexOfAny0(int index, int count, in ReadOnlySpan values) { var span = GetReadableSpan(index, count); +#if NET + var result = span.IndexOfAny(values); +#else var result = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } internal protected virtual int LastIndexOfAny0(int index, int count, in ReadOnlySpan values) { var span = GetReadableSpan(index, count); +#if NET + var result = span.LastIndexOfAny(values); +#else var result = SpanHelpers.LastIndexOfAny(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(values), values.Length); +#endif return (uint)result < SharedConstants.uIndexNotFound ? index + result : result; } } diff --git a/src/DotNetty.Buffers/ByteBufferUtil.Comparable.cs b/src/DotNetty.Buffers/ByteBufferUtil.Comparable.cs index eaa292a6e..01b916552 100644 --- a/src/DotNetty.Buffers/ByteBufferUtil.Comparable.cs +++ b/src/DotNetty.Buffers/ByteBufferUtil.Comparable.cs @@ -27,9 +27,11 @@ namespace DotNetty.Buffers { using System; using System.Runtime.CompilerServices; + using DotNetty.Common.Utilities; +#if !NET using System.Runtime.InteropServices; using DotNetty.Common.Internal; - using DotNetty.Common.Utilities; +#endif partial class ByteBufferUtil { @@ -44,7 +46,11 @@ public static int Compare(IByteBuffer bufferA, IByteBuffer bufferB) { var spanA = bufferA.GetReadableSpan(); var spanB = bufferB.GetReadableSpan(); +#if NET + return spanA.SequenceCompareTo(spanB); +#else return SpanHelpers.SequenceCompareTo(ref MemoryMarshal.GetReference(spanA), spanA.Length, ref MemoryMarshal.GetReference(spanB), spanB.Length); +#endif } return CompareSlow(bufferA, bufferB); } diff --git a/src/DotNetty.Buffers/ByteBufferUtil.Equatable.cs b/src/DotNetty.Buffers/ByteBufferUtil.Equatable.cs index 53b1ce454..04376882d 100644 --- a/src/DotNetty.Buffers/ByteBufferUtil.Equatable.cs +++ b/src/DotNetty.Buffers/ByteBufferUtil.Equatable.cs @@ -26,9 +26,11 @@ namespace DotNetty.Buffers { using System; + using DotNetty.Common.Utilities; +#if !NET using System.Runtime.InteropServices; using DotNetty.Common.Internal; - using DotNetty.Common.Utilities; +#endif partial class ByteBufferUtil { @@ -55,7 +57,11 @@ public static bool Equals(IByteBuffer a, int aStartIndex, IByteBuffer b, int bSt { var spanA = a.GetReadableSpan(aStartIndex, length); var spanB = b.GetReadableSpan(bStartIndex, length); +#if NET + return spanA.SequenceEqual(spanB); +#else return SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(spanA), ref MemoryMarshal.GetReference(spanB), length); +#endif } return EqualsSlow(a, aStartIndex, b, bStartIndex, length); } diff --git a/src/DotNetty.Buffers/Reader/ByteBufferReader.Search.cs b/src/DotNetty.Buffers/Reader/ByteBufferReader.Search.cs index 4f03c4be2..49b61808b 100644 --- a/src/DotNetty.Buffers/Reader/ByteBufferReader.Search.cs +++ b/src/DotNetty.Buffers/Reader/ByteBufferReader.Search.cs @@ -45,7 +45,11 @@ partial struct ByteBufferReader public bool TryReadTo(out ReadOnlySpan span, byte delimiter, bool advancePastDelimiter = true) { ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = remaining.IndexOf(delimiter); +#else int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif uint uIndex = (uint)index; if (SharedConstants.TooBigOrNegative >= uIndex) // index != -1 @@ -81,7 +85,11 @@ private bool TryReadToSlow(out ReadOnlySpan span, byte delimiter, bool adv public bool TryReadTo(out ReadOnlySpan span, byte delimiter, byte delimiterEscape, bool advancePastDelimiter = true) { ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = remaining.IndexOf(delimiter); +#else int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif if ((index > 0 && remaining[index - 1] != delimiterEscape) || 0u >= (uint)index) { @@ -199,7 +207,11 @@ ref MemoryMarshal.GetReference(remaining), remaining = _currentSpan; Continue: +#if NET + index = remaining.IndexOf(delimiter); +#else index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif } while (!End); // Didn't find anything, reset our original state. @@ -227,7 +239,11 @@ private bool TryReadToInternal(out ReadOnlySequence sequence, byte delimit while (_moreData) { +#if NET + int index = remaining.IndexOf(delimiter); +#else int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif uint uIndex = (uint)index; if (SharedConstants.TooBigOrNegative >= uIndex) // index != -1 { @@ -271,7 +287,11 @@ public bool TryReadTo(out ReadOnlySequence sequence, byte delimiter, byte while (_moreData) { +#if NET + int index = remaining.IndexOf(delimiter); +#else int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif uint uIndex = (uint)index; if (SharedConstants.TooBigOrNegative >= uIndex) // index != -1 { @@ -354,7 +374,13 @@ ref MemoryMarshal.GetReference(remaining), public bool TryReadToAny(out ReadOnlySpan span, in ReadOnlySpan delimiters, bool advancePastDelimiter = true) { ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = delimiters.Length == 2 + ? remaining.IndexOfAny(delimiters[0], delimiters[1]) + : remaining.IndexOfAny(delimiters); +#else var index = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(remaining), remaining.Length, ref MemoryMarshal.GetReference(delimiters), delimiters.Length); +#endif if (SharedConstants.TooBigOrNegative >= (uint)index) // index != -1 { @@ -399,7 +425,13 @@ private bool TryReadToAnyInternal(out ReadOnlySequence sequence, in ReadOn while (!End) { +#if NET + int index = delimiters.Length == 2 + ? remaining.IndexOfAny(delimiters[0], delimiters[1]) + : remaining.IndexOfAny(delimiters); +#else int index = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(remaining), remaining.Length, ref delimiterSpace, delimiters.Length); +#endif uint uIndex = (uint)index; if (SharedConstants.TooBigOrNegative >= uIndex) // index != -1 { @@ -421,6 +453,46 @@ private bool TryReadToAnyInternal(out ReadOnlySequence sequence, in ReadOn return false; } + /// + /// Try to read everything up to the given . + /// + /// The read data, if any. + /// The delimiter to look for. + /// True to move past the if found. + /// True if the was found. + public bool TryReadTo(out ReadOnlySpan span, ReadOnlySpan delimiter, bool advancePastDelimiter = true) + { + ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = remaining.IndexOf(delimiter); +#else + int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), remaining.Length, ref MemoryMarshal.GetReference(delimiter), delimiter.Length); +#endif + + if (index >= 0) + { + span = remaining.Slice(0, index); + AdvanceCurrentSpan(index + (advancePastDelimiter ? delimiter.Length : 0)); + return true; + } + + // This delimiter might be skipped, go down the slow path + return TryReadToSlow(out span, delimiter, advancePastDelimiter); + } + + private bool TryReadToSlow(out ReadOnlySpan span, ReadOnlySpan delimiter, bool advancePastDelimiter) + { + if (!TryReadTo(out ReadOnlySequence sequence, delimiter, advancePastDelimiter)) + { + span = default; + return false; + } + + Debug.Assert(sequence.Length > 0); + span = sequence.IsSingleSegment ? sequence.First.Span : sequence.ToArray(); + return true; + } + /// Try to read data until the entire given matches. /// The read data, if any. /// The multi (byte) delimiter. @@ -487,7 +559,11 @@ public bool TryReadTo(out ReadOnlySequence sequence, in ReadOnlySpan public bool TryAdvanceTo(byte delimiter, bool advancePastDelimiter = true) { ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = remaining.IndexOf(delimiter); +#else int index = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(remaining), delimiter, remaining.Length); +#endif if (SharedConstants.TooBigOrNegative >= (uint)index) // ndex != -1 { Advance(advancePastDelimiter ? index + 1 : index); @@ -504,7 +580,11 @@ public bool TryAdvanceTo(byte delimiter, bool advancePastDelimiter = true) public bool TryAdvanceToAny(in ReadOnlySpan delimiters, bool advancePastDelimiter = true) { ReadOnlySpan remaining = UnreadSpan; +#if NET + int index = remaining.IndexOfAny(delimiters); +#else int index = SpanHelpers.IndexOfAny(ref MemoryMarshal.GetReference(remaining), remaining.Length, ref MemoryMarshal.GetReference(delimiters), delimiters.Length); +#endif if (SharedConstants.TooBigOrNegative >= (uint)index) // ndex != -1 { AdvanceCurrentSpan(index + (advancePastDelimiter ? 1 : 0)); @@ -669,6 +749,22 @@ ref MemoryMarshal.GetReference(searchSpan), return _consumed - start; } + /// + /// Moves the reader to the end of the sequence. + /// + public void AdvanceToEnd() + { + if (_moreData) + { + Consumed = Length; + CurrentSpan = default; + CurrentSpanIndex = 0; + _currentPosition = Sequence.End; + _nextPosition = default; + _moreData = false; + } + } + /// Check to see if the given value is next. /// The value to compare the next items to. /// Move past the value if found. diff --git a/src/DotNetty.Buffers/Reader/ByteBufferReader.cs b/src/DotNetty.Buffers/Reader/ByteBufferReader.cs index 685bfa19f..4fc7caa7b 100644 --- a/src/DotNetty.Buffers/Reader/ByteBufferReader.cs +++ b/src/DotNetty.Buffers/Reader/ByteBufferReader.cs @@ -40,7 +40,7 @@ public ref partial struct ByteBufferReader private SequencePosition _currentPosition; private SequencePosition _nextPosition; private bool _moreData; - private long _length; + private readonly long _length; private readonly ReadOnlySequence _sequence; private ReadOnlySpan _currentSpan; @@ -107,6 +107,10 @@ public readonly bool End /// The underlying for the reader. public readonly ReadOnlySequence Sequence => _sequence; + /// Gets the unread portion of the . + /// The unread portion of the . + public readonly ReadOnlySequence UnreadSequence => Sequence.Slice(Position); + /// The current position in the . public readonly SequencePosition Position => _sequence.GetPosition(_currentSpanIndex, _currentPosition); @@ -172,6 +176,61 @@ public readonly bool TryPeek(out byte value) return false; } + /// Peeks at the next value at specific offset without advancing the reader. + /// The offset from current position. + /// The next value, or the default value if at the end of the reader. + /// true if the reader is not at its end and the peek operation succeeded; false if at the end of the reader. + public readonly bool TryPeek(long offset, out byte value) + { + if (offset < 0L) { ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.offset); } + + // If we've got data and offset is not out of bounds + if (!_moreData || Remaining <= offset) + { + value = default; + return false; + } + + // Sum CurrentSpanIndex + offset could overflow as is but the value of offset should be very large + // because we check Remaining <= offset above so to overflow we should have a ReadOnlySequence close to 8 exabytes + Debug.Assert(CurrentSpanIndex + offset >= 0); + + // If offset doesn't fall inside current segment move to next until we find correct one + if ((CurrentSpanIndex + offset) <= CurrentSpan.Length - 1) + { + Debug.Assert(offset <= int.MaxValue); + + value = CurrentSpan[CurrentSpanIndex + (int)offset]; + return true; + } + else + { + long remainingOffset = offset - (CurrentSpan.Length - CurrentSpanIndex); + SequencePosition nextPosition = _nextPosition; + ReadOnlyMemory currentMemory; + + while (Sequence.TryGet(ref nextPosition, out currentMemory, advance: true)) + { + // Skip empty segment + if (currentMemory.Length > 0) + { + if (remainingOffset >= currentMemory.Length) + { + // Subtract current non consumed data + remainingOffset -= currentMemory.Length; + } + else + { + break; + } + } + } + + value = currentMemory.Span[(int)remainingOffset]; + return true; + } + } + /// Read the next value and advance the reader. /// The next value or default if at the end. /// False if at the end of the reader. diff --git a/src/DotNetty.Buffers/Reader/ByteBufferReaderExtensions.Binary.cs b/src/DotNetty.Buffers/Reader/ByteBufferReaderExtensions.Binary.cs index 8d12106f9..31b54cb39 100644 --- a/src/DotNetty.Buffers/Reader/ByteBufferReaderExtensions.Binary.cs +++ b/src/DotNetty.Buffers/Reader/ByteBufferReaderExtensions.Binary.cs @@ -248,7 +248,7 @@ private static bool TryReadReverseEndianness(ref ByteBufferReader reader, out us public static unsafe bool TryReadUnsignedMedium(ref this ByteBufferReader reader, out int value) { - if (reader.TryPeek(MediumSize, out var span)) + if (reader.TryPeek(MediumSize, out ReadOnlySpan span)) { //fixed (byte* bytes = &MemoryMarshal.GetReference(span)) //{ @@ -264,7 +264,7 @@ public static unsafe bool TryReadUnsignedMedium(ref this ByteBufferReader reader public static bool TryReadUnsignedMediumLE(ref this ByteBufferReader reader, out int value) { - if (reader.TryPeek(MediumSize, out var span)) + if (reader.TryPeek(MediumSize, out ReadOnlySpan span)) { ref byte b = ref MemoryMarshal.GetReference(span); value = b | Unsafe.Add(ref b, 1) << 8 | Unsafe.Add(ref b, 2) << 16; diff --git a/src/DotNetty.Common/Internal/AppendableCharSequence.NetStandard.cs b/src/DotNetty.Common/Internal/AppendableCharSequence.NetStandard.cs index 4ff856bb8..ff43815db 100644 --- a/src/DotNetty.Common/Internal/AppendableCharSequence.NetStandard.cs +++ b/src/DotNetty.Common/Internal/AppendableCharSequence.NetStandard.cs @@ -5,8 +5,10 @@ namespace DotNetty.Common.Internal { using System; using System.Runtime.CompilerServices; - using System.Runtime.InteropServices; using DotNetty.Common.Utilities; +#if !NET + using System.Runtime.InteropServices; +#endif partial class AppendableCharSequence : IHasAsciiSpan { @@ -28,8 +30,12 @@ public bool Equals(AppendableCharSequence other) return true; } +#if NET + return other is object && AsciiSpan.SequenceEqual(other.AsciiSpan); +#else return other is object && _pos == other._pos && SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(AsciiSpan), ref MemoryMarshal.GetReference(other.AsciiSpan), _pos); +#endif } public override bool Equals(object obj) @@ -39,8 +45,12 @@ public override bool Equals(object obj) switch (obj) { case AppendableCharSequence other: +#if NET + return AsciiSpan.SequenceEqual(other.AsciiSpan); +#else return _pos == other._pos && SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(AsciiSpan), ref MemoryMarshal.GetReference(other.AsciiSpan), _pos); +#endif case IHasAsciiSpan hasAscii: return AsciiSpan.SequenceEqual(hasAscii.AsciiSpan); @@ -63,8 +73,12 @@ bool IEquatable.Equals(ICharSequence other) return false; case AppendableCharSequence comparand: +#if NET + return AsciiSpan.SequenceEqual(comparand.AsciiSpan); +#else return _pos == comparand._pos && SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(AsciiSpan), ref MemoryMarshal.GetReference(comparand.AsciiSpan), _pos); +#endif case IHasAsciiSpan hasAscii: return AsciiSpan.SequenceEqual(hasAscii.AsciiSpan); diff --git a/src/DotNetty.Common/Internal/PlatformDependent.cs b/src/DotNetty.Common/Internal/PlatformDependent.cs index b98391d6e..c56552e62 100644 --- a/src/DotNetty.Common/Internal/PlatformDependent.cs +++ b/src/DotNetty.Common/Internal/PlatformDependent.cs @@ -59,7 +59,11 @@ public static unsafe bool ByteArrayEquals(byte[] bytes1, int startPos1, byte[] b return true; } +#if NET + return new ReadOnlySpan(bytes1, startPos1, length).SequenceEqual(new ReadOnlySpan(bytes2, startPos2, length)); +#else return SpanHelpers.SequenceEqual(ref bytes1[startPos1], ref bytes2[startPos2], length); +#endif } public static unsafe int ByteArrayEqualsConstantTime(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) diff --git a/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs index 2141f1f2a..4b1c3effd 100644 --- a/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs +++ b/src/DotNetty.Common/Internal/Utf8Utility.Validation.cs @@ -42,7 +42,7 @@ unsafe partial class Utf8Utility // If so, short-circuit the remainder of the method. inputLength -= (int)numAsciiBytesCounted; - if (0u >= inputLength) + if (0u >= (uint)inputLength) { utf16CodeUnitCountAdjustment = 0; scalarCountAdjustment = 0; diff --git a/src/DotNetty.Common/Utilities/AsciiString.NetStandard.cs b/src/DotNetty.Common/Utilities/AsciiString.NetStandard.cs index d6e720551..9963e9e77 100644 --- a/src/DotNetty.Common/Utilities/AsciiString.NetStandard.cs +++ b/src/DotNetty.Common/Utilities/AsciiString.NetStandard.cs @@ -178,13 +178,25 @@ public bool ContentEquals(ICharSequence other) { case AsciiString asciiStr: return this.GetHashCode() == asciiStr.GetHashCode() +#if NET + && this.AsciiSpan.SequenceEqual(asciiStr.AsciiSpan); +#else && SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(this.AsciiSpan), ref MemoryMarshal.GetReference(asciiStr.AsciiSpan), thisLength); +#endif case IHasAsciiSpan hasAscii: +#if NET + return this.AsciiSpan.SequenceEqual(hasAscii.AsciiSpan); +#else return SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(this.AsciiSpan), ref MemoryMarshal.GetReference(hasAscii.AsciiSpan), thisLength); +#endif case IHasUtf16Span hasUtf16: +#if NET + return this.Utf16Span.SequenceEqual(hasUtf16.Utf16Span); +#else return SpanHelpers.SequenceEqual(ref MemoryMarshal.GetReference(this.Utf16Span), ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), thisLength); +#endif default: return ContentEquals0(other); @@ -292,27 +304,43 @@ public int IndexOf(ICharSequence subString, int start) { if (subString is IHasAsciiSpan hasAscii) { +#if NET + return this.AsciiSpan.IndexOf(hasAscii.AsciiSpan); +#else return SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(this.AsciiSpan), thisLen, ref MemoryMarshal.GetReference(hasAscii.AsciiSpan), subCount); +#endif } if (subString is IHasUtf16Span hasUtf16) { +#if NET + return this.Utf16Span.IndexOf(hasUtf16.Utf16Span); +#else return SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(this.Utf16Span), thisLen, ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), subCount); +#endif } } else { if (subString is IHasAsciiSpan hasAscii) { +#if NET + var result = this.AsciiSpan.Slice(start, searchLen).IndexOf(hasAscii.AsciiSpan); +#else var result = SpanHelpers.IndexOf( ref Unsafe.Add(ref MemoryMarshal.GetReference(this.AsciiSpan), start), searchLen, ref MemoryMarshal.GetReference(hasAscii.AsciiSpan), subCount); +#endif return SharedConstants.TooBigOrNegative >= (uint)result ? start + result : IndexNotFound; } if (subString is IHasUtf16Span hasUtf16) { +#if NET + var result = this.Utf16Span.Slice(start, searchLen).IndexOf(hasUtf16.Utf16Span); +#else var result = SpanHelpers.IndexOf( ref Unsafe.Add(ref MemoryMarshal.GetReference(this.Utf16Span), start), searchLen, ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), subCount); +#endif return SharedConstants.TooBigOrNegative >= (uint)result ? start + result : IndexNotFound; } } @@ -364,10 +392,18 @@ public int IndexOf(char ch, int start) if (0u >= uStart) { +#if NET + return this.AsciiSpan.IndexOf((byte)ch); +#else return SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(this.AsciiSpan), (byte)ch, thisLen); +#endif } var seachSpan = this.AsciiSpan.Slice(start); +#if NET + var result = seachSpan.IndexOf((byte)ch); +#else var result = SpanHelpers.IndexOf(ref MemoryMarshal.GetReference(seachSpan), (byte)ch, seachSpan.Length); +#endif return SharedConstants.TooBigOrNegative >= (uint)result ? start + result : IndexNotFound; } @@ -386,15 +422,33 @@ public int LastIndexOf(ICharSequence subString, int start) if (subString is IHasAsciiSpan hasAscii) { +#if NET + var searchLength = start + subCount; + if (searchLength > thisLen) + { + return this.AsciiSpan.LastIndexOf(hasAscii.AsciiSpan); + } + return this.AsciiSpan.Slice(0, searchLength).LastIndexOf(hasAscii.AsciiSpan); +#else return SpanHelpers.LastIndexOf( ref MemoryMarshal.GetReference(this.AsciiSpan), start + subCount, ref MemoryMarshal.GetReference(hasAscii.AsciiSpan), subCount); +#endif } if (subString is IHasUtf16Span hasUtf16) { +#if NET + var searchLength = start + subCount; + if (searchLength > thisLen) + { + return this.Utf16Span.LastIndexOf(hasUtf16.Utf16Span); + } + return this.Utf16Span.Slice(0, searchLength).LastIndexOf(hasUtf16.Utf16Span); +#else return SpanHelpers.LastIndexOf( ref MemoryMarshal.GetReference(this.Utf16Span), start + subCount, ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), subCount); +#endif } return LastIndexOf0(subString, start); @@ -451,17 +505,25 @@ public bool RegionMatches(int thisStart, ICharSequence seq, int start, int count if (seq is IHasAsciiSpan hasAscii) { +#if NET + return this.AsciiSpan.Slice(thisStart, count).SequenceEqual(hasAscii.AsciiSpan.Slice(start, count)); +#else return SpanHelpers.SequenceEqual( ref Unsafe.Add(ref MemoryMarshal.GetReference(this.AsciiSpan), thisStart), ref Unsafe.Add(ref MemoryMarshal.GetReference(hasAscii.AsciiSpan), start), count); +#endif } if (seq is IHasUtf16Span hasUtf16) { +#if NET + return this.Utf16Span.Slice(thisStart, count).SequenceEqual(hasUtf16.Utf16Span.Slice(start, count)); +#else return SpanHelpers.SequenceEqual( ref Unsafe.Add(ref MemoryMarshal.GetReference(this.Utf16Span), thisStart), ref Unsafe.Add(ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), start), count); +#endif } return RegionMatches0(thisStart, seq, start, count); diff --git a/src/DotNetty.Common/Utilities/AsciiString.cs b/src/DotNetty.Common/Utilities/AsciiString.cs index 50f79c3d0..4ddaac350 100644 --- a/src/DotNetty.Common/Utilities/AsciiString.cs +++ b/src/DotNetty.Common/Utilities/AsciiString.cs @@ -475,9 +475,13 @@ public int CompareTo(AsciiString other) { if (ReferenceEquals(this, other)) { return 0; } +#if NET + return this.AsciiSpan.SequenceCompareTo(other.AsciiSpan); +#else return SpanHelpers.SequenceCompareTo( ref MemoryMarshal.GetReference(this.AsciiSpan), this.length, ref MemoryMarshal.GetReference(other.AsciiSpan), other.Count); +#endif } public int CompareTo(object obj) => this.CompareTo(obj as AsciiString); diff --git a/src/DotNetty.Common/Utilities/CharUtil.cs b/src/DotNetty.Common/Utilities/CharUtil.cs index 617253d82..851dd2206 100644 --- a/src/DotNetty.Common/Utilities/CharUtil.cs +++ b/src/DotNetty.Common/Utilities/CharUtil.cs @@ -31,8 +31,10 @@ namespace DotNetty.Common.Utilities using System; using System.Collections.Generic; using System.Runtime.CompilerServices; +#if !NET using System.Runtime.InteropServices; using DotNetty.Common.Internal; +#endif public static partial class CharUtil { @@ -76,17 +78,25 @@ internal static bool ContentEquals(ICharSequence left, ICharSequence right) if (left is IHasAsciiSpan thisHasAscii && right is IHasAsciiSpan otherHasAscii) { +#if NET + return thisHasAscii.AsciiSpan.SequenceEqual(otherHasAscii.AsciiSpan); +#else return SpanHelpers.SequenceEqual( ref MemoryMarshal.GetReference(thisHasAscii.AsciiSpan), ref MemoryMarshal.GetReference(otherHasAscii.AsciiSpan), left.Count); +#endif } else if (left is IHasUtf16Span thisHasUtf16 && right is IHasUtf16Span otherHasUtf16) { +#if NET + return thisHasUtf16.Utf16Span.SequenceEqual(otherHasUtf16.Utf16Span); +#else return SpanHelpers.SequenceEqual( ref MemoryMarshal.GetReference(thisHasUtf16.Utf16Span), ref MemoryMarshal.GetReference(otherHasUtf16.Utf16Span), left.Count); +#endif } for (int i = 0; i < left.Count; i++) @@ -141,10 +151,14 @@ public static bool RegionMatches(string value, int thisStart, ICharSequence othe if (other is IHasUtf16Span hasUtf16) { +#if NET + return value.AsSpan().Slice(thisStart, length).SequenceEqual(hasUtf16.Utf16Span.Slice(start, length)); +#else return SpanHelpers.SequenceEqual( ref Unsafe.Add(ref MemoryMarshal.GetReference(value.AsSpan()), thisStart), ref Unsafe.Add(ref MemoryMarshal.GetReference(hasUtf16.Utf16Span), start), length); +#endif } int o1 = thisStart; int o2 = start; @@ -202,17 +216,25 @@ internal static bool RegionMatches(ICharSequence value, int thisStart, ICharSequ if (value is IHasAsciiSpan thisHasAscii && other is IHasAsciiSpan otherHasAscii) { +#if NET + return thisHasAscii.AsciiSpan.Slice(thisStart, length).SequenceEqual(otherHasAscii.AsciiSpan.Slice(start, length)); +#else return SpanHelpers.SequenceEqual( ref Unsafe.Add(ref MemoryMarshal.GetReference(thisHasAscii.AsciiSpan), thisStart), ref Unsafe.Add(ref MemoryMarshal.GetReference(otherHasAscii.AsciiSpan), start), length); +#endif } else if (value is IHasUtf16Span thisHasUtf16 && other is IHasUtf16Span otherHasUtf16) { +#if NET + return thisHasUtf16.Utf16Span.Slice(thisStart, length).SequenceEqual(otherHasUtf16.Utf16Span.Slice(start, length)); +#else return SpanHelpers.SequenceEqual( ref Unsafe.Add(ref MemoryMarshal.GetReference(thisHasUtf16.Utf16Span), thisStart), ref Unsafe.Add(ref MemoryMarshal.GetReference(otherHasUtf16.Utf16Span), start), length); +#endif } int o1 = thisStart; diff --git a/src/DotNetty.Common/Utilities/ICharSequenceExtensions.cs b/src/DotNetty.Common/Utilities/ICharSequenceExtensions.cs index 7cbf53a7c..cf11589e5 100644 --- a/src/DotNetty.Common/Utilities/ICharSequenceExtensions.cs +++ b/src/DotNetty.Common/Utilities/ICharSequenceExtensions.cs @@ -23,8 +23,12 @@ namespace DotNetty.Common.Utilities { using System.Runtime.CompilerServices; +#if NET + using System; +#else using System.Runtime.InteropServices; using DotNetty.Common.Internal; +#endif public static class ICharSequenceExtensions @@ -38,12 +42,19 @@ public static bool Contains(this ICharSequence sequence, char c) case IHasAsciiSpan hasAscii: if ((uint)c > AsciiString.uMaxCharValue) { return false; } +#if NET + return hasAscii.AsciiSpan.Contains((byte)c); +#else var asciiSpan = hasAscii.AsciiSpan; return SpanHelpers.Contains(ref MemoryMarshal.GetReference(asciiSpan), (byte)c, asciiSpan.Length); +#endif case IHasUtf16Span hasUtf16: +#if NET +#else var utf16Span = hasUtf16.Utf16Span; return SpanHelpers.Contains(ref MemoryMarshal.GetReference(utf16Span), c, utf16Span.Length); +#endif default: int length = sequence.Count; diff --git a/src/DotNetty.Common/Utilities/StringUtil.cs b/src/DotNetty.Common/Utilities/StringUtil.cs index ed382cfa3..871cff594 100644 --- a/src/DotNetty.Common/Utilities/StringUtil.cs +++ b/src/DotNetty.Common/Utilities/StringUtil.cs @@ -31,9 +31,11 @@ namespace DotNetty.Common.Utilities using System; using System.Collections.Generic; using System.Runtime.CompilerServices; - using System.Runtime.InteropServices; using System.Text; using DotNetty.Common.Internal; +#if !NET + using System.Runtime.InteropServices; +#endif /// /// String utility class. @@ -131,9 +133,13 @@ static bool RegionMatches(string value, int thisStart, string other, int start, if (0u >= (uint)length) { return true; } +#if NET + return value.AsSpan().Slice(thisStart, length).SequenceEqual(other.AsSpan().Slice(start, length)); +#else ref char valueStart = ref MemoryMarshal.GetReference(value.AsSpan()); ref char otherStart = ref MemoryMarshal.GetReference(other.AsSpan()); return SpanHelpers.SequenceEqual(ref Unsafe.Add(ref valueStart, thisStart), ref Unsafe.Add(ref otherStart, start), length); +#endif } /// diff --git a/test/DotNetty.Buffers.ReaderWriter.Tests/DotNetty.Buffers.ReaderWriter.Tests.csproj b/test/DotNetty.Buffers.ReaderWriter.Tests/DotNetty.Buffers.ReaderWriter.Tests.csproj index ae6b3839c..73f6f5f1e 100644 --- a/test/DotNetty.Buffers.ReaderWriter.Tests/DotNetty.Buffers.ReaderWriter.Tests.csproj +++ b/test/DotNetty.Buffers.ReaderWriter.Tests/DotNetty.Buffers.ReaderWriter.Tests.csproj @@ -1,7 +1,7 @@  - netcoreapp3.1;netcoreapp2.1 + net5.0;netcoreapp3.1;netcoreapp2.1 true diff --git a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/BasicTests.cs b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/BasicTests.cs index fd35ad23a..cc981b1f2 100644 --- a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/BasicTests.cs +++ b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/BasicTests.cs @@ -98,6 +98,7 @@ public void DefaultState() ByteBufferReader reader = default; Assert.Equal(0, reader.CurrentSpan.Length); Assert.Equal(0, reader.UnreadSpan.Length); + Assert.Equal(0, reader.UnreadSequence.Length); Assert.Equal(0, reader.Consumed); Assert.Equal(0, reader.CurrentSpanIndex); Assert.Equal(0, reader.Length); @@ -114,7 +115,9 @@ public void DefaultState() Assert.True(sequence.IsEmpty); Assert.False(reader.TryReadTo(out sequence, array)); Assert.True(sequence.IsEmpty); - Assert.False(reader.TryReadTo(out ReadOnlySpan span, default)); + Assert.False(reader.TryReadTo(out ReadOnlySpan span, default(byte))); + Assert.True(span.IsEmpty); + Assert.False(reader.TryReadTo(out span, array)); Assert.True(span.IsEmpty); Assert.False(reader.TryReadToAny(out sequence, array)); Assert.True(sequence.IsEmpty); @@ -124,6 +127,7 @@ public void DefaultState() Assert.False(reader.TryAdvanceToAny(array)); Assert.Equal(0, reader.CurrentSpan.Length); Assert.Equal(0, reader.UnreadSpan.Length); + Assert.Equal(0, reader.UnreadSequence.Length); Assert.Equal(0, reader.Consumed); Assert.Equal(0, reader.CurrentSpanIndex); Assert.Equal(0, reader.Length); @@ -168,6 +172,138 @@ public void TryPeekReturnsWithoutMoving() Assert.Equal(2, reader.Remaining); } + [Fact] + public void TryPeekOffset() + { + ByteBufferReader reader = new ByteBufferReader(Factory.CreateWithContent(GetInputData(10))); + Assert.True(reader.TryRead(out byte first)); + Assert.Equal(InputData[0], first); + Assert.True(reader.TryRead(out byte second)); + Assert.Equal(InputData[1], second); + + Assert.True(reader.TryPeek(7, out byte value)); + Assert.Equal(InputData[9], value); + + Assert.False(reader.TryPeek(8, out byte defaultValue)); + Assert.Equal(default, defaultValue); + + Assert.Equal(2, reader.Consumed); + Assert.Equal(8, reader.Remaining); + } + + [Fact] + public void TryPeekOffset_AfterEnd() + { + ByteBufferReader reader = new ByteBufferReader(Factory.CreateWithContent(GetInputData(2))); + Assert.True(reader.TryRead(out byte first)); + Assert.Equal(InputData[0], first); + + Assert.True(reader.TryPeek(0, out byte value)); + Assert.Equal(InputData[1], value); + Assert.Equal(1, reader.Remaining); + + Assert.False(reader.TryPeek(1, out byte defaultValue)); + Assert.Equal(default, defaultValue); + } + + [Fact] + public void TryPeekOffset_RemainsZeroOffsetZero() + { + ByteBufferReader reader = new ByteBufferReader(Factory.CreateWithContent(GetInputData(1))); + Assert.True(reader.TryRead(out byte first)); + Assert.Equal(InputData[0], first); + Assert.Equal(0, reader.Remaining); + Assert.False(reader.TryPeek(0, out byte defaultValue)); + Assert.Equal(default, defaultValue); + } + + [Fact] + public void TryPeekOffset_Empty() + { + ByteBufferReader reader = new ByteBufferReader(Factory.CreateWithContent(GetInputData(0))); + Assert.False(reader.TryPeek(0, out byte defaultValue)); + Assert.Equal(default, defaultValue); + } + + [Fact] + public void TryPeekOffset_MultiSegment_StarAhead() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + // Move by 2 element + for (int i = 0; i < 2; i++) + { + Assert.True(reader.TryRead(out byte val)); + Assert.Equal(InputData[i], val); + } + + // We're on element 3 we peek last element of first segment + Assert.True(reader.TryPeek(2, out byte lastElementFirstSegment)); + Assert.Equal(InputData[4], lastElementFirstSegment); + + // We're on element 3 we peek first element of first segment + Assert.True(reader.TryPeek(3, out byte fistElementSecondSegment)); + Assert.Equal(InputData[5], fistElementSecondSegment); + + // We're on element 3 we peek last element of second segment + Assert.True(reader.TryPeek(7, out byte lastElementSecondSegment)); + Assert.Equal(InputData[9], lastElementSecondSegment); + + // 3 + 8 out of bounds + Assert.False(reader.TryPeek(8, out byte defaultValue)); + Assert.Equal(default, defaultValue); + + Assert.Equal(2, reader.Consumed); + Assert.Equal(8, reader.Remaining); + } + + [Fact] + public void TryPeekOffset_MultiSegment_GetFirstGetLast() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + Assert.True(reader.TryPeek(0, out byte firstElement)); + Assert.Equal(InputData[0], firstElement); + + Assert.True(reader.TryPeek(data.Length - 1, out byte lastElemen)); + Assert.Equal(InputData[data.Length - 1], lastElemen); + + Assert.Equal(0, reader.Consumed); + Assert.Equal(10, reader.Remaining); + } + + [Fact] + public void TryPeekOffset_InvalidOffset() + { + ArgumentOutOfRangeException exception = Assert.Throws(() => + { + ByteBufferReader reader = new ByteBufferReader(Factory.CreateWithContent(GetInputData(10))); + reader.TryPeek(-1, out _); + }); + + Assert.Equal("offset", exception.ParamName); + } + [Fact] public void CursorIsCorrectAtEnd() { @@ -493,6 +629,209 @@ public void AdvanceTo_AdvancePast() } } + [Fact] + public void AdvanceTo_End() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + reader.AdvanceToEnd(); + + Assert.Equal(data.Length, reader.Length); + Assert.Equal(data.Length, reader.Consumed); + Assert.Equal(reader.Length, reader.Consumed); + Assert.True(reader.End); + Assert.Equal(0, reader.CurrentSpanIndex); + Assert.Equal(sequence.End, reader.Position); + Assert.Equal(0, reader.Remaining); + Assert.True(default == reader.UnreadSpan); + Assert.True(default == reader.CurrentSpan); + } + + [Fact] + public void AdvanceTo_End_EmptySegment() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + // Empty segment + SequenceSegment third = new SequenceSegment(); + + SequenceSegment second = new SequenceSegment(); + second.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + second.SetNext(third); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(second); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, third, third.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + reader.AdvanceToEnd(); + + Assert.Equal(first.Length + second.Length, reader.Length); + Assert.Equal(first.Length + second.Length, reader.Consumed); + Assert.Equal(reader.Length, reader.Consumed); + Assert.True(reader.End); + Assert.Equal(0, reader.CurrentSpanIndex); + Assert.Equal(sequence.End, reader.Position); + Assert.Equal(0, reader.Remaining); + Assert.True(default == reader.UnreadSpan); + Assert.True(default == reader.CurrentSpan); + } + + [Fact] + public void AdvanceTo_End_Rewind_Advance() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + reader.AdvanceToEnd(); + + Assert.Equal(data.Length, reader.Length); + Assert.Equal(data.Length, reader.Consumed); + Assert.Equal(reader.Length, reader.Consumed); + Assert.True(reader.End); + Assert.Equal(0, reader.CurrentSpanIndex); + Assert.Equal(sequence.End, reader.Position); + Assert.Equal(0, reader.Remaining); + Assert.True(default == reader.UnreadSpan); + Assert.True(default == reader.CurrentSpan); + + // Rewind to second element + reader.Rewind(9); + + Assert.Equal(1, reader.Consumed); + Assert.False(reader.End); + Assert.Equal(1, reader.CurrentSpanIndex); + Assert.Equal(9, reader.Remaining); + Assert.Equal(sequence.Slice(1), reader.UnreadSequence); + + // Consume next five elements and stop at second element of second segment + reader.Advance(5); + + Assert.Equal(6, reader.Consumed); + Assert.False(reader.End); + Assert.Equal(1, reader.CurrentSpanIndex); + Assert.Equal(4, reader.Remaining); + Assert.Equal(sequence.Slice(6), reader.UnreadSequence); + + reader.AdvanceToEnd(); + + Assert.Equal(data.Length, reader.Length); + Assert.Equal(data.Length, reader.Consumed); + Assert.Equal(reader.Length, reader.Consumed); + Assert.True(reader.End); + Assert.Equal(0, reader.CurrentSpanIndex); + Assert.Equal(sequence.End, reader.Position); + Assert.Equal(0, reader.Remaining); + Assert.True(default == reader.UnreadSpan); + Assert.True(default == reader.CurrentSpan); + } + + [Fact] + public void AdvanceTo_End_Multiple() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + reader.AdvanceToEnd(); + reader.AdvanceToEnd(); + reader.AdvanceToEnd(); + + Assert.Equal(data.Length, reader.Length); + Assert.Equal(data.Length, reader.Consumed); + Assert.Equal(reader.Length, reader.Consumed); + Assert.True(reader.End); + Assert.Equal(0, reader.CurrentSpanIndex); + Assert.Equal(sequence.End, reader.Position); + Assert.Equal(0, reader.Remaining); + Assert.True(default == reader.UnreadSpan); + Assert.True(default == reader.CurrentSpan); + } + + [Fact] + public void UnreadSequence() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + SequenceSegment last = new SequenceSegment(); + last.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(last); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, last, last.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + Assert.Equal(sequence, reader.UnreadSequence); + Assert.Equal(data.Length, reader.UnreadSequence.Length); + Assert.True(reader.TryRead(out byte _)); + Assert.True(reader.TryRead(out byte _)); + Assert.Equal(sequence.Slice(2), reader.UnreadSequence); + // Advance to the end + reader.Advance(8); + Assert.Equal(0, reader.UnreadSequence.Length); + } + + [Fact] + public void UnreadSequence_EmptySegment() + { + ReadOnlySpan data = (byte[])_inputData.Clone(); + + // Empty segment + SequenceSegment third = new SequenceSegment(); + + SequenceSegment second = new SequenceSegment(); + second.SetMemory(new OwnedArray(data.Slice(5).ToArray()), 0, 5); + second.SetNext(third); + + SequenceSegment first = new SequenceSegment(); + first.SetMemory(new OwnedArray(data.Slice(0, 5).ToArray()), 0, 5); + first.SetNext(second); + + ReadOnlySequence sequence = new ReadOnlySequence(first, first.Start, third, third.End); + ByteBufferReader reader = new ByteBufferReader(sequence); + + // Drain until the expected end of data with simple read + for (int i = 0; i < data.Length; i++) + { + reader.TryRead(out byte _); + } + + Assert.Equal(sequence.Slice(data.Length), reader.UnreadSequence); + Assert.Equal(0, reader.UnreadSequence.Length); + Assert.False(reader.TryRead(out byte _)); + } + [Fact] public void CopyToSmallerBufferWorks() { diff --git a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/ReadTo.cs b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/ReadTo.cs index b1800fe50..7ecea18bd 100644 --- a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/ReadTo.cs +++ b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefx/ReadTo.cs @@ -121,7 +121,7 @@ public void TryReadToSpan_Sequence(bool advancePastDelimiter) new byte[] { 3, 3, 4, 4, 5, 5, 6, 6 } }); - ByteBufferReader reader = new ByteBufferReader(bytes); + ByteBufferReader baseReader = new ByteBufferReader(bytes); for (byte i = 0; i < bytes.Length / 2 - 1; i++) { byte[] expected = new byte[i * 2 + 1]; @@ -131,7 +131,12 @@ public void TryReadToSpan_Sequence(bool advancePastDelimiter) } expected[i * 2] = i; ReadOnlySpan searchFor = new byte[] { i, (byte)(i + 1) }; - ByteBufferReader copy = reader; + ByteBufferReader copy = baseReader; + + Assert.True(copy.TryReadTo(out ReadOnlySpan sp, searchFor, advancePastDelimiter)); + Assert.True(sp.SequenceEqual(expected)); + + copy = baseReader; Assert.True(copy.TryReadTo(out ReadOnlySequence seq, searchFor, advancePastDelimiter)); Assert.True(seq.ToArray().AsSpan().SequenceEqual(expected)); } @@ -140,8 +145,14 @@ public void TryReadToSpan_Sequence(bool advancePastDelimiter) new byte[] { 47, 42, 66, 32, 42, 32, 66, 42, 47 } // /*b * b*/ }); - reader = new ByteBufferReader(bytes); - Assert.True(reader.TryReadTo(out ReadOnlySequence sequence, new byte[] { 42, 47 }, advancePastDelimiter)); // */ + baseReader = new ByteBufferReader(bytes); + ByteBufferReader copyReader = baseReader; + + Assert.True(copyReader.TryReadTo(out ReadOnlySpan span, new byte[] { 42, 47 }, advancePastDelimiter)); // */ + Assert.True(span.SequenceEqual(new byte[] { 47, 42, 66, 32, 42, 32, 66 })); + + copyReader = baseReader; + Assert.True(copyReader.TryReadTo(out ReadOnlySequence sequence, new byte[] { 42, 47 }, advancePastDelimiter)); // */ Assert.True(sequence.ToArray().AsSpan().SequenceEqual(new byte[] { 47, 42, 66, 32, 42, 32, 66 })); } @@ -183,17 +194,30 @@ public void TryReadTo_SingleDelimiter() new byte[] { 2, 3, 4, 5, 6 } }); - ByteBufferReader reader = new ByteBufferReader(bytes); + ByteBufferReader baseReader = new ByteBufferReader(bytes); + + ByteBufferReader spanReader = baseReader; + ByteBufferReader sequenceReader = baseReader; Span delimiter = new byte[] { 1 }; for (int i = 1; i < 6; i += 1) { // Also check scanning from the start. - ByteBufferReader resetReader = new ByteBufferReader(bytes); + ByteBufferReader resetReader = baseReader; delimiter[0] = (byte)i; - Assert.True(reader.TryReadTo(out ReadOnlySequence sequence, delimiter, advancePastDelimiter: true)); + Assert.True(spanReader.TryReadTo(out ReadOnlySpan span, delimiter, advancePastDelimiter: true)); + Assert.True(resetReader.TryReadTo(out span, delimiter, advancePastDelimiter: true)); + Assert.True(spanReader.TryPeek(out byte value)); + Assert.Equal(i + 1, value); + Assert.True(resetReader.TryPeek(out value)); + Assert.Equal(i + 1, value); + + // Also check scanning from the start. + resetReader = baseReader; + delimiter[0] = (byte)i; + Assert.True(sequenceReader.TryReadTo(out ReadOnlySequence sequence, delimiter, advancePastDelimiter: true)); Assert.True(resetReader.TryReadTo(out sequence, delimiter, advancePastDelimiter: true)); - Assert.True(reader.TryPeek(out byte value)); + Assert.True(sequenceReader.TryPeek(out value)); Assert.Equal(i + 1, value); Assert.True(resetReader.TryPeek(out value)); Assert.Equal(i + 1, value); @@ -208,7 +232,9 @@ public void TryReadTo_Span_At_Segments_Boundary() segment.Append(Text.Encoding.ASCII.GetBytes("\nWorld")); // add next segment ReadOnlySequence inputSeq = new ReadOnlySequence(segment, 0, segment, 6); // span only the first segment! ByteBufferReader sr = new ByteBufferReader(inputSeq); - bool r = sr.TryReadTo(out _, delimiter); + bool r = sr.TryReadTo(out ReadOnlySpan _, delimiter); + Assert.False(r); + r = sr.TryReadTo(out ReadOnlySequence _, delimiter); Assert.False(r); } } diff --git a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefxlab/ReaderBasicTests.cs b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefxlab/ReaderBasicTests.cs index 414152ee0..b2dc85d57 100644 --- a/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefxlab/ReaderBasicTests.cs +++ b/test/DotNetty.Buffers.ReaderWriter.Tests/test_corefxlab/ReaderBasicTests.cs @@ -83,7 +83,7 @@ public void DefaultState() Assert.True(sequence.IsEmpty); Assert.False(reader.TryReadTo(out sequence, array)); Assert.True(sequence.IsEmpty); - Assert.False(reader.TryReadTo(out ReadOnlySpan span, default)); + Assert.False(reader.TryReadTo(out ReadOnlySpan span, default(byte))); Assert.True(span.IsEmpty); Assert.False(reader.TryReadToAny(out sequence, array)); Assert.True(sequence.IsEmpty); From 9e0a7409f3449e96b2ac990c43a2ba4a188d3728 Mon Sep 17 00:00:00 2001 From: cuteant Date: Thu, 24 Jun 2021 22:13:10 +0800 Subject: [PATCH 4/5] Using MemoryMarshal.GetArrayDataReference in .net5.0 --- src/DotNetty.Buffers/ArrayPooledByteBuffer.cs | 7 +++++++ .../ArrayPooledUnsafeDirectByteBuffer.cs | 12 +++++++++++- src/DotNetty.Buffers/PooledHeapByteBuffer.cs | 8 ++++++++ src/DotNetty.Buffers/UnpooledHeapByteBuffer.cs | 7 +++++++ .../UnpooledUnsafeDirectByteBuffer.cs | 16 +++++++++++++++- .../Internal/PlatformDependent.cs | 8 ++++++++ 6 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/DotNetty.Buffers/ArrayPooledByteBuffer.cs b/src/DotNetty.Buffers/ArrayPooledByteBuffer.cs index e75b040b3..ff0e311b9 100644 --- a/src/DotNetty.Buffers/ArrayPooledByteBuffer.cs +++ b/src/DotNetty.Buffers/ArrayPooledByteBuffer.cs @@ -24,6 +24,9 @@ using System.Buffers; using DotNetty.Common; using DotNetty.Common.Internal; +#if NET +using System.Runtime.InteropServices; +#endif namespace DotNetty.Buffers { @@ -186,7 +189,11 @@ public sealed override byte[] Array public sealed override ref byte GetPinnableMemoryAddress() { EnsureAccessible(); +#if NET + return ref MemoryMarshal.GetArrayDataReference(Memory); +#else return ref Memory[0]; +#endif } public sealed override IntPtr AddressOfPinnedMemory() => IntPtr.Zero; diff --git a/src/DotNetty.Buffers/ArrayPooledUnsafeDirectByteBuffer.cs b/src/DotNetty.Buffers/ArrayPooledUnsafeDirectByteBuffer.cs index 49146c5cb..4c150bd70 100644 --- a/src/DotNetty.Buffers/ArrayPooledUnsafeDirectByteBuffer.cs +++ b/src/DotNetty.Buffers/ArrayPooledUnsafeDirectByteBuffer.cs @@ -27,6 +27,9 @@ using System.Threading; using System.Threading.Tasks; using DotNetty.Common; +#if NET +using System.Runtime.InteropServices; +#endif namespace DotNetty.Buffers { @@ -248,7 +251,14 @@ public sealed override IByteBuffer Copy(int index, int length) } [MethodImpl(InlineMethod.AggressiveOptimization)] - ref byte Addr(int index) => ref Memory[index]; + ref byte Addr(int index) + { +#if NET + return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(Memory), index); +#else + return ref Memory[index]; +#endif + } public sealed override IByteBuffer SetZero(int index, int length) { diff --git a/src/DotNetty.Buffers/PooledHeapByteBuffer.cs b/src/DotNetty.Buffers/PooledHeapByteBuffer.cs index 78fd81e96..f3d47c279 100644 --- a/src/DotNetty.Buffers/PooledHeapByteBuffer.cs +++ b/src/DotNetty.Buffers/PooledHeapByteBuffer.cs @@ -31,6 +31,10 @@ namespace DotNetty.Buffers using System.Threading.Tasks; using DotNetty.Common; using DotNetty.Common.Internal; +#if NET + using System.Runtime.CompilerServices; + using System.Runtime.InteropServices; +#endif sealed partial class PooledHeapByteBuffer : PooledByteBuffer { @@ -212,7 +216,11 @@ public sealed override byte[] Array public sealed override ref byte GetPinnableMemoryAddress() { EnsureAccessible(); +#if NET + return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(Memory), Offset); +#else return ref Memory[Offset]; +#endif } public sealed override IntPtr AddressOfPinnedMemory() => IntPtr.Zero; diff --git a/src/DotNetty.Buffers/UnpooledHeapByteBuffer.cs b/src/DotNetty.Buffers/UnpooledHeapByteBuffer.cs index 45a798b14..8ec1653be 100644 --- a/src/DotNetty.Buffers/UnpooledHeapByteBuffer.cs +++ b/src/DotNetty.Buffers/UnpooledHeapByteBuffer.cs @@ -30,6 +30,9 @@ namespace DotNetty.Buffers using System.Threading; using System.Threading.Tasks; using DotNetty.Common.Internal; +#if NET + using System.Runtime.InteropServices; +#endif partial class UnpooledHeapByteBuffer : AbstractReferenceCountedByteBuffer { @@ -134,7 +137,11 @@ public sealed override byte[] Array public sealed override ref byte GetPinnableMemoryAddress() { EnsureAccessible(); +#if NET + return ref MemoryMarshal.GetArrayDataReference(_array); +#else return ref _array[0]; +#endif } public sealed override IntPtr AddressOfPinnedMemory() => IntPtr.Zero; diff --git a/src/DotNetty.Buffers/UnpooledUnsafeDirectByteBuffer.cs b/src/DotNetty.Buffers/UnpooledUnsafeDirectByteBuffer.cs index 04d552578..dc8fb48c9 100644 --- a/src/DotNetty.Buffers/UnpooledUnsafeDirectByteBuffer.cs +++ b/src/DotNetty.Buffers/UnpooledUnsafeDirectByteBuffer.cs @@ -31,6 +31,9 @@ namespace DotNetty.Buffers using System.Threading; using System.Threading.Tasks; using DotNetty.Common.Internal; +#if NET + using System.Runtime.InteropServices; +#endif unsafe partial class UnpooledUnsafeDirectByteBuffer : AbstractReferenceCountedByteBuffer { @@ -190,7 +193,11 @@ protected internal sealed override void Deallocate() public sealed override ref byte GetPinnableMemoryAddress() { EnsureAccessible(); +#if NET + return ref MemoryMarshal.GetArrayDataReference(_buffer); +#else return ref _buffer[0]; +#endif } public sealed override bool IsContiguous => true; @@ -393,7 +400,14 @@ public sealed override IByteBuffer Copy(int index, int length) } [MethodImpl(InlineMethod.AggressiveOptimization)] - ref byte Addr(int index) => ref _buffer[index]; + ref byte Addr(int index) + { +#if NET + return ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_buffer), index); +#else + return ref _buffer[index]; +#endif + } public sealed override IByteBuffer SetZero(int index, int length) { diff --git a/src/DotNetty.Common/Internal/PlatformDependent.cs b/src/DotNetty.Common/Internal/PlatformDependent.cs index c56552e62..01d4ae25b 100644 --- a/src/DotNetty.Common/Internal/PlatformDependent.cs +++ b/src/DotNetty.Common/Internal/PlatformDependent.cs @@ -12,6 +12,9 @@ namespace DotNetty.Common.Internal using System.Threading; using DotNetty.Common.Internal.Logging; using DotNetty.Common.Utilities; +#if NET + using System.Runtime.InteropServices; +#endif using static PlatformDependent0; @@ -262,6 +265,11 @@ public static void CopyMemory(byte[] src, int srcIndex, byte[] dst, int dstIndex } } } +#elif NET + Unsafe.CopyBlockUnaligned( + ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(dst), dstIndex), + ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(src), srcIndex), + nlen); #else Unsafe.CopyBlockUnaligned(ref dst[dstIndex], ref src[srcIndex], nlen); #endif From e55c9b9d9e2e282280435a06578fd18bbae9ba52 Mon Sep 17 00:00:00 2001 From: cuteant Date: Thu, 24 Jun 2021 23:00:15 +0800 Subject: [PATCH 5/5] added MyGet package info in readme --- README.md | 24 ++++++++++++------------ localPublish.cmd | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index a83390243..20f9b2f6c 100644 --- a/README.md +++ b/README.md @@ -30,18 +30,18 @@ This is a fork of [DotNetty](https://github.com/azure/dotnetty). * Nightly builds are available on [MyGet](https://www.myget.org/F/cuteant/api/v2). -|NuGet Package|Status| -|------|-------------| -|SpanNetty.Common|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Common)](https://www.nuget.org/packages/SpanNetty.Common/)| -|SpanNetty.Buffers|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Buffers)](https://www.nuget.org/packages/SpanNetty.Buffers/)| -|SpanNetty.Codecs|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs)](https://www.nuget.org/packages/SpanNetty.Codecs/)| -|SpanNetty.Codecs.Http|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Http)](https://www.nuget.org/packages/SpanNetty.Codecs.Http/)| -|SpanNetty.Codecs.Http2|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Http2)](https://www.nuget.org/packages/SpanNetty.Codecs.Http2/)| -|SpanNetty.Codecs.Mqtt|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Mqtt)](https://www.nuget.org/packages/SpanNetty.Codecs.Mqtt/)| -|SpanNetty.Codecs.Protobuf|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Protobuf)](https://www.nuget.org/packages/SpanNetty.Codecs.Protobuf/)| -|SpanNetty.Handlers|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Handlers)](https://www.nuget.org/packages/SpanNetty.Handlers/)| -|SpanNetty.Transport|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Transport)](https://www.nuget.org/packages/SpanNetty.Transport/)| -|SpanNetty.Transport.Libuv|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Transport.Libuv)](https://www.nuget.org/packages/SpanNetty.Transport.Libuv/)| +|Package|NuGet Version|MyGet Version| +|------|-------------|-------------| +|SpanNetty.Common|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Common)](https://www.nuget.org/packages/SpanNetty.Common/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Common)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Common)| +|SpanNetty.Buffers|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Buffers)](https://www.nuget.org/packages/SpanNetty.Buffers/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Buffers)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Buffers)| +|SpanNetty.Codecs|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs)](https://www.nuget.org/packages/SpanNetty.Codecs/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Codecs)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Codecs)| +|SpanNetty.Codecs.Http|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Http)](https://www.nuget.org/packages/SpanNetty.Codecs.Http/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Codecs.Http)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Codecs.Http)| +|SpanNetty.Codecs.Http2|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Http2)](https://www.nuget.org/packages/SpanNetty.Codecs.Http2/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Codecs.Http2)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Codecs.Http2)| +|SpanNetty.Codecs.Mqtt|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Mqtt)](https://www.nuget.org/packages/SpanNetty.Codecs.Mqtt/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Codecs.Mqtt)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Codecs.Mqtt)| +|SpanNetty.Codecs.Protobuf|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Codecs.Protobuf)](https://www.nuget.org/packages/SpanNetty.Codecs.Protobuf/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Codecs.Protobuf)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Codecs.Protobuf)| +|SpanNetty.Handlers|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Handlers)](https://www.nuget.org/packages/SpanNetty.Handlers/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Handlers)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Handlers)| +|SpanNetty.Transport|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Transport)](https://www.nuget.org/packages/SpanNetty.Transport/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Transport)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Transport)| +|SpanNetty.Transport.Libuv|[![NuGet Version and Downloads count](https://buildstats.info/nuget/SpanNetty.Transport.Libuv)](https://www.nuget.org/packages/SpanNetty.Transport.Libuv/)|[![MyGet Version](https://img.shields.io/myget/cuteant/vpre/SpanNetty.Transport.Libuv)](https://www.myget.org/feed/cuteant/package/nuget/SpanNetty.Transport.Libuv)| ## Performance diff --git a/localPublish.cmd b/localPublish.cmd index b79f592cf..09e1d5cf2 100644 --- a/localPublish.cmd +++ b/localPublish.cmd @@ -18,7 +18,7 @@ call Ensure-DotNetSdk.cmd SET SOLUTION=%CMDHOME%\DotNetty.CrossPlatform.sln :: Set DateTime prefix or suffix for builds -if "%PublishConfiguration%" == "dev" for /f %%j in ('powershell -NoProfile -ExecutionPolicy ByPass Get-Date -format "{yyMMddHHmm}"') do set DATE_SUFFIX=%%j +if "%PublishConfiguration%" == "dev" for /f %%j in ('powershell -NoProfile -ExecutionPolicy ByPass Get-Date -format "{yyMMdd}"') do set DATE_SUFFIX=%%j if "%PublishConfiguration%" == "dev" SET AdditionalConfigurationProperties=;VersionDateSuffix=%DATE_SUFFIX% if "%PublishConfiguration%" == "release" for /f %%j in ('powershell -NoProfile -ExecutionPolicy ByPass Get-Date -format "{yyMM}"') do set YEAR_PREFIX=%%j if "%PublishConfiguration%" == "release" for /f %%j in ('powershell -NoProfile -ExecutionPolicy ByPass Get-Date -format "{ddHH}"') do set DATE_PREFIX=%%j