Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Ascii (and Utf8) encoding #85266

Merged
merged 9 commits into from
May 12, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -2718,6 +2718,24 @@ public static unsafe void StoreAligned<T>(this Vector128<T> source, T* destinati
public static unsafe void StoreAlignedNonTemporal<T>(this Vector128<T> source, T* destination)
where T : unmanaged => source.StoreAligned(destination);

/// <summary>
/// Stores to lower 64 bits of <paramref name="source"/> to memory destination of <paramref name="destination"/>[<paramref name="elementOffset"/>]
/// </summary>
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
/// <param name="source">The vector that will be stored.</param>
/// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
/// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
/// <remarks>
/// Uses double instead of long to get a single instruction instead of storing temps on general porpose register (or stack)
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void StoreLowerUnsafe<T>(this Vector128<T> source, ref T destination, nuint elementOffset = 0)
where T : struct
{
ref byte address = ref Unsafe.As<T, byte>(ref Unsafe.Add(ref destination, elementOffset));
Unsafe.WriteUnaligned<double>(ref address, source.AsDouble().ToScalar());
}

/// <summary>Stores a vector at the given destination.</summary>
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
/// <param name="source">The vector that will be stored.</param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,41 +463,6 @@ private static unsafe nuint ChangeCase<TFrom, TTo, TCasing>(TFrom* pSrc, TTo* pD
return i;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void Widen8To16AndAndWriteTo(Vector128<byte> narrowVector, char* pDest, nuint destOffset)
{
if (Vector256.IsHardwareAccelerated)
{
Vector256<ushort> wide = Vector256.WidenLower(narrowVector.ToVector256Unsafe());
wide.StoreUnsafe(ref *(ushort*)pDest, destOffset);
}
else
{
Vector128.WidenLower(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset);
Vector128.WidenUpper(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset + 8);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void Narrow16To8AndAndWriteTo(Vector128<ushort> wideVector, byte* pDest, nuint destOffset)
{
Vector128<byte> narrow = Vector128.Narrow(wideVector, wideVector);

if (Sse2.IsSupported)
{
// MOVQ is supported even on x86, unaligned accesses allowed
Sse2.StoreScalar((ulong*)(pDest + destOffset), narrow.AsUInt64());
}
else if (Vector64.IsHardwareAccelerated)
{
narrow.GetLower().StoreUnsafe(ref *pDest, destOffset);
}
else
{
Unsafe.WriteUnaligned<ulong>(pDest + destOffset, narrow.AsUInt64().ToScalar());
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void ChangeWidthAndWriteTo<TFrom, TTo>(Vector128<TFrom> vector, TTo* pDest, nuint elementOffset)
where TFrom : unmanaged
Expand All @@ -524,12 +489,9 @@ private static unsafe void ChangeWidthAndWriteTo<TFrom, TTo>(Vector128<TFrom> ve
}
else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1)
{
// narrowing operation required
// since we know data is all-ASCII, special-case SSE2 to avoid unneeded PAND in Narrow call
Vector128<byte> narrow = (Sse2.IsSupported)
? Sse2.PackUnsignedSaturate(vector.AsInt16(), vector.AsInt16())
: Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16());
narrow.GetLower().StoreUnsafe(ref *(byte*)pDest, elementOffset);
// narrowing operation required, we know data is all-ASCII so use extract helper
Vector128<byte> narrow = ExtractAsciiVector(vector.AsUInt16(), vector.AsUInt16());
narrow.StoreLowerUnsafe(ref *(byte*)pDest, elementOffset);
}
else
{
Expand All @@ -556,25 +518,6 @@ private static unsafe Vector128<T> SignedLessThan<T>(Vector128<T> left, Vector12
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe Vector128<TTo> NarrowOrWidenLowerVectorUnsigned<TFrom, TTo>(Vector128<TFrom> vector)
where TFrom : unmanaged
where TTo : unmanaged
{
if (sizeof(TFrom) == 1 && sizeof(TTo) == 2)
{
return Vector128.WidenLower(vector.AsByte()).As<ushort, TTo>();
}
else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1)
{
return Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()).As<byte, TTo>();
}
else
{
throw new NotSupportedException();
}
}

private struct ToUpperConversion { }
private struct ToLowerConversion { }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1510,6 +1510,7 @@ private static Vector128<byte> ExtractAsciiVector(Vector128<ushort> vectorFirst,
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
{
// This method contains logic optimized using vector instructions for both x64 and Arm64.
Expand Down Expand Up @@ -1542,7 +1543,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,

ref byte asciiBuffer = ref *pAsciiBuffer;
Vector128<byte> asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer);
asciiVector.StoreLowerUnsafe(ref asciiBuffer, 0);
nuint currentOffsetInElements = SizeOfVector128 / 2; // we processed 8 elements so far

// We're going to get the best performance when we have aligned writes, so we'll take the
Expand All @@ -1569,7 +1570,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,

// Turn the 8 ASCII chars we just read into 8 ASCII bytes, then copy it to the destination.
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements);
}

// Calculate how many elements we wrote in order to get pAsciiBuffer to its next alignment
Expand Down Expand Up @@ -1622,7 +1623,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,

Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % sizeof(ulong) == 0, "Destination should be ulong-aligned.");
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
asciiVector.StoreLowerUnsafe(ref asciiBuffer, currentOffsetInElements);
currentOffsetInElements += SizeOfVector128 / 2;

goto Finish;
Expand Down