From 973ceee20c4807c780f72e24a1859a2c695ddf39 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Fri, 23 Feb 2024 16:11:15 +0100 Subject: [PATCH] JIT: Intrinsify ClearWithoutReferences and Fill (#98700) --------- Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> --- src/coreclr/jit/fgbasic.cpp | 2 + src/coreclr/jit/fgprofile.cpp | 3 + src/coreclr/jit/importercalls.cpp | 19 ++ src/coreclr/jit/lower.cpp | 186 +++++++++++++++++- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/namedintrinsiclist.h | 2 + .../src/System/SpanHelpers.T.cs | 1 + .../src/System/SpanHelpers.cs | 1 + 8 files changed, 211 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index ee0f99b32ce838..27aa0966095eac 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1329,6 +1329,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed break; } + case NI_System_SpanHelpers_ClearWithoutReferences: + case NI_System_SpanHelpers_Fill: case NI_System_SpanHelpers_SequenceEqual: case NI_System_Buffer_Memmove: { diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 02a4c22d0aade1..a92e7d260c3043 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2544,6 +2544,9 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: case NI_System_MemoryExtensions_StartsWith: + case NI_System_SpanHelpers_Fill: + case NI_System_SpanHelpers_SequenceEqual: + case NI_System_SpanHelpers_ClearWithoutReferences: // Same here, these are only folded when JIT knows the exact types case NI_System_Type_IsAssignableFrom: diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b557c629e35583..b897e783a7268b 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3982,6 +3982,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: + case NI_System_SpanHelpers_ClearWithoutReferences: case NI_System_Buffer_Memmove: { if (sig->sigInst.methInstCount == 0) @@ -3993,6 +3994,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, break; } + case NI_System_SpanHelpers_Fill: + { + if (sig->sigInst.methInstCount == 1) + { + // We'll try to unroll this in lower for constant input. + isSpecial = true; + } + break; + } + case NI_System_BitConverter_DoubleToInt64Bits: { GenTree* op1 = impStackTop().val; @@ -9021,6 +9032,14 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_SpanHelpers_SequenceEqual; } + else if (strcmp(methodName, "Fill") == 0) + { + result = NI_System_SpanHelpers_Fill; + } + else if (strcmp(methodName, "ClearWithoutReferences") == 0) + { + result = NI_System_SpanHelpers_ClearWithoutReferences; + } } else if (strcmp(className, "String") == 0) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 18d7f388e908a7..f9cd17f6510046 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1842,6 +1842,157 @@ GenTree* Lowering::AddrGen(void* addr) return AddrGen((ssize_t)addr); } +// LowerCallMemset: Replaces the following memset-like special intrinsics: +// +// SpanHelpers.Fill(ref dstRef, CNS_SIZE, CNS_VALUE) +// CORINFO_HELP_MEMSET(ref dstRef, CNS_VALUE, CNS_SIZE) +// SpanHelpers.ClearWithoutReferences(ref dstRef, CNS_SIZE) +// +// with a GT_STORE_BLK node: +// +// * STORE_BLK struct (init) (Unroll) +// +--* LCL_VAR byref dstRef +// \--* CNS_INT int 0 +// +// Arguments: +// tree - GenTreeCall node to replace with STORE_BLK +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::LowerCallMemset(GenTreeCall* call, GenTree** next) +{ + assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill) || + call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences) || + call->IsHelperCall(comp, CORINFO_HELP_MEMSET)); + + JITDUMP("Considering Memset-like call [%06d] for unrolling.. ", comp->dspTreeID(call)) + + if (comp->info.compHasNextCallRetAddr) + { + JITDUMP("compHasNextCallRetAddr=true so we won't be able to remove the call - bail out.\n"); + return false; + } + + GenTree* dstRefArg = call->gtArgs.GetUserArgByIndex(0)->GetNode(); + GenTree* lengthArg; + GenTree* valueArg; + + // Fill's length is not in bytes, so we need to scale it depending on the signature + unsigned lengthScale; + + if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_Fill)) + { + // void SpanHelpers::Fill(ref T refData, nuint numElements, T value) + // + assert(call->gtArgs.CountUserArgs() == 3); + lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + CallArg* valueCallArg = call->gtArgs.GetUserArgByIndex(2); + valueArg = valueCallArg->GetNode(); + + // Get that from the signature + lengthScale = genTypeSize(valueCallArg->GetSignatureType()); + // NOTE: structs and TYP_REF will be ignored by the "Value is not a constant" check + // Some of those cases can be enabled in future, e.g. s + } + else if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET)) + { + // void CORINFO_HELP_MEMSET(ref T refData, byte value, nuint numElements) + // + assert(call->gtArgs.CountUserArgs() == 3); + lengthArg = call->gtArgs.GetUserArgByIndex(2)->GetNode(); + valueArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + lengthScale = 1; // it's always in bytes + } + else + { + // void SpanHelpers::ClearWithoutReferences(ref byte b, nuint byteLength) + // + assert(call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences)); + assert(call->gtArgs.CountUserArgs() == 2); + + // Simple zeroing + lengthArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + valueArg = comp->gtNewZeroConNode(TYP_INT); + lengthScale = 1; // it's always in bytes + } + + if (!lengthArg->IsIntegralConst()) + { + JITDUMP("Length is not a constant - bail out.\n"); + return false; + } + + if (!valueArg->IsCnsIntOrI() || !valueArg->TypeIs(TYP_INT)) + { + JITDUMP("Value is not a constant - bail out.\n"); + return false; + } + + // If value is not zero, we can only unroll for single-byte values + if (!valueArg->IsIntegralConst(0) && (lengthScale != 1)) + { + JITDUMP("Value is not unroll-friendly - bail out.\n"); + return false; + } + + // Convert lenCns to bytes + ssize_t lenCns = lengthArg->AsIntCon()->IconValue(); + if (CheckedOps::MulOverflows((target_ssize_t)lenCns, (target_ssize_t)lengthScale, CheckedOps::Signed)) + { + // lenCns overflows + JITDUMP("lenCns * lengthScale overflows - bail out.\n") + return false; + } + lenCns *= (ssize_t)lengthScale; + + // TODO-CQ: drop the whole thing in case of lenCns = 0 + if ((lenCns <= 0) || (lenCns > (ssize_t)comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) + { + JITDUMP("Size is either 0 or too big to unroll - bail out.\n") + return false; + } + + JITDUMP("Accepted for unrolling!\nOld tree:\n"); + DISPTREERANGE(BlockRange(), call); + + if (!valueArg->IsIntegralConst(0)) + { + // Non-zero (byte) value, wrap value with GT_INIT_VAL + GenTree* initVal = valueArg; + valueArg = comp->gtNewOperNode(GT_INIT_VAL, TYP_INT, initVal); + BlockRange().InsertAfter(initVal, valueArg); + } + + GenTreeBlk* storeBlk = + comp->gtNewStoreBlkNode(comp->typGetBlkLayout((unsigned)lenCns), dstRefArg, valueArg, GTF_IND_UNALIGNED); + storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + + // Insert/Remove trees into LIR + BlockRange().InsertBefore(call, storeBlk); + if (call->IsSpecialIntrinsic(comp, NI_System_SpanHelpers_ClearWithoutReferences)) + { + // Value didn't exist in LIR previously + BlockRange().InsertBefore(storeBlk, valueArg); + } + + // Remove the call and mark everything as unused ... + BlockRange().Remove(call, true); + // ... except the args we're going to re-use + dstRefArg->ClearUnusedValue(); + valueArg->ClearUnusedValue(); + if (valueArg->OperIs(GT_INIT_VAL)) + { + valueArg->gtGetOp1()->ClearUnusedValue(); + } + + JITDUMP("\nNew tree:\n"); + DISPTREERANGE(BlockRange(), storeBlk); + *next = storeBlk; + return true; +} + //------------------------------------------------------------------------ // LowerCallMemmove: Replace Buffer.Memmove(DST, SRC, CNS_SIZE) with a GT_STORE_BLK: // Do the same for CORINFO_HELP_MEMCPY(DST, SRC, CNS_SIZE) @@ -2221,11 +2372,32 @@ GenTree* Lowering::LowerCall(GenTree* node) GenTree* nextNode = nullptr; if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { - NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); - if (((ni == NI_System_Buffer_Memmove) && LowerCallMemmove(call, &nextNode)) || - ((ni == NI_System_SpanHelpers_SequenceEqual) && LowerCallMemcmp(call, &nextNode))) + switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { - return nextNode; + case NI_System_Buffer_Memmove: + if (LowerCallMemmove(call, &nextNode)) + { + return nextNode; + } + break; + + case NI_System_SpanHelpers_SequenceEqual: + if (LowerCallMemcmp(call, &nextNode)) + { + return nextNode; + } + break; + + case NI_System_SpanHelpers_Fill: + case NI_System_SpanHelpers_ClearWithoutReferences: + if (LowerCallMemset(call, &nextNode)) + { + return nextNode; + } + break; + + default: + break; } } @@ -2234,6 +2406,12 @@ GenTree* Lowering::LowerCall(GenTree* node) { return nextNode; } + + // Try to lower CORINFO_HELP_MEMSET to unrollable STORE_BLK + if (call->IsHelperCall(comp, CORINFO_HELP_MEMSET) && LowerCallMemset(call, &nextNode)) + { + return nextNode; + } #endif call->ClearOtherRegs(); diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 5156b8899b804d..095e108633b165 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -140,6 +140,7 @@ class Lowering final : public Phase GenTree* LowerCall(GenTree* call); bool LowerCallMemmove(GenTreeCall* call, GenTree** next); bool LowerCallMemcmp(GenTreeCall* call, GenTree** next); + bool LowerCallMemset(GenTreeCall* call, GenTree** next); void LowerCFGCall(GenTreeCall* call); void MoveCFGCallArgs(GenTreeCall* call); void MoveCFGCallArg(GenTreeCall* call, GenTree* node); diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 09d33ba76a0d6a..a68b88f06a4502 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -118,6 +118,8 @@ enum NamedIntrinsic : unsigned short NI_System_String_EndsWith, NI_System_Span_get_Item, NI_System_Span_get_Length, + NI_System_SpanHelpers_ClearWithoutReferences, + NI_System_SpanHelpers_Fill, NI_System_SpanHelpers_SequenceEqual, NI_System_ReadOnlySpan_get_Item, NI_System_ReadOnlySpan_get_Length, diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index da77d42320a98a..ed66759fd0bda9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -16,6 +16,7 @@ namespace System { internal static partial class SpanHelpers // .T { + [Intrinsic] // Unrolled for small sizes public static unsafe void Fill(ref T refData, nuint numElements, T value) { // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index a7e5f48d63180d..ecc1a5f3f3718b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -12,6 +12,7 @@ namespace System { internal static partial class SpanHelpers { + [Intrinsic] // Unrolled for small sizes public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) { if (byteLength == 0)