diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs index b41ccd778eb74..053490540ffc2 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/CastHelpers.cs @@ -411,7 +411,6 @@ internal struct ArrayElement [DebuggerHidden] [StackTraceHidden] [DebuggerStepThrough] - [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static ref object? LdelemaRef(Array array, nint index, void* type) { // this will throw appropriate exceptions if array is null or access is out of range. @@ -427,7 +426,6 @@ internal struct ArrayElement [DebuggerHidden] [StackTraceHidden] [DebuggerStepThrough] - [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static void StelemRef(Array array, nint index, object? obj) { // this will throw appropriate exceptions if array is null or access is out of range. diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 4d2a705df015e..ab6a99b5af76a 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1380,6 +1380,8 @@ void SystemDomain::LoadBaseSystemClasses() g_pWeakReferenceClass = CoreLibBinder::GetClass(CLASS__WEAKREFERENCE); g_pWeakReferenceOfTClass = CoreLibBinder::GetClass(CLASS__WEAKREFERENCEGENERIC); + g_pCastHelpers = CoreLibBinder::GetClass(CLASS__CASTHELPERS); + #ifdef FEATURE_COMINTEROP if (g_pConfig->IsBuiltInCOMSupported()) { diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index cfa8480439fa4..c464949f7aeee 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -660,6 +660,13 @@ bool CallCountingManager::SetCodeEntryPoint( CallCount callCountThreshold = g_pConfig->TieredCompilation_CallCountThreshold(); _ASSERTE(callCountThreshold != 0); + // Let's tier up all cast helpers faster than other methods. This is because we want to import them as + // direct calls in codegen and they need to be promoted earlier than their callers. + if (methodDesc->GetMethodTable() == g_pCastHelpers) + { + callCountThreshold = max(1, (CallCount)(callCountThreshold / 2)); + } + NewHolder callCountingInfoHolder = new CallCountingInfo(activeCodeVersion, callCountThreshold); callCountingInfoByCodeVersionHash.Add(callCountingInfoHolder); callCountingInfo = callCountingInfoHolder.Extract(); diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 982690c28776b..37ac50d124f6f 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -137,27 +137,12 @@ void ECall::PopulateManagedCastHelpers() pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_UNBOX, pDest); - // Array element accessors are more perf sensitive than other managed helpers and indirection - // costs introduced by PreStub could be noticeable (7% to 30% depending on platform). - // Other helpers are either more complex, less common, or have their trivial case inlined by the JIT, - // so indirection is not as big concern. - // We JIT-compile the following helpers eagerly here to avoid indirection costs. - - //TODO: revise if this specialcasing is still needed when crossgen supports tailcall optimizations - // see: https://github.com/dotnet/runtime/issues/5857 - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__CASTHELPERS__STELEMREF)); - pMD->DoPrestub(NULL); - // This helper is marked AggressiveOptimization and its native code is in its final form. - // Get the code directly to avoid PreStub indirection. - pDest = pMD->GetNativeCode(); + pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_ARRADDR_ST, pDest); pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__CASTHELPERS__LDELEMAREF)); - pMD->DoPrestub(NULL); - // This helper is marked AggressiveOptimization and its native code is in its final form. - // Get the code directly to avoid PreStub indirection. - pDest = pMD->GetNativeCode(); + pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_LDELEMA_REF, pDest); } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 1a3439bdd235e..9215594ced082 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10587,6 +10587,14 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ } #endif + // Check if we already have a cached address of the final target + static LPVOID hlpFinalTierAddrTable[DYNAMIC_CORINFO_HELP_COUNT] = {}; + LPVOID finalTierAddr = hlpFinalTierAddrTable[dynamicFtnNum]; + if (finalTierAddr != NULL) + { + return finalTierAddr; + } + if (dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFINTERFACE || dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFANY || dynamicFtnNum == DYNAMIC_CORINFO_HELP_ISINSTANCEOFARRAY || @@ -10596,10 +10604,43 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTINTERFACE || dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS || dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS_SPECIAL || - dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX) + dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_ARRADDR_ST || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF) { Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP); + + // Check if the target MethodDesc is already jitted to its final Tier + // so we no longer need to use indirections and can emit a direct call instead. + // + // Avoid taking the lock for foreground jit compilations + if (!GetAppDomain()->GetTieredCompilationManager()->IsTieringDelayActive()) + { + MethodDesc* helperMD = pPrecode->GetMethodDesc(); + _ASSERT(helperMD != nullptr); + + CodeVersionManager* manager = helperMD->GetCodeVersionManager(); + NativeCodeVersion activeCodeVersion; + + { + // Get active code version under a lock + CodeVersionManager::LockHolder codeVersioningLockHolder; + activeCodeVersion = manager->GetActiveILCodeVersion(helperMD).GetActiveNativeCodeVersion(helperMD); + } + + if (activeCodeVersion.IsFinalTier()) + { + finalTierAddr = (LPVOID)activeCodeVersion.GetNativeCode(); + if (finalTierAddr != NULL) + { + // Cache it for future uses to avoid taking the lock again. + hlpFinalTierAddrTable[dynamicFtnNum] = finalTierAddr; + return finalTierAddr; + } + } + } + *ppIndirection = ((FixupPrecode*)pPrecode)->GetTargetSlot(); return NULL; } diff --git a/src/coreclr/vm/tieredcompilation.h b/src/coreclr/vm/tieredcompilation.h index b251a3899ed4b..7177c6cc32a41 100644 --- a/src/coreclr/vm/tieredcompilation.h +++ b/src/coreclr/vm/tieredcompilation.h @@ -63,10 +63,10 @@ class TieredCompilationManager void BackgroundWorkerStart(); private: - bool IsTieringDelayActive(); bool TryDeactivateTieringDelay(); public: + bool IsTieringDelayActive(); void AsyncCompleteCallCounting(); private: diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp index 85737057d2f9c..e8d4789b29644 100644 --- a/src/coreclr/vm/vars.cpp +++ b/src/coreclr/vm/vars.cpp @@ -106,6 +106,8 @@ GVAL_IMPL_INIT(DWORD, g_debuggerWordTLSIndex, TLS_OUT_OF_INDEXES); #endif GVAL_IMPL_INIT(DWORD, g_TlsIndex, TLS_OUT_OF_INDEXES); +MethodTable* g_pCastHelpers; + #ifndef DACCESS_COMPILE // @TODO - PROMOTE. diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp index dd92ee7b12fde..5110be83ed1bc 100644 --- a/src/coreclr/vm/vars.hpp +++ b/src/coreclr/vm/vars.hpp @@ -401,6 +401,8 @@ EXTERN OBJECTHANDLE g_pPreallocatedExecutionEngineException; // we use this as a dummy object to indicate free space in the handle tables -- this object is never visible to the world EXTERN OBJECTHANDLE g_pPreallocatedSentinelObject; +EXTERN MethodTable* g_pCastHelpers; + GPTR_DECL(Thread,g_pFinalizerThread); GPTR_DECL(Thread,g_pSuspensionThread);