diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index d6343f24c1b18..0b50a047b3f66 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -546,6 +546,7 @@ enum CorInfoHelpFunc CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, + CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, /* Debugger */ @@ -1686,6 +1687,7 @@ enum CORINFO_FIELD_ACCESSOR CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER, // static field access using the "generic static" helper (argument is MethodTable *) CORINFO_FIELD_STATIC_ADDR_HELPER, // static field accessed using address-of helper (argument is FieldDesc *) CORINFO_FIELD_STATIC_TLS, // unmanaged TLS access + CORINFO_FIELD_STATIC_TLS_MANAGED, // managed TLS access CORINFO_FIELD_STATIC_READYTORUN_HELPER, // static field access using a runtime lookup helper CORINFO_FIELD_STATIC_RELOCATABLE, // static field access using relocation (used in AOT) CORINFO_FIELD_INTRINSIC_ZERO, // intrinsic zero (IntPtr.Zero, UIntPtr.Zero) @@ -1725,6 +1727,17 @@ struct CORINFO_FIELD_INFO CORINFO_CONST_LOOKUP fieldLookup; // Used by Ready-to-Run }; +//---------------------------------------------------------------------------- +// getThreadLocalStaticBlocksInfo and CORINFO_THREAD_STATIC_BLOCKS_INFO: The EE instructs the JIT about how to access a thread local field + +struct CORINFO_THREAD_STATIC_BLOCKS_INFO +{ + CORINFO_CONST_LOOKUP tlsIndex; + uint32_t offsetOfThreadLocalStoragePointer; + uint32_t offsetOfMaxThreadStaticBlocks; + uint32_t offsetOfThreadStaticBlocks; +}; + //---------------------------------------------------------------------------- // Exception handling @@ -2743,6 +2756,12 @@ class ICorStaticInfo CORINFO_FIELD_INFO *pResult ) = 0; + virtual uint32_t getThreadLocalFieldInfo ( + CORINFO_FIELD_HANDLE field) = 0; + + virtual void getThreadLocalStaticBlocksInfo ( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) = 0; + // Returns true iff "fldHnd" represents a static field. virtual bool isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) = 0; diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index 8f91c38be1bef..e053b3d2f816f 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -414,6 +414,12 @@ void getFieldInfo( CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult) override; +uint32_t getThreadLocalFieldInfo( + CORINFO_FIELD_HANDLE field) override; + +void getThreadLocalStaticBlocksInfo( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) override; + bool isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index b90de122eb451..765668241e3c3 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 3054e9ba-bcfe-417c-9043-92ccc8738b80 */ - 0x3054e9ba, - 0xbcfe, - 0x417c, - {0x90, 0x43, 0x92, 0xcc, 0xc8, 0x73, 0x8b, 0x80} +constexpr GUID JITEEVersionIdentifier = { /* 236d7997-3d71-45f9-b7d7-5241ad89a56f */ + 0x236d7997, + 0x3d71, + 0x45f9, + { 0xb7, 0xd7, 0x52, 0x41, 0xad, 0x89, 0xa5, 0x6f } }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index d5d3d5af8d94f..7572ef7700322 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -203,6 +203,7 @@ JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR, JIT_GetSharedNonGCThreadStaticBase, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, JIT_GetSharedNonGCThreadStaticBaseDynamicClass, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, JIT_GetSharedNonGCThreadStaticBaseOptimized, CORINFO_HELP_SIG_REG_ONLY) // Debugger JITHELPER(CORINFO_HELP_DBG_IS_JUST_MY_CODE, JIT_DbgIsJustMyCode,CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index d5ba36b8eec52..843448815bf1f 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -129,7 +129,7 @@ set( JIT_SOURCES hwintrinsic.cpp hostallocator.cpp ifconversion.cpp - runtimelookup.cpp + helperexpansion.cpp indirectcalltransformer.cpp importercalls.cpp importer.cpp diff --git a/src/coreclr/jit/ICorJitInfo_names_generated.h b/src/coreclr/jit/ICorJitInfo_names_generated.h index ed5aa05b90b80..6a9f0408fb82a 100644 --- a/src/coreclr/jit/ICorJitInfo_names_generated.h +++ b/src/coreclr/jit/ICorJitInfo_names_generated.h @@ -103,6 +103,8 @@ DEF_CLR_API(getFieldClass) DEF_CLR_API(getFieldType) DEF_CLR_API(getFieldOffset) DEF_CLR_API(getFieldInfo) +DEF_CLR_API(getThreadLocalFieldInfo) +DEF_CLR_API(getThreadLocalStaticBlocksInfo) DEF_CLR_API(isFieldStatic) DEF_CLR_API(getArrayOrStringLength) DEF_CLR_API(getBoundaries) diff --git a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp index 1d737e475a7c4..1b3aa470acf33 100644 --- a/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp +++ b/src/coreclr/jit/ICorJitInfo_wrapper_generated.hpp @@ -981,6 +981,23 @@ void WrapICorJitInfo::getFieldInfo( API_LEAVE(getFieldInfo); } +uint32_t WrapICorJitInfo::getThreadLocalFieldInfo( + CORINFO_FIELD_HANDLE field) +{ + API_ENTER(getThreadLocalFieldInfo); + uint32_t temp = wrapHnd->getThreadLocalFieldInfo(field); + API_LEAVE(getThreadLocalFieldInfo); + return temp; +} + +void WrapICorJitInfo::getThreadLocalStaticBlocksInfo( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + API_ENTER(getThreadLocalStaticBlocksInfo); + wrapHnd->getThreadLocalStaticBlocksInfo(pInfo); + API_LEAVE(getThreadLocalStaticBlocksInfo); +} + bool WrapICorJitInfo::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c3d1efd1d1e31..1a4297d6b561e 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5420,8 +5420,13 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) { noway_assert(EA_ATTR(genTypeSize(targetType)) == EA_PTRSIZE); +#if TARGET_64BIT + emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tree->GetRegNum(), FLD_GLOBAL_GS, + (int)addr->AsIntCon()->gtIconVal); +#else emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tree->GetRegNum(), FLD_GLOBAL_FS, (int)addr->AsIntCon()->gtIconVal); +#endif } else { diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9c4b574e647d7..b005fc09e1eea 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5040,6 +5040,12 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Partially inline static initializations DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit); + if (TargetOS::IsWindows) + { + // Currently this is only applicable for Windows + DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); + } + // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 72c0a761b942b..18037b5a2801d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4862,7 +4862,7 @@ class Compiler GenTree* fgInitThisClass(); - GenTreeCall* fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper); + GenTreeCall* fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper, uint32_t typeIndex = 0); GenTreeCall* fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls); @@ -5311,11 +5311,21 @@ class Compiler PhaseStatus StressSplitTree(); void SplitTreesRandomly(); void SplitTreesRemoveCommas(); + + template + PhaseStatus fgExpandHelper(bool skipRarelyRunBlocks); + + template + bool fgExpandHelperForBlock(BasicBlock* block); + PhaseStatus fgExpandRuntimeLookups(); + bool fgExpandRuntimeLookupsForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call); + + PhaseStatus fgExpandThreadLocalAccess(); + bool fgExpandThreadLocalAccessForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call); - bool fgExpandStaticInitForBlock(BasicBlock* block); - bool fgExpandStaticInitForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call); PhaseStatus fgExpandStaticInit(); + bool fgExpandStaticInitForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call); PhaseStatus fgInsertGCPolls(); BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block); @@ -7039,6 +7049,7 @@ class Compiler #define OMF_HAS_MDNEWARRAY 0x00002000 // Method contains 'new' of an MD array #define OMF_HAS_MDARRAYREF 0x00004000 // Method contains multi-dimensional intrinsic array element loads or stores. #define OMF_HAS_STATIC_INIT 0x00008000 // Method has static initializations we might want to partially inline +#define OMF_HAS_TLS_FIELD 0x00010000 // Method contains TLS field access // clang-format on @@ -7089,6 +7100,16 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } + bool doesMethodHasTlsFieldAccess() + { + return (optMethodFlags & OMF_HAS_TLS_FIELD) != 0; + } + + void setMethodHasTlsFieldAccess() + { + optMethodFlags |= OMF_HAS_TLS_FIELD; + } + void pickGDV(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, @@ -7790,6 +7811,7 @@ class Compiler void eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult); + uint32_t eeGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field); // Get the flags diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index b180762944285..6a132f6a0ccbc 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -3635,6 +3635,7 @@ inline bool Compiler::IsSharedStaticHelper(GenTree* tree) helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE || helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR || helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR || + helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED || helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS || helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS || #ifdef FEATURE_READYTORUN @@ -3684,12 +3685,12 @@ inline bool Compiler::IsGcSafePoint(GenTreeCall* call) // Note that we want to have two special FIELD_HANDLES that will both // be considered non-Data Offset handles // -// The special values that we use are FLD_GLOBAL_DS and FLD_GLOBAL_FS +// The special values that we use are FLD_GLOBAL_DS, FLD_GLOBAL_FS or FLD_GLOBAL_GS. // inline bool jitStaticFldIsGlobAddr(CORINFO_FIELD_HANDLE fldHnd) { - return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS); + return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS || fldHnd == FLD_GLOBAL_GS); } /* diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index d8237c91270bc..703270d0b4558 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -93,6 +93,7 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2, f CompPhaseNameMacro(PHASE_STRESS_SPLIT_TREE, "Stress gtSplitTree", false, -1, false) CompPhaseNameMacro(PHASE_EXPAND_RTLOOKUPS, "Expand runtime lookups", false, -1, true) CompPhaseNameMacro(PHASE_EXPAND_STATIC_INIT, "Expand static init", false, -1, true) +CompPhaseNameMacro(PHASE_EXPAND_TLS, "Expand TLS access", false, -1, true) CompPhaseNameMacro(PHASE_INSERT_GC_POLLS, "Insert GC Polls", false, -1, true) CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", false, -1, true) CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", false, -1, false) diff --git a/src/coreclr/jit/ee_il_dll.hpp b/src/coreclr/jit/ee_il_dll.hpp index 337ca8f147977..0a9c248a86159 100644 --- a/src/coreclr/jit/ee_il_dll.hpp +++ b/src/coreclr/jit/ee_il_dll.hpp @@ -44,6 +44,12 @@ void Compiler::eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, info.compCompHnd->getFieldInfo(pResolvedToken, info.compMethodHnd, accessFlags, pResult); } +FORCEINLINE +uint32_t Compiler::eeGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field) +{ + return info.compCompHnd->getThreadLocalFieldInfo(field); +} + /***************************************************************************** * * VOS info, method sigs, etc diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 050aa3b91a9fc..682cd042cae9e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -13804,7 +13804,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (addr->isContained()) { - assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_ADDR, GT_LEA)); + assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_ADDR, GT_LEA) || (addr->IsIconHandle(GTF_ICON_TLS_HDL))); int offset = 0; DWORD lsl = 0; @@ -13927,6 +13927,11 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR emitIns_R_S(ins, attr, dataReg, lclNum, offset); } } + else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) + { + // On Arm64, TEB is in r18, so load from the r18 as base. + emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue()); + } else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { // Then load/store dataReg from/to [memBase + offset] diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 7a27076091ca3..7d416e12fa21c 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -7260,6 +7260,10 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO { sz += 1; } + else if (fldHnd == FLD_GLOBAL_GS) + { + sz += 2; // Needs SIB byte as well. + } } id->idCodeSize(sz); @@ -7328,7 +7332,7 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f } // Special case: mov reg, fs:[ddd] - if (fldHnd == FLD_GLOBAL_FS) + if ((fldHnd == FLD_GLOBAL_FS) || (fldHnd == FLD_GLOBAL_GS)) { sz += 1; } @@ -10001,6 +10005,12 @@ void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool rel return; } + if (fldHnd == FLD_GLOBAL_GS) + { + printf("GS:[0x%04X]", (unsigned)offs); + return; + } + if (fldHnd == FLD_GLOBAL_DS) { printf("[0x%04X]", (unsigned)offs); @@ -13336,11 +13346,16 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) fldh = id->idAddr()->iiaFieldHnd; offs = emitGetInsDsp(id); - // Special case: mov reg, fs:[ddd] if (fldh == FLD_GLOBAL_FS) { + // Special case: mov reg, fs:[ddd] dst += emitOutputByte(dst, 0x64); } + else if (fldh == FLD_GLOBAL_GS) + { + // Special case: mov reg, gs:[ddd] + dst += emitOutputByte(dst, 0x65); + } // Compute VEX/EVEX prefix // Some of its callers already add EVEX/VEX prefix and then call this routine. @@ -13550,6 +13565,11 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } } + if (fldh == FLD_GLOBAL_GS) + { + dst += emitOutputByte(dst, 0x25); + } + // Do we have a constant or a static data member? doff = Compiler::eeGetJitDataOffs(fldh); if (doff >= 0) @@ -13629,12 +13649,18 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) #endif // TARGET_AMD64 #ifdef TARGET_AMD64 - // All static field and data section constant accesses should be marked as relocatable - noway_assert(id->idIsDspReloc()); - dst += emitOutputLong(dst, 0); -#else // TARGET_X86 + if (id->idIsDspReloc()) + { + // All static field and data section constant accesses should be marked as relocatable + dst += emitOutputLong(dst, 0); + } + else + { + dst += emitOutputLong(dst, (ssize_t)target); + } +#else dst += emitOutputLong(dst, (int)(ssize_t)target); -#endif // TARGET_X86 +#endif // TARGET_AMD64 if (id->idIsDspReloc()) { @@ -16447,8 +16473,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insEncodeReg3456(id, id->idReg1(), size, code); } - regcode = (insEncodeReg345(id, id->idReg1(), size, &code) << 8); - dst = emitOutputCV(dst, id, code | regcode | 0x0500); + regcode = (insEncodeReg345(id, id->idReg1(), size, &code) << 8); + CORINFO_FIELD_HANDLE fldh = id->idAddr()->iiaFieldHnd; + if (fldh == FLD_GLOBAL_GS) + { + dst = emitOutputCV(dst, id, code | regcode | 0x0400); + } + else + { + dst = emitOutputCV(dst, id, code | regcode | 0x0500); + } } sz = emitSizeOfInsDsc(id); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index f14688a114b5e..80cadd686948c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -436,347 +436,6 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) return createdPollBlocks ? bottom : block; } -//------------------------------------------------------------------------------ -// fgExpandStaticInit: Partially expand static initialization calls, e.g.: -// -// tmp = CORINFO_HELP_X_NONGCSTATIC_BASE(); -// -// into: -// -// if (isClassAlreadyInited) -// CORINFO_HELP_X_NONGCSTATIC_BASE(); -// tmp = fastPath; -// -// Returns: -// PhaseStatus indicating what, if anything, was changed. -// -PhaseStatus Compiler::fgExpandStaticInit() -{ - PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - - if (!doesMethodHaveStaticInit()) - { - // TP: nothing to expand in the current method - JITDUMP("Nothing to expand.\n") - return result; - } - - if (opts.OptimizationDisabled()) - { - JITDUMP("Optimizations aren't allowed - bail out.\n") - return result; - } - - // TODO: Replace with opts.compCodeOpt once it's fixed - const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); - if (preferSize) - { - // The optimization comes with a codegen size increase - JITDUMP("Optimized for size - bail out.\n") - return result; - } - - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - if (block->isRunRarely()) - { - // It's just an optimization - don't waste time on rarely executed blocks - continue; - } - - // Expand and visit the last block again to find more candidates - while (fgExpandStaticInitForBlock(block)) - { - result = PhaseStatus::MODIFIED_EVERYTHING; - } - } - return result; -} - -//------------------------------------------------------------------------------ -// fgExpandStaticInitForCall: Partially expand given static initialization call. -// Also, see fgExpandStaticInit's comments. -// -// Arguments: -// block - call's block -// stmt - call's statement -// call - call that represents a static initialization -// -// Returns: -// true if a static initialization was expanded -// -bool Compiler::fgExpandStaticInitForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call) -{ - bool isGc = false; - StaticHelperReturnValue retValKind = {}; - if (!IsStaticHelperEligibleForExpansion(call, &isGc, &retValKind)) - { - return false; - } - - assert(!call->IsTailCall()); - - if (call->gtInitClsHnd == NO_CLASS_HANDLE) - { - assert(!"helper call was created without gtInitClsHnd or already visited"); - return false; - } - - int isInitOffset = 0; - CORINFO_CONST_LOOKUP flagAddr = {}; - if (!info.compCompHnd->getIsClassInitedFlagAddress(call->gtInitClsHnd, &flagAddr, &isInitOffset)) - { - JITDUMP("getIsClassInitedFlagAddress returned false - bail out.\n") - return false; - } - - CORINFO_CONST_LOOKUP staticBaseAddr = {}; - if ((retValKind == SHRV_STATIC_BASE_PTR) && - !info.compCompHnd->getStaticBaseAddress(call->gtInitClsHnd, isGc, &staticBaseAddr)) - { - JITDUMP("getStaticBaseAddress returned false - bail out.\n") - return false; - } - - JITDUMP("Expanding static initialization for '%s', call: [%06d] in " FMT_BB "\n", - eeGetClassName(call->gtInitClsHnd), dspTreeID(call), block->bbNum) - - DebugInfo debugInfo = stmt->GetDebugInfo(); - - // Split block right before the call tree - BasicBlock* prevBb = block; - GenTree** callUse = nullptr; - Statement* newFirstStmt = nullptr; - block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); - assert(prevBb != nullptr && block != nullptr); - - // Block ops inserted by the split need to be morphed here since we are after morph. - // We cannot morph stmt yet as we may modify it further below, and the morphing - // could invalidate callUse. - while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) - { - fgMorphStmtBlockOps(block, newFirstStmt); - newFirstStmt = newFirstStmt->GetNextStmt(); - } - - // - // Create new blocks. Essentially, we want to transform this: - // - // staticBase = helperCall(); - // - // into: - // - // if (!isInitialized) - // { - // helperCall(); // we don't use its return value - // } - // staticBase = fastPath; - // - - // The initialization check looks like this for JIT: - // - // * JTRUE void - // \--* EQ int - // +--* AND int - // | +--* IND int - // | | \--* CNS_INT(h) long 0x.... const ptr - // | \--* CNS_INT int 1 (bit mask) - // \--* CNS_INT int 1 - // - // For NativeAOT it's: - // - // * JTRUE void - // \--* EQ int - // +--* IND nint - // | \--* ADD long - // | +--* CNS_INT(h) long 0x.... const ptr - // | \--* CNS_INT int -8 (offset) - // \--* CNS_INT int 0 - // - assert(flagAddr.accessType == IAT_VALUE); - - GenTree* cachedStaticBase = nullptr; - GenTree* isInitedActualValueNode; - GenTree* isInitedExpectedValue; - if (IsTargetAbi(CORINFO_NATIVEAOT_ABI)) - { - GenTree* baseAddr = gtNewIconHandleNode((size_t)flagAddr.addr, GTF_ICON_GLOBAL_PTR); - - // Save it to a temp - we'll be using its value for the replacementNode. - // This leads to some size savings on NativeAOT - if ((staticBaseAddr.addr == flagAddr.addr) && (staticBaseAddr.accessType == flagAddr.accessType)) - { - cachedStaticBase = fgInsertCommaFormTemp(&baseAddr); - } - - // Don't fold ADD(CNS1, CNS2) here since the result won't be reloc-friendly for AOT - GenTree* offsetNode = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseAddr, gtNewIconNode(isInitOffset)); - isInitedActualValueNode = gtNewIndir(TYP_I_IMPL, offsetNode, GTF_IND_NONFAULTING); - isInitedActualValueNode->gtFlags |= GTF_GLOB_REF; - - // 0 means "initialized" on NativeAOT - isInitedExpectedValue = gtNewIconNode(0, TYP_I_IMPL); - } - else - { - assert(isInitOffset == 0); - - isInitedActualValueNode = gtNewIndOfIconHandleNode(TYP_INT, (size_t)flagAddr.addr, GTF_ICON_GLOBAL_PTR, false); - - // Check ClassInitFlags::INITIALIZED_FLAG bit - isInitedActualValueNode = gtNewOperNode(GT_AND, TYP_INT, isInitedActualValueNode, gtNewIconNode(1)); - isInitedExpectedValue = gtNewIconNode(1); - } - - GenTree* isInitedCmp = gtNewOperNode(GT_EQ, TYP_INT, isInitedActualValueNode, isInitedExpectedValue); - isInitedCmp->gtFlags |= GTF_RELOP_JMP_USED; - BasicBlock* isInitedBb = - fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, isInitedCmp), debugInfo); - - // Fallback basic block - // TODO-CQ: for JIT we can replace the original call with CORINFO_HELP_INITCLASS - // that only accepts a single argument - BasicBlock* helperCallBb = fgNewBBFromTreeAfter(BBJ_NONE, isInitedBb, call, debugInfo, true); - - GenTree* replacementNode = nullptr; - if (retValKind == SHRV_STATIC_BASE_PTR) - { - // Replace the call with a constant pointer to the statics base - assert(staticBaseAddr.addr != nullptr); - - // Use local if the addressed is already materialized and cached - if (cachedStaticBase != nullptr) - { - assert(staticBaseAddr.accessType == IAT_VALUE); - replacementNode = cachedStaticBase; - } - else if (staticBaseAddr.accessType == IAT_VALUE) - { - replacementNode = gtNewIconHandleNode((size_t)staticBaseAddr.addr, GTF_ICON_STATIC_HDL); - } - else - { - assert(staticBaseAddr.accessType == IAT_PVALUE); - replacementNode = - gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)staticBaseAddr.addr, GTF_ICON_GLOBAL_PTR, false); - } - } - - if (replacementNode == nullptr) - { - (*callUse)->gtBashToNOP(); - } - else - { - *callUse = replacementNode; - } - - fgMorphStmtBlockOps(block, stmt); - gtUpdateStmtSideEffects(stmt); - - // Final block layout looks like this: - // - // prevBb(BBJ_NONE): [weight: 1.0] - // ... - // - // isInitedBb(BBJ_COND): [weight: 1.0] - // if (isInited) - // goto block; - // - // helperCallBb(BBJ_NONE): [weight: 0.0] - // helperCall(); - // - // block(...): [weight: 1.0] - // use(staticBase); - // - // Whether we use helperCall's value or not depends on the helper itself. - - // - // Update preds in all new blocks - // - - // Unlink block and prevBb - fgRemoveRefPred(block, prevBb); - - // Block has two preds now: either isInitedBb or helperCallBb - fgAddRefPred(block, isInitedBb); - fgAddRefPred(block, helperCallBb); - - // prevBb always flow into isInitedBb - fgAddRefPred(isInitedBb, prevBb); - - // Both fastPathBb and helperCallBb have a single common pred - isInitedBb - fgAddRefPred(helperCallBb, isInitedBb); - - // helperCallBb unconditionally jumps to the last block (jumps over fastPathBb) - isInitedBb->bbJumpDest = block; - - // - // Re-distribute weights - // - - block->inheritWeight(prevBb); - isInitedBb->inheritWeight(prevBb); - helperCallBb->bbSetRunRarely(); - - // - // Update loop info if loop table is known to be valid - // - - isInitedBb->bbNatLoopNum = prevBb->bbNatLoopNum; - helperCallBb->bbNatLoopNum = prevBb->bbNatLoopNum; - - // All blocks are expected to be in the same EH region - assert(BasicBlock::sameEHRegion(prevBb, block)); - assert(BasicBlock::sameEHRegion(prevBb, isInitedBb)); - - // Extra step: merge prevBb with isInitedBb if possible - if (fgCanCompactBlocks(prevBb, isInitedBb)) - { - fgCompactBlocks(prevBb, isInitedBb); - } - - // Clear gtInitClsHnd as a mark that we've already visited this call - call->gtInitClsHnd = NO_CLASS_HANDLE; - return true; -} - -//------------------------------------------------------------------------------ -// fgExpandStaticInitForBlock: Partially expand static initialization calls, in -// the given block. Also, see fgExpandStaticInit's comments -// -// Arguments: -// block - block to scan for static initializations -// -// Returns: -// true if a static initialization was found and expanded -// -bool Compiler::fgExpandStaticInitForBlock(BasicBlock* block) -{ - for (Statement* const stmt : block->NonPhiStatements()) - { - if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) - { - // TP: Stmt has no calls - bail out - continue; - } - - for (GenTree* const tree : stmt->TreeList()) - { - if (!tree->IsHelperCall()) - { - continue; - } - - if (fgExpandStaticInitForCall(block, stmt, tree->AsCall())) - { - return true; - } - } - } - return false; -} - //------------------------------------------------------------------------ // fgCanSwitchToOptimized: Determines if conditions are met to allow switching the opt level to optimized // @@ -1035,7 +694,19 @@ bool Compiler::fgIsCommaThrow(GenTree* tree, bool forFolding /* = false */) return false; } -GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper) +//------------------------------------------------------------------------ +// fgGetStaticsCCtorHelper: Creates a BasicBlock from the `tree` node. +// +// Arguments: +// cls - The class handle +// helper - The helper function +// typeIndex - The static block type index. Used only for +// CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED to cache +// the static block in an array at index typeIndex. +// +// Return Value: +// The call node corresponding to the helper +GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper, uint32_t typeIndex) { bool bNeedClassID = true; GenTreeFlags callFlags = GTF_EMPTY; @@ -1063,6 +734,7 @@ GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfo // type = TYP_BYREF; break; + case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED: case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR: bNeedClassID = false; FALLTHROUGH; @@ -1126,6 +798,11 @@ GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfo result = gtNewHelperCallNode(helper, type, opModuleIDArg, opClassIDArg); } + else if (helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) + { + result = gtNewHelperCallNode(helper, type, gtNewIconNode(typeIndex, TYP_UINT)); + result->SetExpTLSFieldAccess(); + } else { result = gtNewHelperCallNode(helper, type, opModuleIDArg); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 01cdc178f92f0..ba94efddd1ac7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4054,6 +4054,7 @@ enum GenTreeCallFlags : unsigned int GTF_CALL_M_EXPANDED_EARLY = 0x08000000, // the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower GTF_CALL_M_HAS_LATE_DEVIRT_INFO = 0x10000000, // this call has late devirtualzation info GTF_CALL_M_LDVIRTFTN_INTERFACE = 0x20000000, // ldvirtftn on an interface type + GTF_CALL_M_EXP_TLS_ACCESS = 0x40000000, // this call is a helper for access TLS marked field }; inline constexpr GenTreeCallFlags operator ~(GenTreeCallFlags a) @@ -5394,6 +5395,21 @@ struct GenTreeCall final : public GenTree return (gtCallMoreFlags & GTF_CALL_M_EXP_RUNTIME_LOOKUP) != 0; } + void SetExpTLSFieldAccess() + { + gtCallMoreFlags |= GTF_CALL_M_EXP_TLS_ACCESS; + } + + void ClearExpTLSFieldAccess() + { + gtCallMoreFlags &= ~GTF_CALL_M_EXP_TLS_ACCESS; + } + + bool IsExpTLSFieldAccess() const + { + return (gtCallMoreFlags & GTF_CALL_M_EXP_TLS_ACCESS) != 0; + } + void SetExpandedEarly() { gtCallMoreFlags |= GTF_CALL_M_EXPANDED_EARLY; diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp new file mode 100644 index 0000000000000..87d144b66a672 --- /dev/null +++ b/src/coreclr/jit/helperexpansion.cpp @@ -0,0 +1,1039 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +// Obtain constant pointer from a tree +static void* GetConstantPointer(Compiler* comp, GenTree* tree) +{ + void* cns = nullptr; + if (tree->gtEffectiveVal()->IsCnsIntOrI()) + { + cns = (void*)tree->gtEffectiveVal()->AsIntCon()->IconValue(); + } + else if (comp->vnStore->IsVNConstant(tree->gtVNPair.GetLiberal())) + { + cns = (void*)comp->vnStore->CoercedConstantValue(tree->gtVNPair.GetLiberal()); + } + return cns; +} + +// Save expression to a local and append it as the last statement in exprBlock +static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) +{ + unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); + Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr), debugInfo); + comp->gtSetStmtInfo(asgStmt); + comp->fgSetStmtSeq(asgStmt); + return comp->gtNewLclvNode(tmpNum, genActualType(expr)); +}; + +//------------------------------------------------------------------------------ +// gtNewRuntimeLookupHelperCallNode : Helper to create a runtime lookup call helper node. +// +// Arguments: +// helper - Call helper +// type - Type of the node +// args - Call args +// +// Return Value: +// New CT_HELPER node +// +GenTreeCall* Compiler::gtNewRuntimeLookupHelperCallNode(CORINFO_RUNTIME_LOOKUP* pRuntimeLookup, + GenTree* ctxTree, + void* compileTimeHandle) +{ + // Call the helper + // - Setup argNode with the pointer to the signature returned by the lookup + GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); + GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + } + return helperCall; +} + +//------------------------------------------------------------------------------ +// fgExpandRuntimeLookups : partially expand runtime lookups helper calls +// to add a nullcheck [+ size check] and a fast path +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +// Notes: +// The runtime lookup itself is needed to access a handle in code shared between +// generic instantiations. The lookup depends on the typeContext which is only available at +// runtime, and not at compile - time. See ASCII block diagrams in comments below for +// better understanding how this phase expands runtime lookups. +// +PhaseStatus Compiler::fgExpandRuntimeLookups() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHaveExpRuntimeLookup()) + { + // The method being compiled doesn't have expandable runtime lookups. If it does + // and doesMethodHaveExpRuntimeLookup() still returns false we'll assert in LowerCall + return result; + } + + return fgExpandHelper<&Compiler::fgExpandRuntimeLookupsForCall>(false); +} + +bool Compiler::fgExpandRuntimeLookupsForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call) +{ + assert(call->IsHelperCall()); + + if (!call->IsExpRuntimeLookup()) + { + return false; + } + + // Clear ExpRuntimeLookup flag so we won't miss any runtime lookup that needs partial expansion + call->ClearExpRuntimeLookup(); + + if (call->IsTailCall()) + { + // It is very unlikely to happen and is impossible to represent in C# + return false; + } + + assert(call->gtArgs.CountArgs() == 2); + // The call has the following signature: + // + // type = call(genericCtx, signatureCns); + // + void* signature = GetConstantPointer(this, call->gtArgs.GetArgByIndex(1)->GetNode()); + if (signature == nullptr) + { + // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we + // want to catch such cases as we really don't want to emit just a fallback call - it's too slow + assert(!"can't restore signature argument value"); + return false; + } + + JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum) + DISPTREE(call) + JITDUMP("\n") + + // Restore runtimeLookup using signature argument via a global dictionary + CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; + const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); + assert(lookupFound); + + const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; + if (needsSizeCheck) + { + JITDUMP("dynamic expansion, needs size check.\n") + } + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + assert(runtimeLookup.indirections != 0); + assert(runtimeLookup.testForNull); + + // Split block right before the call tree + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse. + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + GenTreeLclVar* rtLookupLcl = nullptr; + + // Mostly for Tier0: if the current statement is ASG(LCL, RuntimeLookup) + // we can drop it and use that LCL as the destination + if (stmt->GetRootNode()->OperIs(GT_ASG)) + { + GenTree* lhs = stmt->GetRootNode()->gtGetOp1(); + GenTree* rhs = stmt->GetRootNode()->gtGetOp2(); + if (lhs->OperIs(GT_LCL_VAR) && rhs == *callUse) + { + rtLookupLcl = gtClone(lhs)->AsLclVar(); + fgRemoveStmt(block, stmt); + } + } + + // Grab a temp to store result (it's assigned from either fastPathBb or fallbackBb) + if (rtLookupLcl == nullptr) + { + // Define a local for the result + unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); + lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; + rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + + *callUse = gtClone(rtLookupLcl); + + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + } + + GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); + + // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) + GenTree* slotPtrTree = gtCloneExpr(ctxTree); + GenTree* indOffTree = nullptr; + GenTree* lastIndOfTree = nullptr; + for (WORD i = 0; i < runtimeLookup.indirections; i++) + { + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + indOffTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); + slotPtrTree = gtCloneExpr(indOffTree); + } + + // The last indirection could be subject to a size check (dynamic dictionary expansion) + const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; + if (i != 0) + { + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; + if (!isLastIndirectionWithSizeCheck) + { + slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + } + } + + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); + } + if (runtimeLookup.offsets[i] != 0) + { + if (isLastIndirectionWithSizeCheck) + { + lastIndOfTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); + slotPtrTree = gtCloneExpr(lastIndOfTree); + } + slotPtrTree = + gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); + } + } + + // Non-dynamic expansion case (no size check): + // + // prevBb(BBJ_NONE): [weight: 1.0] + // ... + // + // nullcheckBb(BBJ_COND): [weight: 1.0] + // if (*fastPathValue == null) + // goto fallbackBb; + // + // fastPathBb(BBJ_ALWAYS): [weight: 0.8] + // rtLookupLcl = *fastPathValue; + // goto block; + // + // fallbackBb(BBJ_NONE): [weight: 0.2] + // rtLookupLcl = HelperCall(); + // + // block(...): [weight: 1.0] + // use(rtLookupLcl); + // + + // null-check basic block + GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); + fastPathValue->gtFlags |= GTF_IND_NONFAULTING; + // Save dictionary slot to a local (to be used by fast path) + GenTree* fastPathValueClone = + opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); + nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; + BasicBlock* nullcheckBb = + fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo); + + // Fallback basic block + GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); + BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_NONE, nullcheckBb, asgFallbackValue, debugInfo, true); + + // Fast-path basic block + GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); + BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, asgFastpathValue, debugInfo); + + BasicBlock* sizeCheckBb = nullptr; + if (needsSizeCheck) + { + // Dynamic expansion case (sizeCheckBb is added and some preds are changed): + // + // prevBb(BBJ_NONE): [weight: 1.0] + // + // sizeCheckBb(BBJ_COND): [weight: 1.0] + // if (sizeValue <= offsetValue) + // goto fallbackBb; + // ... + // + // nullcheckBb(BBJ_COND): [weight: 0.8] + // if (*fastPathValue == null) + // goto fallbackBb; + // + // fastPathBb(BBJ_ALWAYS): [weight: 0.64] + // rtLookupLcl = *fastPathValue; + // goto block; + // + // fallbackBb(BBJ_NONE): [weight: 0.36] + // rtLookupLcl = HelperCall(); + // + // block(...): [weight: 1.0] + // use(rtLookupLcl); + // + + // sizeValue = dictionary[pRuntimeLookup->sizeOffset] + GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); + assert(lastIndOfTree != nullptr); + GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); + GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); + sizeValue->gtFlags |= GTF_IND_NONFAULTING; + + // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] + GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); + GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); + sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; + + GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck); + sizeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, jtrue, debugInfo); + } + + // + // Update preds in all new blocks + // + fgRemoveRefPred(block, prevBb); + fgAddRefPred(block, fastPathBb); + fgAddRefPred(block, fallbackBb); + nullcheckBb->bbJumpDest = fallbackBb; + fastPathBb->bbJumpDest = block; + + if (needsSizeCheck) + { + // sizeCheckBb is the first block after prevBb + fgAddRefPred(sizeCheckBb, prevBb); + // sizeCheckBb flows into nullcheckBb in case if the size check passes + fgAddRefPred(nullcheckBb, sizeCheckBb); + // fallbackBb is reachable from both nullcheckBb and sizeCheckBb + fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(fallbackBb, sizeCheckBb); + // fastPathBb is only reachable from successful nullcheckBb + fgAddRefPred(fastPathBb, nullcheckBb); + // sizeCheckBb fails - jump to fallbackBb + sizeCheckBb->bbJumpDest = fallbackBb; + } + else + { + // nullcheckBb is the first block after prevBb + fgAddRefPred(nullcheckBb, prevBb); + // No size check, nullcheckBb jumps to fast path + fgAddRefPred(fastPathBb, nullcheckBb); + // fallbackBb is only reachable from nullcheckBb (jump destination) + fgAddRefPred(fallbackBb, nullcheckBb); + } + + // + // Re-distribute weights (see '[weight: X]' on the diagrams above) + // TODO: consider marking fallbackBb as rarely-taken + // + block->inheritWeight(prevBb); + if (needsSizeCheck) + { + sizeCheckBb->inheritWeight(prevBb); + // 80% chance we pass nullcheck + nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); + // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + // 100-64=36% chance we fail either nullcheck or sizecheck + fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); + } + else + { + nullcheckBb->inheritWeight(prevBb); + // 80% chance we pass nullcheck + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) + fallbackBb->inheritWeightPercentage(nullcheckBb, 20); + } + + // + // Update loop info + // + nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + if (needsSizeCheck) + { + sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + } + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + if (needsSizeCheck) + { + assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); + } + + if (opts.OptimizationEnabled()) + { + fgReorderBlocks(/* useProfileData */ false); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + } + return true; +} + +//------------------------------------------------------------------------------ +// fgExpandThreadLocalAccess: Inline the CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED +// helper. See fgExpandThreadLocalAccessForCall for details. +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgExpandThreadLocalAccess() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHasTlsFieldAccess()) + { + // TP: nothing to expand in the current method + JITDUMP("Nothing to expand.\n") + return result; + } + + if (opts.OptimizationDisabled()) + { + JITDUMP("Optimizations aren't allowed - bail out.\n") + return result; + } + + // TODO: Replace with opts.compCodeOpt once it's fixed + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n") + return result; + } + + return fgExpandHelper<&Compiler::fgExpandThreadLocalAccessForCall>(true); +} + +//------------------------------------------------------------------------------ +// fgExpandThreadLocalAccessForCall : Expand the CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED +// that access fields marked with [ThreadLocal]. +// +// Arguments: +// block - Block containing the helper call to expand +// stmt - Statement containing the helper call +// call - The helper call +// +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +// Notes: +// A cache is stored in thread local storage (TLS) of coreclr. It maps the typeIndex (embedded in +// the code at the JIT time) to the base of static blocks. This method generates code to +// extract the TLS, get the entry at which the cache is stored. Then it checks if the typeIndex of +// enclosing type of current field is present in the cache and if yes, extract out that can be directly +// accessed at the uses. +// If the entry is not present, the helper is called, which would make an entry of current static block +// in the cache. +// +bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call) +{ + assert(call->IsHelperCall()); + if (!call->IsExpTLSFieldAccess()) + { + return false; + } + + CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; + info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo); + JITDUMP("getThreadLocalStaticBlocksInfo\n:"); + JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); + JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks); + JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); + + assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); + assert(eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED); + + JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum); + DISPTREE(call); + JITDUMP("\n"); + + call->ClearExpTLSFieldAccess(); + assert(call->gtArgs.CountArgs() == 1); + + // Split block right before the call tree + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + DebugInfo debugInfo = stmt->GetDebugInfo(); + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse. + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + GenTreeLclVar* threadStaticBlockLcl = nullptr; + + // Grab a temp to store result (it's assigned from either fastPathBb or fallbackBb) + unsigned threadStaticBlockLclNum = lvaGrabTemp(true DEBUGARG("TLS field access")); + lvaTable[threadStaticBlockLclNum].lvType = TYP_I_IMPL; + threadStaticBlockLcl = gtNewLclvNode(threadStaticBlockLclNum, call->TypeGet()); + + *callUse = gtClone(threadStaticBlockLcl); + + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + + GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); + + void** pIdAddr = nullptr; + + size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; + GenTree* dllRef = nullptr; + + if (tlsIndexValue != 0) + { + dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + } + + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] + GenTree* tlsRef = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + + tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + if (dllRef != nullptr) + { + // Add the dllRef to produce thread local storage reference for coreclr + tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); + } + + // Base of coreclr's thread local storage + GenTree* tlsValue = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Cache the tls value + unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); + lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + GenTree* defTlsLclValue = gtNewLclvNode(tlsLclNum, TYP_I_IMPL); + GenTree* useTlsLclValue = gtCloneExpr(defTlsLclValue); // Create a use for tlsLclValue + GenTree* asgTlsValue = gtNewAssignNode(defTlsLclValue, tlsValue); + + // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" + GenTree* offsetOfMaxThreadStaticBlocks = + gtNewIconNode(threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks, TYP_I_IMPL); + GenTree* maxThreadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(useTlsLclValue), offsetOfMaxThreadStaticBlocks); + GenTree* maxThreadStaticBlocksValue = + gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Create tree for "if (maxThreadStaticBlocks < typeIndex)" + GenTree* maxThreadStaticBlocksCond = + gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue)); + maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond); + + // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" + GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(threadStaticBlocksInfo.offsetOfThreadStaticBlocks, TYP_I_IMPL); + GenTree* threadStaticBlocksRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(useTlsLclValue), offsetOfThreadStaticBlocks); + GenTree* threadStaticBlocksValue = + gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]" + typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue), + gtNewIconNode(TARGET_POINTER_SIZE, TYP_INT)); + GenTree* typeThreadStaticBlockRef = + gtNewOperNode(GT_ADD, TYP_I_IMPL, threadStaticBlocksValue, typeThreadStaticBlockIndexValue); + GenTree* typeThreadStaticBlockValue = gtNewIndir(TYP_I_IMPL, typeThreadStaticBlockRef, GTF_IND_NONFAULTING); + + // Cache the threadStaticBlock value + unsigned threadStaticBlockBaseLclNum = lvaGrabTemp(true DEBUGARG("ThreadStaticBlockBase access")); + lvaTable[threadStaticBlockBaseLclNum].lvType = TYP_I_IMPL; + GenTree* defThreadStaticBlockBaseLclValue = gtNewLclvNode(threadStaticBlockBaseLclNum, TYP_I_IMPL); + GenTree* useThreadStaticBlockBaseLclValue = + gtCloneExpr(defThreadStaticBlockBaseLclValue); // StaticBlockBaseLclValue that will be used + GenTree* asgThreadStaticBlockBase = gtNewAssignNode(defThreadStaticBlockBaseLclValue, typeThreadStaticBlockValue); + + // Create tree for "if (threadStaticBlockValue != nullptr)" + GenTree* threadStaticBlockNullCond = + gtNewOperNode(GT_NE, TYP_INT, useThreadStaticBlockBaseLclValue, gtNewIconNode(0, TYP_I_IMPL)); + threadStaticBlockNullCond = gtNewOperNode(GT_JTRUE, TYP_VOID, threadStaticBlockNullCond); + + // prevBb (BBJ_NONE): [weight: 1.0] + // ... + // + // maxThreadStaticBlocksCondBB (BBJ_COND): [weight: 1.0] + // asgTlsValue = tls_access_code + // if (maxThreadStaticBlocks < typeIndex) + // goto fallbackBb; + // + // threadStaticBlockNullCondBB (BBJ_COND): [weight: 1.0] + // fastPathValue = t_threadStaticBlocks[typeIndex] + // if (fastPathValue != nullptr) + // goto fastPathBb; + // + // fallbackBb (BBJ_ALWAYS): [weight: 0] + // threadStaticBlockBase = HelperCall(); + // goto block; + // + // fastPathBb(BBJ_ALWAYS): [weight: 1.0] + // threadStaticBlockBase = fastPathValue; + // + // block (...): [weight: 1.0] + // use(threadStaticBlockBase); + + // maxThreadStaticBlocksCondBB + BasicBlock* maxThreadStaticBlocksCondBB = fgNewBBFromTreeAfter(BBJ_COND, prevBb, asgTlsValue, debugInfo); + + fgInsertStmtAfter(maxThreadStaticBlocksCondBB, maxThreadStaticBlocksCondBB->firstStmt(), + fgNewStmtFromTree(maxThreadStaticBlocksCond)); + + // threadStaticBlockNullCondBB + BasicBlock* threadStaticBlockNullCondBB = + fgNewBBFromTreeAfter(BBJ_COND, maxThreadStaticBlocksCondBB, asgThreadStaticBlockBase, debugInfo); + fgInsertStmtAfter(threadStaticBlockNullCondBB, threadStaticBlockNullCondBB->firstStmt(), + fgNewStmtFromTree(threadStaticBlockNullCond)); + + // fallbackBb + GenTree* asgFallbackValue = gtNewAssignNode(gtClone(threadStaticBlockLcl), call); + BasicBlock* fallbackBb = + fgNewBBFromTreeAfter(BBJ_ALWAYS, threadStaticBlockNullCondBB, asgFallbackValue, debugInfo, true); + + // fastPathBb + GenTree* asgFastPathValue = + gtNewAssignNode(gtClone(threadStaticBlockLcl), gtCloneExpr(useThreadStaticBlockBaseLclValue)); + BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, fallbackBb, asgFastPathValue, debugInfo, true); + + // + // Update preds in all new blocks + // + fgRemoveRefPred(block, prevBb); + fgAddRefPred(maxThreadStaticBlocksCondBB, prevBb); + + fgAddRefPred(threadStaticBlockNullCondBB, maxThreadStaticBlocksCondBB); + fgAddRefPred(fallbackBb, maxThreadStaticBlocksCondBB); + + fgAddRefPred(fastPathBb, threadStaticBlockNullCondBB); + fgAddRefPred(fallbackBb, threadStaticBlockNullCondBB); + + fgAddRefPred(block, fastPathBb); + fgAddRefPred(block, fallbackBb); + + maxThreadStaticBlocksCondBB->bbJumpDest = fallbackBb; + threadStaticBlockNullCondBB->bbJumpDest = fastPathBb; + fastPathBb->bbJumpDest = block; + fallbackBb->bbJumpDest = block; + + // Inherit the weights + block->inheritWeight(prevBb); + maxThreadStaticBlocksCondBB->inheritWeight(prevBb); + threadStaticBlockNullCondBB->inheritWeight(prevBb); + fastPathBb->inheritWeight(prevBb); + + // fallback will just execute first time + fallbackBb->bbSetRunRarely(); + + // + // Update loop info if loop table is known to be valid + // + maxThreadStaticBlocksCondBB->bbNatLoopNum = prevBb->bbNatLoopNum; + threadStaticBlockNullCondBB->bbNatLoopNum = prevBb->bbNatLoopNum; + fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, maxThreadStaticBlocksCondBB)); + assert(BasicBlock::sameEHRegion(prevBb, threadStaticBlockNullCondBB)); + assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + + fgReorderBlocks(/* useProfileData */ false); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + + return true; +} + +//------------------------------------------------------------------------------ +// fgExpandHelper: Expand the helper using ExpansionFunction. +// +// Returns: +// true if there was any helper that was expanded. +// +template +PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + if (skipRarelyRunBlocks && block->isRunRarely()) + { + // It's just an optimization - don't waste time on rarely executed blocks + continue; + } + + // Expand and visit the last block again to find more candidates + while (fgExpandHelperForBlock(block)) + { + result = PhaseStatus::MODIFIED_EVERYTHING; + } + } + return result; +} + +//------------------------------------------------------------------------------ +// fgExpandHelperForBlock: Scans through all the statements of the `block` and +// invoke `fgExpand` if any of the tree node was a helper call. +// +// Arguments: +// block - block to scan for static initializations +// fgExpand - function that expands the helper call +// +// Returns: +// true if a helper was expanded +// +template +bool Compiler::fgExpandHelperForBlock(BasicBlock* block) +{ + for (Statement* const stmt : block->NonPhiStatements()) + { + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) + { + // TP: Stmt has no calls - bail out + continue; + } + + for (GenTree* const tree : stmt->TreeList()) + { + if (!tree->IsHelperCall()) + { + continue; + } + + if ((this->*ExpansionFunction)(block, stmt, tree->AsCall())) + { + return true; + } + } + } + return false; +} + +//------------------------------------------------------------------------------ +// fgExpandStaticInit: Partially expand static initialization calls, e.g.: +// +// tmp = CORINFO_HELP_X_NONGCSTATIC_BASE(); +// +// into: +// +// if (isClassAlreadyInited) +// CORINFO_HELP_X_NONGCSTATIC_BASE(); +// tmp = fastPath; +// +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgExpandStaticInit() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHaveStaticInit()) + { + // TP: nothing to expand in the current method + JITDUMP("Nothing to expand.\n") + return result; + } + + if (opts.OptimizationDisabled()) + { + JITDUMP("Optimizations aren't allowed - bail out.\n") + return result; + } + + // TODO: Replace with opts.compCodeOpt once it's fixed + const bool preferSize = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT); + if (preferSize) + { + // The optimization comes with a codegen size increase + JITDUMP("Optimized for size - bail out.\n") + return result; + } + + return fgExpandHelper<&Compiler::fgExpandStaticInitForCall>(true); +} + +//------------------------------------------------------------------------------ +// fgExpandStaticInitForCall: Partially expand given static initialization call. +// Also, see fgExpandStaticInit's comments. +// +// Arguments: +// block - call's block +// stmt - call's statement +// call - call that represents a static initialization +// +// Returns: +// true if a static initialization was expanded +// +bool Compiler::fgExpandStaticInitForCall(BasicBlock* block, Statement* stmt, GenTreeCall* call) +{ + assert(call->IsHelperCall()); + + bool isGc = false; + StaticHelperReturnValue retValKind = {}; + if (!IsStaticHelperEligibleForExpansion(call, &isGc, &retValKind)) + { + return false; + } + + assert(!call->IsTailCall()); + + if (call->gtInitClsHnd == NO_CLASS_HANDLE) + { + assert(!"helper call was created without gtInitClsHnd or already visited"); + return false; + } + + int isInitOffset = 0; + CORINFO_CONST_LOOKUP flagAddr = {}; + if (!info.compCompHnd->getIsClassInitedFlagAddress(call->gtInitClsHnd, &flagAddr, &isInitOffset)) + { + JITDUMP("getIsClassInitedFlagAddress returned false - bail out.\n") + return false; + } + + CORINFO_CONST_LOOKUP staticBaseAddr = {}; + if ((retValKind == SHRV_STATIC_BASE_PTR) && + !info.compCompHnd->getStaticBaseAddress(call->gtInitClsHnd, isGc, &staticBaseAddr)) + { + JITDUMP("getStaticBaseAddress returned false - bail out.\n") + return false; + } + + JITDUMP("Expanding static initialization for '%s', call: [%06d] in " FMT_BB "\n", + eeGetClassName(call->gtInitClsHnd), dspTreeID(call), block->bbNum); + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + // Split block right before the call tree + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + assert(prevBb != nullptr && block != nullptr); + + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse. + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + + // + // Create new blocks. Essentially, we want to transform this: + // + // staticBase = helperCall(); + // + // into: + // + // if (!isInitialized) + // { + // helperCall(); // we don't use its return value + // } + // staticBase = fastPath; + // + + // The initialization check looks like this for JIT: + // + // * JTRUE void + // \--* EQ int + // +--* AND int + // | +--* IND int + // | | \--* CNS_INT(h) long 0x.... const ptr + // | \--* CNS_INT int 1 (bit mask) + // \--* CNS_INT int 1 + // + // For NativeAOT it's: + // + // * JTRUE void + // \--* EQ int + // +--* IND nint + // | \--* ADD long + // | +--* CNS_INT(h) long 0x.... const ptr + // | \--* CNS_INT int -8 (offset) + // \--* CNS_INT int 0 + // + assert(flagAddr.accessType == IAT_VALUE); + + GenTree* cachedStaticBase = nullptr; + GenTree* isInitedActualValueNode; + GenTree* isInitedExpectedValue; + if (IsTargetAbi(CORINFO_NATIVEAOT_ABI)) + { + GenTree* baseAddr = gtNewIconHandleNode((size_t)flagAddr.addr, GTF_ICON_GLOBAL_PTR); + + // Save it to a temp - we'll be using its value for the replacementNode. + // This leads to some size savings on NativeAOT + if ((staticBaseAddr.addr == flagAddr.addr) && (staticBaseAddr.accessType == flagAddr.accessType)) + { + cachedStaticBase = fgInsertCommaFormTemp(&baseAddr); + } + + // Don't fold ADD(CNS1, CNS2) here since the result won't be reloc-friendly for AOT + GenTree* offsetNode = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseAddr, gtNewIconNode(isInitOffset)); + isInitedActualValueNode = gtNewIndir(TYP_I_IMPL, offsetNode, GTF_IND_NONFAULTING); + isInitedActualValueNode->gtFlags |= GTF_GLOB_REF; + + // 0 means "initialized" on NativeAOT + isInitedExpectedValue = gtNewIconNode(0, TYP_I_IMPL); + } + else + { + assert(isInitOffset == 0); + + isInitedActualValueNode = gtNewIndOfIconHandleNode(TYP_INT, (size_t)flagAddr.addr, GTF_ICON_GLOBAL_PTR, false); + + // Check ClassInitFlags::INITIALIZED_FLAG bit + isInitedActualValueNode = gtNewOperNode(GT_AND, TYP_INT, isInitedActualValueNode, gtNewIconNode(1)); + isInitedExpectedValue = gtNewIconNode(1); + } + + GenTree* isInitedCmp = gtNewOperNode(GT_EQ, TYP_INT, isInitedActualValueNode, isInitedExpectedValue); + isInitedCmp->gtFlags |= GTF_RELOP_JMP_USED; + BasicBlock* isInitedBb = + fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, isInitedCmp), debugInfo); + + // Fallback basic block + // TODO-CQ: for JIT we can replace the original call with CORINFO_HELP_INITCLASS + // that only accepts a single argument + BasicBlock* helperCallBb = fgNewBBFromTreeAfter(BBJ_NONE, isInitedBb, call, debugInfo, true); + + GenTree* replacementNode = nullptr; + if (retValKind == SHRV_STATIC_BASE_PTR) + { + // Replace the call with a constant pointer to the statics base + assert(staticBaseAddr.addr != nullptr); + + // Use local if the addressed is already materialized and cached + if (cachedStaticBase != nullptr) + { + assert(staticBaseAddr.accessType == IAT_VALUE); + replacementNode = cachedStaticBase; + } + else if (staticBaseAddr.accessType == IAT_VALUE) + { + replacementNode = gtNewIconHandleNode((size_t)staticBaseAddr.addr, GTF_ICON_STATIC_HDL); + } + else + { + assert(staticBaseAddr.accessType == IAT_PVALUE); + replacementNode = + gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)staticBaseAddr.addr, GTF_ICON_GLOBAL_PTR, false); + } + } + + if (replacementNode == nullptr) + { + (*callUse)->gtBashToNOP(); + } + else + { + *callUse = replacementNode; + } + + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + + // Final block layout looks like this: + // + // prevBb(BBJ_NONE): [weight: 1.0] + // ... + // + // isInitedBb(BBJ_COND): [weight: 1.0] + // if (isInited) + // goto block; + // + // helperCallBb(BBJ_NONE): [weight: 0.0] + // helperCall(); + // + // block(...): [weight: 1.0] + // use(staticBase); + // + // Whether we use helperCall's value or not depends on the helper itself. + + // + // Update preds in all new blocks + // + + // Unlink block and prevBb + fgRemoveRefPred(block, prevBb); + + // Block has two preds now: either isInitedBb or helperCallBb + fgAddRefPred(block, isInitedBb); + fgAddRefPred(block, helperCallBb); + + // prevBb always flow into isInitedBb + fgAddRefPred(isInitedBb, prevBb); + + // Both fastPathBb and helperCallBb have a single common pred - isInitedBb + fgAddRefPred(helperCallBb, isInitedBb); + + // helperCallBb unconditionally jumps to the last block (jumps over fastPathBb) + isInitedBb->bbJumpDest = block; + + // + // Re-distribute weights + // + + block->inheritWeight(prevBb); + isInitedBb->inheritWeight(prevBb); + helperCallBb->bbSetRunRarely(); + + // + // Update loop info if loop table is known to be valid + // + + isInitedBb->bbNatLoopNum = prevBb->bbNatLoopNum; + helperCallBb->bbNatLoopNum = prevBb->bbNatLoopNum; + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, isInitedBb)); + + // Extra step: merge prevBb with isInitedBb if possible + if (fgCanCompactBlocks(prevBb, isInitedBb)) + { + fgCompactBlocks(prevBb, isInitedBb); + } + + // Clear gtInitClsHnd as a mark that we've already visited this call + call->gtInitClsHnd = NO_CLASS_HANDLE; + return true; +} diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a91ab29e439d9..cba5715528d18 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4151,6 +4151,7 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT } bool isStaticReadOnlyInitedRef = false; + unsigned typeIndex = 0; GenTree* op1; switch (pFieldInfo->fieldAccessor) { @@ -4185,6 +4186,11 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT } break; + case CORINFO_FIELD_STATIC_TLS_MANAGED: + + typeIndex = info.compCompHnd->getThreadLocalFieldInfo(pResolvedToken->hField); + + FALLTHROUGH; case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: { #ifdef FEATURE_READYTORUN @@ -4218,7 +4224,7 @@ GenTree* Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedT else #endif { - op1 = fgGetStaticsCCtorHelper(pResolvedToken->hClass, pFieldInfo->helper); + op1 = fgGetStaticsCCtorHelper(pResolvedToken->hClass, pFieldInfo->helper, typeIndex); } op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, gtNewIconNode(pFieldInfo->offset, innerFldSeq)); @@ -9423,6 +9429,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) usesHelper = true; break; + case CORINFO_FIELD_STATIC_TLS_MANAGED: + setMethodHasTlsFieldAccess(); + FALLTHROUGH; case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: case CORINFO_FIELD_STATIC_ADDRESS: // Replace static read-only fields with constant if possible @@ -9672,7 +9681,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) } } break; - case CORINFO_FIELD_STATIC_TLS: #ifdef TARGET_X86 // Legacy TLS access is implemented as intrinsic on x86 only. @@ -9701,6 +9709,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) clsHnd, op2); goto SPILL_APPEND; + case CORINFO_FIELD_STATIC_TLS_MANAGED: + setMethodHasTlsFieldAccess(); + FALLTHROUGH; case CORINFO_FIELD_STATIC_ADDRESS: case CORINFO_FIELD_STATIC_RVA_ADDRESS: case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 663bbf54c8514..7265028fa6b49 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -426,6 +426,7 @@ typedef double weight_t; // See eeFindJitDataOffs and eeGetJitDataOffs in Compiler.hpp #define FLD_GLOBAL_DS ((CORINFO_FIELD_HANDLE)-4) #define FLD_GLOBAL_FS ((CORINFO_FIELD_HANDLE)-8) +#define FLD_GLOBAL_GS ((CORINFO_FIELD_HANDLE)-12) class GlobalJitOptions { diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 0a9f57f546926..f8ac1ddc1c841 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2017,6 +2017,10 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) // make this contained, it turns into a constant that goes into an addr mode MakeSrcContained(indirNode, addr); } + else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) + { + MakeSrcContained(indirNode, addr); + } #endif // TARGET_ARM64 } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index cc3732238bf1a..384d5ee24c739 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5466,9 +5466,6 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node) if (icon->FitsInAddrBase(comp)) #endif { - // Amd64: - // We can mark any pc-relative 32-bit addr as containable. - // // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained. // // Noting we cannot contain relocatable constants for TYP_SIMD12 today. Doing so would diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp deleted file mode 100644 index 9056f510194f4..0000000000000 --- a/src/coreclr/jit/runtimelookup.cpp +++ /dev/null @@ -1,427 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "jitpch.h" -#ifdef _MSC_VER -#pragma hdrstop -#endif - -// Obtain constant pointer from a tree -static void* GetConstantPointer(Compiler* comp, GenTree* tree) -{ - void* cns = nullptr; - if (tree->gtEffectiveVal()->IsCnsIntOrI()) - { - cns = (void*)tree->gtEffectiveVal()->AsIntCon()->IconValue(); - } - else if (comp->vnStore->IsVNConstant(tree->gtVNPair.GetLiberal())) - { - cns = (void*)comp->vnStore->CoercedConstantValue(tree->gtVNPair.GetLiberal()); - } - return cns; -} - -// Save expression to a local and append it as the last statement in exprBlock -static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) -{ - unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); - Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr), debugInfo); - comp->gtSetStmtInfo(asgStmt); - comp->fgSetStmtSeq(asgStmt); - return comp->gtNewLclvNode(tmpNum, genActualType(expr)); -}; - -//------------------------------------------------------------------------------ -// gtNewRuntimeLookupHelperCallNode : Helper to create a runtime lookup call helper node. -// -// Arguments: -// helper - Call helper -// type - Type of the node -// args - Call args -// -// Return Value: -// New CT_HELPER node -// -GenTreeCall* Compiler::gtNewRuntimeLookupHelperCallNode(CORINFO_RUNTIME_LOOKUP* pRuntimeLookup, - GenTree* ctxTree, - void* compileTimeHandle) -{ - // Call the helper - // - Setup argNode with the pointer to the signature returned by the lookup - GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - argNode->gtFlags |= GTF_DONT_CSE; - - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) - { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); - } - return helperCall; -} - -//------------------------------------------------------------------------------ -// fgExpandRuntimeLookups : partially expand runtime lookups helper calls -// to add a nullcheck [+ size check] and a fast path -// Returns: -// PhaseStatus indicating what, if anything, was changed. -// -// Notes: -// The runtime lookup itself is needed to access a handle in code shared between -// generic instantiations. The lookup depends on the typeContext which is only available at -// runtime, and not at compile - time. See ASCII block diagrams in comments below for -// better understanding how this phase expands runtime lookups. -// -PhaseStatus Compiler::fgExpandRuntimeLookups() -{ - PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - - if (!doesMethodHaveExpRuntimeLookup()) - { - // The method being compiled doesn't have expandable runtime lookups. If it does - // and doesMethodHaveExpRuntimeLookup() still returns false we'll assert in LowerCall - return result; - } - - // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag - // We don't use Blocks() iterator here as we modify `block` variable - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - SCAN_BLOCK_AGAIN: - for (Statement* const stmt : block->Statements()) - { - if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) - { - // TP: Stmt has no calls - bail out - continue; - } - - for (GenTree* const tree : stmt->TreeList()) - { - // We only need calls with IsExpRuntimeLookup() flag - if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) - { - continue; - } - assert(tree->IsHelperCall()); - JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(tree), block->bbNum) - DISPTREE(tree) - JITDUMP("\n") - - GenTreeCall* call = tree->AsCall(); - - // Clear ExpRuntimeLookup flag so we won't miss any runtime lookup that needs partial expansion - call->ClearExpRuntimeLookup(); - - if (call->IsTailCall()) - { - // It is very unlikely to happen and is impossible to represent in C# - continue; - } - - assert(call->gtArgs.CountArgs() == 2); - // The call has the following signature: - // - // type = call(genericCtx, signatureCns); - // - void* signature = GetConstantPointer(this, call->gtArgs.GetArgByIndex(1)->GetNode()); - if (signature == nullptr) - { - // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we - // want to catch such cases as we really don't want to emit just a fallback call - it's too slow - assert(!"can't restore signature argument value"); - continue; - } - - // Restore runtimeLookup using signature argument via a global dictionary - CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; - const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); - assert(lookupFound); - - const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; - if (needsSizeCheck) - { - JITDUMP("dynamic expansion, needs size check.\n") - } - - DebugInfo debugInfo = stmt->GetDebugInfo(); - - assert(runtimeLookup.indirections != 0); - assert(runtimeLookup.testForNull); - - // Split block right before the call tree - BasicBlock* prevBb = block; - GenTree** callUse = nullptr; - Statement* newFirstStmt = nullptr; - block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); - assert(prevBb != nullptr && block != nullptr); - - // Block ops inserted by the split need to be morphed here since we are after morph. - // We cannot morph stmt yet as we may modify it further below, and the morphing - // could invalidate callUse. - while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) - { - fgMorphStmtBlockOps(block, newFirstStmt); - newFirstStmt = newFirstStmt->GetNextStmt(); - } - - GenTreeLclVar* rtLookupLcl = nullptr; - - // Mostly for Tier0: if the current statement is ASG(LCL, RuntimeLookup) - // we can drop it and use that LCL as the destination - if (stmt->GetRootNode()->OperIs(GT_ASG)) - { - GenTree* lhs = stmt->GetRootNode()->gtGetOp1(); - GenTree* rhs = stmt->GetRootNode()->gtGetOp2(); - if (lhs->OperIs(GT_LCL_VAR) && rhs == *callUse) - { - rtLookupLcl = gtClone(lhs)->AsLclVar(); - fgRemoveStmt(block, stmt); - } - } - - // Grab a temp to store result (it's assigned from either fastPathBb or fallbackBb) - if (rtLookupLcl == nullptr) - { - // Define a local for the result - unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); - lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; - rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - - *callUse = gtClone(rtLookupLcl); - - fgMorphStmtBlockOps(block, stmt); - gtUpdateStmtSideEffects(stmt); - } - - GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); - - // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) - GenTree* slotPtrTree = gtCloneExpr(ctxTree); - GenTree* indOffTree = nullptr; - GenTree* lastIndOfTree = nullptr; - for (WORD i = 0; i < runtimeLookup.indirections; i++) - { - if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) - { - indOffTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); - slotPtrTree = gtCloneExpr(indOffTree); - } - - // The last indirection could be subject to a size check (dynamic dictionary expansion) - const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; - if (i != 0) - { - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - if (!isLastIndirectionWithSizeCheck) - { - slotPtrTree->gtFlags |= GTF_IND_INVARIANT; - } - } - - if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) - { - slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); - } - if (runtimeLookup.offsets[i] != 0) - { - if (isLastIndirectionWithSizeCheck) - { - lastIndOfTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); - slotPtrTree = gtCloneExpr(lastIndOfTree); - } - slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, - gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); - } - } - - // Non-dynamic expansion case (no size check): - // - // prevBb(BBJ_NONE): [weight: 1.0] - // ... - // - // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (*fastPathValue == null) - // goto fallbackBb; - // - // fastPathBb(BBJ_ALWAYS): [weight: 0.8] - // rtLookupLcl = *fastPathValue; - // goto block; - // - // fallbackBb(BBJ_NONE): [weight: 0.2] - // rtLookupLcl = HelperCall(); - // - // block(...): [weight: 1.0] - // use(rtLookupLcl); - // - - // null-check basic block - GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); - fastPathValue->gtFlags |= GTF_IND_NONFAULTING; - // Save dictionary slot to a local (to be used by fast path) - GenTree* fastPathValueClone = - opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); - GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); - nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; - BasicBlock* nullcheckBb = - fgNewBBFromTreeAfter(BBJ_COND, prevBb, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo); - - // Fallback basic block - GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); - BasicBlock* fallbackBb = fgNewBBFromTreeAfter(BBJ_NONE, nullcheckBb, asgFallbackValue, debugInfo, true); - - // Fast-path basic block - GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); - BasicBlock* fastPathBb = fgNewBBFromTreeAfter(BBJ_ALWAYS, nullcheckBb, asgFastpathValue, debugInfo); - - BasicBlock* sizeCheckBb = nullptr; - if (needsSizeCheck) - { - // Dynamic expansion case (sizeCheckBb is added and some preds are changed): - // - // prevBb(BBJ_NONE): [weight: 1.0] - // - // sizeCheckBb(BBJ_COND): [weight: 1.0] - // if (sizeValue <= offsetValue) - // goto fallbackBb; - // ... - // - // nullcheckBb(BBJ_COND): [weight: 0.8] - // if (*fastPathValue == null) - // goto fallbackBb; - // - // fastPathBb(BBJ_ALWAYS): [weight: 0.64] - // rtLookupLcl = *fastPathValue; - // goto block; - // - // fallbackBb(BBJ_NONE): [weight: 0.36] - // rtLookupLcl = HelperCall(); - // - // block(...): [weight: 1.0] - // use(rtLookupLcl); - // - - // sizeValue = dictionary[pRuntimeLookup->sizeOffset] - GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); - assert(lastIndOfTree != nullptr); - GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); - GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); - sizeValue->gtFlags |= GTF_IND_NONFAULTING; - - // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] - GenTree* offsetValue = - gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); - sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; - - GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck); - sizeCheckBb = fgNewBBFromTreeAfter(BBJ_COND, prevBb, jtrue, debugInfo); - } - - // - // Update preds in all new blocks - // - fgRemoveRefPred(block, prevBb); - fgAddRefPred(block, fastPathBb); - fgAddRefPred(block, fallbackBb); - nullcheckBb->bbJumpDest = fallbackBb; - fastPathBb->bbJumpDest = block; - - if (needsSizeCheck) - { - // sizeCheckBb is the first block after prevBb - fgAddRefPred(sizeCheckBb, prevBb); - // sizeCheckBb flows into nullcheckBb in case if the size check passes - fgAddRefPred(nullcheckBb, sizeCheckBb); - // fallbackBb is reachable from both nullcheckBb and sizeCheckBb - fgAddRefPred(fallbackBb, nullcheckBb); - fgAddRefPred(fallbackBb, sizeCheckBb); - // fastPathBb is only reachable from successful nullcheckBb - fgAddRefPred(fastPathBb, nullcheckBb); - // sizeCheckBb fails - jump to fallbackBb - sizeCheckBb->bbJumpDest = fallbackBb; - } - else - { - // nullcheckBb is the first block after prevBb - fgAddRefPred(nullcheckBb, prevBb); - // No size check, nullcheckBb jumps to fast path - fgAddRefPred(fastPathBb, nullcheckBb); - // fallbackBb is only reachable from nullcheckBb (jump destination) - fgAddRefPred(fallbackBb, nullcheckBb); - } - - // - // Re-distribute weights (see '[weight: X]' on the diagrams above) - // TODO: consider marking fallbackBb as rarely-taken - // - block->inheritWeight(prevBb); - if (needsSizeCheck) - { - sizeCheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck - nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); - // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 100-64=36% chance we fail either nullcheck or sizecheck - fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); - } - else - { - nullcheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) - fallbackBb->inheritWeightPercentage(nullcheckBb, 20); - } - - // - // Update loop info - // - nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; - if (needsSizeCheck) - { - sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - } - - // All blocks are expected to be in the same EH region - assert(BasicBlock::sameEHRegion(prevBb, block)); - assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); - assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); - if (needsSizeCheck) - { - assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); - } - - // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. - // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible - // for that - result = PhaseStatus::MODIFIED_EVERYTHING; - - // We've modified the graph and the current "block" might still have more runtime lookups - goto SCAN_BLOCK_AGAIN; - } - } - } - - if (result == PhaseStatus::MODIFIED_EVERYTHING) - { - if (opts.OptimizationEnabled()) - { - fgReorderBlocks(/* useProfileData */ false); - fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); - } - } - return result; -} diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 95874882458fa..5dcb315af0a35 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1463,6 +1463,7 @@ void HelperCallProperties::init() case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR: case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR: case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR: + case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED: // These do not invoke static class constructors // diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index c7fc36be22843..4076cb3d2cf31 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12100,6 +12100,9 @@ VNFunc Compiler::fgValueNumberJitHelperMethodVNFunc(CorInfoHelpFunc helpFunc) case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR: vnf = VNF_GetsharedNongcthreadstaticBaseNoctor; break; + case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED: + vnf = VNF_GetsharedNongcthreadstaticBaseNoctorOptimized; + break; case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS: vnf = VNF_GetsharedGcthreadstaticBaseDynamicclass; break; diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index cb952f2504cf9..6551be8772a2a 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -128,6 +128,7 @@ ValueNumFuncDef(GetsharedGcthreadstaticBase, 2, false, true, true) ValueNumFuncDef(GetsharedNongcthreadstaticBase, 2, false, true, true) ValueNumFuncDef(GetsharedGcthreadstaticBaseNoctor, 2, false, true, true) ValueNumFuncDef(GetsharedNongcthreadstaticBaseNoctor, 2, false, true, true) +ValueNumFuncDef(GetsharedNongcthreadstaticBaseNoctorOptimized, 1, false, true, true) ValueNumFuncDef(GetsharedGcthreadstaticBaseDynamicclass, 2, false, true, true) ValueNumFuncDef(GetsharedNongcthreadstaticBaseDynamicclass, 2, false, true, true) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 01c4dfbc1b118..e4cc6a13c3e17 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -186,6 +186,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR, + CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED, CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS, CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index be36ea5f04dba..9ad3bd7156dd7 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -2860,6 +2860,22 @@ private nuint printFieldName(CORINFO_FIELD_STRUCT_* fld, byte* buffer, nuint buf return PrintFromUtf16(field.Name, buffer, bufferSize, requiredBufferSize); } +#pragma warning disable CA1822 // Mark members as static + private uint getThreadLocalFieldInfo(CORINFO_FIELD_STRUCT_* fld) +#pragma warning restore CA1822 // Mark members as static + { + // Implemented for JIT only for now. + + return 0; + } + +#pragma warning disable CA1822 // Mark members as static + private void getThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +#pragma warning restore CA1822 // Mark members as static + { + // Implemented for JIT only for now. + } + private CORINFO_CLASS_STRUCT_* getFieldClass(CORINFO_FIELD_STRUCT_* field) { var fieldDesc = HandleToObject(field); diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs index fe9e2c8ed7f88..26b637ef6357d 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl_generated.cs @@ -1479,6 +1479,35 @@ private static void _getFieldInfo(IntPtr thisHandle, IntPtr* ppException, CORINF } } + [UnmanagedCallersOnly] + private static uint _getThreadLocalFieldInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_FIELD_STRUCT_* field) + { + var _this = GetThis(thisHandle); + try + { + return _this.getThreadLocalFieldInfo(field); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + return default; + } + } + + [UnmanagedCallersOnly] + private static void _getThreadLocalStaticBlocksInfo(IntPtr thisHandle, IntPtr* ppException, CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) + { + var _this = GetThis(thisHandle); + try + { + _this.getThreadLocalStaticBlocksInfo(pInfo); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + } + } + [UnmanagedCallersOnly] private static byte _isFieldStatic(IntPtr thisHandle, IntPtr* ppException, CORINFO_FIELD_STRUCT_* fldHnd) { @@ -2701,7 +2730,7 @@ private static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_ private static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 182); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 184); callbacks[0] = (delegate* unmanaged)&_isIntrinsic; callbacks[1] = (delegate* unmanaged)&_getMethodAttribs; @@ -2802,89 +2831,91 @@ private static IntPtr GetUnmanagedCallbacks() callbacks[96] = (delegate* unmanaged)&_getFieldType; callbacks[97] = (delegate* unmanaged)&_getFieldOffset; callbacks[98] = (delegate* unmanaged)&_getFieldInfo; - callbacks[99] = (delegate* unmanaged)&_isFieldStatic; - callbacks[100] = (delegate* unmanaged)&_getArrayOrStringLength; - callbacks[101] = (delegate* unmanaged)&_getBoundaries; - callbacks[102] = (delegate* unmanaged)&_setBoundaries; - callbacks[103] = (delegate* unmanaged)&_getVars; - callbacks[104] = (delegate* unmanaged)&_setVars; - callbacks[105] = (delegate* unmanaged)&_reportRichMappings; - callbacks[106] = (delegate* unmanaged)&_allocateArray; - callbacks[107] = (delegate* unmanaged)&_freeArray; - callbacks[108] = (delegate* unmanaged)&_getArgNext; - callbacks[109] = (delegate* unmanaged)&_getArgType; - callbacks[110] = (delegate* unmanaged)&_getExactClasses; - callbacks[111] = (delegate* unmanaged)&_getArgClass; - callbacks[112] = (delegate* unmanaged)&_getHFAType; - callbacks[113] = (delegate* unmanaged)&_GetErrorHRESULT; - callbacks[114] = (delegate* unmanaged)&_GetErrorMessage; - callbacks[115] = (delegate* unmanaged)&_FilterException; - callbacks[116] = (delegate* unmanaged)&_ThrowExceptionForJitResult; - callbacks[117] = (delegate* unmanaged)&_ThrowExceptionForHelper; - callbacks[118] = (delegate* unmanaged)&_runWithErrorTrap; - callbacks[119] = (delegate* unmanaged)&_runWithSPMIErrorTrap; - callbacks[120] = (delegate* unmanaged)&_getEEInfo; - callbacks[121] = (delegate* unmanaged)&_getJitTimeLogFilename; - callbacks[122] = (delegate* unmanaged)&_getMethodDefFromMethod; - callbacks[123] = (delegate* unmanaged)&_printMethodName; - callbacks[124] = (delegate* unmanaged)&_getMethodNameFromMetadata; - callbacks[125] = (delegate* unmanaged)&_getMethodHash; - callbacks[126] = (delegate* unmanaged)&_findNameOfToken; - callbacks[127] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; - callbacks[128] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; - callbacks[129] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; - callbacks[130] = (delegate* unmanaged)&_getThreadTLSIndex; - callbacks[131] = (delegate* unmanaged)&_getInlinedCallFrameVptr; - callbacks[132] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; - callbacks[133] = (delegate* unmanaged)&_getHelperFtn; - callbacks[134] = (delegate* unmanaged)&_getFunctionEntryPoint; - callbacks[135] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; - callbacks[136] = (delegate* unmanaged)&_getMethodSync; - callbacks[137] = (delegate* unmanaged)&_getLazyStringLiteralHelper; - callbacks[138] = (delegate* unmanaged)&_embedModuleHandle; - callbacks[139] = (delegate* unmanaged)&_embedClassHandle; - callbacks[140] = (delegate* unmanaged)&_embedMethodHandle; - callbacks[141] = (delegate* unmanaged)&_embedFieldHandle; - callbacks[142] = (delegate* unmanaged)&_embedGenericHandle; - callbacks[143] = (delegate* unmanaged)&_getLocationOfThisType; - callbacks[144] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; - callbacks[145] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; - callbacks[146] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; - callbacks[147] = (delegate* unmanaged)&_getJustMyCodeHandle; - callbacks[148] = (delegate* unmanaged)&_GetProfilingHandle; - callbacks[149] = (delegate* unmanaged)&_getCallInfo; - callbacks[150] = (delegate* unmanaged)&_canAccessFamily; - callbacks[151] = (delegate* unmanaged)&_isRIDClassDomainID; - callbacks[152] = (delegate* unmanaged)&_getClassDomainID; - callbacks[153] = (delegate* unmanaged)&_getReadonlyStaticFieldValue; - callbacks[154] = (delegate* unmanaged)&_getStaticFieldCurrentClass; - callbacks[155] = (delegate* unmanaged)&_getVarArgsHandle; - callbacks[156] = (delegate* unmanaged)&_canGetVarArgsHandle; - callbacks[157] = (delegate* unmanaged)&_constructStringLiteral; - callbacks[158] = (delegate* unmanaged)&_emptyStringLiteral; - callbacks[159] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; - callbacks[160] = (delegate* unmanaged)&_GetDelegateCtor; - callbacks[161] = (delegate* unmanaged)&_MethodCompileComplete; - callbacks[162] = (delegate* unmanaged)&_getTailCallHelpers; - callbacks[163] = (delegate* unmanaged)&_convertPInvokeCalliToCall; - callbacks[164] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[165] = (delegate* unmanaged)&_updateEntryPointForTailCall; - callbacks[166] = (delegate* unmanaged)&_allocMem; - callbacks[167] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[168] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[169] = (delegate* unmanaged)&_allocGCInfo; - callbacks[170] = (delegate* unmanaged)&_setEHcount; - callbacks[171] = (delegate* unmanaged)&_setEHinfo; - callbacks[172] = (delegate* unmanaged)&_logMsg; - callbacks[173] = (delegate* unmanaged)&_doAssert; - callbacks[174] = (delegate* unmanaged)&_reportFatalError; - callbacks[175] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[176] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[177] = (delegate* unmanaged)&_recordCallSite; - callbacks[178] = (delegate* unmanaged)&_recordRelocation; - callbacks[179] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[180] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[181] = (delegate* unmanaged)&_getJitFlags; + callbacks[99] = (delegate* unmanaged)&_getThreadLocalFieldInfo; + callbacks[100] = (delegate* unmanaged)&_getThreadLocalStaticBlocksInfo; + callbacks[101] = (delegate* unmanaged)&_isFieldStatic; + callbacks[102] = (delegate* unmanaged)&_getArrayOrStringLength; + callbacks[103] = (delegate* unmanaged)&_getBoundaries; + callbacks[104] = (delegate* unmanaged)&_setBoundaries; + callbacks[105] = (delegate* unmanaged)&_getVars; + callbacks[106] = (delegate* unmanaged)&_setVars; + callbacks[107] = (delegate* unmanaged)&_reportRichMappings; + callbacks[108] = (delegate* unmanaged)&_allocateArray; + callbacks[109] = (delegate* unmanaged)&_freeArray; + callbacks[110] = (delegate* unmanaged)&_getArgNext; + callbacks[111] = (delegate* unmanaged)&_getArgType; + callbacks[112] = (delegate* unmanaged)&_getExactClasses; + callbacks[113] = (delegate* unmanaged)&_getArgClass; + callbacks[114] = (delegate* unmanaged)&_getHFAType; + callbacks[115] = (delegate* unmanaged)&_GetErrorHRESULT; + callbacks[116] = (delegate* unmanaged)&_GetErrorMessage; + callbacks[117] = (delegate* unmanaged)&_FilterException; + callbacks[118] = (delegate* unmanaged)&_ThrowExceptionForJitResult; + callbacks[119] = (delegate* unmanaged)&_ThrowExceptionForHelper; + callbacks[120] = (delegate* unmanaged)&_runWithErrorTrap; + callbacks[121] = (delegate* unmanaged)&_runWithSPMIErrorTrap; + callbacks[122] = (delegate* unmanaged)&_getEEInfo; + callbacks[123] = (delegate* unmanaged)&_getJitTimeLogFilename; + callbacks[124] = (delegate* unmanaged)&_getMethodDefFromMethod; + callbacks[125] = (delegate* unmanaged)&_printMethodName; + callbacks[126] = (delegate* unmanaged)&_getMethodNameFromMetadata; + callbacks[127] = (delegate* unmanaged)&_getMethodHash; + callbacks[128] = (delegate* unmanaged)&_findNameOfToken; + callbacks[129] = (delegate* unmanaged)&_getSystemVAmd64PassStructInRegisterDescriptor; + callbacks[130] = (delegate* unmanaged)&_getLoongArch64PassStructInRegisterFlags; + callbacks[131] = (delegate* unmanaged)&_getRISCV64PassStructInRegisterFlags; + callbacks[132] = (delegate* unmanaged)&_getThreadTLSIndex; + callbacks[133] = (delegate* unmanaged)&_getInlinedCallFrameVptr; + callbacks[134] = (delegate* unmanaged)&_getAddrOfCaptureThreadGlobal; + callbacks[135] = (delegate* unmanaged)&_getHelperFtn; + callbacks[136] = (delegate* unmanaged)&_getFunctionEntryPoint; + callbacks[137] = (delegate* unmanaged)&_getFunctionFixedEntryPoint; + callbacks[138] = (delegate* unmanaged)&_getMethodSync; + callbacks[139] = (delegate* unmanaged)&_getLazyStringLiteralHelper; + callbacks[140] = (delegate* unmanaged)&_embedModuleHandle; + callbacks[141] = (delegate* unmanaged)&_embedClassHandle; + callbacks[142] = (delegate* unmanaged)&_embedMethodHandle; + callbacks[143] = (delegate* unmanaged)&_embedFieldHandle; + callbacks[144] = (delegate* unmanaged)&_embedGenericHandle; + callbacks[145] = (delegate* unmanaged)&_getLocationOfThisType; + callbacks[146] = (delegate* unmanaged)&_getAddressOfPInvokeTarget; + callbacks[147] = (delegate* unmanaged)&_GetCookieForPInvokeCalliSig; + callbacks[148] = (delegate* unmanaged)&_canGetCookieForPInvokeCalliSig; + callbacks[149] = (delegate* unmanaged)&_getJustMyCodeHandle; + callbacks[150] = (delegate* unmanaged)&_GetProfilingHandle; + callbacks[151] = (delegate* unmanaged)&_getCallInfo; + callbacks[152] = (delegate* unmanaged)&_canAccessFamily; + callbacks[153] = (delegate* unmanaged)&_isRIDClassDomainID; + callbacks[154] = (delegate* unmanaged)&_getClassDomainID; + callbacks[155] = (delegate* unmanaged)&_getReadonlyStaticFieldValue; + callbacks[156] = (delegate* unmanaged)&_getStaticFieldCurrentClass; + callbacks[157] = (delegate* unmanaged)&_getVarArgsHandle; + callbacks[158] = (delegate* unmanaged)&_canGetVarArgsHandle; + callbacks[159] = (delegate* unmanaged)&_constructStringLiteral; + callbacks[160] = (delegate* unmanaged)&_emptyStringLiteral; + callbacks[161] = (delegate* unmanaged)&_getFieldThreadLocalStoreID; + callbacks[162] = (delegate* unmanaged)&_GetDelegateCtor; + callbacks[163] = (delegate* unmanaged)&_MethodCompileComplete; + callbacks[164] = (delegate* unmanaged)&_getTailCallHelpers; + callbacks[165] = (delegate* unmanaged)&_convertPInvokeCalliToCall; + callbacks[166] = (delegate* unmanaged)&_notifyInstructionSetUsage; + callbacks[167] = (delegate* unmanaged)&_updateEntryPointForTailCall; + callbacks[168] = (delegate* unmanaged)&_allocMem; + callbacks[169] = (delegate* unmanaged)&_reserveUnwindInfo; + callbacks[170] = (delegate* unmanaged)&_allocUnwindInfo; + callbacks[171] = (delegate* unmanaged)&_allocGCInfo; + callbacks[172] = (delegate* unmanaged)&_setEHcount; + callbacks[173] = (delegate* unmanaged)&_setEHinfo; + callbacks[174] = (delegate* unmanaged)&_logMsg; + callbacks[175] = (delegate* unmanaged)&_doAssert; + callbacks[176] = (delegate* unmanaged)&_reportFatalError; + callbacks[177] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[178] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; + callbacks[179] = (delegate* unmanaged)&_recordCallSite; + callbacks[180] = (delegate* unmanaged)&_recordRelocation; + callbacks[181] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[182] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[183] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 9f6db18fb631c..b45184a5b6803 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1112,6 +1112,7 @@ public enum CORINFO_FIELD_ACCESSOR CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER, // static field access using the "generic static" helper (argument is MethodTable *) CORINFO_FIELD_STATIC_ADDR_HELPER, // static field accessed using address-of helper (argument is FieldDesc *) CORINFO_FIELD_STATIC_TLS, // unmanaged TLS access + CORINFO_FIELD_STATIC_TLS_MANAGED, // managed TLS access CORINFO_FIELD_STATIC_READYTORUN_HELPER, // static field access using a runtime lookup helper CORINFO_FIELD_STATIC_RELOCATABLE, // static field access from the data segment CORINFO_FIELD_INTRINSIC_ZERO, // intrinsic zero (IntPtr.Zero, UIntPtr.Zero) @@ -1152,6 +1153,14 @@ public unsafe struct CORINFO_FIELD_INFO public CORINFO_CONST_LOOKUP fieldLookup; }; + public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO + { + public CORINFO_CONST_LOOKUP tlsIndex; + public uint offsetOfThreadLocalStoragePointer; + public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks; + public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks; + }; + // System V struct passing // The Classification types are described in the ABI spec at https://software.intel.com/sites/default/files/article/402129/mpx-linux64-abi.pdf public enum SystemVClassificationType : byte diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 06a8a05df1b5f..5c4d8ad0b94b1 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -86,6 +86,7 @@ CORINFO_TAILCALL_HELPERS*,ref CORINFO_TAILCALL_HELPERS CORINFO_GENERICHANDLE_RESULT*,ref CORINFO_GENERICHANDLE_RESULT CORINFO_METHOD_INFO*,CORINFO_METHOD_INFO* CORINFO_FIELD_INFO*,CORINFO_FIELD_INFO* +CORINFO_THREAD_STATIC_BLOCKS_INFO*,CORINFO_THREAD_STATIC_BLOCKS_INFO* CORINFO_CALL_INFO*,CORINFO_CALL_INFO* CORINFO_DEVIRTUALIZATION_INFO*,CORINFO_DEVIRTUALIZATION_INFO* PatchpointInfo* @@ -257,6 +258,8 @@ FUNCTIONS CorInfoType getFieldType(CORINFO_FIELD_HANDLE field, CORINFO_CLASS_HANDLE* structType, CORINFO_CLASS_HANDLE memberParent) unsigned getFieldOffset(CORINFO_FIELD_HANDLE field) void getFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult) + uint32_t getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field) + void getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) bool isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) int getArrayOrStringLength(CORINFO_OBJECT_HANDLE objHnd) void getBoundaries(CORINFO_METHOD_HANDLE ftn, unsigned int* cILOffsets, uint32_t** pILOffsets, ICorDebugInfo::BoundaryTypes* implicitBoundaries) diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h index b9e31672ddcad..bc1f1383c2966 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface_generated.h @@ -110,6 +110,8 @@ struct JitInterfaceCallbacks CorInfoType (* getFieldType)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE field, CORINFO_CLASS_HANDLE* structType, CORINFO_CLASS_HANDLE memberParent); unsigned (* getFieldOffset)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE field); void (* getFieldInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult); + uint32_t (* getThreadLocalFieldInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE field); + void (* getThreadLocalStaticBlocksInfo)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo); bool (* isFieldStatic)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_FIELD_HANDLE fldHnd); int (* getArrayOrStringLength)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_OBJECT_HANDLE objHnd); void (* getBoundaries)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_METHOD_HANDLE ftn, unsigned int* cILOffsets, uint32_t** pILOffsets, ICorDebugInfo::BoundaryTypes* implicitBoundaries); @@ -1177,6 +1179,23 @@ class JitInterfaceWrapper : public ICorJitInfo if (pException != nullptr) throw pException; } + virtual uint32_t getThreadLocalFieldInfo( + CORINFO_FIELD_HANDLE field) +{ + CorInfoExceptionClass* pException = nullptr; + uint32_t temp = _callbacks->getThreadLocalFieldInfo(_thisHandle, &pException, field); + if (pException != nullptr) throw pException; + return temp; +} + + virtual void getThreadLocalStaticBlocksInfo( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->getThreadLocalStaticBlocksInfo(_thisHandle, &pException, pInfo); + if (pException != nullptr) throw pException; +} + virtual bool isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index cb99c4df50bce..61f81c83e0d51 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -518,6 +518,19 @@ struct Agnostic_GetProfilingHandle DWORD bIndirectedHandles; }; +struct Agnostic_GetThreadLocalStaticBlocksInfo +{ + Agnostic_CORINFO_CONST_LOOKUP tlsIndex; + UINT offsetOfThreadLocalStoragePointer; + UINT offsetOfMaxThreadStaticBlocks; + UINT offsetOfThreadStaticBlocks; +}; + +struct Agnostic_GetThreadLocalFieldInfo +{ + DWORD staticBlockIndex; +}; + struct Agnostic_GetTailCallHelpers { Agnostic_CORINFO_RESOLVED_TOKEN callToken; diff --git a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h index 9872957f9899b..f079877342882 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/lwmlist.h @@ -39,6 +39,8 @@ LWM(EmbedFieldHandle, DWORDLONG, DLDL) LWM(EmbedGenericHandle, Agnostic_EmbedGenericHandle, Agnostic_CORINFO_GENERICHANDLE_RESULT) LWM(EmbedMethodHandle, DWORDLONG, DLDL) LWM(EmbedModuleHandle, DWORDLONG, DLDL) +LWM(GetThreadLocalFieldInfo, DWORDLONG, DWORD) +LWM(GetThreadLocalStaticBlocksInfo, DWORD, Agnostic_GetThreadLocalStaticBlocksInfo) DENSELWM(EmptyStringLiteral, DLD) DENSELWM(ErrorList, DWORD) LWM(FilterException, DWORD, DWORD) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 42d655e53ff56..7680c8685fe8f 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3626,6 +3626,73 @@ void MethodContext::repGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, } } +void MethodContext::recGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field, uint32_t result) +{ + if (GetThreadLocalFieldInfo == nullptr) + GetThreadLocalFieldInfo = new LightWeightMap(); + + DWORDLONG key = 0; + + key = CastHandle(field); + GetThreadLocalFieldInfo->Add(key, result); + DEBUG_REC(dmpGetThreadLocalFieldInfo(key, result)); +} + +void MethodContext::dmpGetThreadLocalFieldInfo(DWORDLONG key, DWORD value) +{ + printf("GetThreadLocalFieldInfo key hnd-%016" PRIX64 ", result-%u", key, value); +} + +uint32_t MethodContext::repGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field) +{ + DWORDLONG key = CastHandle(field); + DWORD value = LookupByKeyOrMiss(GetThreadLocalFieldInfo, key, ": key %016" PRIX64 "", key); + + DEBUG_REP(dmpGetThreadLocalFieldInfo(key, value)); + + return value; +} + +void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + if (GetThreadLocalStaticBlocksInfo == nullptr) + GetThreadLocalStaticBlocksInfo = new LightWeightMap(); + + Agnostic_GetThreadLocalStaticBlocksInfo value; + ZeroMemory(&value, sizeof(value)); + + value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); + value.tlsIndex.accessType = pInfo->tlsIndex.accessType; + value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; + value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; + value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; + + // This data is same for entire process, so just add it against key '0'. + GetThreadLocalStaticBlocksInfo->Add(0, value); + DEBUG_REC(dmpGetThreadLocalStaticBlocksInfo(0, value)); +} + +void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) +{ + printf("GetThreadLocalStaticBlocksInfo key 0, value tlsIndex-%016" PRIX64 + ", offsetOfMaxThreadStaticBlocks-%u, offsetOfThreadLocalStoragePointer-%u, offsetOfThreadStaticBlocks-%u", + value.tlsIndex.handle, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadLocalStoragePointer, + value.offsetOfThreadStaticBlocks); +} + +void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + Agnostic_GetThreadLocalStaticBlocksInfo value = LookupByKeyOrMiss(GetThreadLocalStaticBlocksInfo, 0, ": key %u", 0); + + DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(0, value)); + + pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; + pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; + pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; +} + void MethodContext::recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, void** ppIndirection, CORINFO_METHOD_HANDLE result) diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 137fd7503e934..949d780b09089 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -485,6 +485,14 @@ class MethodContext CORINFO_ACCESS_FLAGS flags, CORINFO_FIELD_INFO* pResult); + void recGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field, uint32_t result); + void dmpGetThreadLocalFieldInfo(DWORDLONG key, DWORD value); + uint32_t repGetThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field); + + void recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo); + void dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value); + void repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo); + void recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, void** ppIndirection, CORINFO_METHOD_HANDLE result); void dmpEmbedMethodHandle(DWORDLONG key, DLDL value); CORINFO_METHOD_HANDLE repEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, void** ppIndirection); @@ -1176,7 +1184,9 @@ enum mcPackets Packet_GetStringChar = 204, Packet_GetIsClassInitedFlagAddress = 205, Packet_GetStaticBaseAddress = 206, - Packet_GetRISCV64PassStructInRegisterFlags = 207, + Packet_GetThreadLocalFieldInfo = 207, + Packet_GetThreadLocalStaticBlocksInfo = 208, + Packet_GetRISCV64PassStructInRegisterFlags = 209, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp index 519482547ca79..c318a1a6481a4 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1105,6 +1105,21 @@ void interceptor_ICJI::getFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, mc->recGetFieldInfo(pResolvedToken, callerHandle, flags, pResult); } +uint32_t interceptor_ICJI::getThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field) +{ + mc->cr->AddCall("getThreadLocalFieldInfo"); + uint32_t result = original_ICorJitInfo->getThreadLocalFieldInfo(field); + mc->recGetThreadLocalFieldInfo(field, result); + return result; +} + +void interceptor_ICJI::getThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + mc->cr->AddCall("getThreadLocalStaticBlocksInfo"); + original_ICorJitInfo->getThreadLocalStaticBlocksInfo(pInfo); + mc->recGetThreadLocalStaticBlocksInfo(pInfo); +} + // Returns true iff "fldHnd" represents a static field. bool interceptor_ICJI::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp index d5c46baac99f6..ab0c006b8c3ce 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-counter/icorjitinfo_generated.cpp @@ -801,6 +801,20 @@ void interceptor_ICJI::getFieldInfo( original_ICorJitInfo->getFieldInfo(pResolvedToken, callerHandle, flags, pResult); } +uint32_t interceptor_ICJI::getThreadLocalFieldInfo( + CORINFO_FIELD_HANDLE field) +{ + mcs->AddCall("getThreadLocalFieldInfo"); + return original_ICorJitInfo->getThreadLocalFieldInfo(field); +} + +void interceptor_ICJI::getThreadLocalStaticBlocksInfo( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + mcs->AddCall("getThreadLocalStaticBlocksInfo"); + original_ICorJitInfo->getThreadLocalStaticBlocksInfo(pInfo); +} + bool interceptor_ICJI::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp index bff430b2038f9..d8e33021f6286 100644 --- a/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shim-simple/icorjitinfo_generated.cpp @@ -702,6 +702,18 @@ void interceptor_ICJI::getFieldInfo( original_ICorJitInfo->getFieldInfo(pResolvedToken, callerHandle, flags, pResult); } +uint32_t interceptor_ICJI::getThreadLocalFieldInfo( + CORINFO_FIELD_HANDLE field) +{ + return original_ICorJitInfo->getThreadLocalFieldInfo(field); +} + +void interceptor_ICJI::getThreadLocalStaticBlocksInfo( + CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + original_ICorJitInfo->getThreadLocalStaticBlocksInfo(pInfo); +} + bool interceptor_ICJI::isFieldStatic( CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index 937dc7249fa14..693ea2fe00673 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -924,6 +924,18 @@ void MyICJI::getFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, jitInstance->mc->repGetFieldInfo(pResolvedToken, callerHandle, flags, pResult); } +uint32_t MyICJI::getThreadLocalFieldInfo(CORINFO_FIELD_HANDLE field) +{ + jitInstance->mc->cr->AddCall("getThreadLocalFieldInfo"); + return jitInstance->mc->repGetThreadLocalFieldInfo(field); +} + +void MyICJI::getThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + jitInstance->mc->cr->AddCall("getThreadLocalStaticBlocksInfo"); + jitInstance->mc->repGetThreadLocalStaticBlocksInfo(pInfo); +} + // Returns true iff "fldHnd" represents a static field. bool MyICJI::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) { diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index 455cb0f8d9204..3cb82546ade00 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -665,6 +665,14 @@ void BaseDomain::InitVSD() GetLoaderAllocator()->InitVirtualCallStubManager(this); } +#ifdef HOST_WINDOWS +void BaseDomain::InitThreadStaticBlockTypeMap() +{ + STANDARD_VM_CONTRACT; + + m_threadStaticBlockTypeIDMap.Init(); +} +#endif // HOST_WINDOWS void BaseDomain::ClearBinderContext() { @@ -1762,6 +1770,11 @@ void AppDomain::Create() // allocate a Virtual Call Stub Manager for the default domain pDomain->InitVSD(); +#ifdef HOST_WINDOWS + // allocate a thread static block to index map + pDomain->InitThreadStaticBlockTypeMap(); +#endif + pDomain->SetStage(AppDomain::STAGE_OPEN); pDomain->CreateDefaultBinder(); @@ -4634,7 +4647,7 @@ UINT32 BaseDomain::GetTypeID(PTR_MethodTable pMT) { PRECONDITION(pMT->GetDomain() == this); } CONTRACTL_END; - return m_typeIDMap.GetTypeID(pMT); + return m_typeIDMap.GetTypeID(pMT, true); } //------------------------------------------------------------------------ @@ -4665,6 +4678,34 @@ PTR_MethodTable BaseDomain::LookupType(UINT32 id) { return pMT; } +#ifdef HOST_WINDOWS +//------------------------------------------------------------------------ +UINT32 BaseDomain::GetThreadStaticTypeIndex(PTR_MethodTable pMT) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + PRECONDITION(pMT->GetDomain() == this); + } CONTRACTL_END; + + return m_threadStaticBlockTypeIDMap.GetTypeID(pMT, false); +} + +//------------------------------------------------------------------------ +PTR_MethodTable BaseDomain::LookupThreadStaticBlockType(UINT32 id) { + CONTRACTL { + NOTHROW; + WRAPPER(GC_TRIGGERS); + CONSISTENCY_CHECK(id != TYPE_ID_THIS_CLASS); + } CONTRACTL_END; + + PTR_MethodTable pMT = m_threadStaticBlockTypeIDMap.LookupType(id); + + CONSISTENCY_CHECK(CheckPointer(pMT)); + return pMT; +} +#endif // HOST_WINDOWS + #ifndef DACCESS_COMPILE //--------------------------------------------------------------------------------------- void BaseDomain::RemoveTypesFromTypeIDMap(LoaderAllocator* pLoaderAllocator) diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp index 1296adbed0d59..2c6df2564e7b6 100644 --- a/src/coreclr/vm/appdomain.hpp +++ b/src/coreclr/vm/appdomain.hpp @@ -1224,7 +1224,24 @@ class BaseDomain // Range list for collectible types. Maps VSD PCODEs back to the VirtualCallStubManager they belong to LockedRangeList m_collVSDRanges; +#ifdef HOST_WINDOWS + // MethodTable to `typeIndex` map. `typeIndex` is embedded in the code during codegen. + // During execution corresponding thread static data blocks are stored in `t_threadStaticBlocks` + // array at the `typeIndex`. + TypeIDMap m_threadStaticBlockTypeIDMap; + +#endif // HOST_WINDOWS + public: + +#ifdef HOST_WINDOWS + void InitThreadStaticBlockTypeMap(); + + UINT32 GetThreadStaticTypeIndex(PTR_MethodTable pMT); + + PTR_MethodTable LookupThreadStaticBlockType(UINT32 id); +#endif + UINT32 GetTypeID(PTR_MethodTable pMT); UINT32 LookupTypeID(PTR_MethodTable pMT); PTR_MethodTable LookupType(UINT32 id); diff --git a/src/coreclr/vm/contractimpl.cpp b/src/coreclr/vm/contractimpl.cpp index 1764efcd03857..388e94549c1d0 100644 --- a/src/coreclr/vm/contractimpl.cpp +++ b/src/coreclr/vm/contractimpl.cpp @@ -88,7 +88,8 @@ PTR_MethodTable TypeIDMap::LookupType(UINT32 id) //------------------------------------------------------------------------ // Returns the ID of the type if found. If not found, assigns the ID and // returns the new ID. -UINT32 TypeIDMap::GetTypeID(PTR_MethodTable pMT) +// If useFatPointerDispatch = true, return the next Fat ID of the type. +UINT32 TypeIDMap::GetTypeID(PTR_MethodTable pMT, bool useFatPointerDispatch) { CONTRACTL { THROWS; @@ -110,7 +111,7 @@ UINT32 TypeIDMap::GetTypeID(PTR_MethodTable pMT) { return id; } - id = GetNextID(); + id = GetNextID(useFatPointerDispatch); CONSISTENCY_CHECK(id <= TypeIDProvider::MAX_TYPE_ID); // Insert the pair, with lookups in both directions diff --git a/src/coreclr/vm/contractimpl.h b/src/coreclr/vm/contractimpl.h index 64ff2496592d2..e5ee6ee35b3b2 100644 --- a/src/coreclr/vm/contractimpl.h +++ b/src/coreclr/vm/contractimpl.h @@ -394,7 +394,7 @@ class TypeIDProvider //------------------------------------------------------------------------ // Returns the next available ID - inline UINT32 GetNextID() + inline UINT32 GetNextID(bool useFatPointerDispatch) { CONTRACTL { THROWS; @@ -406,7 +406,7 @@ class TypeIDProvider UINT32 id = m_nextID; #ifdef FAT_DISPATCH_TOKENS - if (id > DispatchToken::MAX_TYPE_ID_SMALL) + if (useFatPointerDispatch && (id > DispatchToken::MAX_TYPE_ID_SMALL)) { return GetNextFatID(); } @@ -455,11 +455,11 @@ class TypeIDMap //------------------------------------------------------------------------ // Returns the next available ID - inline UINT32 GetNextID() + inline UINT32 GetNextID(bool useFatPointerDispatch) { WRAPPER_NO_CONTRACT; CONSISTENCY_CHECK(m_lock.OwnedByCurrentThread()); - UINT32 id = m_idProvider.GetNextID(); + UINT32 id = m_idProvider.GetNextID(useFatPointerDispatch); CONSISTENCY_CHECK(id != TYPE_ID_THIS_CLASS); return id; } @@ -493,7 +493,7 @@ class TypeIDMap //------------------------------------------------------------------------ // Returns the ID of the type if found. If not found, assigns the ID and // returns the new ID. - UINT32 GetTypeID(PTR_MethodTable pMT); + UINT32 GetTypeID(PTR_MethodTable pMT, bool useFatPointerDispatch); #ifndef DACCESS_COMPILE //------------------------------------------------------------------------ diff --git a/src/coreclr/vm/interpreter.cpp b/src/coreclr/vm/interpreter.cpp index 3eafbc0e7557e..a838432f5ca1a 100644 --- a/src/coreclr/vm/interpreter.cpp +++ b/src/coreclr/vm/interpreter.cpp @@ -7781,7 +7781,7 @@ bool Interpreter::StaticFldAddrWork(CORINFO_ACCESS_FLAGS accessFlgs, /*out (byre EnsureClassInit(GetMethodTableFromClsHnd(fldTok.hClass)); - if (fldInfo.fieldAccessor == CORINFO_FIELD_STATIC_TLS) + if ((fldInfo.fieldAccessor == CORINFO_FIELD_STATIC_TLS) || (fldInfo.fieldAccessor == CORINFO_FIELD_STATIC_TLS_MANAGED)) { NYI_INTERP("Thread-local static."); } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index db26ddfe14bc9..c36e50d77d2fd 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1760,6 +1760,16 @@ HCIMPL1(void*, JIT_GetGCThreadStaticBase_Helper, MethodTable * pMT) HCIMPLEND +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) uint32_t t_maxThreadStaticBlocks; +__declspec(selectany) __declspec(thread) uint32_t t_threadStaticBlocksSize; +__declspec(selectany) __declspec(thread) void** t_threadStaticBlocks; +#else +EXTERN_C __thread uint32_t t_maxThreadStaticBlocks; +EXTERN_C __thread uint32_t t_threadStaticBlocksSize; +EXTERN_C __thread void** t_threadStaticBlocks; +#endif + // *** This helper corresponds to both CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE and // CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR. Even though we always check // if the class constructor has been run, we have a separate helper ID for the "no ctor" @@ -1793,6 +1803,68 @@ HCIMPL2(void*, JIT_GetSharedNonGCThreadStaticBase, DomainLocalModule *pDomainLoc return HCCALL1(JIT_GetNonGCThreadStaticBase_Helper, pMT); } HCIMPLEND + +// *** This helper corresponds CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED. +// Even though we always check if the class constructor has been run, we have a separate +// helper ID for the "no ctor" version because it allows the JIT to do some reordering that +// otherwise wouldn't be possible. +HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIndex) +{ + void* staticBlock = nullptr; + +#ifdef HOST_WINDOWS + FCALL_CONTRACT; + + HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame + + MethodTable * pMT = AppDomain::GetCurrentDomain()->LookupThreadStaticBlockType(staticBlockIndex); + _ASSERTE(!pMT->HasGenericsStaticsInfo()); + + // Get the TLM + ThreadLocalModule * pThreadLocalModule = ThreadStatics::GetTLM(pMT); + _ASSERTE(pThreadLocalModule != NULL); + + // Check if the class constructor needs to be run + pThreadLocalModule->CheckRunClassInitThrowing(pMT); + + // Lookup the non-GC statics base pointer + staticBlock = (void*) pMT->GetNonGCThreadStaticsBasePointer(); + CONSISTENCY_CHECK(staticBlock != NULL); + + if (t_threadStaticBlocksSize <= staticBlockIndex) + { + UINT32 newThreadStaticBlocksSize = max(2 * t_threadStaticBlocksSize, staticBlockIndex + 1); + void** newThreadStaticBlocks = (void**) new PTR_BYTE[newThreadStaticBlocksSize * sizeof(PTR_BYTE)]; + memset(newThreadStaticBlocks + t_threadStaticBlocksSize, 0, (newThreadStaticBlocksSize - t_threadStaticBlocksSize) * sizeof(PTR_BYTE)); + + if (t_threadStaticBlocksSize > 0) + { + memcpy(newThreadStaticBlocks, t_threadStaticBlocks, t_threadStaticBlocksSize * sizeof(PTR_BYTE)); + delete t_threadStaticBlocks; + } + + t_threadStaticBlocksSize = newThreadStaticBlocksSize; + t_threadStaticBlocks = newThreadStaticBlocks; + } + + void* currentEntry = t_threadStaticBlocks[staticBlockIndex]; + // We could be coming here 2nd time after running the ctor when we try to get the static block. + // In such case, just avoid adding the same entry. + if (currentEntry != staticBlock) + { + _ASSERTE(currentEntry == nullptr); + t_threadStaticBlocks[staticBlockIndex] = staticBlock; + t_maxThreadStaticBlocks = max(t_maxThreadStaticBlocks, staticBlockIndex); + } + HELPER_METHOD_FRAME_END(); +#else + _ASSERTE(!"JIT_GetSharedNonGCThreadStaticBaseOptimized not supported on non-windows."); +#endif // HOST_WINDOWS + + return staticBlock; +} +HCIMPLEND + #include // *** This helper corresponds to both CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE and diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 1392dcf51a748..c633517cbf5fa 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -65,6 +65,18 @@ #include "tailcallhelp.h" +#ifdef HOST_WINDOWS +EXTERN_C uint32_t _tls_index; +#endif + +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) uint32_t t_maxThreadStaticBlocks; +__declspec(selectany) __declspec(thread) void** t_threadStaticBlocks; +#else +EXTERN_C __thread uint32_t t_maxThreadStaticBlocks; +EXTERN_C __thread void** t_threadStaticBlocks; +#endif + // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -839,6 +851,17 @@ size_t CEEInfo::findNameOfToken (Module* module, return strlen (szFQName); } +#ifdef HOST_WINDOWS +/* static */ +uint32_t CEEInfo::ThreadLocalOffset(void* p) +{ + PTEB Teb = NtCurrentTeb(); + uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; + uint8_t* pOurTls = pTls[_tls_index]; + return (uint32_t)((uint8_t*)p - pOurTls); +} +#endif // HOST_WINDOWS + CorInfoHelpFunc CEEInfo::getLazyStringLiteralHelper(CORINFO_MODULE_HANDLE handle) { CONTRACTL { @@ -1522,16 +1545,33 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = intrinsicAccessor; } else - if (// Static fields are not pinned in collectible types. We will always access + if (pFieldMT->Collectible()) + { + // Static fields are not pinned in collectible types. We will always access // them using a helper since the address cannot be embedded into the code. - pFieldMT->Collectible() || - // We always treat accessing thread statics as if we are in domain neutral code. - pField->IsThreadStatic() - ) + fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; + + pResult->helper = getSharedStaticsHelper(pField, pFieldMT); + } + else if (pField->IsThreadStatic()) { + // We always treat accessing thread statics as if we are in domain neutral code. fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; pResult->helper = getSharedStaticsHelper(pField, pFieldMT); + +#ifdef HOST_WINDOWS + bool canOptimizeHelper = (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE); + // For windows, we convert the TLS access to the optimized helper where we will store + // the static blocks in TLS directly and access them via inline code. + if (canOptimizeHelper && ((pField->GetFieldType() >= ELEMENT_TYPE_BOOLEAN) && (pField->GetFieldType() < ELEMENT_TYPE_STRING))) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + + pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } +#endif // HOST_WINDOWS } else { @@ -1713,6 +1753,86 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, EE_TO_JIT_TRANSITION(); } + + +#ifdef HOST_WINDOWS + +/*********************************************************************/ +uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + UINT32 typeIndex = 0; + + JIT_TO_EE_TRANSITION(); + + FieldDesc* fieldDesc = (FieldDesc*)field; + _ASSERTE(fieldDesc->IsThreadStatic()); + + typeIndex = AppDomain::GetCurrentDomain()->GetThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); + + assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); + + EE_TO_JIT_TRANSITION(); + return typeIndex; +} + +/*********************************************************************/ +void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + + pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); + pInfo->tlsIndex.accessType = IAT_VALUE; + + pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); + pInfo->offsetOfThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_threadStaticBlocks); + pInfo->offsetOfMaxThreadStaticBlocks = CEEInfo::ThreadLocalOffset(&t_maxThreadStaticBlocks); + + JIT_TO_EE_TRANSITION_LEAF(); +} +#else +uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + return 0; +} + +void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + + pInfo->tlsIndex.addr = (UINT8*)0; + + pInfo->offsetOfThreadLocalStoragePointer = 0; + pInfo->offsetOfThreadStaticBlocks = 0; + pInfo->offsetOfMaxThreadStaticBlocks = 0; + + JIT_TO_EE_TRANSITION_LEAF(); +} +#endif // HOST_WINDOWS + //--------------------------------------------------------------------------------------- // bool CEEInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index cc2cccca72a09..64f56c162e9dc 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -407,7 +407,6 @@ extern "C" }; - /*********************************************************************/ /*********************************************************************/ class CEEInfo : public ICorJitInfo @@ -439,6 +438,10 @@ class CEEInfo : public ICorJitInfo static size_t findNameOfToken (Module* module, mdToken metaTOK, _Out_writes_ (FQNameCapacity) char * szFQName, size_t FQNameCapacity); +#ifdef HOST_WINDOWS + static uint32_t ThreadLocalOffset(void* p); +#endif // HOST_WINDOWS + DWORD getMethodAttribsInternal (CORINFO_METHOD_HANDLE ftnHnd); // Given a method descriptor ftnHnd, extract signature information into sigInfo