diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index bb07aab36a85f..ea1e0c10ec954 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -1727,8 +1727,11 @@ struct CORINFO_FIELD_INFO struct CORINFO_THREAD_STATIC_BLOCKS_INFO { - CORINFO_CONST_LOOKUP tlsIndex; - uint32_t offsetOfThreadLocalStoragePointer; + CORINFO_CONST_LOOKUP tlsIndex; // windows specific + void* tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function + void* tlsIndexObject; // linux/x64 specific - address of tls_index object + void* threadVarsSection; // osx x64/arm64 specific - address of __thread_vars section of `t_ThreadStatics` + uint32_t offsetOfThreadLocalStoragePointer; // windows specific uint32_t offsetOfMaxThreadStaticBlocks; uint32_t offsetOfThreadStaticBlocks; uint32_t offsetOfGCDataPointer; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index fda1fdeae24be..6c6f7e8283c01 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* ba2c087c-9b8b-49c1-a52f-3514eb489308 */ - 0xba2c087c, - 0x9b8b, - 0x49c1, - {0xa5, 0x2f, 0x35, 0x14, 0xeb, 0x48, 0x93, 0x08} +constexpr GUID JITEEVersionIdentifier = { /* 02e334af-4e6e-4a68-9feb-308d3d2661bc */ + 0x2e334af, + 0x4e6e, + 0x4a68, + {0x9f, 0xeb, 0x30, 0x8d, 0x3d, 0x26, 0x61, 0xbc} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 955cba0b42a8b..93e98309f0949 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2944,6 +2944,13 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) inst_Mov_Extend(targetType, /* srcInReg */ true, targetReg, dataReg, /* canSkip */ true, emitActualTypeSize(targetType)); } + else if (TargetOS::IsUnix && data->IsIconHandle(GTF_ICON_TLS_HDL)) + { + assert(data->AsIntCon()->IconValue() == 0); + emitAttr attr = emitActualTypeSize(targetType); + // On non-windows, need to load the address from system register. + emit->emitIns_R(INS_mrs_tpid0, attr, targetReg); + } else { inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5804e731e6924..7b2fdb8730c00 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5026,11 +5026,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Partially inline static initializations DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit); - if (TargetOS::IsWindows) - { - // Currently this is only applicable for Windows - DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); - } + // Expand thread local access + DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f2c438fe62398..1d42520215ded 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7124,7 +7124,7 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } - bool doesMethodHasTlsFieldAccess() + bool methodHasTlsFieldAccess() { return (optMethodFlags & OMF_HAS_TLS_FIELD) != 0; } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 7e3c6501399c6..0c1318006e18c 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10205,9 +10205,9 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* IN */ if (callSig == nullptr) { - assert(methodHandle != nullptr); - - if (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF) + // For certain calls whose target is non-containable (e.g. tls access targets), `methodHandle` + // will be nullptr, because the target is present in a register. + if ((methodHandle != nullptr) && (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)) { emitComp->eeGetMethodSig(methodHandle, &sigInfo); callSig = &sigInfo; diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4ae2d717f0eb4..4743615fd03d4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -937,7 +937,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) datasize = id->idOpSize(); assert(isGeneralRegister(id->idReg1())); assert(datasize == EA_8BYTE); @@ -3741,6 +3741,12 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) fmt = IF_SR_1A; break; + case INS_mrs_tpid0: + id = emitNewInstrSmall(attr); + id->idReg1(reg); + fmt = IF_SR_1A; + break; + default: unreached(); } @@ -11793,7 +11799,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) assert(insOptsNone(id->idInsOpt())); code = emitInsCode(ins, fmt); code |= insEncodeReg_Rt(id->idReg1()); // ttttt @@ -13921,8 +13927,16 @@ void emitter::emitDispInsHelp( emitDispBarrier((insBarrier)emitGetInsSC(id)); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva) - emitDispReg(id->idReg1(), size, false); + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + if (ins == INS_mrs_tpid0) + { + emitDispReg(id->idReg1(), size, true); + printf("tpidr_el0"); + } + else + { + emitDispReg(id->idReg1(), size, false); + } break; default: diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index 81f41085a2ebe..31bbde6afc47a 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -227,7 +227,7 @@ IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................ IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16 IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier -IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva) +IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva, mrs) IF_DEF(INVALID, IS_NONE, NONE) // diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 62f4d7e0c1d8c..b95f75f075a78 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -421,7 +421,7 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - if (!doesMethodHasTlsFieldAccess()) + if (!methodHasTlsFieldAccess()) { // TP: nothing to expand in the current method JITDUMP("Nothing to expand.\n") @@ -478,36 +478,50 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* return false; } + assert(!opts.IsReadyToRun()); + + if (TargetOS::IsUnix) + { +#if defined(TARGET_ARM) || !defined(TARGET_64BIT) + // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such + // as MRC and MCR are used to access them. We do not support them and so should never optimize the + // field access using TLS. + noway_assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); +#endif + } + else + { #ifdef TARGET_ARM - // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such - // as MRC and MCR are used to access them. We do not support them and so should never optimize the - // field access using TLS. - assert(!"Unsupported scenario of optimizing TLS access on Arm32"); + // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such + // as MRC and MCR are used to access them. We do not support them and so should never optimize the + // field access using TLS. + noway_assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32"); #endif + } JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum); DISPTREE(call); JITDUMP("\n"); + bool isGCThreadStatic = eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; - info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); + memset(&threadStaticBlocksInfo, 0, sizeof(CORINFO_THREAD_STATIC_BLOCKS_INFO)); - uint32_t offsetOfMaxThreadStaticBlocksVal = 0; - uint32_t offsetOfThreadStaticBlocksVal = 0; + info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); - offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; - offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; - - JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); - JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); - JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); - JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); - JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); + JITDUMP("tlsIndex= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndex.addr)); + JITDUMP("tlsGetAddrFtnPtr= %p\n", dspPtr(threadStaticBlocksInfo.tlsGetAddrFtnPtr)); + JITDUMP("tlsIndexObject= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndexObject)); + JITDUMP("threadVarsSection= %p\n", dspPtr(threadStaticBlocksInfo.threadVarsSection)); + JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", + dspOffset(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer)); + JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks)); + JITDUMP("offsetOfThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfThreadStaticBlocks)); + JITDUMP("offsetOfGCDataPointer= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfGCDataPointer)); - assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); @@ -546,56 +560,131 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* gtUpdateStmtSideEffects(stmt); GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* tlsValue = nullptr; + unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); + lvaTable[tlsLclNum].lvType = TYP_I_IMPL; + GenTree* maxThreadStaticBlocksValue = nullptr; + GenTree* threadStaticBlocksValue = nullptr; + GenTree* tlsValueDef = nullptr; + + if (TargetOS::IsWindows) + { + size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; + GenTree* dllRef = nullptr; - void** pIdAddr = nullptr; + if (tlsIndexValue != 0) + { + dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + } - size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; - GenTree* dllRef = nullptr; + // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] + tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - if (tlsIndexValue != 0) - { - dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); + if (dllRef != nullptr) + { + // Add the dllRef to produce thread local storage reference for coreclr + tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); + } + + // Base of coreclr's thread local storage + tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); } + else if (TargetOS::IsMacOS) + { + // For OSX x64/arm64, we need to get the address of relevant __thread_vars section of + // the thread local variable `t_ThreadStatics`. Address of `tlv_get_address` is stored + // in this entry, which we dereference and invoke it, passing the __thread_vars address + // present in `threadVarsSection`. + // + // Code sequence to access thread local variable on osx/x64: + // + // mov rdi, threadVarsSection + // call [rdi] + // + // Code sequence to access thread local variable on osx/arm64: + // + // mov x0, threadVarsSection + // mov x1, [x0] + // blr x1 + // + size_t threadVarsSectionVal = (size_t)threadStaticBlocksInfo.threadVarsSection; + GenTree* tls_get_addr_val = gtNewIconHandleNode(threadVarsSectionVal, GTF_ICON_FTN_ADDR); + + tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] - GenTree* tlsRef = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); - tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + // This is a call which takes an argument. + // Populate and set the ABI appropriately. + assert(opts.altJit || threadVarsSectionVal != 0); + GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); + tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg)); - if (dllRef != nullptr) + fgMorphArgs(tlsRefCall); + + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); + } + else if (TargetOS::IsUnix) { - // Add the dllRef to produce thread local storage reference for coreclr - tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); +#if defined(TARGET_AMD64) + // Code sequence to access thread local variable on linux/x64: + // + // mov rdi, 0x7FE5C418CD28 ; tlsIndexObject + // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr + // call rax + // + GenTree* tls_get_addr_val = + gtNewIconHandleNode((size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); + tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); + GenTreeCall* tlsRefCall = tlsValue->AsCall(); + + // This is an indirect call which takes an argument. + // Populate and set the ABI appropriately. + assert(opts.altJit || threadStaticBlocksInfo.tlsIndexObject != 0); + GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); + tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg)); + + fgMorphArgs(tlsRefCall); + + tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); +#ifdef UNIX_X86_ABI + tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; +#endif // UNIX_X86_ABI +#elif defined(TARGET_ARM64) + // Code sequence to access thread local variable on linux/arm64: + // + // mrs xt, tpidr_elf0 + // mov xd, [xt+cns] + tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); +#else + assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); +#endif } - // Base of coreclr's thread local storage - GenTree* tlsValue = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // Cache the tls value - unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); - lvaTable[tlsLclNum].lvType = TYP_I_IMPL; - GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); - GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); + GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); + + size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; + size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); GenTree* maxThreadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); - GenTree* maxThreadStaticBlocksValue = - gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); + + GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), + gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); + threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); // Create tree for "if (maxThreadStaticBlocks < typeIndex)" GenTree* maxThreadStaticBlocksCond = gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue)); maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond); - // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" - GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); - GenTree* threadStaticBlocksRef = - gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); - GenTree* threadStaticBlocksValue = - gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); - // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]" typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue), gtNewIconNode(TARGET_POINTER_SIZE, TYP_INT)); diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 5745ac0d70180..ee7483d5257c8 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1595,6 +1595,9 @@ INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF) INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420) // dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt +INST1(mrs_tpid0, "mrs", 0, IF_SR_1A, 0xD53BD040) + // mrs Rt,tpidr_el0 SR_1A 1101010100111011 11010000010ttttt D53B D040 Rt, tpidr_el0 + INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00) // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index fcc4a2ef136cb..b3dc1ffc9a297 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -1149,10 +1149,13 @@ public unsafe struct CORINFO_FIELD_INFO public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO { public CORINFO_CONST_LOOKUP tlsIndex; + public nuint tlsGetAddrFtnPtr; + public nuint tlsIndexObject; + public nuint threadVarsSection; public uint offsetOfThreadLocalStoragePointer; - public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks; - public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks; - public CORINFO_CONST_LOOKUP offsetOfGCDataPointer; + public uint offsetOfMaxThreadStaticBlocks; + public uint offsetOfThreadStaticBlocks; + public uint offsetOfGCDataPointer; }; // System V struct passing diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h index 975091688c453..b4cdb272b3209 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h @@ -530,10 +530,13 @@ struct Agnostic_GetProfilingHandle struct Agnostic_GetThreadLocalStaticBlocksInfo { Agnostic_CORINFO_CONST_LOOKUP tlsIndex; - UINT offsetOfThreadLocalStoragePointer; - UINT offsetOfMaxThreadStaticBlocks; - UINT offsetOfThreadStaticBlocks; - UINT offsetOfGCDataPointer; + DWORDLONG tlsGetAddrFtnPtr; + DWORDLONG tlsIndexObject; + DWORDLONG threadVarsSection; + DWORD offsetOfThreadLocalStoragePointer; + DWORD offsetOfMaxThreadStaticBlocks; + DWORD offsetOfThreadStaticBlocks; + DWORD offsetOfGCDataPointer; }; struct Agnostic_GetThreadLocalFieldInfo diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 1c4264df6b9ce..ad0f2c7dd6dae 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -3578,12 +3578,14 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC Agnostic_GetThreadLocalStaticBlocksInfo value; ZeroMemory(&value, sizeof(value)); - value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr); - value.tlsIndex.accessType = pInfo->tlsIndex.accessType; - value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; - value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; - value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; - value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; + value.tlsIndex = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&pInfo->tlsIndex); + value.tlsGetAddrFtnPtr = CastPointer(pInfo->tlsGetAddrFtnPtr); + value.tlsIndexObject = CastPointer(pInfo->tlsIndexObject); + value.threadVarsSection = CastPointer(pInfo->threadVarsSection); + value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer; + value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks; + value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks; + value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer; // This data is same for entire process, so just add it against key '0'. DWORD key = isGCType ? 0 : 1; @@ -3593,10 +3595,13 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value) { - printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64 + printf("GetThreadLocalStaticBlocksInfo key %u, tlsIndex-%s, " + ", tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject - %016" PRIX64 + ", threadVarsSection - %016" PRIX64 ", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u" - ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u", - key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer, + ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u", + key, SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(value.tlsIndex).c_str(), value.tlsGetAddrFtnPtr, + value.tlsIndexObject, value.threadVarsSection, value.offsetOfThreadLocalStoragePointer, value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer); } @@ -3607,12 +3612,14 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(key, value)); - pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType; - pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle; - pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; - pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; - pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; - pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; + pInfo->tlsIndex = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value.tlsIndex); + pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr; + pInfo->tlsIndexObject = (void*)value.tlsIndexObject; + pInfo->threadVarsSection = (void*)value.threadVarsSection; + pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer; + pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks; + pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks; + pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer; } void MethodContext::recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle, diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index bebfd3376c12d..a8cdb06237eb5 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -307,3 +307,44 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler ret NESTED_END ProfileTailcallNaked, _TEXT + +#ifdef TARGET_OSX +# EXTERN_C void* GetThreadVarsAddress() +# +# Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread +# local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called +# to retrieve the address of the thread local. +# +LEAF_ENTRY GetThreadVarsAddress, _TEXT + mov rdi, _t_ThreadStatics@TLVP[rip] + ret +LEAF_END GetThreadVarsAddress, _TEXT +// ------------------------------------------------------------------ +#endif // TARGET_OSX + +#ifndef TARGET_OSX +# EXTERN_C void* GetTlsIndexObjectDescOffset(); + +# +# Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime +# once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so +# it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the +# final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for +# the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern +# of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains. +# We never have to call this method directly, and hence there is a `int 3` at the end. +# + +LEAF_ENTRY GetTlsIndexObjectDescOffset, _TEXT +# On The `lea` instruction has a data16 prefix and the call instruction has two data16 (0x66) prefixes and one rex64 prefix. +# This is so the total size of lea+call to be 16, suitable for link-time optimization. + + .byte 0x66 + lea rdi, t_ThreadStatics@TLSGD[rip] # instruction where offset is embedded by the linker during compilation + .byte 0x66 + .byte 0x66 + .byte 0x48 # rex.W prefix for padding + call __tls_get_addr # dummy call to have linker see the code pattern to replace the offset + int 3 +LEAF_END GetTlsIndexObjectDescOffset, _TEXT +#endif diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index c11627614ea07..b210c25ed382f 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -665,7 +665,6 @@ void BaseDomain::InitVSD() GetLoaderAllocator()->InitVirtualCallStubManager(this); } -#ifdef HOST_WINDOWS void BaseDomain::InitThreadStaticBlockTypeMap() { STANDARD_VM_CONTRACT; @@ -673,7 +672,6 @@ void BaseDomain::InitThreadStaticBlockTypeMap() m_NonGCThreadStaticBlockTypeIDMap.Init(); m_GCThreadStaticBlockTypeIDMap.Init(); } -#endif // HOST_WINDOWS void BaseDomain::ClearBinderContext() { @@ -1771,10 +1769,8 @@ void AppDomain::Create() // allocate a Virtual Call Stub Manager for the default domain pDomain->InitVSD(); -#ifdef HOST_WINDOWS // allocate a thread static block to index map pDomain->InitThreadStaticBlockTypeMap(); -#endif pDomain->SetStage(AppDomain::STAGE_OPEN); pDomain->CreateDefaultBinder(); @@ -4664,7 +4660,6 @@ PTR_MethodTable BaseDomain::LookupType(UINT32 id) { return pMT; } -#ifdef HOST_WINDOWS //------------------------------------------------------------------------ UINT32 BaseDomain::GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT) { @@ -4715,7 +4710,6 @@ PTR_MethodTable BaseDomain::LookupGCThreadStaticBlockType(UINT32 id) { CONSISTENCY_CHECK(CheckPointer(pMT)); return pMT; } -#endif // HOST_WINDOWS #ifndef DACCESS_COMPILE //--------------------------------------------------------------------------------------- diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp index 5b5975c5afee0..ea2d1648186b2 100644 --- a/src/coreclr/vm/appdomain.hpp +++ b/src/coreclr/vm/appdomain.hpp @@ -1221,18 +1221,14 @@ class BaseDomain private: TypeIDMap m_typeIDMap; -#ifdef HOST_WINDOWS // MethodTable to `typeIndex` map. `typeIndex` is embedded in the code during codegen. // During execution corresponding thread static data blocks are stored in `t_NonGCThreadStaticBlocks` // and `t_GCThreadStaticBlocks` array at the `typeIndex`. TypeIDMap m_NonGCThreadStaticBlockTypeIDMap; TypeIDMap m_GCThreadStaticBlockTypeIDMap; -#endif // HOST_WINDOWS - public: -#ifdef HOST_WINDOWS void InitThreadStaticBlockTypeMap(); UINT32 GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT); @@ -1240,7 +1236,6 @@ class BaseDomain PTR_MethodTable LookupNonGCThreadStaticBlockType(UINT32 id); PTR_MethodTable LookupGCThreadStaticBlockType(UINT32 id); -#endif UINT32 GetTypeID(PTR_MethodTable pMT); UINT32 LookupTypeID(PTR_MethodTable pMT); diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 574d30068f099..cbe14485e8df4 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -974,3 +974,39 @@ LEAF_END JIT_ValidateIndirectCall, _TEXT LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT br x9 LEAF_END JIT_DispatchIndirectCall, _TEXT + +#ifdef TARGET_OSX +// ------------------------------------------------------------------ +// void* GetThreadVarsAddress() + +// Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread +// local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called +// to retrieve the address of the thread local. + +LEAF_ENTRY GetThreadVarsAddress, _TEXT + adrp x0, _t_ThreadStatics@TLVPPAGE + ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF] + ret +LEAF_END GetThreadVarsAddress, _TEXT +// ------------------------------------------------------------------ +#endif // TARGET_OSX + +#ifndef TARGET_OSX +// ------------------------------------------------------------------ +// size_t GetThreadStaticsVariableOffset() + +// Helper to calculate the offset of native thread local variable `t_ThreadStatics` in TCB. The offset has to be found at runtime +// once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register. + +LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + adrp x0, :tlsdesc:t_ThreadStatics + ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics] + add x0, x0, :tlsdesc_lo12:t_ThreadStatics + .tlsdesccall t_ThreadStatics + blr x1 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + EPILOG_RETURN +LEAF_END GetThreadStaticsVariableOffset, _TEXT +// ------------------------------------------------------------------ +#endif // !TARGET_OSX diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index d4ce2c9aa69ac..acb8cefb6942b 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -1777,25 +1777,23 @@ HCIMPL1(void*, JIT_GetGCThreadStaticBase_Helper, MethodTable * pMT) } HCIMPLEND +struct ThreadStaticBlockInfo +{ + uint32_t NonGCMaxThreadStaticBlocks; + void** NonGCThreadStaticBlocks; -#ifdef _MSC_VER -__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks; -__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; - -__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; -__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; + uint32_t GCMaxThreadStaticBlocks; + void** GCThreadStaticBlocks; +}; -__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; -__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics; +__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; +__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; #else -EXTERN_C __thread uint32_t t_NonGCMaxThreadStaticBlocks; -EXTERN_C __thread uint32_t t_GCMaxThreadStaticBlocks; - +EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics; EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize; EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize; - -EXTERN_C __thread void** t_NonGCThreadStaticBlocks; -EXTERN_C __thread void** t_GCThreadStaticBlocks; #endif // *** This helper corresponds to both CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE and @@ -1840,7 +1838,6 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn { void* staticBlock = nullptr; -#ifdef HOST_WINDOWS FCALL_CONTRACT; HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame @@ -1867,27 +1864,24 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn if (t_NonGCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_NonGCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete[] t_ThreadStatics.NonGCThreadStaticBlocks; } t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_NonGCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.NonGCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_NonGCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.NonGCMaxThreadStaticBlocks = max(t_ThreadStatics.NonGCMaxThreadStaticBlocks, staticBlockIndex); } HELPER_METHOD_FRAME_END(); -#else - _ASSERTE(!"JIT_GetSharedNonGCThreadStaticBaseOptimized not supported on non-windows."); -#endif // HOST_WINDOWS return staticBlock; } @@ -1938,7 +1932,6 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex { void* staticBlock = nullptr; -#ifdef HOST_WINDOWS FCALL_CONTRACT; HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame @@ -1965,31 +1958,28 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex if (t_GCThreadStaticBlocksSize > 0) { - memcpy(newThreadStaticBlocks, t_GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); - delete t_GCThreadStaticBlocks; + memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE)); + delete[] t_ThreadStatics.GCThreadStaticBlocks; } t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize; - t_GCThreadStaticBlocks = newThreadStaticBlocks; + t_ThreadStatics.GCThreadStaticBlocks = newThreadStaticBlocks; } - void* currentEntry = t_GCThreadStaticBlocks[staticBlockIndex]; + void* currentEntry = t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex]; // We could be coming here 2nd time after running the ctor when we try to get the static block. // In such case, just avoid adding the same entry. if (currentEntry != staticBlock) { _ASSERTE(currentEntry == nullptr); - t_GCThreadStaticBlocks[staticBlockIndex] = staticBlock; - t_GCMaxThreadStaticBlocks = max(t_GCMaxThreadStaticBlocks, staticBlockIndex); + t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex] = staticBlock; + t_ThreadStatics.GCMaxThreadStaticBlocks = max(t_ThreadStatics.GCMaxThreadStaticBlocks, staticBlockIndex); } // Get the data pointer of static block staticBlock = (void*) pMT->GetGCThreadStaticsBasePointer(); HELPER_METHOD_FRAME_END(); -#else - _ASSERTE(!"JIT_GetSharedGCThreadStaticBaseOptimized not supported on non-windows."); -#endif // HOST_WINDOWS return staticBlock; } diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 678345f9fb658..2dd8eaf5dac84 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -65,20 +65,28 @@ #include "tailcallhelp.h" -#ifdef HOST_WINDOWS +#ifdef TARGET_WINDOWS EXTERN_C uint32_t _tls_index; #endif -#ifdef _MSC_VER -__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks; -__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks; +struct ThreadStaticBlockInfo +{ + uint32_t NonGCMaxThreadStaticBlocks; + void** NonGCThreadStaticBlocks; -__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks; -__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks; + uint32_t GCMaxThreadStaticBlocks; + void** GCThreadStaticBlocks; +}; +#ifdef _MSC_VER +__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics; +__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize; +__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize; #else -EXTERN_C __thread uint32_t t_maxThreadStaticBlocks; -EXTERN_C __thread void** t_threadStaticBlocks; -#endif +extern "C" void* __tls_get_addr(void* ti); +__thread ThreadStaticBlockInfo t_ThreadStatics; +__thread uint32_t t_NonGCThreadStaticBlocksSize; +__thread uint32_t t_GCThreadStaticBlocksSize; +#endif // _MSC_VER // The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro // @@ -1297,6 +1305,178 @@ static CorInfoHelpFunc getInstanceFieldHelper(FieldDesc * pField, CORINFO_ACCESS return (CorInfoHelpFunc)helper; } + + +/*********************************************************************/ +uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + UINT32 typeIndex = 0; + + JIT_TO_EE_TRANSITION(); + + FieldDesc* fieldDesc = (FieldDesc*)field; + _ASSERTE(fieldDesc->IsThreadStatic()); + + if (isGCType) + { + typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); + } + else + { + typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); + } + + assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); + + EE_TO_JIT_TRANSITION(); + return typeIndex; +} + +#if defined(TARGET_WINDOWS) +/*********************************************************************/ +static uint32_t ThreadLocalOffset(void* p) +{ + PTEB Teb = NtCurrentTeb(); + uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; + uint8_t* pOurTls = pTls[_tls_index]; + return (uint32_t)((uint8_t*)p - pOurTls); +} +#elif defined(TARGET_OSX) +extern "C" void* GetThreadVarsAddress(); + +static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p) +{ + _ASSERT(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d); + + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. + p += 3; + + // The descriptor address is located at *p at this point. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + +static void* GetThreadVarsSectionAddress() +{ +#ifdef TARGET_AMD64 + // On x64, the address is related to rip, so, disassemble the function, + // read the offset, and then relative to the IP, find the final address of + // __thread_vars section. + uint8_t* p = reinterpret_cast(&GetThreadVarsAddress); + return GetThreadVarsSectionAddressFromDesc(p); +#else + return GetThreadVarsAddress(); +#endif // TARGET_AMD64 +} + +#else + +// Linux + +#ifdef TARGET_AMD64 + +extern "C" void* GetTlsIndexObjectDescOffset(); + +static void* GetThreadStaticDescriptor(uint8_t* p) +{ + if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d)) + { + // The optimization is disabled if coreclr is not compiled in .so format. + _ASSERTE(false && "Unexpected code sequence"); + return nullptr; + } + + // At this point, `p` contains the instruction pointer and is pointing to the above opcodes. + // These opcodes are patched by the dynamic linker. + // Move beyond the opcodes that we have already checked above. + p += 4; + + // The descriptor address is located at *p at this point. Read that and add + // it to the instruction pointer to locate the address of `ti` that will be used + // to pass to __tls_get_addr during execution. + // (p + 4) below skips the descriptor address bytes embedded in the instruction and + // add it to the `instruction pointer` to find out the address. + return *(uint32_t*)p + (p + 4); +} + +static void* GetTlsIndexObjectAddress() +{ + uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset); + return GetThreadStaticDescriptor(p); +} + +#elif TARGET_ARM64 + +extern "C" size_t GetThreadStaticsVariableOffset(); + +#endif // TARGET_ARM64 +#endif // TARGET_WINDOWS + + +void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_PREEMPTIVE; + } CONTRACTL_END; + + JIT_TO_EE_TRANSITION_LEAF(); + + size_t threadStaticBaseOffset = 0; + +#if defined(TARGET_WINDOWS) + pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); + pInfo->tlsIndex.accessType = IAT_VALUE; + + pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); + threadStaticBaseOffset = ThreadLocalOffset(&t_ThreadStatics); + +#elif defined(TARGET_OSX) + + pInfo->threadVarsSection = GetThreadVarsSectionAddress(); + +#elif defined(TARGET_AMD64) + + // For Linux/x64, get the address of tls_get_addr system method and the base address + // of struct that we will pass to it. + pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr); + pInfo->tlsIndexObject = GetTlsIndexObjectAddress(); + +#elif defined(TARGET_ARM64) + + // For Linux/arm64, just get the offset of thread static variable, and during execution, + // this offset, taken from trpid_elp0 system register gives back the thread variable address. + threadStaticBaseOffset = GetThreadStaticsVariableOffset(); + +#else + _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); +#endif // TARGET_WINDOWS + + if (isGCType) + { + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks)); + } + else + { + pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks)); + pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks)); + } + pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); + + EE_TO_JIT_TRANSITION_LEAF(); +} + /*********************************************************************/ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, CORINFO_METHOD_HANDLE callerHandle, @@ -1401,25 +1581,40 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER; pResult->helper = getSharedStaticsHelper(pField, pFieldMT); - -#ifdef HOST_WINDOWS -#ifndef TARGET_ARM - // For windows, we convert the TLS access to the optimized helper where we will store - // the static blocks in TLS directly and access them via inline code. - if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) - { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; - } - else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || - (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) +#if defined(TARGET_ARM) + // Optimization is disabled for linux/windows arm +#elif !defined(TARGET_WINDOWS) && defined(TARGET_X86) + // Optimization is disabled for linux/x86 +#elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64) + // Optimization is disabled for linux musl arm64 +#else + bool optimizeThreadStaticAccess = true; +#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64) + // For linux/x64, check if compiled coreclr as .so file and not single file. + // For single file, the `tls_index` might not be accurate. + // Do not perform this optimization in such case. + optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr; +#endif // TARGET_UNIX && TARGET_AMD64 + + if (optimizeThreadStaticAccess) { - fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; - pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + // For windows x64/x86/arm64, linux x64/arm64: + // We convert the TLS access to the optimized helper where we will store + // the static blocks in TLS directly and access them via inline code. + if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } + else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) || + (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE)) + { + fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED; + pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; + } } -#endif // !TARGET_ARM -#endif // HOST_WINDOWS +#endif // TARGET_ARM } else { @@ -1601,113 +1796,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken, EE_TO_JIT_TRANSITION(); } - - -#ifdef HOST_WINDOWS - -/*********************************************************************/ -uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) -{ - CONTRACTL { - THROWS; - GC_TRIGGERS; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - UINT32 typeIndex = 0; - - JIT_TO_EE_TRANSITION(); - - FieldDesc* fieldDesc = (FieldDesc*)field; - _ASSERTE(fieldDesc->IsThreadStatic()); - - if (isGCType) - { - typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); - } - else - { - typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable()); - } - - assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID); - - EE_TO_JIT_TRANSITION(); - return typeIndex; -} - -/*********************************************************************/ -static uint32_t ThreadLocalOffset(void* p) -{ - PTEB Teb = NtCurrentTeb(); - uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer; - uint8_t* pOurTls = pTls[_tls_index]; - return (uint32_t)((uint8_t*)p - pOurTls); -} - -void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - JIT_TO_EE_TRANSITION_LEAF(); - - pInfo->tlsIndex.addr = (void*)static_cast(_tls_index); - pInfo->tlsIndex.accessType = IAT_VALUE; - - pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer); - if (isGCType) - { - pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_GCThreadStaticBlocks); - pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_GCMaxThreadStaticBlocks); - } - else - { - pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_NonGCThreadStaticBlocks); - pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks); - } - - pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset()); - - JIT_TO_EE_TRANSITION_LEAF(); -} -#else - -uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - return 0; -} - -void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType) -{ - CONTRACTL { - NOTHROW; - GC_NOTRIGGER; - MODE_PREEMPTIVE; - } CONTRACTL_END; - - JIT_TO_EE_TRANSITION_LEAF(); - - pInfo->tlsIndex.addr = (UINT8*)0; - - pInfo->offsetOfThreadLocalStoragePointer = 0; - pInfo->offsetOfThreadStaticBlocks = 0; - pInfo->offsetOfMaxThreadStaticBlocks = 0; - pInfo->offsetOfGCDataPointer = 0; - - JIT_TO_EE_TRANSITION_LEAF(); -} -#endif // HOST_WINDOWS - //--------------------------------------------------------------------------------------- // bool CEEInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd)