diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index bb07aab36a85f..ea1e0c10ec954 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -1727,8 +1727,11 @@ struct CORINFO_FIELD_INFO
struct CORINFO_THREAD_STATIC_BLOCKS_INFO
{
- CORINFO_CONST_LOOKUP tlsIndex;
- uint32_t offsetOfThreadLocalStoragePointer;
+ CORINFO_CONST_LOOKUP tlsIndex; // windows specific
+ void* tlsGetAddrFtnPtr; // linux/x64 specific - address of __tls_get_addr() function
+ void* tlsIndexObject; // linux/x64 specific - address of tls_index object
+ void* threadVarsSection; // osx x64/arm64 specific - address of __thread_vars section of `t_ThreadStatics`
+ uint32_t offsetOfThreadLocalStoragePointer; // windows specific
uint32_t offsetOfMaxThreadStaticBlocks;
uint32_t offsetOfThreadStaticBlocks;
uint32_t offsetOfGCDataPointer;
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index fda1fdeae24be..6c6f7e8283c01 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED
-constexpr GUID JITEEVersionIdentifier = { /* ba2c087c-9b8b-49c1-a52f-3514eb489308 */
- 0xba2c087c,
- 0x9b8b,
- 0x49c1,
- {0xa5, 0x2f, 0x35, 0x14, 0xeb, 0x48, 0x93, 0x08}
+constexpr GUID JITEEVersionIdentifier = { /* 02e334af-4e6e-4a68-9feb-308d3d2661bc */
+ 0x2e334af,
+ 0x4e6e,
+ 0x4a68,
+ {0x9f, 0xeb, 0x30, 0x8d, 0x3d, 0x26, 0x61, 0xbc}
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp
index 955cba0b42a8b..93e98309f0949 100644
--- a/src/coreclr/jit/codegenarm64.cpp
+++ b/src/coreclr/jit/codegenarm64.cpp
@@ -2944,6 +2944,13 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
inst_Mov_Extend(targetType, /* srcInReg */ true, targetReg, dataReg, /* canSkip */ true,
emitActualTypeSize(targetType));
}
+ else if (TargetOS::IsUnix && data->IsIconHandle(GTF_ICON_TLS_HDL))
+ {
+ assert(data->AsIntCon()->IconValue() == 0);
+ emitAttr attr = emitActualTypeSize(targetType);
+ // On non-windows, need to load the address from system register.
+ emit->emitIns_R(INS_mrs_tpid0, attr, targetReg);
+ }
else
{
inst_Mov(targetType, targetReg, dataReg, /* canSkip */ true);
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 5804e731e6924..7b2fdb8730c00 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -5026,11 +5026,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Partially inline static initializations
DoPhase(this, PHASE_EXPAND_STATIC_INIT, &Compiler::fgExpandStaticInit);
- if (TargetOS::IsWindows)
- {
- // Currently this is only applicable for Windows
- DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess);
- }
+ // Expand thread local access
+ DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess);
// Insert GC Polls
DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls);
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index f2c438fe62398..1d42520215ded 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -7124,7 +7124,7 @@ class Compiler
optMethodFlags |= OMF_HAS_GUARDEDDEVIRT;
}
- bool doesMethodHasTlsFieldAccess()
+ bool methodHasTlsFieldAccess()
{
return (optMethodFlags & OMF_HAS_TLS_FIELD) != 0;
}
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index 7e3c6501399c6..0c1318006e18c 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -10205,9 +10205,9 @@ void emitter::emitRecordCallSite(ULONG instrOffset, /* IN */
if (callSig == nullptr)
{
- assert(methodHandle != nullptr);
-
- if (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)
+ // For certain calls whose target is non-containable (e.g. tls access targets), `methodHandle`
+ // will be nullptr, because the target is present in a register.
+ if ((methodHandle != nullptr) && (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF))
{
emitComp->eeGetMethodSig(methodHandle, &sigInfo);
callSig = &sigInfo;
diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp
index 4ae2d717f0eb4..4743615fd03d4 100644
--- a/src/coreclr/jit/emitarm64.cpp
+++ b/src/coreclr/jit/emitarm64.cpp
@@ -937,7 +937,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier
break;
- case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva)
+ case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs)
datasize = id->idOpSize();
assert(isGeneralRegister(id->idReg1()));
assert(datasize == EA_8BYTE);
@@ -3741,6 +3741,12 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
fmt = IF_SR_1A;
break;
+ case INS_mrs_tpid0:
+ id = emitNewInstrSmall(attr);
+ id->idReg1(reg);
+ fmt = IF_SR_1A;
+ break;
+
default:
unreached();
}
@@ -11793,7 +11799,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;
- case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva)
+ case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs)
assert(insOptsNone(id->idInsOpt()));
code = emitInsCode(ins, fmt);
code |= insEncodeReg_Rt(id->idReg1()); // ttttt
@@ -13921,8 +13927,16 @@ void emitter::emitDispInsHelp(
emitDispBarrier((insBarrier)emitGetInsSC(id));
break;
- case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva)
- emitDispReg(id->idReg1(), size, false);
+ case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs)
+ if (ins == INS_mrs_tpid0)
+ {
+ emitDispReg(id->idReg1(), size, true);
+ printf("tpidr_el0");
+ }
+ else
+ {
+ emitDispReg(id->idReg1(), size, false);
+ }
break;
default:
diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h
index 81f41085a2ebe..31bbde6afc47a 100644
--- a/src/coreclr/jit/emitfmtsarm64.h
+++ b/src/coreclr/jit/emitfmtsarm64.h
@@ -227,7 +227,7 @@ IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................
IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16
IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier
-IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva)
+IF_DEF(SR_1A, IS_NONE, NONE) // SR_1A ................ ...........ttttt Rt (dc zva, mrs)
IF_DEF(INVALID, IS_NONE, NONE) //
diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp
index 62f4d7e0c1d8c..b95f75f075a78 100644
--- a/src/coreclr/jit/helperexpansion.cpp
+++ b/src/coreclr/jit/helperexpansion.cpp
@@ -421,7 +421,7 @@ PhaseStatus Compiler::fgExpandThreadLocalAccess()
{
PhaseStatus result = PhaseStatus::MODIFIED_NOTHING;
- if (!doesMethodHasTlsFieldAccess())
+ if (!methodHasTlsFieldAccess())
{
// TP: nothing to expand in the current method
JITDUMP("Nothing to expand.\n")
@@ -478,36 +478,50 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
return false;
}
+ assert(!opts.IsReadyToRun());
+
+ if (TargetOS::IsUnix)
+ {
+#if defined(TARGET_ARM) || !defined(TARGET_64BIT)
+ // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
+ // as MRC and MCR are used to access them. We do not support them and so should never optimize the
+ // field access using TLS.
+ noway_assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86");
+#endif
+ }
+ else
+ {
#ifdef TARGET_ARM
- // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
- // as MRC and MCR are used to access them. We do not support them and so should never optimize the
- // field access using TLS.
- assert(!"Unsupported scenario of optimizing TLS access on Arm32");
+ // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such
+ // as MRC and MCR are used to access them. We do not support them and so should never optimize the
+ // field access using TLS.
+ noway_assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32");
#endif
+ }
JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum);
DISPTREE(call);
JITDUMP("\n");
+
bool isGCThreadStatic =
eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo;
- info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic);
+ memset(&threadStaticBlocksInfo, 0, sizeof(CORINFO_THREAD_STATIC_BLOCKS_INFO));
- uint32_t offsetOfMaxThreadStaticBlocksVal = 0;
- uint32_t offsetOfThreadStaticBlocksVal = 0;
+ info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic);
JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC");
- offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks;
- offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks;
-
- JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr);
- JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer);
- JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal);
- JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal);
- JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer);
+ JITDUMP("tlsIndex= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndex.addr));
+ JITDUMP("tlsGetAddrFtnPtr= %p\n", dspPtr(threadStaticBlocksInfo.tlsGetAddrFtnPtr));
+ JITDUMP("tlsIndexObject= %p\n", dspPtr(threadStaticBlocksInfo.tlsIndexObject));
+ JITDUMP("threadVarsSection= %p\n", dspPtr(threadStaticBlocksInfo.threadVarsSection));
+ JITDUMP("offsetOfThreadLocalStoragePointer= %u\n",
+ dspOffset(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer));
+ JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks));
+ JITDUMP("offsetOfThreadStaticBlocks= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfThreadStaticBlocks));
+ JITDUMP("offsetOfGCDataPointer= %u\n", dspOffset(threadStaticBlocksInfo.offsetOfGCDataPointer));
- assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE);
assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) ||
(eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED));
@@ -546,56 +560,131 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement*
gtUpdateStmtSideEffects(stmt);
GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode();
+ GenTree* tlsValue = nullptr;
+ unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access"));
+ lvaTable[tlsLclNum].lvType = TYP_I_IMPL;
+ GenTree* maxThreadStaticBlocksValue = nullptr;
+ GenTree* threadStaticBlocksValue = nullptr;
+ GenTree* tlsValueDef = nullptr;
+
+ if (TargetOS::IsWindows)
+ {
+ size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr;
+ GenTree* dllRef = nullptr;
- void** pIdAddr = nullptr;
+ if (tlsIndexValue != 0)
+ {
+ dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL);
+ }
- size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr;
- GenTree* dllRef = nullptr;
+ // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns]
+ tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL);
+ tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
- if (tlsIndexValue != 0)
- {
- dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL);
+ if (dllRef != nullptr)
+ {
+ // Add the dllRef to produce thread local storage reference for coreclr
+ tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef);
+ }
+
+ // Base of coreclr's thread local storage
+ tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
}
+ else if (TargetOS::IsMacOS)
+ {
+ // For OSX x64/arm64, we need to get the address of relevant __thread_vars section of
+ // the thread local variable `t_ThreadStatics`. Address of `tlv_get_address` is stored
+ // in this entry, which we dereference and invoke it, passing the __thread_vars address
+ // present in `threadVarsSection`.
+ //
+ // Code sequence to access thread local variable on osx/x64:
+ //
+ // mov rdi, threadVarsSection
+ // call [rdi]
+ //
+ // Code sequence to access thread local variable on osx/arm64:
+ //
+ // mov x0, threadVarsSection
+ // mov x1, [x0]
+ // blr x1
+ //
+ size_t threadVarsSectionVal = (size_t)threadStaticBlocksInfo.threadVarsSection;
+ GenTree* tls_get_addr_val = gtNewIconHandleNode(threadVarsSectionVal, GTF_ICON_FTN_ADDR);
+
+ tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
- // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns]
- GenTree* tlsRef = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL);
+ tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
+ GenTreeCall* tlsRefCall = tlsValue->AsCall();
- tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
+ // This is a call which takes an argument.
+ // Populate and set the ABI appropriately.
+ assert(opts.altJit || threadVarsSectionVal != 0);
+ GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL);
+ tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));
- if (dllRef != nullptr)
+ fgMorphArgs(tlsRefCall);
+
+ tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
+ }
+ else if (TargetOS::IsUnix)
{
- // Add the dllRef to produce thread local storage reference for coreclr
- tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
+#if defined(TARGET_AMD64)
+ // Code sequence to access thread local variable on linux/x64:
+ //
+ // mov rdi, 0x7FE5C418CD28 ; tlsIndexObject
+ // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr
+ // call rax
+ //
+ GenTree* tls_get_addr_val =
+ gtNewIconHandleNode((size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR);
+ tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
+ GenTreeCall* tlsRefCall = tlsValue->AsCall();
+
+ // This is an indirect call which takes an argument.
+ // Populate and set the ABI appropriately.
+ assert(opts.altJit || threadStaticBlocksInfo.tlsIndexObject != 0);
+ GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL);
+ tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));
+
+ fgMorphArgs(tlsRefCall);
+
+ tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
+#ifdef UNIX_X86_ABI
+ tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS;
+#endif // UNIX_X86_ABI
+#elif defined(TARGET_ARM64)
+ // Code sequence to access thread local variable on linux/arm64:
+ //
+ // mrs xt, tpidr_elf0
+ // mov xd, [xt+cns]
+ tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL);
+#else
+ assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86");
+#endif
}
- // Base of coreclr's thread local storage
- GenTree* tlsValue = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
-
// Cache the tls value
- unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access"));
- lvaTable[tlsLclNum].lvType = TYP_I_IMPL;
- GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue);
- GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum);
+ tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue);
+ GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum);
+
+ size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks;
+ size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks;
// Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]"
GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL);
GenTree* maxThreadStaticBlocksRef =
gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks);
- GenTree* maxThreadStaticBlocksValue =
- gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
+ maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
+
+ GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse),
+ gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL));
+ threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
// Create tree for "if (maxThreadStaticBlocks < typeIndex)"
GenTree* maxThreadStaticBlocksCond =
gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue));
maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond);
- // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]"
- GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL);
- GenTree* threadStaticBlocksRef =
- gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks);
- GenTree* threadStaticBlocksValue =
- gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
-
// Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]"
typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue),
gtNewIconNode(TARGET_POINTER_SIZE, TYP_INT));
diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h
index 5745ac0d70180..ee7483d5257c8 100644
--- a/src/coreclr/jit/instrsarm64.h
+++ b/src/coreclr/jit/instrsarm64.h
@@ -1595,6 +1595,9 @@ INST1(isb, "isb", 0, IF_SI_0B, 0xD50330DF)
INST1(dczva, "dczva", 0, IF_SR_1A, 0xD50B7420)
// dc zva,Rt SR_1A 1101010100001011 01110100001ttttt D50B 7420 Rt
+INST1(mrs_tpid0, "mrs", 0, IF_SR_1A, 0xD53BD040)
+ // mrs Rt,tpidr_el0 SR_1A 1101010100111011 11010000010ttttt D53B D040 Rt, tpidr_el0
+
INST1(umov, "umov", 0, IF_DV_2B, 0x0E003C00)
// umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[]
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
index fcc4a2ef136cb..b3dc1ffc9a297 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
@@ -1149,10 +1149,13 @@ public unsafe struct CORINFO_FIELD_INFO
public unsafe struct CORINFO_THREAD_STATIC_BLOCKS_INFO
{
public CORINFO_CONST_LOOKUP tlsIndex;
+ public nuint tlsGetAddrFtnPtr;
+ public nuint tlsIndexObject;
+ public nuint threadVarsSection;
public uint offsetOfThreadLocalStoragePointer;
- public CORINFO_CONST_LOOKUP offsetOfMaxThreadStaticBlocks;
- public CORINFO_CONST_LOOKUP offsetOfThreadStaticBlocks;
- public CORINFO_CONST_LOOKUP offsetOfGCDataPointer;
+ public uint offsetOfMaxThreadStaticBlocks;
+ public uint offsetOfThreadStaticBlocks;
+ public uint offsetOfGCDataPointer;
};
// System V struct passing
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
index 975091688c453..b4cdb272b3209 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
+++ b/src/coreclr/tools/superpmi/superpmi-shared/agnostic.h
@@ -530,10 +530,13 @@ struct Agnostic_GetProfilingHandle
struct Agnostic_GetThreadLocalStaticBlocksInfo
{
Agnostic_CORINFO_CONST_LOOKUP tlsIndex;
- UINT offsetOfThreadLocalStoragePointer;
- UINT offsetOfMaxThreadStaticBlocks;
- UINT offsetOfThreadStaticBlocks;
- UINT offsetOfGCDataPointer;
+ DWORDLONG tlsGetAddrFtnPtr;
+ DWORDLONG tlsIndexObject;
+ DWORDLONG threadVarsSection;
+ DWORD offsetOfThreadLocalStoragePointer;
+ DWORD offsetOfMaxThreadStaticBlocks;
+ DWORD offsetOfThreadStaticBlocks;
+ DWORD offsetOfGCDataPointer;
};
struct Agnostic_GetThreadLocalFieldInfo
diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
index 1c4264df6b9ce..ad0f2c7dd6dae 100644
--- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
+++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
@@ -3578,12 +3578,14 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC
Agnostic_GetThreadLocalStaticBlocksInfo value;
ZeroMemory(&value, sizeof(value));
- value.tlsIndex.handle = CastHandle(pInfo->tlsIndex.addr);
- value.tlsIndex.accessType = pInfo->tlsIndex.accessType;
- value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks;
- value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer;
- value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks;
- value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer;
+ value.tlsIndex = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&pInfo->tlsIndex);
+ value.tlsGetAddrFtnPtr = CastPointer(pInfo->tlsGetAddrFtnPtr);
+ value.tlsIndexObject = CastPointer(pInfo->tlsIndexObject);
+ value.threadVarsSection = CastPointer(pInfo->threadVarsSection);
+ value.offsetOfThreadLocalStoragePointer = pInfo->offsetOfThreadLocalStoragePointer;
+ value.offsetOfMaxThreadStaticBlocks = pInfo->offsetOfMaxThreadStaticBlocks;
+ value.offsetOfThreadStaticBlocks = pInfo->offsetOfThreadStaticBlocks;
+ value.offsetOfGCDataPointer = pInfo->offsetOfGCDataPointer;
// This data is same for entire process, so just add it against key '0'.
DWORD key = isGCType ? 0 : 1;
@@ -3593,10 +3595,13 @@ void MethodContext::recGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC
void MethodContext::dmpGetThreadLocalStaticBlocksInfo(DWORD key, const Agnostic_GetThreadLocalStaticBlocksInfo& value)
{
- printf("GetThreadLocalStaticBlocksInfo key %u, value tlsIndex-%016" PRIX64
+ printf("GetThreadLocalStaticBlocksInfo key %u, tlsIndex-%s, "
+ ", tlsGetAddrFtnPtr-%016" PRIX64 ", tlsIndexObject - %016" PRIX64
+ ", threadVarsSection - %016" PRIX64
", offsetOfThreadLocalStoragePointer-%u, offsetOfMaxThreadStaticBlocks-%u"
- ", offsetOfThreadStaticBlocks-%u offsetOfGCDataPointer-%u",
- key, value.tlsIndex.handle, value.offsetOfThreadLocalStoragePointer,
+ ", offsetOfThreadStaticBlocks-%u, offsetOfGCDataPointer-%u",
+ key, SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(value.tlsIndex).c_str(), value.tlsGetAddrFtnPtr,
+ value.tlsIndexObject, value.threadVarsSection, value.offsetOfThreadLocalStoragePointer,
value.offsetOfMaxThreadStaticBlocks, value.offsetOfThreadStaticBlocks, value.offsetOfGCDataPointer);
}
@@ -3607,12 +3612,14 @@ void MethodContext::repGetThreadLocalStaticBlocksInfo(CORINFO_THREAD_STATIC_BLOC
DEBUG_REP(dmpGetThreadLocalStaticBlocksInfo(key, value));
- pInfo->tlsIndex.accessType = (InfoAccessType)value.tlsIndex.accessType;
- pInfo->tlsIndex.addr = (void*)value.tlsIndex.handle;
- pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks;
- pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer;
- pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks;
- pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer;
+ pInfo->tlsIndex = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value.tlsIndex);
+ pInfo->tlsGetAddrFtnPtr = (void*)value.tlsGetAddrFtnPtr;
+ pInfo->tlsIndexObject = (void*)value.tlsIndexObject;
+ pInfo->threadVarsSection = (void*)value.threadVarsSection;
+ pInfo->offsetOfThreadLocalStoragePointer = value.offsetOfThreadLocalStoragePointer;
+ pInfo->offsetOfMaxThreadStaticBlocks = value.offsetOfMaxThreadStaticBlocks;
+ pInfo->offsetOfThreadStaticBlocks = value.offsetOfThreadStaticBlocks;
+ pInfo->offsetOfGCDataPointer = value.offsetOfGCDataPointer;
}
void MethodContext::recEmbedMethodHandle(CORINFO_METHOD_HANDLE handle,
diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S
index bebfd3376c12d..a8cdb06237eb5 100644
--- a/src/coreclr/vm/amd64/asmhelpers.S
+++ b/src/coreclr/vm/amd64/asmhelpers.S
@@ -307,3 +307,44 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
ret
NESTED_END ProfileTailcallNaked, _TEXT
+
+#ifdef TARGET_OSX
+# EXTERN_C void* GetThreadVarsAddress()
+#
+# Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread
+# local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called
+# to retrieve the address of the thread local.
+#
+LEAF_ENTRY GetThreadVarsAddress, _TEXT
+ mov rdi, _t_ThreadStatics@TLVP[rip]
+ ret
+LEAF_END GetThreadVarsAddress, _TEXT
+// ------------------------------------------------------------------
+#endif // TARGET_OSX
+
+#ifndef TARGET_OSX
+# EXTERN_C void* GetTlsIndexObjectDescOffset();
+
+#
+# Helper to calculate the offset of native thread local variable `t_ThreadStatics`. The offset has to be found at runtime
+# once linker does its relocation and fixup of thread locals. The runtime gets the address of this function, so
+# it can walk through the instruction bytes to retrieve the offset embedded by the linker and calculate the
+# final offset that should be passed to __tls_get_addr() in order to calculate the address of `t_ThreadStatics` for
+# the current thread. Here, we have to call `__tls_get_addr()`, because if the linker tries to find the code pattern
+# of "lea t_ThreadStatics@TLSGD", followed by `call __tls_get_addr()`. Without adding the call, the linker complains.
+# We never have to call this method directly, and hence there is a `int 3` at the end.
+#
+
+LEAF_ENTRY GetTlsIndexObjectDescOffset, _TEXT
+# On The `lea` instruction has a data16 prefix and the call instruction has two data16 (0x66) prefixes and one rex64 prefix.
+# This is so the total size of lea+call to be 16, suitable for link-time optimization.
+
+ .byte 0x66
+ lea rdi, t_ThreadStatics@TLSGD[rip] # instruction where offset is embedded by the linker during compilation
+ .byte 0x66
+ .byte 0x66
+ .byte 0x48 # rex.W prefix for padding
+ call __tls_get_addr # dummy call to have linker see the code pattern to replace the offset
+ int 3
+LEAF_END GetTlsIndexObjectDescOffset, _TEXT
+#endif
diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp
index c11627614ea07..b210c25ed382f 100644
--- a/src/coreclr/vm/appdomain.cpp
+++ b/src/coreclr/vm/appdomain.cpp
@@ -665,7 +665,6 @@ void BaseDomain::InitVSD()
GetLoaderAllocator()->InitVirtualCallStubManager(this);
}
-#ifdef HOST_WINDOWS
void BaseDomain::InitThreadStaticBlockTypeMap()
{
STANDARD_VM_CONTRACT;
@@ -673,7 +672,6 @@ void BaseDomain::InitThreadStaticBlockTypeMap()
m_NonGCThreadStaticBlockTypeIDMap.Init();
m_GCThreadStaticBlockTypeIDMap.Init();
}
-#endif // HOST_WINDOWS
void BaseDomain::ClearBinderContext()
{
@@ -1771,10 +1769,8 @@ void AppDomain::Create()
// allocate a Virtual Call Stub Manager for the default domain
pDomain->InitVSD();
-#ifdef HOST_WINDOWS
// allocate a thread static block to index map
pDomain->InitThreadStaticBlockTypeMap();
-#endif
pDomain->SetStage(AppDomain::STAGE_OPEN);
pDomain->CreateDefaultBinder();
@@ -4664,7 +4660,6 @@ PTR_MethodTable BaseDomain::LookupType(UINT32 id) {
return pMT;
}
-#ifdef HOST_WINDOWS
//------------------------------------------------------------------------
UINT32 BaseDomain::GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT)
{
@@ -4715,7 +4710,6 @@ PTR_MethodTable BaseDomain::LookupGCThreadStaticBlockType(UINT32 id) {
CONSISTENCY_CHECK(CheckPointer(pMT));
return pMT;
}
-#endif // HOST_WINDOWS
#ifndef DACCESS_COMPILE
//---------------------------------------------------------------------------------------
diff --git a/src/coreclr/vm/appdomain.hpp b/src/coreclr/vm/appdomain.hpp
index 5b5975c5afee0..ea2d1648186b2 100644
--- a/src/coreclr/vm/appdomain.hpp
+++ b/src/coreclr/vm/appdomain.hpp
@@ -1221,18 +1221,14 @@ class BaseDomain
private:
TypeIDMap m_typeIDMap;
-#ifdef HOST_WINDOWS
// MethodTable to `typeIndex` map. `typeIndex` is embedded in the code during codegen.
// During execution corresponding thread static data blocks are stored in `t_NonGCThreadStaticBlocks`
// and `t_GCThreadStaticBlocks` array at the `typeIndex`.
TypeIDMap m_NonGCThreadStaticBlockTypeIDMap;
TypeIDMap m_GCThreadStaticBlockTypeIDMap;
-#endif // HOST_WINDOWS
-
public:
-#ifdef HOST_WINDOWS
void InitThreadStaticBlockTypeMap();
UINT32 GetNonGCThreadStaticTypeIndex(PTR_MethodTable pMT);
@@ -1240,7 +1236,6 @@ class BaseDomain
PTR_MethodTable LookupNonGCThreadStaticBlockType(UINT32 id);
PTR_MethodTable LookupGCThreadStaticBlockType(UINT32 id);
-#endif
UINT32 GetTypeID(PTR_MethodTable pMT);
UINT32 LookupTypeID(PTR_MethodTable pMT);
diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S
index 574d30068f099..cbe14485e8df4 100644
--- a/src/coreclr/vm/arm64/asmhelpers.S
+++ b/src/coreclr/vm/arm64/asmhelpers.S
@@ -974,3 +974,39 @@ LEAF_END JIT_ValidateIndirectCall, _TEXT
LEAF_ENTRY JIT_DispatchIndirectCall, _TEXT
br x9
LEAF_END JIT_DispatchIndirectCall, _TEXT
+
+#ifdef TARGET_OSX
+// ------------------------------------------------------------------
+// void* GetThreadVarsAddress()
+
+// Helper to calculate the address of relevant __thread_vars section that holds the address of symbol tlv_get_address for thread
+// local `t_ThreadStatics`. The address is updated by the linker, which we retrieve here. In JIT code, this address is called
+// to retrieve the address of the thread local.
+
+LEAF_ENTRY GetThreadVarsAddress, _TEXT
+ adrp x0, _t_ThreadStatics@TLVPPAGE
+ ldr x0, [x0, _t_ThreadStatics@TLVPPAGEOFF]
+ ret
+LEAF_END GetThreadVarsAddress, _TEXT
+// ------------------------------------------------------------------
+#endif // TARGET_OSX
+
+#ifndef TARGET_OSX
+// ------------------------------------------------------------------
+// size_t GetThreadStaticsVariableOffset()
+
+// Helper to calculate the offset of native thread local variable `t_ThreadStatics` in TCB. The offset has to be found at runtime
+// once linker does its relocation and fixup of thread locals. The offset, after calculation is returned in `x0` register.
+
+LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT
+ PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32
+ adrp x0, :tlsdesc:t_ThreadStatics
+ ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]
+ add x0, x0, :tlsdesc_lo12:t_ThreadStatics
+ .tlsdesccall t_ThreadStatics
+ blr x1
+ EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
+ EPILOG_RETURN
+LEAF_END GetThreadStaticsVariableOffset, _TEXT
+// ------------------------------------------------------------------
+#endif // !TARGET_OSX
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index d4ce2c9aa69ac..acb8cefb6942b 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -1777,25 +1777,23 @@ HCIMPL1(void*, JIT_GetGCThreadStaticBase_Helper, MethodTable * pMT)
}
HCIMPLEND
+struct ThreadStaticBlockInfo
+{
+ uint32_t NonGCMaxThreadStaticBlocks;
+ void** NonGCThreadStaticBlocks;
-#ifdef _MSC_VER
-__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks;
-__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks;
-
-__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize;
-__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize;
+ uint32_t GCMaxThreadStaticBlocks;
+ void** GCThreadStaticBlocks;
+};
-__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks;
-__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks;
+#ifdef _MSC_VER
+__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics;
+__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize;
+__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize;
#else
-EXTERN_C __thread uint32_t t_NonGCMaxThreadStaticBlocks;
-EXTERN_C __thread uint32_t t_GCMaxThreadStaticBlocks;
-
+EXTERN_C __thread ThreadStaticBlockInfo t_ThreadStatics;
EXTERN_C __thread uint32_t t_NonGCThreadStaticBlocksSize;
EXTERN_C __thread uint32_t t_GCThreadStaticBlocksSize;
-
-EXTERN_C __thread void** t_NonGCThreadStaticBlocks;
-EXTERN_C __thread void** t_GCThreadStaticBlocks;
#endif
// *** This helper corresponds to both CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE and
@@ -1840,7 +1838,6 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn
{
void* staticBlock = nullptr;
-#ifdef HOST_WINDOWS
FCALL_CONTRACT;
HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
@@ -1867,27 +1864,24 @@ HCIMPL1(void*, JIT_GetSharedNonGCThreadStaticBaseOptimized, UINT32 staticBlockIn
if (t_NonGCThreadStaticBlocksSize > 0)
{
- memcpy(newThreadStaticBlocks, t_NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE));
- delete t_NonGCThreadStaticBlocks;
+ memcpy(newThreadStaticBlocks, t_ThreadStatics.NonGCThreadStaticBlocks, t_NonGCThreadStaticBlocksSize * sizeof(PTR_BYTE));
+ delete[] t_ThreadStatics.NonGCThreadStaticBlocks;
}
t_NonGCThreadStaticBlocksSize = newThreadStaticBlocksSize;
- t_NonGCThreadStaticBlocks = newThreadStaticBlocks;
+ t_ThreadStatics.NonGCThreadStaticBlocks = newThreadStaticBlocks;
}
- void* currentEntry = t_NonGCThreadStaticBlocks[staticBlockIndex];
+ void* currentEntry = t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex];
// We could be coming here 2nd time after running the ctor when we try to get the static block.
// In such case, just avoid adding the same entry.
if (currentEntry != staticBlock)
{
_ASSERTE(currentEntry == nullptr);
- t_NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock;
- t_NonGCMaxThreadStaticBlocks = max(t_NonGCMaxThreadStaticBlocks, staticBlockIndex);
+ t_ThreadStatics.NonGCThreadStaticBlocks[staticBlockIndex] = staticBlock;
+ t_ThreadStatics.NonGCMaxThreadStaticBlocks = max(t_ThreadStatics.NonGCMaxThreadStaticBlocks, staticBlockIndex);
}
HELPER_METHOD_FRAME_END();
-#else
- _ASSERTE(!"JIT_GetSharedNonGCThreadStaticBaseOptimized not supported on non-windows.");
-#endif // HOST_WINDOWS
return staticBlock;
}
@@ -1938,7 +1932,6 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex
{
void* staticBlock = nullptr;
-#ifdef HOST_WINDOWS
FCALL_CONTRACT;
HELPER_METHOD_FRAME_BEGIN_RET_0(); // Set up a frame
@@ -1965,31 +1958,28 @@ HCIMPL1(void*, JIT_GetSharedGCThreadStaticBaseOptimized, UINT32 staticBlockIndex
if (t_GCThreadStaticBlocksSize > 0)
{
- memcpy(newThreadStaticBlocks, t_GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE));
- delete t_GCThreadStaticBlocks;
+ memcpy(newThreadStaticBlocks, t_ThreadStatics.GCThreadStaticBlocks, t_GCThreadStaticBlocksSize * sizeof(PTR_BYTE));
+ delete[] t_ThreadStatics.GCThreadStaticBlocks;
}
t_GCThreadStaticBlocksSize = newThreadStaticBlocksSize;
- t_GCThreadStaticBlocks = newThreadStaticBlocks;
+ t_ThreadStatics.GCThreadStaticBlocks = newThreadStaticBlocks;
}
- void* currentEntry = t_GCThreadStaticBlocks[staticBlockIndex];
+ void* currentEntry = t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex];
// We could be coming here 2nd time after running the ctor when we try to get the static block.
// In such case, just avoid adding the same entry.
if (currentEntry != staticBlock)
{
_ASSERTE(currentEntry == nullptr);
- t_GCThreadStaticBlocks[staticBlockIndex] = staticBlock;
- t_GCMaxThreadStaticBlocks = max(t_GCMaxThreadStaticBlocks, staticBlockIndex);
+ t_ThreadStatics.GCThreadStaticBlocks[staticBlockIndex] = staticBlock;
+ t_ThreadStatics.GCMaxThreadStaticBlocks = max(t_ThreadStatics.GCMaxThreadStaticBlocks, staticBlockIndex);
}
// Get the data pointer of static block
staticBlock = (void*) pMT->GetGCThreadStaticsBasePointer();
HELPER_METHOD_FRAME_END();
-#else
- _ASSERTE(!"JIT_GetSharedGCThreadStaticBaseOptimized not supported on non-windows.");
-#endif // HOST_WINDOWS
return staticBlock;
}
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index 678345f9fb658..2dd8eaf5dac84 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -65,20 +65,28 @@
#include "tailcallhelp.h"
-#ifdef HOST_WINDOWS
+#ifdef TARGET_WINDOWS
EXTERN_C uint32_t _tls_index;
#endif
-#ifdef _MSC_VER
-__declspec(selectany) __declspec(thread) uint32_t t_NonGCMaxThreadStaticBlocks;
-__declspec(selectany) __declspec(thread) uint32_t t_GCMaxThreadStaticBlocks;
+struct ThreadStaticBlockInfo
+{
+ uint32_t NonGCMaxThreadStaticBlocks;
+ void** NonGCThreadStaticBlocks;
-__declspec(selectany) __declspec(thread) void** t_NonGCThreadStaticBlocks;
-__declspec(selectany) __declspec(thread) void** t_GCThreadStaticBlocks;
+ uint32_t GCMaxThreadStaticBlocks;
+ void** GCThreadStaticBlocks;
+};
+#ifdef _MSC_VER
+__declspec(selectany) __declspec(thread) ThreadStaticBlockInfo t_ThreadStatics;
+__declspec(selectany) __declspec(thread) uint32_t t_NonGCThreadStaticBlocksSize;
+__declspec(selectany) __declspec(thread) uint32_t t_GCThreadStaticBlocksSize;
#else
-EXTERN_C __thread uint32_t t_maxThreadStaticBlocks;
-EXTERN_C __thread void** t_threadStaticBlocks;
-#endif
+extern "C" void* __tls_get_addr(void* ti);
+__thread ThreadStaticBlockInfo t_ThreadStatics;
+__thread uint32_t t_NonGCThreadStaticBlocksSize;
+__thread uint32_t t_GCThreadStaticBlocksSize;
+#endif // _MSC_VER
// The Stack Overflow probe takes place in the COOPERATIVE_TRANSITION_BEGIN() macro
//
@@ -1297,6 +1305,178 @@ static CorInfoHelpFunc getInstanceFieldHelper(FieldDesc * pField, CORINFO_ACCESS
return (CorInfoHelpFunc)helper;
}
+
+
+/*********************************************************************/
+uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType)
+{
+ CONTRACTL {
+ THROWS;
+ GC_TRIGGERS;
+ MODE_PREEMPTIVE;
+ } CONTRACTL_END;
+
+ UINT32 typeIndex = 0;
+
+ JIT_TO_EE_TRANSITION();
+
+ FieldDesc* fieldDesc = (FieldDesc*)field;
+ _ASSERTE(fieldDesc->IsThreadStatic());
+
+ if (isGCType)
+ {
+ typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable());
+ }
+ else
+ {
+ typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable());
+ }
+
+ assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID);
+
+ EE_TO_JIT_TRANSITION();
+ return typeIndex;
+}
+
+#if defined(TARGET_WINDOWS)
+/*********************************************************************/
+static uint32_t ThreadLocalOffset(void* p)
+{
+ PTEB Teb = NtCurrentTeb();
+ uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer;
+ uint8_t* pOurTls = pTls[_tls_index];
+ return (uint32_t)((uint8_t*)p - pOurTls);
+}
+#elif defined(TARGET_OSX)
+extern "C" void* GetThreadVarsAddress();
+
+static void* GetThreadVarsSectionAddressFromDesc(uint8_t* p)
+{
+ _ASSERT(p[0] == 0x48 && p[1] == 0x8d && p[2] == 0x3d);
+
+ // At this point, `p` contains the instruction pointer and is pointing to the above opcodes.
+ // These opcodes are patched by the dynamic linker.
+ // Move beyond the opcodes that we have already checked above.
+ p += 3;
+
+ // The descriptor address is located at *p at this point.
+ // (p + 4) below skips the descriptor address bytes embedded in the instruction and
+ // add it to the `instruction pointer` to find out the address.
+ return *(uint32_t*)p + (p + 4);
+}
+
+static void* GetThreadVarsSectionAddress()
+{
+#ifdef TARGET_AMD64
+ // On x64, the address is related to rip, so, disassemble the function,
+ // read the offset, and then relative to the IP, find the final address of
+ // __thread_vars section.
+ uint8_t* p = reinterpret_cast(&GetThreadVarsAddress);
+ return GetThreadVarsSectionAddressFromDesc(p);
+#else
+ return GetThreadVarsAddress();
+#endif // TARGET_AMD64
+}
+
+#else
+
+// Linux
+
+#ifdef TARGET_AMD64
+
+extern "C" void* GetTlsIndexObjectDescOffset();
+
+static void* GetThreadStaticDescriptor(uint8_t* p)
+{
+ if (!(p[0] == 0x66 && p[1] == 0x48 && p[2] == 0x8d && p[3] == 0x3d))
+ {
+ // The optimization is disabled if coreclr is not compiled in .so format.
+ _ASSERTE(false && "Unexpected code sequence");
+ return nullptr;
+ }
+
+ // At this point, `p` contains the instruction pointer and is pointing to the above opcodes.
+ // These opcodes are patched by the dynamic linker.
+ // Move beyond the opcodes that we have already checked above.
+ p += 4;
+
+ // The descriptor address is located at *p at this point. Read that and add
+ // it to the instruction pointer to locate the address of `ti` that will be used
+ // to pass to __tls_get_addr during execution.
+ // (p + 4) below skips the descriptor address bytes embedded in the instruction and
+ // add it to the `instruction pointer` to find out the address.
+ return *(uint32_t*)p + (p + 4);
+}
+
+static void* GetTlsIndexObjectAddress()
+{
+ uint8_t* p = reinterpret_cast(&GetTlsIndexObjectDescOffset);
+ return GetThreadStaticDescriptor(p);
+}
+
+#elif TARGET_ARM64
+
+extern "C" size_t GetThreadStaticsVariableOffset();
+
+#endif // TARGET_ARM64
+#endif // TARGET_WINDOWS
+
+
+void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_PREEMPTIVE;
+ } CONTRACTL_END;
+
+ JIT_TO_EE_TRANSITION_LEAF();
+
+ size_t threadStaticBaseOffset = 0;
+
+#if defined(TARGET_WINDOWS)
+ pInfo->tlsIndex.addr = (void*)static_cast(_tls_index);
+ pInfo->tlsIndex.accessType = IAT_VALUE;
+
+ pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer);
+ threadStaticBaseOffset = ThreadLocalOffset(&t_ThreadStatics);
+
+#elif defined(TARGET_OSX)
+
+ pInfo->threadVarsSection = GetThreadVarsSectionAddress();
+
+#elif defined(TARGET_AMD64)
+
+ // For Linux/x64, get the address of tls_get_addr system method and the base address
+ // of struct that we will pass to it.
+ pInfo->tlsGetAddrFtnPtr = reinterpret_cast(&__tls_get_addr);
+ pInfo->tlsIndexObject = GetTlsIndexObjectAddress();
+
+#elif defined(TARGET_ARM64)
+
+ // For Linux/arm64, just get the offset of thread static variable, and during execution,
+ // this offset, taken from trpid_elp0 system register gives back the thread variable address.
+ threadStaticBaseOffset = GetThreadStaticsVariableOffset();
+
+#else
+ _ASSERTE_MSG(false, "Unsupported scenario of optimizing TLS access on Linux Arm32/x86");
+#endif // TARGET_WINDOWS
+
+ if (isGCType)
+ {
+ pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCMaxThreadStaticBlocks));
+ pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, GCThreadStaticBlocks));
+ }
+ else
+ {
+ pInfo->offsetOfMaxThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCMaxThreadStaticBlocks));
+ pInfo->offsetOfThreadStaticBlocks = (uint32_t)(threadStaticBaseOffset + offsetof(ThreadStaticBlockInfo, NonGCThreadStaticBlocks));
+ }
+ pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset());
+
+ EE_TO_JIT_TRANSITION_LEAF();
+}
+
/*********************************************************************/
void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
CORINFO_METHOD_HANDLE callerHandle,
@@ -1401,25 +1581,40 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
fieldAccessor = CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER;
pResult->helper = getSharedStaticsHelper(pField, pFieldMT);
-
-#ifdef HOST_WINDOWS
-#ifndef TARGET_ARM
- // For windows, we convert the TLS access to the optimized helper where we will store
- // the static blocks in TLS directly and access them via inline code.
- if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) ||
- (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE))
- {
- fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED;
- pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
- }
- else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) ||
- (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE))
+#if defined(TARGET_ARM)
+ // Optimization is disabled for linux/windows arm
+#elif !defined(TARGET_WINDOWS) && defined(TARGET_X86)
+ // Optimization is disabled for linux/x86
+#elif defined(TARGET_LINUX_MUSL) && defined(TARGET_ARM64)
+ // Optimization is disabled for linux musl arm64
+#else
+ bool optimizeThreadStaticAccess = true;
+#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64)
+ // For linux/x64, check if compiled coreclr as .so file and not single file.
+ // For single file, the `tls_index` might not be accurate.
+ // Do not perform this optimization in such case.
+ optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr;
+#endif // TARGET_UNIX && TARGET_AMD64
+
+ if (optimizeThreadStaticAccess)
{
- fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED;
- pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
+ // For windows x64/x86/arm64, linux x64/arm64:
+ // We convert the TLS access to the optimized helper where we will store
+ // the static blocks in TLS directly and access them via inline code.
+ if ((pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR) ||
+ (pResult->helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE))
+ {
+ fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED;
+ pResult->helper = CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
+ }
+ else if ((pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR) ||
+ (pResult->helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE))
+ {
+ fieldAccessor = CORINFO_FIELD_STATIC_TLS_MANAGED;
+ pResult->helper = CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED;
+ }
}
-#endif // !TARGET_ARM
-#endif // HOST_WINDOWS
+#endif // TARGET_ARM
}
else
{
@@ -1601,113 +1796,6 @@ void CEEInfo::getFieldInfo (CORINFO_RESOLVED_TOKEN * pResolvedToken,
EE_TO_JIT_TRANSITION();
}
-
-
-#ifdef HOST_WINDOWS
-
-/*********************************************************************/
-uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType)
-{
- CONTRACTL {
- THROWS;
- GC_TRIGGERS;
- MODE_PREEMPTIVE;
- } CONTRACTL_END;
-
- UINT32 typeIndex = 0;
-
- JIT_TO_EE_TRANSITION();
-
- FieldDesc* fieldDesc = (FieldDesc*)field;
- _ASSERTE(fieldDesc->IsThreadStatic());
-
- if (isGCType)
- {
- typeIndex = AppDomain::GetCurrentDomain()->GetGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable());
- }
- else
- {
- typeIndex = AppDomain::GetCurrentDomain()->GetNonGCThreadStaticTypeIndex(fieldDesc->GetEnclosingMethodTable());
- }
-
- assert(typeIndex != TypeIDProvider::INVALID_TYPE_ID);
-
- EE_TO_JIT_TRANSITION();
- return typeIndex;
-}
-
-/*********************************************************************/
-static uint32_t ThreadLocalOffset(void* p)
-{
- PTEB Teb = NtCurrentTeb();
- uint8_t** pTls = (uint8_t**)Teb->ThreadLocalStoragePointer;
- uint8_t* pOurTls = pTls[_tls_index];
- return (uint32_t)((uint8_t*)p - pOurTls);
-}
-
-void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType)
-{
- CONTRACTL {
- NOTHROW;
- GC_NOTRIGGER;
- MODE_PREEMPTIVE;
- } CONTRACTL_END;
-
- JIT_TO_EE_TRANSITION_LEAF();
-
- pInfo->tlsIndex.addr = (void*)static_cast(_tls_index);
- pInfo->tlsIndex.accessType = IAT_VALUE;
-
- pInfo->offsetOfThreadLocalStoragePointer = offsetof(_TEB, ThreadLocalStoragePointer);
- if (isGCType)
- {
- pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_GCThreadStaticBlocks);
- pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_GCMaxThreadStaticBlocks);
- }
- else
- {
- pInfo->offsetOfThreadStaticBlocks = ThreadLocalOffset(&t_NonGCThreadStaticBlocks);
- pInfo->offsetOfMaxThreadStaticBlocks = ThreadLocalOffset(&t_NonGCMaxThreadStaticBlocks);
- }
-
- pInfo->offsetOfGCDataPointer = static_cast(PtrArray::GetDataOffset());
-
- JIT_TO_EE_TRANSITION_LEAF();
-}
-#else
-
-uint32_t CEEInfo::getThreadLocalFieldInfo (CORINFO_FIELD_HANDLE field, bool isGCType)
-{
- CONTRACTL {
- NOTHROW;
- GC_NOTRIGGER;
- MODE_PREEMPTIVE;
- } CONTRACTL_END;
-
- return 0;
-}
-
-void CEEInfo::getThreadLocalStaticBlocksInfo (CORINFO_THREAD_STATIC_BLOCKS_INFO* pInfo, bool isGCType)
-{
- CONTRACTL {
- NOTHROW;
- GC_NOTRIGGER;
- MODE_PREEMPTIVE;
- } CONTRACTL_END;
-
- JIT_TO_EE_TRANSITION_LEAF();
-
- pInfo->tlsIndex.addr = (UINT8*)0;
-
- pInfo->offsetOfThreadLocalStoragePointer = 0;
- pInfo->offsetOfThreadStaticBlocks = 0;
- pInfo->offsetOfMaxThreadStaticBlocks = 0;
- pInfo->offsetOfGCDataPointer = 0;
-
- JIT_TO_EE_TRANSITION_LEAF();
-}
-#endif // HOST_WINDOWS
-
//---------------------------------------------------------------------------------------
//
bool CEEInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd)