Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create portable box helper and wrap the thread-local alloc context in a struct #103607

Merged
merged 8 commits into from
Jun 20, 2024
1 change: 0 additions & 1 deletion src/coreclr/vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
${ARCH_SOURCES_DIR}/GenericCLRToCOMCallStubs.asm
${ARCH_SOURCES_DIR}/getstate.asm
${ARCH_SOURCES_DIR}/JitHelpers_Fast.asm
${ARCH_SOURCES_DIR}/JitHelpers_FastMP.asm
${ARCH_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm
${ARCH_SOURCES_DIR}/JitHelpers_SingleAppDomain.asm
${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm
Expand Down
20 changes: 0 additions & 20 deletions src/coreclr/vm/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -206,26 +206,6 @@ INLINE_GETTHREAD macro Reg

endm

;
; Inlined macro to get the current thread's allocation context
; Trashes rax and r11
;

INLINE_GET_ALLOC_CONTEXT macro Reg

EXTERN _tls_index: DWORD
EXTERN t_thread_alloc_context: DWORD

mov r11d, [_tls_index]
mov rax, gs:[OFFSET__TEB__ThreadLocalStoragePointer]
mov rax, [rax + r11 * 8]
mov r11d, SECTIONREL t_thread_alloc_context
add rax, r11
mov Reg, rax

endm


; if you change this code there will be corresponding code in JITInterfaceGen.cpp which will need to be changed
;

Expand Down
75 changes: 0 additions & 75 deletions src/coreclr/vm/amd64/JitHelpers_FastMP.asm

This file was deleted.

1 change: 1 addition & 0 deletions src/coreclr/vm/arm/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/arm64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)

INT64 currentAllocated = 0;
Thread *pThread = GetThread();
gc_alloc_context* ac = &t_thread_alloc_context;
gc_alloc_context* ac = &t_gc_thread_locals.alloc_context;
currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);

return currentAllocated;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gccover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
assert(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_gc_thread_locals.alloc_context);

// StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
// We can not rely on the return code to determine if the instruction update happened
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcenv.ee.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
return nullptr;
}

return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}

void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcheaputilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
#ifndef _MSC_VER
__thread gc_alloc_context t_thread_alloc_context;
thread_local GCThreadLocals t_gc_thread_locals;
#endif

enum GC_LOAD_STATUS {
Expand Down
15 changes: 11 additions & 4 deletions src/coreclr/vm/gcheaputilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ GVAL_DECL(gc_alloc_context, g_global_alloc_context);
}
#endif // !DACCESS_COMPILE

// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
struct GCThreadLocals
jkoritzinsky marked this conversation as resolved.
Show resolved Hide resolved
{
// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
gc_alloc_context alloc_context;
};

#ifdef _MSC_VER
EXTERN_C __declspec(selectany) __declspec(thread) gc_alloc_context t_thread_alloc_context;
// use selectany to avoid initialization de-optimization issues in the compiler
__declspec(selectany)
#else
EXTERN_C __thread gc_alloc_context t_thread_alloc_context;
extern
#endif
thread_local GCThreadLocals t_gc_thread_locals;

extern "C" uint32_t* g_card_bundle_table;
extern "C" uint8_t* g_ephemeral_low;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ inline gc_alloc_context* GetThreadAllocContext()

assert(GCHeapUtilities::UseThreadAllocationContexts());

return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}

// When not using per-thread allocation contexts, we (the EE) need to take care that
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcstress.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ namespace _GCStress
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_gc_thread_locals.alloc_context);
}

FORCEINLINE
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/vm/i386/stublinkerx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2434,7 +2434,7 @@ namespace
{
gc_alloc_context* STDCALL GetAllocContextHelper()
{
return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}
}
#endif
Expand Down Expand Up @@ -2490,8 +2490,8 @@ VOID StubLinkerCPU::X86EmitCurrentThreadAllocContextFetch(X86Reg dstreg, unsigne

X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * _tls_index);

_ASSERTE(Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context) < INT_MAX);
X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context));
_ASSERTE(Thread::GetOffsetOfThreadStatic(&t_gc_thread_locals.alloc_context) < INT_MAX);
X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_gc_thread_locals.alloc_context));

#endif // TARGET_UNIX
}
Expand Down
63 changes: 52 additions & 11 deletions src/coreclr/vm/jithelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_)
} CONTRACTL_END;

_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

TypeHandle typeHandle(typeHnd_);
_ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
Expand Down Expand Up @@ -1785,7 +1785,7 @@ HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
return HCCALL1(FramedAllocateString, stringLength);
}

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

SIZE_T totalSize = StringObject::GetSize(stringLength);

Expand Down Expand Up @@ -1901,7 +1901,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT
return HCCALL2(JIT_NewArr1, arrayMT, size);
}

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

MethodTable *pArrayMT = (MethodTable *)arrayMT;

Expand Down Expand Up @@ -1959,11 +1959,6 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM
return HCCALL2(JIT_NewArr1, arrayMT, size);
}

// This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler
// to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates
// some reshuffling of intermediate values into nonvolatile registers around the call.
Thread *thread = GetThread();

SIZE_T totalSize = componentCount * sizeof(void *);
_ASSERTE(totalSize / sizeof(void *) == componentCount);

Expand All @@ -1975,7 +1970,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM

_ASSERTE(ALIGN_UP(totalSize, DATA_ALIGNMENT) == totalSize);

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;
BYTE *allocPtr = allocContext->alloc_ptr;
_ASSERTE(allocPtr <= allocContext->alloc_limit);
if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
Expand Down Expand Up @@ -2109,14 +2104,60 @@ HCIMPLEND
// VALUETYPE/BYREF HELPERS
//
//========================================================================
/*************************************************************/
HCIMPL2_RAW(Object*, JIT_Box_MP_FastPortable, CORINFO_CLASS_HANDLE type, void* unboxedData)
{
CONTRACTL {
THROWS;
DISABLED(GC_TRIGGERS);
MODE_COOPERATIVE;
} CONTRACTL_END;

if (unboxedData == nullptr)
{
// Tail call to the slow helper
return HCCALL2(JIT_Box, type, unboxedData);
}

_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

TypeHandle typeHandle(type);
_ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
MethodTable *methodTable = typeHandle.AsMethodTable();
// The fast helper should never be called for nullable types.
_ASSERTE(!methodTable->IsNullable());
jkoritzinsky marked this conversation as resolved.
Show resolved Hide resolved

SIZE_T size = methodTable->GetBaseSize();
_ASSERTE(size % DATA_ALIGNMENT == 0);

BYTE *allocPtr = allocContext->alloc_ptr;
_ASSERTE(allocPtr <= allocContext->alloc_limit);
if (size > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
{
// Tail call to the slow helper
return HCCALL2(JIT_Box, type, unboxedData);
}

allocContext->alloc_ptr = allocPtr + size;

_ASSERTE(allocPtr != nullptr);
Object *object = reinterpret_cast<Object *>(allocPtr);
_ASSERTE(object->HasEmptySyncBlockInfo());
object->SetMethodTable(methodTable);

// Copy the data into the object
CopyValueClass(object->UnBox(), unboxedData, methodTable);

return object;
}
HCIMPLEND_RAW

/*************************************************************/
HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData)
{
FCALL_CONTRACT;

// <TODO>TODO: if we care, we could do a fast trial allocation
// and avoid the building the frame most times</TODO>
OBJECTREF newobj = NULL;
HELPER_METHOD_FRAME_BEGIN_RET_NOPOLL(); // Set up a frame
GCPROTECT_BEGININTERIOR(unboxedData);
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/vm/jitinterfacegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void InitJITHelpers1()
#ifdef TARGET_UNIX
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);

Expand All @@ -75,7 +76,7 @@ void InitJITHelpers1()
{
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/loongarch64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/riscv64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/vm/threads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,9 +964,9 @@ HRESULT Thread::DetachThread(BOOL fDLLThreadDetach)
GCX_COOP();
// GetTotalAllocatedBytes reads dead_threads_non_alloc_bytes, but will suspend EE, being in COOP mode we cannot race with that
// however, there could be other threads terminating and doing the same Add.
InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_thread_alloc_context.alloc_limit - t_thread_alloc_context.alloc_ptr);
GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_thread_alloc_context, NULL, NULL);
t_thread_alloc_context.init(); // re-initialize the context.
InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_gc_thread_locals.alloc_context.alloc_limit - t_gc_thread_locals.alloc_context.alloc_ptr);
GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_gc_thread_locals.alloc_context, NULL, NULL);
t_gc_thread_locals.alloc_context.init(); // re-initialize the context.

// Clear out the alloc context pointer for this thread. When TLS is gone, this pointer will point into freed memory.
m_alloc_context = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ class Thread
gc_alloc_context* m_alloc_context;

public:
inline void InitAllocContext() { LIMITED_METHOD_CONTRACT; m_alloc_context = &t_thread_alloc_context; }
inline void InitAllocContext() { LIMITED_METHOD_CONTRACT; m_alloc_context = &t_gc_thread_locals.alloc_context; }

inline gc_alloc_context *GetAllocContext() { LIMITED_METHOD_CONTRACT; return m_alloc_context; }

Expand Down
Loading
Loading