Skip to content

Commit

Permalink
Add ee_alloc_context
Browse files Browse the repository at this point in the history
This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it.

In a future PR we will add a field called combined_limit which should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path.

This PR introduces the abstraction for combined_limit and changes all the fast allocation helpers to use it, but it does not physically create the field yet. For now combined_limit is just an alias back to alloc_limit. There is also a small change in globals. Previously g_global_alloc_context was gc_alloc_context, now it is a gc_alloc_context* which points at g_global_ee_alloc_context.gc_allocation_context.

Overall this PR should not cause any change in runtime behavior and compiled code should be largely identical to before assuming modest inlining and optimization by the compiler.
  • Loading branch information
noahfalk committed Jul 13, 2024
1 parent 42b2b19 commit ca15926
Show file tree
Hide file tree
Showing 24 changed files with 152 additions and 70 deletions.
6 changes: 3 additions & 3 deletions src/coreclr/debug/daccess/dacdbiimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6551,10 +6551,10 @@ HRESULT DacHeapWalker::Init(CORDB_ADDRESS start, CORDB_ADDRESS end)
j++;
}
}
if ((&g_global_alloc_context)->alloc_ptr != nullptr)
if (g_global_alloc_context->alloc_ptr != nullptr)
{
mAllocInfo[j].Ptr = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_ptr;
mAllocInfo[j].Limit = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_limit;
mAllocInfo[j].Ptr = (CORDB_ADDRESS)g_global_alloc_context->alloc_ptr;
mAllocInfo[j].Limit = (CORDB_ADDRESS)g_global_alloc_context->alloc_limit;
}

mThreadCount = j;
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/debug/daccess/request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5493,8 +5493,8 @@ HRESULT ClrDataAccess::GetGlobalAllocationContext(
}

SOSDacEnter();
*allocPtr = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_ptr);
*allocLimit = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_limit);
*allocPtr = (CLRDATA_ADDRESS)(g_global_alloc_context->alloc_ptr);
*allocLimit = (CLRDATA_ADDRESS)(g_global_alloc_context->alloc_limit);
SOSDacLeave();
return hr;
}
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/debug/runtimeinfo/datadescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,14 @@ CDAC_TYPE_END(ThreadStore)

CDAC_TYPE_BEGIN(RuntimeThreadLocals)
CDAC_TYPE_INDETERMINATE(RuntimeThreadLocals)
CDAC_TYPE_FIELD(RuntimeThreadLocals, AllocContext, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
CDAC_TYPE_FIELD(RuntimeThreadLocals, /*EEAllocContext*/, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
CDAC_TYPE_END(RuntimeThreadLocals)

CDAC_TYPE_BEGIN(EEAllocContext)
CDAC_TYPE_INDETERMINATE(EEAllocContext)
CDAC_TYPE_FIELD(EEAllocContext, /*GCAllocContext*/, GCAllocationContext, offsetof(ee_alloc_context, gc_allocation_context))
CDAC_TYPE_END(EEAllocContext)

CDAC_TYPE_BEGIN(GCAllocContext)
CDAC_TYPE_INDETERMINATE(GCAllocContext)
CDAC_TYPE_FIELD(GCAllocContext, /*pointer*/, Pointer, offsetof(gc_alloc_context, alloc_ptr))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/inc/dacvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ DEFINE_DACVAR(ProfControlBlock, dac__g_profControlBlock, ::g_profControlBlock)
DEFINE_DACVAR(PTR_DWORD, dac__g_card_table, ::g_card_table)
DEFINE_DACVAR(PTR_BYTE, dac__g_lowest_address, ::g_lowest_address)
DEFINE_DACVAR(PTR_BYTE, dac__g_highest_address, ::g_highest_address)
DEFINE_DACVAR(gc_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
DEFINE_DACVAR(UNKNOWN_POINTER_TYPE, dac__g_global_alloc_context, ::g_global_alloc_context)

DEFINE_DACVAR(IGCHeap, dac__g_pGCHeap, ::g_pGCHeap)

Expand Down
32 changes: 16 additions & 16 deletions src/coreclr/vm/amd64/JitHelpers_Slow.asm
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ endif


extern g_global_alloc_lock:dword
extern g_global_alloc_context:qword
extern g_global_ee_alloc_context:qword

LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT

Expand All @@ -180,15 +180,15 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NEW

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand All @@ -208,8 +208,8 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_Box

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

Expand All @@ -219,7 +219,7 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
test rdx, rdx
je NullRef

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -287,15 +287,15 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
inc [g_global_alloc_lock]
jnz FramedAllocateString

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], r11
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -343,16 +343,16 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NewArr1

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax
jc AllocFailed

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -396,15 +396,15 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NewArr1

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_ee_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down
17 changes: 12 additions & 5 deletions src/coreclr/vm/amd64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,18 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame
#define Thread_m_pFrame OFFSETOF__Thread__m_pFrame


#define OFFSETOF__gc_alloc_context__alloc_ptr 0x0
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_ptr);

#define OFFSETOF__gc_alloc_context__alloc_limit 0x8
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_limit);
#define OFFSETOF__ee_alloc_context__alloc_ptr 0x0
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, gc_allocation_context) +
offsetof(gc_alloc_context, alloc_ptr));

// combined_limit is a new field we plan to add as part of the randomized allocation sampling feature. In preparation
// for adding that feature we are doing some refactoring. Once the feature is complete, either
// combined_limit = alloc_limit when sampling is disabled or combined_limit = MIN(sampling_limit, alloc_limit) when sampling
// is enabled. Because sampling is never enabled right now and the combined_limit field doesn't exist yet, this offset
// continues to point at the alloc_limit field.
#define OFFSETOF__ee_alloc_context__combined_limit 0x8
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, gc_allocation_context) +
offsetof(gc_alloc_context, alloc_limit));

#define OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000
ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ typedef VPTR(class VirtualCallStubManager) PTR_VirtualCallStubManager;
typedef VPTR(class VirtualCallStubManagerManager) PTR_VirtualCallStubManagerManager;
typedef VPTR(class IGCHeap) PTR_IGCHeap;
typedef VPTR(class ModuleBase) PTR_ModuleBase;
typedef DPTR(struct gc_alloc_context) PTR_gc_alloc_context;

//
// _UNCHECKED_OBJECTREF is for code that can't deal with DEBUG OBJECTREFs
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)

INT64 currentAllocated = 0;
Thread *pThread = GetThread();
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context;
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context.gc_allocation_context;
currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);

return currentAllocated;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gccover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
assert(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);

// StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
// We can not rely on the return code to determine if the instruction update happened
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/vm/gcenv.ee.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
return nullptr;
}

return &t_runtime_thread_locals.alloc_context;
return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
}

void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
Expand All @@ -469,7 +469,7 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par
}
else
{
fn(&g_global_alloc_context, param);
fn(g_global_alloc_context, param);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/vm/gcheaputilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ bool g_sw_ww_enabled_for_gc_heap = false;

#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
ee_alloc_context g_global_ee_alloc_context = {};
GPTR_IMPL_INIT(gc_alloc_context, g_global_alloc_context, &(g_global_ee_alloc_context.gc_allocation_context));

enum GC_LOAD_STATUS {
GC_LOAD_STATUS_BEFORE_START,
Expand Down
45 changes: 44 additions & 1 deletion src/coreclr/vm/gcheaputilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,45 @@ GPTR_DECL(IGCHeap, g_pGCHeap);
#ifndef DACCESS_COMPILE
extern "C" {
#endif // !DACCESS_COMPILE

// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context
// Right now there is no additional state, but we are planning to add a field as part of the randomized allocation
// sampling feature. Adding the struct now is some preparatory refactoring to make that change easier.
typedef struct _ee_alloc_context
{
gc_alloc_context gc_allocation_context;

void init()
{
LIMITED_METHOD_CONTRACT;
gc_allocation_context.init();
}

// Once the randomized allocation sampling feature is enabled, this will return the lower of the alloc_limit
// or the sampling_limit. When sampling is disabled combined limit is always the same as alloc_limit.
// Right now the feature is not yet complete so this method always returns alloc_limit.
uint8_t* getCombinedLimit()
{
LIMITED_METHOD_CONTRACT;
return gc_allocation_context.alloc_limit;
}

static size_t getAllocPtrFieldOffset()
{
LIMITED_METHOD_CONTRACT;
return offsetof(ee_alloc_context, gc_allocation_context) + offsetof(gc_alloc_context, alloc_ptr);
}

// Once the randomized allocation sampling feature is enabled, this will return the offset of the combined_limit
// field. Right now it returns the offset of the alloc_limit field instead.
static size_t getCombinedLimitFieldOffset()
{
LIMITED_METHOD_CONTRACT;
return offsetof(ee_alloc_context, gc_allocation_context) + offsetof(gc_alloc_context, alloc_limit);
}

} ee_alloc_context;

GPTR_DECL(uint8_t,g_lowest_address);
GPTR_DECL(uint8_t,g_highest_address);
GPTR_DECL(uint32_t,g_card_table);
Expand All @@ -21,7 +60,11 @@ GVAL_DECL(GCHeapType, g_heap_type);
// for all allocations. In order to avoid extra indirections in assembly
// allocation helpers, the EE owns the global allocation context and the
// GC will update it when it needs to.
GVAL_DECL(gc_alloc_context, g_global_alloc_context);
extern "C" ee_alloc_context g_global_ee_alloc_context;

// This is a pointer into the g_global_ee_alloc_context.gc_allocation_context, the GC visible portion
// of the global alloc context.
GPTR_DECL(gc_alloc_context, g_global_alloc_context);
#ifndef DACCESS_COMPILE
}
#endif // !DACCESS_COMPILE
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ inline gc_alloc_context* GetThreadAllocContext()

assert(GCHeapUtilities::UseThreadAllocationContexts());

return &t_runtime_thread_locals.alloc_context;
return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
}

// When not using per-thread allocation contexts, we (the EE) need to take care that
Expand Down Expand Up @@ -229,7 +229,7 @@ inline Object* Alloc(size_t size, GC_ALLOC_FLAGS flags)
else
{
GlobalAllocLockHolder holder(&g_global_alloc_lock);
gc_alloc_context *globalContext = &g_global_alloc_context;
gc_alloc_context *globalContext = g_global_alloc_context;
GCStress<gc_on_alloc>::MaybeTrigger(globalContext);
retVal = GCHeapUtilities::GetGCHeap()->Alloc(globalContext, size, flags);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcstress.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ namespace _GCStress
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
}

FORCEINLINE
Expand Down
16 changes: 8 additions & 8 deletions src/coreclr/vm/i386/jitinterfacex86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,29 +237,29 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *

if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
{
// MOV EBX, [edx]gc_alloc_context.alloc_ptr
psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, offsetof(gc_alloc_context, alloc_ptr));
// MOV EBX, [edx]alloc_context.gc_allocation_context.alloc_ptr
psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, ee_alloc_context::getAllocPtrFieldOffset());
// add EAX, EBX
psl->Emit16(0xC303);
if (flags & ALIGN8)
EmitAlignmentRoundup(psl, kEBX, kEAX, flags); // bump EAX up size by 12 if EBX unaligned (so that we are aligned)
}
else
{
// add eax, [edx]gc_alloc_context.alloc_ptr
psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, offsetof(gc_alloc_context, alloc_ptr));
// add eax, [edx]alloc_context.gc_allocation_context.alloc_ptr
psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, ee_alloc_context::getAllocPtrFieldOffset());
}

// cmp eax, [edx]gc_alloc_context.alloc_limit
psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, offsetof(gc_alloc_context, alloc_limit));
// cmp eax, [edx]alloc_context.combined_limit
psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, ee_alloc_context::getCombinedLimitFieldOffset());

// ja noAlloc
psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);

// Fill in the allocation and get out.

// mov [edx]gc_alloc_context.alloc_ptr, eax
psl->X86EmitIndexRegStore(kEDX, offsetof(gc_alloc_context, alloc_ptr), kEAX);
// mov [edx]alloc_context.gc_allocation_context.alloc_ptr, eax
psl->X86EmitIndexRegStore(kEDX, ee_alloc_context::getAllocPtrFieldOffset(), kEAX);

if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/i386/stublinkerx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2434,7 +2434,7 @@ namespace
{
gc_alloc_context* STDCALL GetAllocContextHelper()
{
return &t_runtime_thread_locals.alloc_context;
return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
}
}
#endif
Expand Down
Loading

0 comments on commit ca15926

Please sign in to comment.