diff --git a/src/vm/comthreadpool.cpp b/src/vm/comthreadpool.cpp index e9ffbaf92fbc..7f629b508b32 100644 --- a/src/vm/comthreadpool.cpp +++ b/src/vm/comthreadpool.cpp @@ -273,7 +273,7 @@ FCIMPL0(FC_BOOL_RET, ThreadPoolNative::NotifyRequestComplete) } if (needReset) - pThread->InternalReset (FALSE, TRUE, TRUE, FALSE); + pThread->InternalReset(FALSE, TRUE, TRUE, FALSE); HELPER_METHOD_FRAME_END(); } diff --git a/src/vm/threadpoolrequest.cpp b/src/vm/threadpoolrequest.cpp index 111578e76842..8d47e6b810b5 100644 --- a/src/vm/threadpoolrequest.cpp +++ b/src/vm/threadpoolrequest.cpp @@ -30,13 +30,15 @@ #endif #include "appdomain.inl" -UnManagedPerAppDomainTPCount PerAppDomainTPCountList::s_unmanagedTPCount; +BYTE PerAppDomainTPCountList::s_padding[64 - sizeof(LONG)]; +// Make this point to unmanaged TP in case, no appdomains have initialized yet. +// Cacheline aligned, hot variable +DECLSPEC_ALIGN(64) LONG PerAppDomainTPCountList::s_ADHint = -1; +// Move out of from preceeding variables' cache line +DECLSPEC_ALIGN(64) UnManagedPerAppDomainTPCount PerAppDomainTPCountList::s_unmanagedTPCount; //The list of all per-appdomain work-request counts. -ArrayListStatic PerAppDomainTPCountList::s_appDomainIndexList; - -//Make this point to unmanaged TP in case, no appdomains have initialized yet. -LONG PerAppDomainTPCountList::s_ADHint=-1; +ArrayListStatic PerAppDomainTPCountList::s_appDomainIndexList; void PerAppDomainTPCountList::InitAppDomainIndexList() { @@ -74,7 +76,14 @@ TPIndex PerAppDomainTPCountList::AddNewTPIndex() return index; } +#ifdef _MSC_VER + // Disable this warning - we intentionally want __declspec(align()) to insert trailing padding for us +#pragma warning(disable:4316) // Object allocated on the heap may not be aligned for this type. +#endif ManagedPerAppDomainTPCount * pAdCount = new ManagedPerAppDomainTPCount(index); +#ifdef _MSC_VER +#pragma warning(default:4316) // Object allocated on the heap may not be aligned for this type. +#endif pAdCount->ResetState(); IfFailThrow(s_appDomainIndexList.Append(pAdCount)); @@ -274,34 +283,33 @@ LONG PerAppDomainTPCountList::GetAppDomainIndexForThreadpoolDispatch() } CONTRACTL_END; + LONG hint = s_ADHint; + DWORD count = s_appDomainIndexList.GetCount(); IPerAppDomainTPCount * pAdCount; - DWORD DwnumADs = s_appDomainIndexList.GetCount(); DWORD Dwi; - LONG hint = s_ADHint; - LONG temphint = hint; - if (hint == -1) + if (hint != -1) { - pAdCount = &s_unmanagedTPCount; + pAdCount = dac_cast(s_appDomainIndexList.Get(hint)); } else { - pAdCount = dac_cast(s_appDomainIndexList.Get(hint)); + pAdCount = &s_unmanagedTPCount; } + //temphint ensures that the check for appdomains proceeds in a pure round robin fashion. + LONG temphint = hint; + _ASSERTE( pAdCount); if (pAdCount->TakeActiveRequest()) goto HintDone; - //temphint ensures that the check for appdomains proceeds in a pure round robin fashion. - temphint = hint; - //If there is no work in any appdomains, check the unmanaged queue, hint = -1; - for (Dwi=0;Dwi 0) { @@ -599,7 +605,7 @@ void ManagedPerAppDomainTPCount::SetAppDomainRequestsActive() _ASSERTE(m_id.m_dwId != 0); #ifndef DACCESS_COMPILE - LONG count = m_numRequestsPending; + LONG count = VolatileLoad(&m_numRequestsPending); while (count != ADUnloading) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count+1, count); @@ -628,12 +634,15 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU) if (bADU) { - m_numRequestsPending = ADUnloading; + VolatileStore(&m_numRequestsPending, ADUnloading); } else { - LONG count = m_numRequestsPending; - while (count != ADUnloading && count > 0) + LONG count = VolatileLoad(&m_numRequestsPending); + // Test is: count > 0 && count != ADUnloading + // Since: const ADUnloading == -1 + // Both are tested: (count > 0) means following also true (count != ADUnloading) + while (count > 0) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, 0, count); if (prev == count) @@ -646,8 +655,11 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU) bool ManagedPerAppDomainTPCount::TakeActiveRequest() { LIMITED_METHOD_CONTRACT; - LONG count = m_numRequestsPending; - while (count != ADUnloading && count > 0) + LONG count = VolatileLoad(&m_numRequestsPending); + // Test is: count > 0 && count != ADUnloading + // Since: const ADUnloading == -1 + // Both are tested: (count > 0) means following also true (count != ADUnloading) + while (count > 0) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count-1, count); if (prev == count) @@ -678,7 +690,7 @@ void ManagedPerAppDomainTPCount::ClearAppDomainUnloading() // it should be, but if it's smaller then we will be permanently out of sync with the // AD. // - m_numRequestsPending = ThreadpoolMgr::NumberOfProcessors; + VolatileStore(&m_numRequestsPending, (LONG)ThreadpoolMgr::NumberOfProcessors); if (!CLRThreadpoolHosted() && ThreadpoolMgr::IsInitialized()) { ThreadpoolMgr::MaybeAddWorkingWorker(); @@ -715,73 +727,74 @@ void ManagedPerAppDomainTPCount::DispatchWorkItem(bool* foundWork, bool* wasNotR //We are in a state where AppDomain Unload has begun, but not all threads have been //forced out of the unloading domain. This check below will prevent us from getting //unmanaged AD unloaded exceptions while trying to enter an unloaded appdomain. - - if(IsAppDomainUnloading()) - { - __SwitchToThread(0, CALLER_LIMITS_SPINNING); - return; - } - - CONTRACTL + if (!IsAppDomainUnloading()) { - MODE_PREEMPTIVE; - THROWS; - GC_TRIGGERS; - } - CONTRACTL_END; - - GCX_COOP(); - BEGIN_SO_INTOLERANT_CODE(pThread); + CONTRACTL + { + MODE_PREEMPTIVE; + THROWS; + GC_TRIGGERS; + } + CONTRACTL_END; - // - // NOTE: there is a potential race between the time we retrieve the app - // domain pointer, and the time which this thread enters the domain. - // - // To solve the race, we rely on the fact that there is a thread sync (via - // GC) between releasing an app domain's handle, and destroying the - // app domain. Thus it is important that we not go into preemptive gc mode - // in that window. - // + GCX_COOP(); + BEGIN_SO_INTOLERANT_CODE(pThread); - { - ADID appDomainId(m_id); + // + // NOTE: there is a potential race between the time we retrieve the app + // domain pointer, and the time which this thread enters the domain. + // + // To solve the race, we rely on the fact that there is a thread sync (via + // GC) between releasing an app domain's handle, and destroying the + // app domain. Thus it is important that we not go into preemptive gc mode + // in that window. + // - // This TPIndex may have been recycled since we chose it for workitem dispatch. - // Thus it's possible for the ADID we just read to refer to an AppDomain that's still - // being created. If so, the new AppDomain will necessarily have zero requests - // pending (because the destruction of the previous AD that used this TPIndex - // will have reset this object). We don't want to call into such an AppDomain. -// TODO: fix this another way! -// if (IsRequestPending()) { - //This holder resets our thread's security state when exiting this scope - ThreadSecurityStateHolder secState(pThread); - - ManagedThreadBase::ThreadPool(appDomainId, QueueUserWorkItemManagedCallback, wasNotRecalled); - } + ADID appDomainId(m_id); + + // This TPIndex may have been recycled since we chose it for workitem dispatch. + // Thus it's possible for the ADID we just read to refer to an AppDomain that's still + // being created. If so, the new AppDomain will necessarily have zero requests + // pending (because the destruction of the previous AD that used this TPIndex + // will have reset this object). We don't want to call into such an AppDomain. + // TODO: fix this another way! + // if (IsRequestPending()) + { + //This holder resets our thread's security state when exiting this scope + ThreadSecurityStateHolder secState(pThread); - if (pThread->IsAbortRequested()) - { - // thread was aborted, and may not have had a chance to tell us it has work. - ENTER_DOMAIN_ID(m_id) + ManagedThreadBase::ThreadPool(appDomainId, QueueUserWorkItemManagedCallback, wasNotRecalled); + } + + if (pThread->IsAbortRequested()) { - ThreadpoolMgr::SetAppDomainRequestsActive(); - ThreadpoolMgr::QueueUserWorkItem(NULL, - NULL, - 0, - FALSE); + // thread was aborted, and may not have had a chance to tell us it has work. + ENTER_DOMAIN_ID(m_id) + { + ThreadpoolMgr::SetAppDomainRequestsActive(); + ThreadpoolMgr::QueueUserWorkItem(NULL, + NULL, + 0, + FALSE); + } + END_DOMAIN_TRANSITION; } - END_DOMAIN_TRANSITION; } - } - // We should have released all locks. - _ASSERTE(g_fEEShutDown || pThread->m_dwLockCount == 0 || pThread->m_fRudeAborted); - - END_SO_INTOLERANT_CODE; + // We should have released all locks. + _ASSERTE(g_fEEShutDown || pThread->m_dwLockCount == 0 || pThread->m_fRudeAborted); + + END_SO_INTOLERANT_CODE; - *foundWork = true; + *foundWork = true; + } + else + { + __SwitchToThread(0, CALLER_LIMITS_SPINNING); + return; + } } #endif // !DACCESS_COMPILE diff --git a/src/vm/threadpoolrequest.h b/src/vm/threadpoolrequest.h index d0c47e677cd6..07bcbb7dbad0 100644 --- a/src/vm/threadpoolrequest.h +++ b/src/vm/threadpoolrequest.h @@ -85,14 +85,16 @@ class ManagedPerAppDomainTPCount : public IPerAppDomainTPCount { inline void ResetState() { LIMITED_METHOD_CONTRACT; - m_numRequestsPending = 0; + VolatileStore(&m_numRequestsPending, (LONG)0); m_id.m_dwId = 0; } inline BOOL IsRequestPending() { LIMITED_METHOD_CONTRACT; - return m_numRequestsPending != ADUnloading && m_numRequestsPending > 0; + + LONG count = VolatileLoad(&m_numRequestsPending); + return count != ADUnloading && count > 0; } void SetAppDomainRequestsActive(); @@ -106,7 +108,7 @@ class ManagedPerAppDomainTPCount : public IPerAppDomainTPCount { //has started running yet. That implies, no requests should be pending //or dispatched to this structure yet. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); m_id = id; @@ -119,7 +121,7 @@ class ManagedPerAppDomainTPCount : public IPerAppDomainTPCount { //has started running yet. That implies, no requests should be pending //or dispatched to this structure yet. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); _ASSERTE(m_index.m_dwIndex == UNUSED_THREADPOOL_INDEX); @@ -135,7 +137,7 @@ class ManagedPerAppDomainTPCount : public IPerAppDomainTPCount { //added removed at this time. So, make sure that the per-appdomain structures that //have been cleared(reclaimed) don't have any pending requests to them. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); return TRUE; @@ -159,22 +161,27 @@ class ManagedPerAppDomainTPCount : public IPerAppDomainTPCount { inline void SetAppDomainUnloading() { LIMITED_METHOD_CONTRACT; - m_numRequestsPending = ADUnloading; + VolatileStore(&m_numRequestsPending, ADUnloading); } inline void ClearAppDomainUnloading(); inline BOOL IsAppDomainUnloading() { - return m_numRequestsPending.Load() == ADUnloading; + return VolatileLoad(&m_numRequestsPending) == ADUnloading; } void DispatchWorkItem(bool* foundWork, bool* wasNotRecalled); private: - Volatile m_numRequestsPending; ADID m_id; TPIndex m_index; + DECLSPEC_ALIGN(64) struct { + BYTE m_padding1[64 - sizeof(LONG)]; + // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange + LONG m_numRequestsPending; + BYTE m_padding2[64]; + }; }; //-------------------------------------------------------------------------- @@ -212,13 +219,13 @@ class UnManagedPerAppDomainTPCount : public IPerAppDomainTPCount { { LIMITED_METHOD_CONTRACT; m_NumRequests = 0; - m_outstandingThreadRequestCount = 0; + VolatileStore(&m_outstandingThreadRequestCount, (LONG)0); } inline BOOL IsRequestPending() { LIMITED_METHOD_CONTRACT; - return m_outstandingThreadRequestCount != 0 ? TRUE : FALSE; + return VolatileLoad(&m_outstandingThreadRequestCount) != (LONG)0 ? TRUE : FALSE; } void SetAppDomainRequestsActive(); @@ -226,7 +233,7 @@ class UnManagedPerAppDomainTPCount : public IPerAppDomainTPCount { inline void ClearAppDomainRequestsActive(BOOL bADU) { LIMITED_METHOD_CONTRACT; - m_outstandingThreadRequestCount = 0; + VolatileStore(&m_outstandingThreadRequestCount, (LONG)0); } bool TakeActiveRequest(); @@ -272,9 +279,14 @@ class UnManagedPerAppDomainTPCount : public IPerAppDomainTPCount { } private: - ULONG m_NumRequests; - Volatile m_outstandingThreadRequestCount; SpinLock m_lock; + ULONG m_NumRequests; + DECLSPEC_ALIGN(64) struct { + BYTE m_padding1[64 - sizeof(LONG)]; + // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange + LONG m_outstandingThreadRequestCount; + BYTE m_padding2[64]; + }; }; //-------------------------------------------------------------------------- @@ -330,29 +342,12 @@ class PerAppDomainTPCountList{ private: static DWORD FindFirstFreeTpEntry(); + static BYTE s_padding[64 - sizeof(LONG)]; + static LONG s_ADHint; static UnManagedPerAppDomainTPCount s_unmanagedTPCount; //The list of all per-appdomain work-request counts. - static ArrayListStatic s_appDomainIndexList; - - static LONG s_ADHint; + static ArrayListStatic s_appDomainIndexList; }; -#endif //_THREADPOOL_REQUEST_H - - - - - - - - - - - - - - - - - +#endif //_THREADPOOL_REQUEST_H \ No newline at end of file diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp index 2c58710f112e..1121417492e4 100644 --- a/src/vm/win32threadpool.cpp +++ b/src/vm/win32threadpool.cpp @@ -85,7 +85,8 @@ SVAL_IMPL(LONG,ThreadpoolMgr,MaxFreeCPThreads); // = MaxFreeCP Volatile ThreadpoolMgr::NumCPInfrastructureThreads = 0; // number of threads currently busy handling draining cycle -SVAL_IMPL(ThreadpoolMgr::ThreadCounter, ThreadpoolMgr, WorkerCounter); +// Cacheline aligned, hot variable +DECLSPEC_ALIGN(64) SVAL_IMPL(ThreadpoolMgr::ThreadCounter, ThreadpoolMgr, WorkerCounter); SVAL_IMPL(LONG,ThreadpoolMgr,MinLimitTotalWorkerThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS SVAL_IMPL(LONG,ThreadpoolMgr,MaxLimitTotalWorkerThreads); // = MaxLimitCPThreadsPerCPU * number of CPUS @@ -95,9 +96,11 @@ LONG ThreadpoolMgr::cpuUtilizationAverage = 0; HillClimbing ThreadpoolMgr::HillClimbingInstance; -Volatile ThreadpoolMgr::PriorCompletedWorkRequests = 0; -Volatile ThreadpoolMgr::PriorCompletedWorkRequestsTime; -Volatile ThreadpoolMgr::NextCompletedWorkRequestsTime; +// Cacheline aligned, 3 hot variables updated in a group +DECLSPEC_ALIGN(64) LONG ThreadpoolMgr::PriorCompletedWorkRequests = 0; +DWORD ThreadpoolMgr::PriorCompletedWorkRequestsTime; +DWORD ThreadpoolMgr::NextCompletedWorkRequestsTime; + LARGE_INTEGER ThreadpoolMgr::CurrentSampleStartTime; int ThreadpoolMgr::ThreadAdjustmentInterval; @@ -111,8 +114,12 @@ int ThreadpoolMgr::ThreadAdjustmentInterval; #define SUSPEND_TIME GATE_THREAD_DELAY+100 // milliseconds to suspend during SuspendProcessing LONG ThreadpoolMgr::Initialization=0; // indicator of whether the threadpool is initialized. -Volatile ThreadpoolMgr::LastDequeueTime; // used to determine if work items are getting thread starved -int ThreadpoolMgr::offset_counter = 0; + +// Cacheline aligned, hot variable +DECLSPEC_ALIGN(64) unsigned int ThreadpoolMgr::LastDequeueTime; // used to determine if work items are getting thread starved + +// Move out of from preceeding variables' cache line +DECLSPEC_ALIGN(64) int ThreadpoolMgr::offset_counter = 0; SPTR_IMPL(WorkRequest,ThreadpoolMgr,WorkRequestHead); // Head of work request queue SPTR_IMPL(WorkRequest,ThreadpoolMgr,WorkRequestTail); // Head of work request queue @@ -135,15 +142,19 @@ CLRSemaphore* ThreadpoolMgr::RetiredWorkerSemaphore; CrstStatic ThreadpoolMgr::TimerQueueCriticalSection; HANDLE ThreadpoolMgr::TimerThread=NULL; Thread *ThreadpoolMgr::pTimerThread=NULL; -DWORD ThreadpoolMgr::LastTickCount; + +// Cacheline aligned, hot variable +DECLSPEC_ALIGN(64) DWORD ThreadpoolMgr::LastTickCount; #ifdef _DEBUG DWORD ThreadpoolMgr::TickCountAdjustment=0; #endif -LONG ThreadpoolMgr::GateThreadStatus=GATE_THREAD_STATUS_NOT_RUNNING; +// Cacheline aligned, hot variable +DECLSPEC_ALIGN(64) LONG ThreadpoolMgr::GateThreadStatus=GATE_THREAD_STATUS_NOT_RUNNING; -ThreadpoolMgr::RecycledListsWrapper ThreadpoolMgr::RecycledLists; +// Move out of from preceeding variables' cache line +DECLSPEC_ALIGN(64) ThreadpoolMgr::RecycledListsWrapper ThreadpoolMgr::RecycledLists; ThreadpoolMgr::TimerInfo *ThreadpoolMgr::TimerInfosToBeRecycled = NULL; @@ -1189,7 +1200,7 @@ void ThreadpoolMgr::AdjustMaxWorkersActive() DWORD currentTicks = GetTickCount(); LONG totalNumCompletions = Thread::GetTotalThreadPoolCompletionCount(); - LONG numCompletions = totalNumCompletions - PriorCompletedWorkRequests; + LONG numCompletions = totalNumCompletions - VolatileLoad(&PriorCompletedWorkRequests); LARGE_INTEGER startTime = CurrentSampleStartTime; LARGE_INTEGER endTime; @@ -1252,9 +1263,10 @@ void ThreadpoolMgr::AdjustMaxWorkersActive() } PriorCompletedWorkRequests = totalNumCompletions; + NextCompletedWorkRequestsTime = currentTicks + ThreadAdjustmentInterval; + MemoryBarrier(); // flush previous writes (especially NextCompletedWorkRequestsTime) PriorCompletedWorkRequestsTime = currentTicks; - NextCompletedWorkRequestsTime = PriorCompletedWorkRequestsTime + ThreadAdjustmentInterval; - CurrentSampleStartTime = endTime; + CurrentSampleStartTime = endTime;; } } @@ -1271,7 +1283,8 @@ void ThreadpoolMgr::MaybeAddWorkingWorker() _ASSERTE(!CLRThreadpoolHosted()); - ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + ThreadCounter::Counts counts = WorkerCounter.DangerousGetDirtyCounts(); ThreadCounter::Counts newCounts; while (true) { @@ -1325,7 +1338,9 @@ void ThreadpoolMgr::MaybeAddWorkingWorker() // Of course, there's no guarantee *that* will work - but hopefully enough time will have passed // to allow whoever's using all the memory right now to release some. // - counts = WorkerCounter.GetCleanCounts(); + + // counts volatile read paired with CompareExchangeCounts loop set + counts = WorkerCounter.DangerousGetDirtyCounts(); while (true) { // @@ -2286,7 +2301,8 @@ DWORD __stdcall ThreadpoolMgr::WorkerThreadStart(LPVOID lpArgs) #ifdef FEATURE_COMINTEROP if (pThread->SetApartment(Thread::AS_InMTA, TRUE) != Thread::AS_InMTA) { - counts = WorkerCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + counts = WorkerCounter.DangerousGetDirtyCounts(); while (true) { newCounts = counts; @@ -2311,7 +2327,8 @@ DWORD __stdcall ThreadpoolMgr::WorkerThreadStart(LPVOID lpArgs) // make sure there's really work. If not, go back to sleep - counts = WorkerCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + counts = WorkerCounter.DangerousGetDirtyCounts(); while (true) { _ASSERTE(counts.NumActive > 0); @@ -2403,6 +2420,7 @@ DWORD __stdcall ThreadpoolMgr::WorkerThreadStart(LPVOID lpArgs) if (WAIT_OBJECT_0 == result) { foundWork = true; + counts = WorkerCounter.GetCleanCounts(); FireEtwThreadPoolWorkerThreadRetirementStop(counts.NumActive, counts.NumRetired, GetClrInstanceId()); goto Work; @@ -2435,7 +2453,9 @@ DWORD __stdcall ThreadpoolMgr::WorkerThreadStart(LPVOID lpArgs) // if we don't hit zero, then there's another retired thread that will pick up this signal. So it's ok // to exit. // - counts = WorkerCounter.GetCleanCounts(); + + // counts volatile read paired with CompareExchangeCounts loop set + counts = WorkerCounter.DangerousGetDirtyCounts(); while (true) { if (counts.NumRetired == 0) @@ -2491,7 +2511,8 @@ DWORD __stdcall ThreadpoolMgr::WorkerThreadStart(LPVOID lpArgs) DangerousNonHostedSpinLockHolder tal(&ThreadAdjustmentLock); - counts = WorkerCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + counts = WorkerCounter.DangerousGetDirtyCounts(); while (true) { if (counts.NumActive == counts.NumWorking) @@ -3830,7 +3851,9 @@ DWORD __stdcall ThreadpoolMgr::CompletionPortThreadStart(LPVOID lpArgs) // and the state of the rest of the IOCP threads, we need to figure out whether to de-activate (exit) this thread, retire this thread, // or transition to "working." // - oldCounts = CPThreadCounter.GetCleanCounts(); + + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; enterRetirement = false; @@ -3913,7 +3936,8 @@ DWORD __stdcall ThreadpoolMgr::CompletionPortThreadStart(LPVOID lpArgs) // We can now exit; decrement the retired count. while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumRetired--; if (oldCounts == CPThreadCounter.CompareExchangeCounts(newCounts, oldCounts)) @@ -3927,7 +3951,8 @@ DWORD __stdcall ThreadpoolMgr::CompletionPortThreadStart(LPVOID lpArgs) // put back into rotation -- we need a thread while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumRetired--; newCounts.NumActive++; @@ -3968,7 +3993,8 @@ DWORD __stdcall ThreadpoolMgr::CompletionPortThreadStart(LPVOID lpArgs) //GC event, and some have not. while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumWorking--; if (oldCounts == CPThreadCounter.CompareExchangeCounts(newCounts, oldCounts)) @@ -3981,7 +4007,8 @@ DWORD __stdcall ThreadpoolMgr::CompletionPortThreadStart(LPVOID lpArgs) while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumWorking++; if (oldCounts == CPThreadCounter.CompareExchangeCounts(newCounts, oldCounts)) @@ -4256,7 +4283,8 @@ void ThreadpoolMgr::GrowCompletionPortThreadpoolIfNeeded() // if thread creation failed, we have to adjust the counts back down. while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumActive--; newCounts.NumWorking--; @@ -4686,7 +4714,8 @@ DWORD __stdcall ThreadpoolMgr::GateThreadStart(LPVOID lpArgs) // IOCP threads are created as "active" and "working" while (true) { - oldCounts = CPThreadCounter.GetCleanCounts(); + // counts volatile read paired with CompareExchangeCounts loop set + oldCounts = CPThreadCounter.DangerousGetDirtyCounts(); newCounts = oldCounts; newCounts.NumActive++; newCounts.NumWorking++; @@ -4794,7 +4823,7 @@ BOOL ThreadpoolMgr::SufficientDelaySinceLastDequeue() #define DEQUEUE_DELAY_THRESHOLD (GATE_THREAD_DELAY * 2) - unsigned delay = GetTickCount() - LastDequeueTime; + unsigned delay = GetTickCount() - VolatileLoad(&LastDequeueTime); unsigned tooLong; if(cpuUtilization < CpuUtilizationLow) diff --git a/src/vm/win32threadpool.h b/src/vm/win32threadpool.h index ba800e5073ed..45880d0b0f4c 100644 --- a/src/vm/win32threadpool.h +++ b/src/vm/win32threadpool.h @@ -373,9 +373,17 @@ class ThreadpoolMgr } counts; + // padding to ensure we get our own cache line + BYTE padding[64]; + Counts GetCleanCounts() { LIMITED_METHOD_CONTRACT; +#ifdef _WIN64 + // VolatileLoad x64 bit read is atomic + return DangerousGetDirtyCounts(); +#else // !_WIN64 + // VolatileLoad may result in torn read Counts result; #ifndef DACCESS_COMPILE result.AsLongLong = FastInterlockCompareExchangeLong(&counts.AsLongLong, 0, 0); @@ -384,6 +392,7 @@ class ThreadpoolMgr result.AsLongLong = 0; //prevents prefast warning for DAC builds #endif return result; +#endif // !_WIN64 } // @@ -523,7 +532,7 @@ class ThreadpoolMgr static inline void UpdateLastDequeueTime() { LIMITED_METHOD_CONTRACT; - LastDequeueTime = GetTickCount(); + VolatileStore(&LastDequeueTime, (unsigned int)GetTickCount()); } static BOOL CreateTimerQueueTimer(PHANDLE phNewTimer, @@ -1133,8 +1142,10 @@ class ThreadpoolMgr if (CLRThreadpoolHosted()) return false; - DWORD requiredInterval = NextCompletedWorkRequestsTime - PriorCompletedWorkRequestsTime; - DWORD elapsedInterval = GetTickCount() - PriorCompletedWorkRequestsTime; + DWORD priorTime = PriorCompletedWorkRequestsTime; + MemoryBarrier(); // read fresh value for NextCompletedWorkRequestsTime below + DWORD requiredInterval = NextCompletedWorkRequestsTime - priorTime; + DWORD elapsedInterval = GetTickCount() - priorTime; if (elapsedInterval >= requiredInterval) { ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts(); @@ -1294,13 +1305,13 @@ class ThreadpoolMgr SVAL_DECL(LONG,MinLimitTotalWorkerThreads); // same as MinLimitTotalCPThreads SVAL_DECL(LONG,MaxLimitTotalWorkerThreads); // same as MaxLimitTotalCPThreads - static Volatile LastDequeueTime; // used to determine if work items are getting thread starved + static unsigned int LastDequeueTime; // used to determine if work items are getting thread starved static HillClimbing HillClimbingInstance; - static Volatile PriorCompletedWorkRequests; - static Volatile PriorCompletedWorkRequestsTime; - static Volatile NextCompletedWorkRequestsTime; + static LONG PriorCompletedWorkRequests; + static DWORD PriorCompletedWorkRequestsTime; + static DWORD NextCompletedWorkRequestsTime; static LARGE_INTEGER CurrentSampleStartTime;