Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Cache pad threadpool queue indices (false sharing)
Browse files Browse the repository at this point in the history
  • Loading branch information
benaadams committed Jun 23, 2016
1 parent 498504c commit aef37e4
Showing 1 changed file with 41 additions and 34 deletions.
75 changes: 41 additions & 34 deletions src/mscorlib/src/System/Threading/ThreadPool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ internal struct PaddedWorkItem
public IThreadPoolWorkItem Item;
}

// Enusre full cache line occupied so no false sharing with other indicies
[StructLayout(LayoutKind.Explicit, Size = 64)]
internal struct PaddedIndex
{
[FieldOffset(0)]
public volatile int Index;
}

internal class WorkStealingQueue
{
Expand All @@ -151,14 +158,14 @@ internal class WorkStealingQueue
private const int START_INDEX = 0;
#endif

private volatile int m_headIndex = START_INDEX;
private volatile int m_tailIndex = START_INDEX;
private PaddedIndex m_headIndex = new PaddedIndex() { Index = START_INDEX };
private PaddedIndex m_tailIndex = new PaddedIndex() { Index = START_INDEX };

private SpinLock m_foreignLock = new SpinLock(false);

public void LocalPush(IThreadPoolWorkItem obj)
{
int tail = m_tailIndex;
int tail = m_tailIndex.Index;

// We're going to increment the tail; if we'll overflow, then we need to reset our counts
if (tail == int.MaxValue)
Expand All @@ -168,7 +175,7 @@ public void LocalPush(IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

if (m_tailIndex == int.MaxValue)
if (m_tailIndex.Index == int.MaxValue)
{
//
// Rather than resetting to zero, we'll just mask off the bits we don't care about.
Expand All @@ -180,9 +187,9 @@ public void LocalPush(IThreadPoolWorkItem obj)
// for the head to end up > than the tail, since you can't set any more bits than all of
// them.
//
m_headIndex = m_headIndex & m_mask;
m_tailIndex = tail = m_tailIndex & m_mask;
Contract.Assert(m_headIndex <= m_tailIndex);
m_headIndex.Index = m_headIndex.Index & m_mask;
m_tailIndex.Index = tail = m_tailIndex.Index & m_mask;
Contract.Assert(m_headIndex.Index <= m_tailIndex.Index);
}
}
finally
Expand All @@ -193,10 +200,10 @@ public void LocalPush(IThreadPoolWorkItem obj)
}

// When there are at least 2 elements' worth of space, we can take the fast path.
if (tail < m_headIndex + m_mask)
if (tail < m_headIndex.Index + m_mask)
{
Volatile.Write(ref m_array[tail & m_mask].Item, obj);
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
}
else
{
Expand All @@ -206,8 +213,8 @@ public void LocalPush(IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

int head = m_headIndex;
int count = m_tailIndex - m_headIndex;
int head = m_headIndex.Index;
int count = m_tailIndex.Index - m_headIndex.Index;

// If there is still space (one left), just add the element.
if (count >= m_mask)
Expand All @@ -219,13 +226,13 @@ public void LocalPush(IThreadPoolWorkItem obj)

// Reset the field values, incl. the mask.
m_array = newArray;
m_headIndex = 0;
m_tailIndex = tail = count;
m_headIndex.Index = 0;
m_tailIndex.Index = tail = count;
m_mask = (m_mask << 1) | 1;
}

Volatile.Write(ref m_array[tail & m_mask].Item, obj);
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
}
finally
{
Expand All @@ -239,7 +246,7 @@ public void LocalPush(IThreadPoolWorkItem obj)
public bool LocalFindAndPop(IThreadPoolWorkItem obj)
{
// Fast path: check the tail. If equal, we can skip the lock.
if (m_array[(m_tailIndex - 1) & m_mask].Item == obj)
if (m_array[(m_tailIndex.Index - 1) & m_mask].Item == obj)
{
IThreadPoolWorkItem unused;
if (LocalPop(out unused))
Expand All @@ -258,7 +265,7 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj)
// problem (although degenerate cases are clearly an issue) because local work
// queues tend to be somewhat shallow in length, and because if we fail to find
// the work item, we are about to block anyway (which is very expensive).
for (int i = m_tailIndex - 2; i >= m_headIndex; i--)
for (int i = m_tailIndex.Index - 2; i >= m_headIndex.Index; i--)
{
if (m_array[i & m_mask].Item == obj)
{
Expand All @@ -278,10 +285,10 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj)
// And then check to see if we can fix up the indexes (if we're at
// the edge). If we can't, we just leave nulls in the array and they'll
// get filtered out eventually (but may lead to superflous resizing).
if (i == m_tailIndex)
m_tailIndex -= 1;
else if (i == m_headIndex)
m_headIndex += 1;
if (i == m_tailIndex.Index)
m_tailIndex.Index -= 1;
else if (i == m_headIndex.Index)
m_headIndex.Index += 1;

return true;
}
Expand All @@ -302,18 +309,18 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
while (true)
{
// Decrement the tail using a fence to ensure subsequent read doesn't come before.
int tail = m_tailIndex;
if (m_headIndex >= tail)
int tail = m_tailIndex.Index;
if (m_headIndex.Index >= tail)
{
obj = null;
return false;
}

tail -= 1;
Interlocked.Exchange(ref m_tailIndex, tail);
Interlocked.Exchange(ref m_tailIndex.Index, tail);

// If there is no interaction with a take, we can head down the fast path.
if (m_headIndex <= tail)
if (m_headIndex.Index <= tail)
{
int idx = tail & m_mask;
obj = Volatile.Read(ref m_array[idx].Item);
Expand All @@ -332,7 +339,7 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

if (m_headIndex <= tail)
if (m_headIndex.Index <= tail)
{
// Element still available. Take it.
int idx = tail & m_mask;
Expand All @@ -347,7 +354,7 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
else
{
// If we encountered a race condition and element was stolen, restore the tail.
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
obj = null;
return false;
}
Expand All @@ -372,7 +379,7 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil

while (true)
{
if (m_headIndex >= m_tailIndex)
if (m_headIndex.Index >= m_tailIndex.Index)
return false;

bool taken = false;
Expand All @@ -382,10 +389,10 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil
if (taken)
{
// Increment head, and ensure read of tail doesn't move before it (fence).
int head = m_headIndex;
Interlocked.Exchange(ref m_headIndex, head + 1);
int head = m_headIndex.Index;
Interlocked.Exchange(ref m_headIndex.Index, head + 1);

if (head < m_tailIndex)
if (head < m_tailIndex.Index)
{
int idx = head & m_mask;
obj = Volatile.Read(ref m_array[idx].Item);
Expand All @@ -399,7 +406,7 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil
else
{
// Failed, restore head.
m_headIndex = head;
m_headIndex.Index = head;
obj = null;
missedSteal = true;
}
Expand Down Expand Up @@ -429,7 +436,7 @@ internal class QueueSegment
// Holds the indexes of the lowest and highest valid elements of the nodes array.
// The low index is in the lower 16 bits, high index is in the upper 16 bits.
// Use GetIndexes and CompareExchangeIndexes to manipulate this.
private volatile int indexes;
private PaddedIndex indexes = new PaddedIndex();

// The next segment in the queue.
public volatile QueueSegment Next;
Expand All @@ -439,7 +446,7 @@ internal class QueueSegment

void GetIndexes(out int upper, out int lower)
{
int i = indexes;
int i = indexes.Index;
upper = (i >> 16) & SixteenBits;
lower = i & SixteenBits;

Expand All @@ -463,7 +470,7 @@ bool CompareExchangeIndexes(ref int prevUpper, int newUpper, ref int prevLower,

int oldIndexes = (prevUpper << 16) | (prevLower & SixteenBits);
int newIndexes = (newUpper << 16) | (newLower & SixteenBits);
int prevIndexes = Interlocked.CompareExchange(ref indexes, newIndexes, oldIndexes);
int prevIndexes = Interlocked.CompareExchange(ref indexes.Index, newIndexes, oldIndexes);
prevUpper = (prevIndexes >> 16) & SixteenBits;
prevLower = prevIndexes & SixteenBits;
return prevIndexes == oldIndexes;
Expand Down

0 comments on commit aef37e4

Please sign in to comment.