Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Cache pad threadpool queue indices (prevent false sharing)
Browse files Browse the repository at this point in the history
  • Loading branch information
benaadams committed Jun 23, 2016
1 parent 63796e5 commit 5057952
Showing 1 changed file with 42 additions and 34 deletions.
76 changes: 42 additions & 34 deletions src/mscorlib/src/System/Threading/ThreadPool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,14 @@ internal void Remove(T e)
}
}

// Enusre index is on its own cache line by occuping two so no false sharing can happen
[StructLayout(LayoutKind.Explicit, Size = 128)]
internal struct PaddedIndex
{
[FieldOffset(64)]
public volatile int Index;
}

internal class WorkStealingQueue
{
private const int INITIAL_SIZE = 32;
Expand All @@ -142,14 +150,14 @@ internal class WorkStealingQueue
private const int START_INDEX = 0;
#endif

private volatile int m_headIndex = START_INDEX;
private volatile int m_tailIndex = START_INDEX;
private PaddedIndex m_headIndex = new PaddedIndex() { Index = START_INDEX };
private PaddedIndex m_tailIndex = new PaddedIndex() { Index = START_INDEX };

private SpinLock m_foreignLock = new SpinLock(false);

public void LocalPush(IThreadPoolWorkItem obj)
{
int tail = m_tailIndex;
int tail = m_tailIndex.Index;

// We're going to increment the tail; if we'll overflow, then we need to reset our counts
if (tail == int.MaxValue)
Expand All @@ -159,7 +167,7 @@ public void LocalPush(IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

if (m_tailIndex == int.MaxValue)
if (m_tailIndex.Index == int.MaxValue)
{
//
// Rather than resetting to zero, we'll just mask off the bits we don't care about.
Expand All @@ -171,9 +179,9 @@ public void LocalPush(IThreadPoolWorkItem obj)
// for the head to end up > than the tail, since you can't set any more bits than all of
// them.
//
m_headIndex = m_headIndex & m_mask;
m_tailIndex = tail = m_tailIndex & m_mask;
Contract.Assert(m_headIndex <= m_tailIndex);
m_headIndex.Index = m_headIndex.Index & m_mask;
m_tailIndex.Index = tail = m_tailIndex.Index & m_mask;
Contract.Assert(m_headIndex.Index <= m_tailIndex.Index);
}
}
finally
Expand All @@ -184,10 +192,10 @@ public void LocalPush(IThreadPoolWorkItem obj)
}

// When there are at least 2 elements' worth of space, we can take the fast path.
if (tail < m_headIndex + m_mask)
if (tail < m_headIndex.Index + m_mask)
{
Volatile.Write(ref m_array[tail & m_mask], obj);
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
}
else
{
Expand All @@ -197,8 +205,8 @@ public void LocalPush(IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

int head = m_headIndex;
int count = m_tailIndex - m_headIndex;
int head = m_headIndex.Index;
int count = m_tailIndex.Index - m_headIndex.Index;

// If there is still space (one left), just add the element.
if (count >= m_mask)
Expand All @@ -210,13 +218,13 @@ public void LocalPush(IThreadPoolWorkItem obj)

// Reset the field values, incl. the mask.
m_array = newArray;
m_headIndex = 0;
m_tailIndex = tail = count;
m_headIndex.Index = 0;
m_tailIndex.Index = tail = count;
m_mask = (m_mask << 1) | 1;
}

Volatile.Write(ref m_array[tail & m_mask], obj);
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
}
finally
{
Expand All @@ -230,7 +238,7 @@ public void LocalPush(IThreadPoolWorkItem obj)
public bool LocalFindAndPop(IThreadPoolWorkItem obj)
{
// Fast path: check the tail. If equal, we can skip the lock.
if (m_array[(m_tailIndex - 1) & m_mask] == obj)
if (m_array[(m_tailIndex.Index - 1) & m_mask] == obj)
{
IThreadPoolWorkItem unused;
if (LocalPop(out unused))
Expand All @@ -249,7 +257,7 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj)
// problem (although degenerate cases are clearly an issue) because local work
// queues tend to be somewhat shallow in length, and because if we fail to find
// the work item, we are about to block anyway (which is very expensive).
for (int i = m_tailIndex - 2; i >= m_headIndex; i--)
for (int i = m_tailIndex.Index - 2; i >= m_headIndex.Index; i--)
{
if (m_array[i & m_mask] == obj)
{
Expand All @@ -269,10 +277,10 @@ public bool LocalFindAndPop(IThreadPoolWorkItem obj)
// And then check to see if we can fix up the indexes (if we're at
// the edge). If we can't, we just leave nulls in the array and they'll
// get filtered out eventually (but may lead to superflous resizing).
if (i == m_tailIndex)
m_tailIndex -= 1;
else if (i == m_headIndex)
m_headIndex += 1;
if (i == m_tailIndex.Index)
m_tailIndex.Index -= 1;
else if (i == m_headIndex.Index)
m_headIndex.Index += 1;

return true;
}
Expand All @@ -293,18 +301,18 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
while (true)
{
// Decrement the tail using a fence to ensure subsequent read doesn't come before.
int tail = m_tailIndex;
if (m_headIndex >= tail)
int tail = m_tailIndex.Index;
if (m_headIndex.Index >= tail)
{
obj = null;
return false;
}

tail -= 1;
Interlocked.Exchange(ref m_tailIndex, tail);
Interlocked.Exchange(ref m_tailIndex.Index, tail);

// If there is no interaction with a take, we can head down the fast path.
if (m_headIndex <= tail)
if (m_headIndex.Index <= tail)
{
int idx = tail & m_mask;
obj = Volatile.Read(ref m_array[idx]);
Expand All @@ -323,7 +331,7 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
{
m_foreignLock.Enter(ref lockTaken);

if (m_headIndex <= tail)
if (m_headIndex.Index <= tail)
{
// Element still available. Take it.
int idx = tail & m_mask;
Expand All @@ -338,7 +346,7 @@ public bool LocalPop(out IThreadPoolWorkItem obj)
else
{
// If we encountered a race condition and element was stolen, restore the tail.
m_tailIndex = tail + 1;
m_tailIndex.Index = tail + 1;
obj = null;
return false;
}
Expand All @@ -363,7 +371,7 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil

while (true)
{
if (m_headIndex >= m_tailIndex)
if (m_headIndex.Index >= m_tailIndex.Index)
return false;

bool taken = false;
Expand All @@ -373,10 +381,10 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil
if (taken)
{
// Increment head, and ensure read of tail doesn't move before it (fence).
int head = m_headIndex;
Interlocked.Exchange(ref m_headIndex, head + 1);
int head = m_headIndex.Index;
Interlocked.Exchange(ref m_headIndex.Index, head + 1);

if (head < m_tailIndex)
if (head < m_tailIndex.Index)
{
int idx = head & m_mask;
obj = Volatile.Read(ref m_array[idx]);
Expand All @@ -390,7 +398,7 @@ private bool TrySteal(out IThreadPoolWorkItem obj, ref bool missedSteal, int mil
else
{
// Failed, restore head.
m_headIndex = head;
m_headIndex.Index = head;
obj = null;
missedSteal = true;
}
Expand Down Expand Up @@ -420,7 +428,7 @@ internal class QueueSegment
// Holds the indexes of the lowest and highest valid elements of the nodes array.
// The low index is in the lower 16 bits, high index is in the upper 16 bits.
// Use GetIndexes and CompareExchangeIndexes to manipulate this.
private volatile int indexes;
private PaddedIndex indexes = new PaddedIndex();

// The next segment in the queue.
public volatile QueueSegment Next;
Expand All @@ -430,7 +438,7 @@ internal class QueueSegment

void GetIndexes(out int upper, out int lower)
{
int i = indexes;
int i = indexes.Index;
upper = (i >> 16) & SixteenBits;
lower = i & SixteenBits;

Expand All @@ -454,7 +462,7 @@ bool CompareExchangeIndexes(ref int prevUpper, int newUpper, ref int prevLower,

int oldIndexes = (prevUpper << 16) | (prevLower & SixteenBits);
int newIndexes = (newUpper << 16) | (newLower & SixteenBits);
int prevIndexes = Interlocked.CompareExchange(ref indexes, newIndexes, oldIndexes);
int prevIndexes = Interlocked.CompareExchange(ref indexes.Index, newIndexes, oldIndexes);
prevUpper = (prevIndexes >> 16) & SixteenBits;
prevLower = prevIndexes & SixteenBits;
return prevIndexes == oldIndexes;
Expand Down

0 comments on commit 5057952

Please sign in to comment.