-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Make use of CopyBlock for non-overlapping spans #17063
Changes from all commits
df2bfc8
0dff1c8
15d7217
3836687
01303dc
8a9d8ad
1a5672a
2ccd0d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -204,12 +204,12 @@ public unsafe void Clear() | |
|
||
var byteLength = (UIntPtr)((uint)length * Unsafe.SizeOf<T>()); | ||
|
||
if ((Unsafe.SizeOf<T>() & (sizeof(IntPtr) - 1)) != 0) | ||
if ((Unsafe.SizeOf<T>() & (sizeof(IntPtr) - 1)) != 0) | ||
{ | ||
if (_pinnable == null) | ||
{ | ||
var ptr = (byte*)_byteOffset.ToPointer(); | ||
|
||
SpanHelpers.ClearLessThanPointerSized(ptr, byteLength); | ||
} | ||
else | ||
|
@@ -311,7 +311,6 @@ public void CopyTo(Span<T> destination) | |
ThrowHelper.ThrowArgumentException_DestinationTooShort(); | ||
} | ||
|
||
|
||
/// <summary> | ||
/// Copies the contents of this span into destination span. If the source | ||
/// and destinations overlap, this method behaves as if the original values in | ||
|
@@ -323,35 +322,78 @@ public void CopyTo(Span<T> destination) | |
/// <param name="destination">The span to copy items into.</param> | ||
public bool TryCopyTo(Span<T> destination) | ||
{ | ||
if ((uint)_length > (uint)destination._length) | ||
int length = _length; | ||
int destLength = destination._length; | ||
|
||
if ((uint)length == 0) | ||
return true; | ||
|
||
if ((uint)length > (uint)destLength) | ||
return false; | ||
|
||
// TODO: This is a tide-over implementation as we plan to add a overlap-safe cpblk-based api to Unsafe. (https://github.com/dotnet/corefx/issues/13427) | ||
unsafe | ||
{ | ||
ref T src = ref DangerousGetPinnableReference(); | ||
ref T dst = ref destination.DangerousGetPinnableReference(); | ||
IntPtr srcMinusDst = Unsafe.ByteOffset<T>(ref dst, ref src); | ||
int length = _length; | ||
|
||
IntPtr srcMinusDst = Unsafe.ByteOffset<T>(ref dst, ref src); | ||
bool srcGreaterThanDst = (sizeof(IntPtr) == sizeof(int)) ? srcMinusDst.ToInt32() >= 0 : srcMinusDst.ToInt64() >= 0; | ||
IntPtr tailDiff; | ||
|
||
if (srcGreaterThanDst) | ||
{ | ||
// Source address greater than or equal to destination address. Can do normal copy. | ||
for (int i = 0; i < length; i++) | ||
// If the start of source is greater than the start of destination, then we need to calculate | ||
// the different between the end of destination relative to the start of source. | ||
tailDiff = Unsafe.ByteOffset<T>(ref Unsafe.Add<T>(ref dst, destLength), ref src); | ||
} | ||
else | ||
{ | ||
// If the start of source is less than the start of destination, then we need to calculate | ||
// the different between the end of source relative to the start of destunation. | ||
tailDiff = Unsafe.ByteOffset<T>(ref Unsafe.Add<T>(ref src, length), ref dst); | ||
} | ||
|
||
// If the source is entirely before or entirely after the destination and the type inside the span is not | ||
// itself a reference type or containing reference types, then we can do a simple block copy of the data. | ||
bool isOverlapped = (sizeof(IntPtr) == sizeof(int)) ? tailDiff.ToInt32() < 0 : tailDiff.ToInt64() < 0; | ||
if (!isOverlapped && !SpanHelpers.IsReferenceOrContainsReferences<T>()) | ||
{ | ||
ref byte dstBytes = ref Unsafe.As<T, byte>(ref dst); | ||
ref byte srcBytes = ref Unsafe.As<T, byte>(ref src); | ||
ulong byteCount = (ulong)length * (ulong)Unsafe.SizeOf<T>(); | ||
ulong index = 0; | ||
|
||
while (index < byteCount) | ||
{ | ||
Unsafe.Add<T>(ref dst, i) = Unsafe.Add<T>(ref src, i); | ||
uint blockSize = byteCount > uint.MaxValue ? uint.MaxValue : (uint)byteCount; | ||
Unsafe.CopyBlock( | ||
ref Unsafe.Add(ref dstBytes, (IntPtr)index), | ||
ref Unsafe.Add(ref srcBytes, (IntPtr)index), | ||
blockSize); | ||
index += blockSize; | ||
} | ||
} | ||
else | ||
{ | ||
// Source address less than destination address. Must do backward copy. | ||
int i = length; | ||
while (i-- != 0) | ||
if (srcGreaterThanDst) | ||
{ | ||
// Source address greater than or equal to destination address. Can do normal copy. | ||
for (int i = 0; i < length; i++) | ||
{ | ||
Unsafe.Add<T>(ref dst, i) = Unsafe.Add<T>(ref src, i); | ||
} | ||
} | ||
else | ||
{ | ||
Unsafe.Add<T>(ref dst, i) = Unsafe.Add<T>(ref src, i); | ||
// Source address less than destination address. Must do backward copy. | ||
int i = length; | ||
while (i-- != 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have for loop above vs while loop here. Is there a reason for this inconsistency? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I never changed that part. Not sure if there is a performance reason it was done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the loop above walks memory forward. This one walks memory backwards. Walking memory forward is faster on some hardware as the CPU prefetches memory in the former case and not the latter. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @KrzysztofCwalina I think the question was about for vs while? Is there a reason for was used with one loop but while with the backwards loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, got it. In general, I think it's better to use a for loop as it makes it more likely that bounds check optimizations will kick in. Otherwise, the optimizer might not realize that the while loop variable is actually the Length. But, maybe our optimizer is better than I realize? @russellhadley? |
||
{ | ||
Unsafe.Add<T>(ref dst, i) = Unsafe.Add<T>(ref src, i); | ||
} | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be unrolled?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was initially just fixing the perf for non-overlapping blocks. If you want this unrolled, I can add that as another commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Either a different commit or a different PR, but I think we should try to make the gap between overlapping and non-overlapping as small as possible. Otherwise it creates annoying performance traps.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The slower path for overlapping exists in the fast Span as well. Fixing it for fast Span should be higher priority than for the slow Span.