Skip to content

Commit

Permalink
Memcpy1 with 8 unrolled ulongs is much slower, avoiding unrolls of ul…
Browse files Browse the repository at this point in the history
…ongs.
  • Loading branch information
CptMoore committed Jan 8, 2025
1 parent ca8ef84 commit 1dc7d4a
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 34 deletions.
36 changes: 15 additions & 21 deletions ModTek/Features/Logging/FastBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,14 @@ private void EnlargeCapacity(int targetLength)
{
_buffer = null;
_bufferPtr = null;
_isG2 = false;
}
}

_buffer = newBuffer;
_handle = newHandle;
_bufferPtr = newBufferPtr;
_isG2 = false;
}
catch
{
Expand All @@ -254,25 +256,12 @@ private void EnlargeCapacity(int targetLength)
}
}

// from Buffer.memcpy1 and optimized
private static void Memcpy1(byte* dest, byte* src, int size)
internal static readonly MTStopwatch CopyStopwatch = new();
// from Buffer.memcpy1 and optimized to use 64bit/16bit types instead of just 8bit
internal static void Memcpy1(byte* dest, byte* src, int size)
{
{
const int BatchSize = 8 * sizeof(ulong); // 64, 8*8
for (; size >= BatchSize; size -= BatchSize)
{
*(ulong*)dest = *(ulong*)src;
*((ulong*)dest + 1) = *((ulong*)src + 1);
*((ulong*)dest + 2) = *((ulong*)src + 2);
*((ulong*)dest + 3) = *((ulong*)src + 3);
*((ulong*)dest + 4) = *((ulong*)src + 4);
*((ulong*)dest + 5) = *((ulong*)src + 5);
*((ulong*)dest + 6) = *((ulong*)src + 6);
*((ulong*)dest + 7) = *((ulong*)src + 7);
dest += BatchSize;
src += BatchSize;
}
}
// make sure to only measure when there is enough, otherwise measurement is slower than the actual copy
var measurement = size >= 64 ? MTStopwatch.GetTimestamp() : 0;
{
const int BatchSize = sizeof(ulong); // 8
for (; size >= BatchSize; size -= BatchSize)
Expand All @@ -291,9 +280,14 @@ private static void Memcpy1(byte* dest, byte* src, int size)
src += BatchSize;
}
}
if (size <= 0)
return;
*dest = *src;
if (size > 0)
{
*dest = *src;
}
if (measurement > 0)
{
CopyStopwatch.EndMeasurement(measurement, size);
}
}

~FastBuffer()
Expand Down
21 changes: 13 additions & 8 deletions ModTek/Features/Logging/LogStreamImpl/Win32ApiImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,24 @@ public unsafe void Append(byte[] bytes, int offset, int count)
return;
}

var position = AcquirePosition(count);
fixed (byte* numPtr = bytes)
{
Append(numPtr + offset, count);
}
}

private unsafe void Append(byte* bytes, int numBytesToWrite)
{
var position = AcquirePosition(numBytesToWrite);
var overlapped = PrepareOverlap(position);

fixed (byte* numPtr = bytes)
if (WriteFile(_handle, bytes, numBytesToWrite, out var numBytesWritten, &overlapped) != 0)
{
if (WriteFile(_handle, numPtr + offset, count, out var numBytesWritten, &overlapped) != 0)
if (numBytesWritten != numBytesToWrite)
{
if (numBytesWritten != count)
{
throw new IOException($"{numBytesWritten} != {count}");
}
return;
throw new IOException($"{numBytesWritten} != {numBytesToWrite}");
}
return;
}

var errorCode = Marshal.GetLastWin32Error();
Expand Down
1 change: 1 addition & 0 deletions ModTek/Features/Logging/MTLoggerAsyncQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Async internal processing had an average latency of {latencyStats.AverageNanosec
Filters {AppenderFile.FiltersStopWatch.GetStats()}.
Formatter {AppenderFile.FormatterStopWatch.GetStats()}.
UTF8-Fallback {FastBuffer.UTF8FallbackStopwatch.GetStats()}.
Copy buffer per bytes {FastBuffer.CopyStopwatch.GetStats()}.
Write (to OS buffers) {AppenderFile.WriteStopwatch.GetStats()}.
"""
);
Expand Down
4 changes: 2 additions & 2 deletions ModTek/Util/Stopwatch/MTStopwatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ internal static long GetTimestamp()
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal virtual void EndMeasurement(long start)
internal virtual void EndMeasurement(long start, long delta = 1)
{
AddMeasurement(GetTimestamp() - start, 1);
AddMeasurement(GetTimestamp() - start, delta);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Expand Down
6 changes: 3 additions & 3 deletions ModTek/Util/Stopwatch/MTStopwatchWithSampling.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,17 @@ static MTStopwatchWithSampling()
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool ShouldMeasure()
internal bool ShouldMeasure()
{
return _random.NextUInt64() <= _sampleIfRandomSmallerOrEqualsTo;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override void EndMeasurement(long start)
internal override void EndMeasurement(long start, long delta = 1)
{
if (ShouldMeasure())
{
AddMeasurement((GetTimestamp() - start) * _samplingInterval, _samplingInterval);
AddMeasurement((GetTimestamp() - start) * _samplingInterval, delta * _samplingInterval);
}
}
}

0 comments on commit 1dc7d4a

Please sign in to comment.