Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

HashCode based on xxHash32 #25013

Merged
merged 11 commits into from
Dec 13, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions src/System.Runtime/ref/System.Runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3675,6 +3675,59 @@ public void GetObjectData(System.Runtime.Serialization.SerializationInfo info, S
public void SetTarget(T target) { }
public bool TryGetTarget(out T target) { throw null; }
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Explicit, Size = 8 * 8)]
public partial struct HashCode
{
[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public int ToHashCode() { throw null; }

#pragma warning disable 0809
[System.ObsoleteAttribute("Use ToHashCode to retrieve the computed hash code.", error: true)]
[System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
public override int GetHashCode() { throw null; }
#pragma warning restore 0809

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public void Add<T>(T value) { }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public void Add<T>(T value, System.Collections.Generic.IEqualityComparer<T> comparer) { }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1>(T1 value1) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2>(T1 value1, T2 value2) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3>(T1 value1, T2 value2, T3 value3) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4>(T1 value1, T2 value2, T3 value3, T4 value4) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5, T6>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5, T6, T7>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7) { throw null; }

[System.Runtime.CompilerServices.MethodImplAttribute(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]
[System.Runtime.TargetedPatchingOptOutAttribute("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5, T6, T7, T8>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7, T8 value8) { throw null; }
}
}

namespace System.Runtime.ConstrainedExecution
Expand Down
4 changes: 3 additions & 1 deletion src/System.Runtime/src/System.Runtime.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<ProjectGuid>{56B9D0A9-44D3-488E-8B42-C14A6E30CAB2}</ProjectGuid>
<AssemblyName>System.Runtime</AssemblyName>
<IsPartialFacadeAssembly>true</IsPartialFacadeAssembly>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have realized that this is no longer needed, I'll put it into the next commit that solves the build problem and any additional feedback.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'netcoreapp-Unix-Debug|AnyCPU'" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'netcoreapp-Unix-Release|AnyCPU'" />
Expand All @@ -18,6 +19,7 @@
<Compile Include="System\Action.cs" />
<Compile Include="System\Collections\Generic\ISet.cs" />
<Compile Include="System\Function.cs" />
<Compile Include="System\HashCode.cs" />
<Compile Include="System\IO\FileAttributes.cs" />
<Compile Include="System\IO\HandleInheritability.cs" />
<Compile Include="System\LazyOfTTMetadata.cs" />
Expand All @@ -33,4 +35,4 @@
<ReferenceFromRuntime Include="System.Private.CoreLib" />
</ItemGroup>
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
</Project>
</Project>
250 changes: 250 additions & 0 deletions src/System.Runtime/src/System/HashCode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
// Licensed to the .NET Foundation under one or more agreements.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please follow the rules from the "Copying Files from Other Projects" section of https://github.com/dotnet/coreclr/blob/master/Documentation/project-docs/contributing.md. In particular, I think that means including the xxHash license here and adding it to the 3rd party notices file. @richlander, please confirm we don't need to do anything else to make this file follow the rules.

// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.ComponentModel;
using System.Runtime;
using System.Runtime.CompilerServices;

namespace System
{
public unsafe struct HashCode
{
private const uint Queue = 4;
private const uint Length = 7;

private const uint Prime1 = 2654435761U;
private const uint Prime2 = 2246822519U;
private const uint Prime3 = 3266489917U;
private const uint Prime4 = 0668265263U;
private const uint Prime5 = 0374761393U;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • I've been working with the C# language (and .NET) for more than 13 years now.
  • Today I learned that a 0 prefix doesn't mean octal.

Together, to me, these points say "please remove the leading 0".

(My first thought (given the knowledge that it had compiled) was a quirky behavior that since an 8 and 9 were present that it knew it couldn't be octal, but apparently C# simply removed that confusing part of C, but without removing the "confusing" flag from my head.)


// 8x8 = 64 bytes = 1 x86 cache line
// +----+----+----+----+----+----+----+----+
// | 00 | 01 | 02 | 03 | 04 | 05 | 06 | 07 |
// | xxHash State | Queue | Le |
// +----+----+----+----+----+----+----+----+
private fixed uint _state[8];

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
private void Add(int value)
{
unchecked
{
var val = (uint)value;

fixed (uint* state = _state)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a convention we called such fixed pointers pXxx (pState in this example). It makes it easier to see in following code where there is a danger of overruns.

{
var queue = state + Queue;
var position = state[Length] & 0x3;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is not really Length but rather LastIndex.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it seems like this should be LengthIdx, as the array is used as a packed struct (which would be nicer).


if (position < 3)
{
queue[position] = val;
}
else
{

var sentinel =
state[0] | state[1] | state[2] | state[3] | // any of accumulators set?
(state[Length] ^ position); // length greater than 3?

if (sentinel == 0)
{
// Initialize

state[0] = Prime1 + Prime2;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid HashCode consumers to confuse the hash-code to be suitable for persistence, the initialization should make use of a random, per-app-domain, seed. For XX32, The seed is added to the first three state-vars (state[0] += seed, same for idx 1 and 2), and being subtracted from for the last (state[3] = seed - state[3]).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem. I still have the seed code in my polyfill.

This would break the test vector test (which the build server is having problems with anyway). I can't think of any other way to test the output of this function. Suggestions?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At my prototype project Haschisch.Kastriert I implemented a version where the seed could be specified explicitly. The tests would use a known seed, and the overloads for the default seed would use random numbers (from the security.cryptography randomnumbergenerator).

state[1] = Prime2;
state[2] = 0;
state[3] = (uint)-Prime1;
}

for (var i = 0; i < 3; i++)
state[i] = Round(state[i], queue[i]);
state[3] = Round(state[3], val);
}

state[Length]++;
}
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public int ToHashCode()
{
unchecked
{
fixed (uint* state = _state)
{
var queue = state + Queue;
var position = state[Length] & 0x3;

// Mix the accumulators (if they contain anything)

var hash = state[Length] > 3
? Rol(state[0], 1) + Rol(state[1], 7) + Rol(state[2], 12) + Rol(state[3], 18)
: Prime5;

hash += state[Length] * 4;

// Mix what remains in the queue

for (var i = 0; i < position; i++)
{
hash += queue[i] * Prime3;
hash = Rol(hash, 17) * Prime4;
}

// Final mix

hash ^= hash >> 15;
hash *= Prime2;
hash ^= hash >> 13;
hash *= Prime3;
hash ^= hash >> 16;

return (int)hash;
}
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
private uint Round(uint seed, uint input)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be static.

{
unchecked
{
seed += input * Prime2;
seed = Rol(seed, 13);
seed *= Prime1;
return seed;
}
}

# pragma warning disable 0809
[Obsolete("Use ToHashCode to retrieve the computed hash code.", error: true)]
[EditorBrowsable(EditorBrowsableState.Never)]
public override int GetHashCode() => ToHashCode();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My (and also @tannergooding 's) interpretation of the proposed API was to throw new NotImplementedException(); here. Usually, and also for this implementation, ToHashCode() destroys state, i.e. calling .ToHashCode() twice will return different results. For .GetHashCode() this would be more than just unexpected, it would lead to wrong results for all the usual consumers (Dictionary etc.). Failing with InvalidOperationException might be even better.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to do this, just need a final say on which Exception and what message.

# pragma warning restore 0809

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
private static uint Rol(uint value, int count)
=> (value << count) | (value >> (32 - count));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public void Add<T>(T value) => Add(value?.GetHashCode() ?? 0);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public void Add<T>(T value, IEqualityComparer<T> comparer)
{
if (comparer is null) throw new ArgumentNullException(nameof(comparer));
Add(comparer.GetHashCode(value));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1>(T1 value1)
{
var hc = new HashCode();
hc.Add(value1);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've once tried combining hash-codes with generic code, but due to performance (factor 2 to 8) ended up manually unrolling all .Combine() overloads. I see that you've elegantly improved handling the state and queue, so this might improve the situation; but do you have some ballpark estimate how much this costs? (For comparison: this was a larger difference than choosing SipHash 1-3, which would be hash-dos resistant).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll benchmark post the results in a comment below.

return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2>(T1 value1, T2 value2)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3>(T1 value1, T2 value2, T3 value3)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4>(T1 value1, T2 value2, T3 value3, T4 value4)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
hc.Add(value4);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
hc.Add(value4);
hc.Add(value5);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5, T6>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
hc.Add(value4);
hc.Add(value5);
hc.Add(value6);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
public static int Combine<T1, T2, T3, T4, T5, T6, T7>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
hc.Add(value4);
hc.Add(value5);
hc.Add(value6);
hc.Add(value7);
return hc.ToHashCode();
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
[TargetedPatchingOptOut("Performance critical for inlining across NGen images.")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkotas is "targetedpatchingoptout" still needed in corfx?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if so when do we apply it...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jkotas is "targetedpatchingoptout" still needed in corfx?

No. These attributes can be deleted.

public static int Combine<T1, T2, T3, T4, T5, T6, T7, T8>(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7, T8 value8)
{
var hc = new HashCode();
hc.Add(value1);
hc.Add(value2);
hc.Add(value3);
hc.Add(value4);
hc.Add(value5);
hc.Add(value6);
hc.Add(value7);
hc.Add(value8);
return hc.ToHashCode();
}
}
}
28 changes: 28 additions & 0 deletions src/System.Runtime/tests/Performance/Perf.HashCode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.Xunit.Performance;

namespace System.Tests
{
public class Perf_HashCode
{
[Benchmark]
public void Add_()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the trailing underscore?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy/paste (have a look at Perf.Int32). I thought it might be part of some convention.

Removed.

{
foreach (var iteration in Benchmark.Iterations)
using (iteration.StartMeasurement())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'd prefer braces for the foreach here.

{
var hc = new HashCode();
for (int i = 0; i < 10000; i++)
{
hc.Add(i); hc.Add(i); hc.Add(i);
hc.Add(i); hc.Add(i); hc.Add(i);
hc.Add(i); hc.Add(i); hc.Add(i);
}
hc.ToHashCode();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder whether the JIT might notice that nothing useful happens here and remove all of it. You might need to use the result in some way to ensure it doesn't get eliminated.

}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be good to have benchmarks for a couple of the Combine methods since those are hopefully the ones that get used most and you've already done some performance tuning on them. I don't think you need every combine, but maybe Combine with 2, 4 and 8 inputs.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<Compile Include="Perf.Double.cs" />
<Compile Include="Perf.Enum.cs" />
<Compile Include="Perf.Guid.cs" />
<Compile Include="Perf.HashCode.cs" />
<Compile Include="Perf.Object.cs" />
<Compile Include="Perf.String.cs" />
<Compile Include="Perf.TimeSpan.cs" />
Expand Down
1 change: 1 addition & 0 deletions src/System.Runtime/tests/System.Runtime.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<Compile Include="$(CommonTestPath)\System\Reflection\MockParameterInfo.cs">
<Link>Common\System\Reflection\MockParameterInfo.cs</Link>
</Compile>
<Compile Include="System\HashCodeTests.cs" />
<Compile Include="TimeZoneInfoTests.cs" />
<Compile Include="Microsoft\Win32\SafeHandles\CriticalHandleZeroOrMinusOneIsInvalid.cs" />
<Compile Include="Microsoft\Win32\SafeHandles\SafeHandleZeroOrMinusOneIsInvalid.cs" />
Expand Down
Loading