From 5fb5b3b845603328bd43247a425d920aa0fee043 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Fri, 3 Nov 2017 21:09:04 -0700 Subject: [PATCH 01/10] HashCode based on xxHash32 Works by maintaining the xxHash32 state variables (v1 -> v4, length) as well as a queue of values that fall outside of the block size (16 bytes/4 ints). The seed is initialized to random bytes. Further details, unit tests and history: https://github.com/dotnet/corefx/pull/25013 --- THIRD-PARTY-NOTICES.TXT | 28 ++ .../System.Private.CoreLib.Shared.projitems | 1 + src/mscorlib/shared/System/HashCode.cs | 435 ++++++++++++++++++ 3 files changed, 464 insertions(+) create mode 100644 src/mscorlib/shared/System/HashCode.cs diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index 30502be1427b..4a8002db2514 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -221,3 +221,31 @@ License notice for the Printing Floating-Point Numbers 3. This notice may not be removed or altered from any source distribution. ******************************************************************************/ + +License notice for xxHash +------------------------- + +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems index 5b7629f24548..8e832f373032 100644 --- a/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems +++ b/src/mscorlib/shared/System.Private.CoreLib.Shared.projitems @@ -159,6 +159,7 @@ + diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs new file mode 100644 index 000000000000..4e6c419c31b9 --- /dev/null +++ b/src/mscorlib/shared/System/HashCode.cs @@ -0,0 +1,435 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/* + +The xxHash32 implementation is based on the code published by Yann Collet: +https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c + + xxHash - Fast Hash algorithm + Copyright (C) 2012-2016, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash homepage: http://www.xxhash.com + - xxHash source repository : https://github.com/Cyan4973/xxHash + +*/ + +using System.Collections.Generic; +using System.ComponentModel; +using System.Runtime.CompilerServices; + +namespace System +{ + // xxHash32 is used for the hash code. + // https://github.com/Cyan4973/xxHash + + public struct HashCode + { + private static readonly uint s_seed = GenerateGlobalSeed(); + + private const uint Prime1 = 2654435761U; + private const uint Prime2 = 2246822519U; + private const uint Prime3 = 3266489917U; + private const uint Prime4 = 668265263U; + private const uint Prime5 = 374761393U; + + private uint _v1, _v2, _v3, _v4; + private uint _queue1, _queue2, _queue3; + private uint _length; + + private static unsafe uint GenerateGlobalSeed() + { + uint result; + Interop.GetRandomBytes((byte*)&result, sizeof(int)); + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + + uint hash = MixEmptyState(); + hash += 4; + + hash = QueueRound(hash, hc1); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + + uint hash = MixEmptyState(); + hash += 8; + + hash = QueueRound(hash, hc1); + hash = QueueRound(hash, hc2); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + + uint hash = MixEmptyState(); + hash += 12; + + hash = QueueRound(hash, hc1); + hash = QueueRound(hash, hc2); + hash = QueueRound(hash, hc3); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + var hc4 = (uint)(value4?.GetHashCode() ?? 0); + + Initialize(out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round(v1, hc1); + v2 = Round(v2, hc2); + v3 = Round(v3, hc3); + v4 = Round(v4, hc4); + + uint hash = MixState(v1, v2, v3, v4); + hash += 16; + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + var hc4 = (uint)(value4?.GetHashCode() ?? 0); + var hc5 = (uint)(value5?.GetHashCode() ?? 0); + + Initialize(out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round(v1, hc1); + v2 = Round(v2, hc2); + v3 = Round(v3, hc3); + v4 = Round(v4, hc4); + + uint hash = MixState(v1, v2, v3, v4); + hash += 20; + + hash = QueueRound(hash, hc5); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + var hc4 = (uint)(value4?.GetHashCode() ?? 0); + var hc5 = (uint)(value5?.GetHashCode() ?? 0); + var hc6 = (uint)(value6?.GetHashCode() ?? 0); + + Initialize(out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round(v1, hc1); + v2 = Round(v2, hc2); + v3 = Round(v3, hc3); + v4 = Round(v4, hc4); + + uint hash = MixState(v1, v2, v3, v4); + hash += 24; + + hash = QueueRound(hash, hc5); + hash = QueueRound(hash, hc6); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + var hc4 = (uint)(value4?.GetHashCode() ?? 0); + var hc5 = (uint)(value5?.GetHashCode() ?? 0); + var hc6 = (uint)(value6?.GetHashCode() ?? 0); + var hc7 = (uint)(value7?.GetHashCode() ?? 0); + + Initialize(out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round(v1, hc1); + v2 = Round(v2, hc2); + v3 = Round(v3, hc3); + v4 = Round(v4, hc4); + + uint hash = MixState(v1, v2, v3, v4); + hash += 28; + + hash = QueueRound(hash, hc5); + hash = QueueRound(hash, hc6); + hash = QueueRound(hash, hc7); + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7, T8 value8) + { + var hc1 = (uint)(value1?.GetHashCode() ?? 0); + var hc2 = (uint)(value2?.GetHashCode() ?? 0); + var hc3 = (uint)(value3?.GetHashCode() ?? 0); + var hc4 = (uint)(value4?.GetHashCode() ?? 0); + var hc5 = (uint)(value5?.GetHashCode() ?? 0); + var hc6 = (uint)(value6?.GetHashCode() ?? 0); + var hc7 = (uint)(value7?.GetHashCode() ?? 0); + var hc8 = (uint)(value8?.GetHashCode() ?? 0); + + Initialize(out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round(v1, hc1); + v2 = Round(v2, hc2); + v3 = Round(v3, hc3); + v4 = Round(v4, hc4); + + v1 = Round(v1, hc5); + v2 = Round(v2, hc6); + v3 = Round(v3, hc7); + v4 = Round(v4, hc8); + + uint hash = MixState(v1, v2, v3, v4); + hash += 32; + + hash = MixFinal(hash); + return (int)hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint Rol(uint value, int count) + => (value << count) | (value >> (32 - count)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4) + { + v1 = s_seed + Prime1 + Prime2; + v2 = s_seed + Prime2; + v3 = s_seed + 0; + v4 = s_seed - Prime1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint Round(uint hash, uint input) + { + hash += input * Prime2; + hash = Rol(hash, 13); + hash *= Prime1; + return hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint QueueRound(uint hash, uint queuedValue) + { + hash += queuedValue * Prime3; + return Rol(hash, 17) * Prime4; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixState(uint v1, uint v2, uint v3, uint v4) + { + return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18); + } + + private static uint MixEmptyState() + { + return s_seed + Prime5; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static uint MixFinal(uint hash) + { + hash ^= hash >> 15; + hash *= Prime2; + hash ^= hash >> 13; + hash *= Prime3; + hash ^= hash >> 16; + return hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Add(T value) + { + Add(value?.GetHashCode() ?? 0); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Add(T value, IEqualityComparer comparer) + { + if (comparer is null) + comparer = EqualityComparer.Default; + Add(comparer.GetHashCode(value)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void Add(int value) + { + // Note that x & 0x3 is like mod 3, but faster. + + var val = (uint)value; + uint position = _length & 0x3; + + // xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no default + // ctor). + // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the hash. + // 3. Accumulate remaining blocks of length 1 into the hash. + + // There is no need for *3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when ToHashCode is + // called we can execute *2 correctly. That's what first three case + // statements do. + + // We need to initialize the xxHash32 state (_v1 -> _v4) lazily (see *0) + // and the last place that can be done if you look at the original code + // is just before the first block of 16 bytes is mixed in. The xxHash32 + // state is never used for streams containing fewer than 16 bytes. + + // A bloom filter is used to determine whether the default case statement + // has even been executed. To do that we check if the length is smaller + // than 4 (_length ^ position will be non-zero if this is the case). The + // case statement is for values larger than 2, so the check only succeeds + // on exactly 3. + + // To see what's really going on here, have a look at the Combine methods. + + // Switch can't be inlined. + + if (position == 0) + _queue1 = val; + else if (position == 1) + _queue2 = val; + else if (position == 2) + _queue3 = val; + else // == 3 + { + // length smaller than 4? + if ((_length ^ position) == 0) + Initialize(out _v1, out _v2, out _v3, out _v4); + + _v1 = Round(_v1, _queue1); + _v2 = Round(_v2, _queue2); + _v3 = Round(_v3, _queue3); + _v4 = Round(_v4, val); + } + + // Throw for more than uint.MaxValue fields. + _length = checked(_length + 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int ToHashCode() + { + uint position = _length & 0x3; + + // If the length is less than 3, _v1 -> _v4 don't contain + // anything yet. xxHash32 treats this differently. + + uint hash = (_length ^ position) == 0 + ? MixEmptyState() + : MixState(_v1, _v2, _v3, _v4); + + // Multiply by 4 because we've been counting in ints, not + // bytes. + + hash += _length * 4; + + // Mix what remains in the queue + // Switch can't be inlined right now, so use as few + // branches as possible instead. + + if (position > 0) + { + hash = QueueRound(hash, _queue1); + if (position > 1) + { + hash = QueueRound(hash, _queue2); + if (position > 2) + hash = QueueRound(hash, _queue3); + } + } + + hash = MixFinal(hash); + return (int)hash; + } + +# pragma warning disable 0809 + // Obsolete member 'memberA' overrides non-obsolete member 'memberB'. + // Disallowing GetHashCode is by design + + // * We decided to not override GetHashCode() to produce the hash code + // as this would be weird, both naming-wise as well as from a behavioral + // standpoint (GetHashCode() should return the object's hash code, not + // the one being computed). + + // * Even though ToHashCode() can be called safely multiple times on this + // implementation, it is not part of the contract. If the implementation + // has to change in the future we don't want to worry about people who + // might have incorrectly used this type. + + [Obsolete("Use ToHashCode to retrieve the computed hash code.", error: true)] + [EditorBrowsable(EditorBrowsableState.Never)] + [MethodImpl(MethodImplOptions.NoInlining)] + public override int GetHashCode() => throw new NotSupportedException(); +# pragma warning restore 0809 + + } +} From 30d3d3facdaeb4968916b388f1fe314c7379d1bf Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Sat, 4 Nov 2017 10:37:39 -0700 Subject: [PATCH 02/10] PR Feedback * Removing AgressiveInlining from all public facing methods. * Fixing erroneous comment --- src/mscorlib/shared/System/HashCode.cs | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index 4e6c419c31b9..733896ed6861 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -71,7 +71,6 @@ private static unsafe uint GenerateGlobalSeed() return result; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -85,7 +84,6 @@ public static int Combine(T1 value1) return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -101,7 +99,6 @@ public static int Combine(T1 value1, T2 value2) return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -119,7 +116,6 @@ public static int Combine(T1 value1, T2 value2, T3 value3) return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -141,7 +137,6 @@ public static int Combine(T1 value1, T2 value2, T3 value3, T4 va return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -166,7 +161,6 @@ public static int Combine(T1 value1, T2 value2, T3 value3, T return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -193,7 +187,6 @@ public static int Combine(T1 value1, T2 value2, T3 value return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -222,7 +215,6 @@ public static int Combine(T1 value1, T2 value2, T3 v return (int)hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Combine(T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7, T8 value8) { var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -304,13 +296,11 @@ private static uint MixFinal(uint hash) return hash; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Add(T value) { Add(value?.GetHashCode() ?? 0); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Add(T value, IEqualityComparer comparer) { if (comparer is null) @@ -321,7 +311,7 @@ public void Add(T value, IEqualityComparer comparer) [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Add(int value) { - // Note that x & 0x3 is like mod 3, but faster. + // Note that x & 0x3 is like mod 4, but faster. var val = (uint)value; uint position = _length & 0x3; @@ -375,7 +365,6 @@ private void Add(int value) _length = checked(_length + 1); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public int ToHashCode() { uint position = _length & 0x3; From af71a1bf03d5caea630c400c2c09f183d5e259cc Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Sun, 5 Nov 2017 12:02:12 -0800 Subject: [PATCH 03/10] PR Feedback * Adding comment explaining why Combine(value1) performs a hash. * Removing superfluous code and attributes. * Improving perf of Add(..., null). * Moving two lines of code below the giant comment to improve reading locality. * Use mod instead of bit twiddling. --- src/mscorlib/shared/System/HashCode.cs | 29 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index 733896ed6861..e9890523c0da 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -73,6 +73,13 @@ private static unsafe uint GenerateGlobalSeed() public static int Combine(T1 value1) { + // Provide a way of diffusing bits from something with a + // limited input hash space. For example, many enums only + // have a few possible hashes, only using the bottom few bits + // of the code. Some collections are built on the assumption that + // hashes are spread over a larger space, so diffusing the bits + // may help the collection work more efficiently. + var hc1 = (uint)(value1?.GetHashCode() ?? 0); uint hash = MixEmptyState(); @@ -254,7 +261,7 @@ private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v { v1 = s_seed + Prime1 + Prime2; v2 = s_seed + Prime2; - v3 = s_seed + 0; + v3 = s_seed; v4 = s_seed - Prime1; } @@ -304,18 +311,18 @@ public void Add(T value) public void Add(T value, IEqualityComparer comparer) { if (comparer is null) - comparer = EqualityComparer.Default; - Add(comparer.GetHashCode(value)); + { + Add(value); + } + else + { + Add(comparer.GetHashCode(value)); + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Add(int value) { - // Note that x & 0x3 is like mod 4, but faster. - - var val = (uint)value; - uint position = _length & 0x3; - // xxHash works as follows: // 0. Initialize immediately. We can't do this in a struct (no default // ctor). @@ -340,6 +347,9 @@ private void Add(int value) // on exactly 3. // To see what's really going on here, have a look at the Combine methods. + + var val = (uint)value; + uint position = _length % 4; // Switch can't be inlined. @@ -367,7 +377,7 @@ private void Add(int value) public int ToHashCode() { - uint position = _length & 0x3; + uint position = _length % 4; // If the length is less than 3, _v1 -> _v4 don't contain // anything yet. xxHash32 treats this differently. @@ -416,7 +426,6 @@ public int ToHashCode() [Obsolete("Use ToHashCode to retrieve the computed hash code.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] - [MethodImpl(MethodImplOptions.NoInlining)] public override int GetHashCode() => throw new NotSupportedException(); # pragma warning restore 0809 From 7f80518b3908f4d2366edf0033a2411bfeffb688 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Mon, 6 Nov 2017 21:08:38 -0800 Subject: [PATCH 04/10] PR Feedback * Improving some comments. Ensuring comments wrap at column 80. * Fixing GenerateGlobalSeed(). * Reading _length once in methods. * Ignoring overflow. * Simplifying some conditions. * Using goto for fallthrough in ToHashCode(). * Adding Equals as a disallowed method. * Adding exception message string. --- src/mscorlib/Resources/Strings.resx | 3 + src/mscorlib/shared/System/HashCode.cs | 132 ++++++++++++------------- 2 files changed, 69 insertions(+), 66 deletions(-) diff --git a/src/mscorlib/Resources/Strings.resx b/src/mscorlib/Resources/Strings.resx index 0ded8ea294f4..0401f037201a 100644 --- a/src/mscorlib/Resources/Strings.resx +++ b/src/mscorlib/Resources/Strings.resx @@ -3691,4 +3691,7 @@ Release all references before disposing this instance. + + HashCode is a mutable struct and should not be compared with other HashCodes. + diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index e9890523c0da..fc76b051df84 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -67,18 +67,18 @@ public struct HashCode private static unsafe uint GenerateGlobalSeed() { uint result; - Interop.GetRandomBytes((byte*)&result, sizeof(int)); + Interop.GetRandomBytes((byte*)&result, sizeof(uint)); return result; } public static int Combine(T1 value1) { - // Provide a way of diffusing bits from something with a - // limited input hash space. For example, many enums only - // have a few possible hashes, only using the bottom few bits - // of the code. Some collections are built on the assumption that - // hashes are spread over a larger space, so diffusing the bits - // may help the collection work more efficiently. + // Provide a way of diffusing bits from something with a limited + // input hash space. For example, many enums only have a few + // possible hashes, only using the bottom few bits of the code. Some + // collections are built on the assumption that hashes are spread + // over a larger space, so diffusing the bits may help the + // collection work more efficiently. var hc1 = (uint)(value1?.GetHashCode() ?? 0); @@ -323,33 +323,30 @@ public void Add(T value, IEqualityComparer comparer) [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Add(int value) { - // xxHash works as follows: - // 0. Initialize immediately. We can't do this in a struct (no default - // ctor). + // The original xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no + // default ctor). // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. - // 2. Accumulate remaining blocks of length 4 (1 uint) into the hash. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the + // hash. // 3. Accumulate remaining blocks of length 1 into the hash. - // There is no need for *3 as this type only accepts ints. _queue1, - // _queue2 and _queue3 are basically a buffer so that when ToHashCode is - // called we can execute *2 correctly. That's what first three case - // statements do. + // There is no need for #3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when + // ToHashCode is called we can execute #2 correctly. - // We need to initialize the xxHash32 state (_v1 -> _v4) lazily (see *0) - // and the last place that can be done if you look at the original code - // is just before the first block of 16 bytes is mixed in. The xxHash32 - // state is never used for streams containing fewer than 16 bytes. + // We need to initialize the xxHash32 state (_v1 to _v4) lazily (see + // #0) nd the last place that can be done if you look at the + // original code is just before the first block of 16 bytes is mixed + // in. The xxHash32 state is never used for streams containing fewer + // than 16 bytes. - // A bloom filter is used to determine whether the default case statement - // has even been executed. To do that we check if the length is smaller - // than 4 (_length ^ position will be non-zero if this is the case). The - // case statement is for values larger than 2, so the check only succeeds - // on exactly 3. - - // To see what's really going on here, have a look at the Combine methods. + // To see what's really going on here, have a look at the Combine + // methods. var val = (uint)value; - uint position = _length % 4; + uint previousLength = _length++; + uint position = previousLength % 4; // Switch can't be inlined. @@ -359,10 +356,9 @@ private void Add(int value) _queue2 = val; else if (position == 2) _queue3 = val; - else // == 3 + else // position == 3 { - // length smaller than 4? - if ((_length ^ position) == 0) + if (previousLength == 3) Initialize(out _v1, out _v2, out _v3, out _v4); _v1 = Round(_v1, _queue1); @@ -370,64 +366,68 @@ private void Add(int value) _v3 = Round(_v3, _queue3); _v4 = Round(_v4, val); } - - // Throw for more than uint.MaxValue fields. - _length = checked(_length + 1); } public int ToHashCode() { - uint position = _length % 4; + uint length = _length; + uint position = length % 4; - // If the length is less than 3, _v1 -> _v4 don't contain - // anything yet. xxHash32 treats this differently. + // If the length is less than 4, _v1 to _v4 don't contain anything + // yet. xxHash32 treats this differently. - uint hash = (_length ^ position) == 0 - ? MixEmptyState() - : MixState(_v1, _v2, _v3, _v4); + uint hash; + if (length < 4) + hash = MixEmptyState(); + else + hash = MixState(_v1, _v2, _v3, _v4); - // Multiply by 4 because we've been counting in ints, not - // bytes. + // Multiply by 4 because we've been counting in bytes, not ints. - hash += _length * 4; + hash += length * 4; // Mix what remains in the queue - // Switch can't be inlined right now, so use as few - // branches as possible instead. - if (position > 0) - { - hash = QueueRound(hash, _queue1); - if (position > 1) - { - hash = QueueRound(hash, _queue2); - if (position > 2) - hash = QueueRound(hash, _queue3); - } - } + // Switch can't be inlined right now, so emulate case statement + // fallthrough using goto. + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. + if (position == 0) goto mixFinal; + hash = QueueRound(hash, _queue1); + if (position == 1) goto mixFinal; + hash = QueueRound(hash, _queue2); + if (position == 2) goto mixFinal; + hash = QueueRound(hash, _queue3); + + mixFinal: hash = MixFinal(hash); return (int)hash; } -# pragma warning disable 0809 +#pragma warning disable 0809 // Obsolete member 'memberA' overrides non-obsolete member 'memberB'. - // Disallowing GetHashCode is by design + // Disallowing GetHashCode and Equals is by design // * We decided to not override GetHashCode() to produce the hash code - // as this would be weird, both naming-wise as well as from a behavioral - // standpoint (GetHashCode() should return the object's hash code, not - // the one being computed). + // as this would be weird, both naming-wise as well as from a + // behavioral standpoint (GetHashCode() should return the object's + // hash code, not the one being computed). - // * Even though ToHashCode() can be called safely multiple times on this - // implementation, it is not part of the contract. If the implementation - // has to change in the future we don't want to worry about people who - // might have incorrectly used this type. + // * Even though ToHashCode() can be called safely multiple times on + // this implementation, it is not part of the contract. If the + // implementation has to change in the future we don't want to worry + // about people who might have incorrectly used this type. - [Obsolete("Use ToHashCode to retrieve the computed hash code.", error: true)] + [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] - public override int GetHashCode() => throw new NotSupportedException(); -# pragma warning restore 0809 + public override int GetHashCode() => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); + [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] + [EditorBrowsable(EditorBrowsableState.Never)] + public override bool Equals(object obj) => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); +#pragma warning restore 0809 } } From 5bc90958101349daa56ace304fed5d7f4f3f60a4 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 7 Nov 2017 13:26:32 -0800 Subject: [PATCH 05/10] PR Feedback * Adding further detail to obsolete and exception message. * Fix spacing issues. * Restore nested if for ToHashCode. --- src/mscorlib/Resources/Strings.resx | 2 +- src/mscorlib/shared/System/HashCode.cs | 36 ++++++++++++++------------ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/mscorlib/Resources/Strings.resx b/src/mscorlib/Resources/Strings.resx index 0401f037201a..32fbb211c12e 100644 --- a/src/mscorlib/Resources/Strings.resx +++ b/src/mscorlib/Resources/Strings.resx @@ -3692,6 +3692,6 @@ Release all references before disposing this instance. - HashCode is a mutable struct and should not be compared with other HashCodes. + HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code. diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index fc76b051df84..7527c2db6899 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -73,7 +73,7 @@ private static unsafe uint GenerateGlobalSeed() public static int Combine(T1 value1) { - // Provide a way of diffusing bits from something with a limited + // Provide a way of diffusing bits from something with a limited // input hash space. For example, many enums only have a few // possible hashes, only using the bottom few bits of the code. Some // collections are built on the assumption that hashes are spread @@ -345,6 +345,9 @@ private void Add(int value) // methods. var val = (uint)value; + + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. uint previousLength = _length++; uint position = previousLength % 4; @@ -370,17 +373,15 @@ private void Add(int value) public int ToHashCode() { + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. uint length = _length; uint position = length % 4; // If the length is less than 4, _v1 to _v4 don't contain anything // yet. xxHash32 treats this differently. - uint hash; - if (length < 4) - hash = MixEmptyState(); - else - hash = MixState(_v1, _v2, _v3, _v4); + uint hash = length < 4 ? MixEmptyState() : MixState(_v1, _v2, _v3, _v4); // Multiply by 4 because we've been counting in bytes, not ints. @@ -395,14 +396,17 @@ public int ToHashCode() // position == 1 means that _queue1 is populated; _queue2 would have // been populated on the next call to Add. - if (position == 0) goto mixFinal; - hash = QueueRound(hash, _queue1); - if (position == 1) goto mixFinal; - hash = QueueRound(hash, _queue2); - if (position == 2) goto mixFinal; - hash = QueueRound(hash, _queue3); - - mixFinal: + if (position > 0) + { + hash = QueueRound(hash, _queue1); + if (position > 1) + { + hash = QueueRound(hash, _queue2); + if (position > 2) + hash = QueueRound(hash, _queue3); + } + } + hash = MixFinal(hash); return (int)hash; } @@ -421,11 +425,11 @@ public int ToHashCode() // implementation has to change in the future we don't want to worry // about people who might have incorrectly used this type. - [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] + [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] public override int GetHashCode() => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); - [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] + [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] public override bool Equals(object obj) => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); #pragma warning restore 0809 From bb44e32f55f9d56a9e7350b57ad2e783c8c67c4d Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 7 Nov 2017 18:01:24 -0800 Subject: [PATCH 06/10] PR Feedback * Correcting goto comment. * Using different strings for errors. * Re-ordering a comment so that it makes more sense. * Fixing comment that was still difficult to understand. --- src/mscorlib/Resources/Strings.resx | 5 ++++- src/mscorlib/shared/System/HashCode.cs | 21 +++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/mscorlib/Resources/Strings.resx b/src/mscorlib/Resources/Strings.resx index 32fbb211c12e..ef3821abf181 100644 --- a/src/mscorlib/Resources/Strings.resx +++ b/src/mscorlib/Resources/Strings.resx @@ -3691,7 +3691,10 @@ Release all references before disposing this instance. - + HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code. + + HashCode is a mutable struct and should not be compared with other HashCodes. + diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index 7527c2db6899..118133db262e 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -376,6 +376,10 @@ public int ToHashCode() // Storing the value of _length locally shaves of quite a few bytes // in the resulting machine code. uint length = _length; + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. uint position = length % 4; // If the length is less than 4, _v1 to _v4 don't contain anything @@ -383,19 +387,16 @@ public int ToHashCode() uint hash = length < 4 ? MixEmptyState() : MixState(_v1, _v2, _v3, _v4); - // Multiply by 4 because we've been counting in bytes, not ints. + // _length is incremented once per Add(Int32) and is therefore 4 + // times too small (xxHash length is in bytes, not ints). hash += length * 4; // Mix what remains in the queue - // Switch can't be inlined right now, so emulate case statement - // fallthrough using goto. - - // position refers to the *next* queue position in this method, so - // position == 1 means that _queue1 is populated; _queue2 would have - // been populated on the next call to Add. - + // Switch can't be inlined right now, so use as few branches as + // possible by manually excluding impossible scenarios (position > 1 + // is always false if position is not > 0). if (position > 0) { hash = QueueRound(hash, _queue1); @@ -427,9 +428,9 @@ public int ToHashCode() [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] - public override int GetHashCode() => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); + public override int GetHashCode() => throw new NotSupportedException(SR.HashCode_HashCodeNotSupported); - [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code.", error: true)] + [Obsolete("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] [EditorBrowsable(EditorBrowsableState.Never)] public override bool Equals(object obj) => throw new NotSupportedException(SR.HashCode_EqualityNotSupported); #pragma warning restore 0809 From 526ef4a7c999d0eeb29cd96d63f7d6e3d5e82876 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 14 Nov 2017 10:21:32 -0800 Subject: [PATCH 07/10] PR Feedback * Removing AggressiveInlining on big method. * Reducing number of generic instantiations for Add. --- src/mscorlib/shared/System/HashCode.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index 118133db262e..17861ece4247 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -312,7 +312,7 @@ public void Add(T value, IEqualityComparer comparer) { if (comparer is null) { - Add(value); + Add(value?.GetHashCode() ?? 0); } else { @@ -320,7 +320,6 @@ public void Add(T value, IEqualityComparer comparer) } } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private void Add(int value) { // The original xxHash works as follows: From f8475ca271272ebf51d010847a6f1b775696933f Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 14 Nov 2017 13:35:51 -0800 Subject: [PATCH 08/10] PR Feedback * Comment. --- src/mscorlib/shared/System/HashCode.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index 17861ece4247..d89896026124 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -312,6 +312,8 @@ public void Add(T value, IEqualityComparer comparer) { if (comparer is null) { + // Prevent unessecary generic method instantiation by manually + // inlining. Add(value?.GetHashCode() ?? 0); } else From 80783ceeb585b001ccb430faa2701a2068155422 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 14 Nov 2017 13:36:17 -0800 Subject: [PATCH 09/10] Spelling. --- src/mscorlib/shared/System/HashCode.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index d89896026124..a1ddcebbcf39 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -312,7 +312,7 @@ public void Add(T value, IEqualityComparer comparer) { if (comparer is null) { - // Prevent unessecary generic method instantiation by manually + // Prevent unnecessary generic method instantiation by manually // inlining. Add(value?.GetHashCode() ?? 0); } From 94250c71bdeb570d70407967037e1723520b99f2 Mon Sep 17 00:00:00 2001 From: Jonathan Dickinson Date: Tue, 14 Nov 2017 17:18:12 -0800 Subject: [PATCH 10/10] PR Feedback Slightly optimize Add(,). --- src/mscorlib/shared/System/HashCode.cs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/mscorlib/shared/System/HashCode.cs b/src/mscorlib/shared/System/HashCode.cs index a1ddcebbcf39..4be57be02a9a 100644 --- a/src/mscorlib/shared/System/HashCode.cs +++ b/src/mscorlib/shared/System/HashCode.cs @@ -310,16 +310,7 @@ public void Add(T value) public void Add(T value, IEqualityComparer comparer) { - if (comparer is null) - { - // Prevent unnecessary generic method instantiation by manually - // inlining. - Add(value?.GetHashCode() ?? 0); - } - else - { - Add(comparer.GetHashCode(value)); - } + Add(comparer != null ? comparer.GetHashCode(value) : (value?.GetHashCode() ?? 0)); } private void Add(int value)