From 55777e8f94b5b1578c707528678cd3073d849d13 Mon Sep 17 00:00:00 2001 From: Jan Krivanek Date: Mon, 18 Mar 2024 15:01:50 +0100 Subject: [PATCH] Reintroduce FNV hashing (#9860) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "Revert "Shorten UTD marker file (#9387)" except Microsoft.Common.Curr…" This reverts commit 5af9301a6d46fd40ab1420467d3291519f62c7e6. * Make FNV hash compatible across endianness * Add StableStringHash intrinsic function overloads * Put StringTools functions references behind changewave * Prevent StableStringHash inlining * Move the changewave description to proper section --- documentation/wiki/ChangeWaves.md | 1 + .../Evaluation/Expander_Tests.cs | 42 ++++- src/Build/Evaluation/Expander.cs | 10 +- src/Build/Evaluation/IntrinsicFunctions.cs | 58 ++++++- .../BinaryLogger/BuildEventArgsWriter.cs | 40 +---- src/StringTools/FowlerNollVo1aHash.cs | 145 ++++++++++++++++++ 6 files changed, 253 insertions(+), 43 deletions(-) create mode 100644 src/StringTools/FowlerNollVo1aHash.cs diff --git a/documentation/wiki/ChangeWaves.md b/documentation/wiki/ChangeWaves.md index 86ff4fe515e..f2ad4211e16 100644 --- a/documentation/wiki/ChangeWaves.md +++ b/documentation/wiki/ChangeWaves.md @@ -33,6 +33,7 @@ A wave of features is set to "rotate out" (i.e. become standard functionality) t - [Load NuGet.Frameworks into secondary AppDomain (MSBuild.exe only)](https://github.com/dotnet/msbuild/pull/9446) - [Update Traits when environment has been changed](https://github.com/dotnet/msbuild/pull/9655) - [Exec task does not trim leading whitespaces for ConsoleOutput](https://github.com/dotnet/msbuild/pull/9722) +- [Introduce [MSBuild]::StableStringHash overloads](https://github.com/dotnet/msbuild/issues/9519) - [Keep the encoding of standard output & error consistent with the console code page for ToolTask](https://github.com/dotnet/msbuild/pull/9539) diff --git a/src/Build.UnitTests/Evaluation/Expander_Tests.cs b/src/Build.UnitTests/Evaluation/Expander_Tests.cs index d7744692f25..b7744d6ed4f 100644 --- a/src/Build.UnitTests/Evaluation/Expander_Tests.cs +++ b/src/Build.UnitTests/Evaluation/Expander_Tests.cs @@ -3890,8 +3890,14 @@ public void PropertyStringConstructorConsumingItemMetadata(string metadatumName, result.ShouldBe(metadatumValue); } - [Fact] - public void PropertyFunctionHashCodeSameOnlyIfStringSame() + public static IEnumerable GetHashAlgoTypes() + => Enum.GetNames(typeof(IntrinsicFunctions.StringHashingAlgorithm)) + .Append(null) + .Select(t => new object[] { t }); + + [Theory] + [MemberData(nameof(GetHashAlgoTypes))] + public void PropertyFunctionHashCodeSameOnlyIfStringSame(string hashType) { PropertyDictionary pg = new PropertyDictionary(); Expander expander = new Expander(pg, FileSystems.Default); @@ -3906,8 +3912,9 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame() "cat12s", "cat1s" }; - int[] hashes = stringsToHash.Select(toHash => - (int)expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance)) + string hashTypeString = hashType == null ? "" : $", '{hashType}'"; + object[] hashes = stringsToHash.Select(toHash => + expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance)) .ToArray(); for (int a = 0; a < hashes.Length; a++) { @@ -3925,6 +3932,33 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame() } } + [Theory] + [MemberData(nameof(GetHashAlgoTypes))] + public void PropertyFunctionHashCodeReturnsExpectedType(string hashType) + { + PropertyDictionary pg = new PropertyDictionary(); + Expander expander = new Expander(pg, FileSystems.Default); + Type expectedType; + + expectedType = hashType switch + { + null => typeof(int), + "Legacy" => typeof(int), + "Fnv1a32bit" => typeof(int), + "Fnv1a32bitFast" => typeof(int), + "Fnv1a64bit" => typeof(long), + "Fnv1a64bitFast" => typeof(long), + "Sha256" => typeof(string), + _ => throw new ArgumentOutOfRangeException(nameof(hashType)) + }; + + + string hashTypeString = hashType == null ? "" : $", '{hashType}'"; + object hashValue = expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('FooBar'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance); + + hashValue.ShouldBeOfType(expectedType); + } + [Theory] [InlineData("easycase")] [InlineData("")] diff --git a/src/Build/Evaluation/Expander.cs b/src/Build/Evaluation/Expander.cs index b98f902e994..cc8e5ac2efa 100644 --- a/src/Build/Evaluation/Expander.cs +++ b/src/Build/Evaluation/Expander.cs @@ -4159,7 +4159,15 @@ private bool TryExecuteWellKnownFunction(out object returnVal, object objectInst { if (TryGetArg(args, out string arg0)) { - returnVal = IntrinsicFunctions.StableStringHash(arg0); + // Prevent loading methods refs from StringTools if ChangeWave opted out. + returnVal = ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10) + ? IntrinsicFunctions.StableStringHash(arg0) + : IntrinsicFunctions.StableStringHashLegacy(arg0); + return true; + } + else if (TryGetArgs(args, out string arg1, out string arg2) && Enum.TryParse(arg2, true, out var hashAlgorithm)) + { + returnVal = IntrinsicFunctions.StableStringHash(arg1, hashAlgorithm); return true; } } diff --git a/src/Build/Evaluation/IntrinsicFunctions.cs b/src/Build/Evaluation/IntrinsicFunctions.cs index 37312b8c83e..6f8c5ed00f6 100644 --- a/src/Build/Evaluation/IntrinsicFunctions.cs +++ b/src/Build/Evaluation/IntrinsicFunctions.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Versioning; using System.Text; @@ -14,6 +15,7 @@ using Microsoft.Build.Shared; using Microsoft.Build.Shared.FileSystem; using Microsoft.Build.Utilities; +using Microsoft.NET.StringTools; using Microsoft.Win32; // Needed for DoesTaskHostExistForParameters @@ -397,12 +399,60 @@ internal static string ConvertFromBase64(string toDecode) return Encoding.UTF8.GetString(Convert.FromBase64String(toDecode)); } + internal enum StringHashingAlgorithm + { + // Legacy way of calculating StableStringHash - which was derived from string GetHashCode + Legacy, + // FNV-1a 32bit hash + Fnv1a32bit, + // Custom FNV-1a 32bit hash - optimized for speed by hashing by the whole chars (not individual bytes) + Fnv1a32bitFast, + // FNV-1a 64bit hash + Fnv1a64bit, + // Custom FNV-1a 64bit hash - optimized for speed by hashing by the whole chars (not individual bytes) + Fnv1a64bitFast, + // SHA256 hash - gets the hex string of the hash (with no prefix) + Sha256 + } + /// - /// Hash the string independent of bitness and target framework. + /// Legacy implementation that doesn't lead to JIT pulling the new functions from StringTools (so those must not be referenced anywhere in the function body) + /// - for cases where the calling code would erroneously load old version of StringTools alongside of the new version of Microsoft.Build. + /// Should be removed once Wave17_10 is removed. /// - internal static int StableStringHash(string toHash) - { - return CommunicationsUtilities.GetHashCode(toHash); + internal static object StableStringHashLegacy(string toHash) + => CommunicationsUtilities.GetHashCode(toHash); + + /// + /// Hash the string independent of bitness, target framework and default codepage of the environment. + /// We do not want this to be inlined, as then the Expander would call directly the new overload, and hence + /// JIT load the functions from StringTools - so we would not be able to prevent their loading with ChangeWave as we do now. + /// + [MethodImpl(MethodImplOptions.NoInlining)] + internal static object StableStringHash(string toHash) + => StableStringHash(toHash, StringHashingAlgorithm.Legacy); + + internal static object StableStringHash(string toHash, StringHashingAlgorithm algo) => + algo switch + { + StringHashingAlgorithm.Legacy => CommunicationsUtilities.GetHashCode(toHash), + StringHashingAlgorithm.Fnv1a32bit => FowlerNollVo1aHash.ComputeHash32(toHash), + StringHashingAlgorithm.Fnv1a32bitFast => FowlerNollVo1aHash.ComputeHash32Fast(toHash), + StringHashingAlgorithm.Fnv1a64bit => FowlerNollVo1aHash.ComputeHash64(toHash), + StringHashingAlgorithm.Fnv1a64bitFast => FowlerNollVo1aHash.ComputeHash64Fast(toHash), + StringHashingAlgorithm.Sha256 => CalculateSha256(toHash), + _ => throw new ArgumentOutOfRangeException(nameof(algo), algo, null) + }; + + private static string CalculateSha256(string toHash) + { + var sha = System.Security.Cryptography.SHA256.Create(); + var hashResult = new StringBuilder(); + foreach (byte theByte in sha.ComputeHash(Encoding.UTF8.GetBytes(toHash))) + { + hashResult.Append(theByte.ToString("x2")); + } + return hashResult.ToString(); } /// diff --git a/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs b/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs index 8a8311e1fd6..a3ce5efa3c5 100644 --- a/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs +++ b/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs @@ -15,6 +15,8 @@ using Microsoft.Build.Framework; using Microsoft.Build.Framework.Profiler; using Microsoft.Build.Shared; +using Microsoft.Build.Utilities; +using Microsoft.NET.StringTools; #nullable disable @@ -1260,9 +1262,9 @@ private void Write(IExtendedBuildEventArgs extendedData) internal readonly struct HashKey : IEquatable { - private readonly ulong value; + private readonly long value; - private HashKey(ulong i) + private HashKey(long i) { value = i; } @@ -1275,13 +1277,13 @@ public HashKey(string text) } else { - value = FnvHash64.GetHashCode(text); + value = FowlerNollVo1aHash.ComputeHash64Fast(text); } } public static HashKey Combine(HashKey left, HashKey right) { - return new HashKey(FnvHash64.Combine(left.value, right.value)); + return new HashKey(FowlerNollVo1aHash.Combine64(left.value, right.value)); } public HashKey Add(HashKey other) => Combine(this, other); @@ -1311,35 +1313,5 @@ public override string ToString() return value.ToString(); } } - - internal static class FnvHash64 - { - public const ulong Offset = 14695981039346656037; - public const ulong Prime = 1099511628211; - - public static ulong GetHashCode(string text) - { - ulong hash = Offset; - - unchecked - { - for (int i = 0; i < text.Length; i++) - { - char ch = text[i]; - hash = (hash ^ ch) * Prime; - } - } - - return hash; - } - - public static ulong Combine(ulong left, ulong right) - { - unchecked - { - return (left ^ right) * Prime; - } - } - } } } diff --git a/src/StringTools/FowlerNollVo1aHash.cs b/src/StringTools/FowlerNollVo1aHash.cs new file mode 100644 index 00000000000..5a9a876e4c0 --- /dev/null +++ b/src/StringTools/FowlerNollVo1aHash.cs @@ -0,0 +1,145 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; +using System; + +namespace Microsoft.NET.StringTools +{ + /// + /// Fowler/Noll/Vo hashing. + /// + public static class FowlerNollVo1aHash + { + // Fowler/Noll/Vo hashing. + // http://www.isthe.com/chongo/tech/comp/fnv/ + // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash + // http://www.isthe.com/chongo/src/fnv/hash_32a.c + + // 32 bit FNV prime and offset basis for FNV-1a. + private const uint fnvPrimeA32Bit = 16777619; + private const uint fnvOffsetBasisA32Bit = 2166136261; + + // 64 bit FNV prime and offset basis for FNV-1a. + private const long fnvPrimeA64Bit = 1099511628211; + private const long fnvOffsetBasisA64Bit = unchecked((long)14695981039346656037); + + /// + /// Computes 32 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding). + /// + /// String to be hashed. + /// 32 bit signed hash + public static int ComputeHash32(string text) + { + uint hash = fnvOffsetBasisA32Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + byte b = (byte)ch; + hash ^= b; + hash *= fnvPrimeA32Bit; + + b = (byte)(ch >> 8); + hash ^= b; + hash *= fnvPrimeA32Bit; + } + } + + return unchecked((int)hash); + } + + /// + /// Computes 32 bit Fowler/Noll/Vo-1a inspired hash of a string. + /// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes. + /// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate. + /// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a + /// + /// String to be hashed. + /// 32 bit unsigned hash + public static int ComputeHash32Fast(string text) + { + uint hash = fnvOffsetBasisA32Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + + hash = (hash ^ ch) * fnvPrimeA32Bit; + } + } + + return unchecked((int)hash); + } + + /// + /// Computes 64 bit Fowler/Noll/Vo-1a inspired hash of a string. + /// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes. + /// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate. + /// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a + /// + /// String to be hashed. + /// 64 bit unsigned hash + public static long ComputeHash64Fast(string text) + { + long hash = fnvOffsetBasisA64Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + + hash = (hash ^ ch) * fnvPrimeA64Bit; + } + } + + return hash; + } + + /// + /// Computes 64 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding). + /// + /// String to be hashed. + /// 64 bit unsigned hash + public static long ComputeHash64(string text) + { + long hash = fnvOffsetBasisA64Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + byte b = (byte)ch; + hash ^= b; + hash *= fnvPrimeA64Bit; + + b = (byte)(ch >> 8); + hash ^= b; + hash *= fnvPrimeA64Bit; + } + } + + return hash; + } + + /// + /// Combines two 64 bit hashes generated by class into one. + /// + /// First hash value to be combined. + /// Second hash value to be combined. + /// + public static long Combine64(long left, long right) + { + unchecked + { + return (left ^ right) * fnvPrimeA64Bit; + } + } + } +}