Skip to content

Commit

Permalink
Reintroduce FNV hashing (#9860)
Browse files Browse the repository at this point in the history
* Revert "Revert "Shorten UTD marker file (#9387)" except Microsoft.Common.Curr…"

This reverts commit 5af9301.

* Make FNV hash compatible across endianness

* Add StableStringHash intrinsic function overloads

* Put StringTools functions references behind changewave

* Prevent StableStringHash inlining

* Move the changewave description to proper section
  • Loading branch information
JanKrivanek committed Mar 18, 2024
1 parent 1a342f9 commit 55777e8
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 43 deletions.
1 change: 1 addition & 0 deletions documentation/wiki/ChangeWaves.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ A wave of features is set to "rotate out" (i.e. become standard functionality) t
- [Load NuGet.Frameworks into secondary AppDomain (MSBuild.exe only)](https://github.com/dotnet/msbuild/pull/9446)
- [Update Traits when environment has been changed](https://github.com/dotnet/msbuild/pull/9655)
- [Exec task does not trim leading whitespaces for ConsoleOutput](https://github.com/dotnet/msbuild/pull/9722)
- [Introduce [MSBuild]::StableStringHash overloads](https://github.com/dotnet/msbuild/issues/9519)
- [Keep the encoding of standard output & error consistent with the console code page for ToolTask](https://github.com/dotnet/msbuild/pull/9539)


Expand Down
42 changes: 38 additions & 4 deletions src/Build.UnitTests/Evaluation/Expander_Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3890,8 +3890,14 @@ public void PropertyStringConstructorConsumingItemMetadata(string metadatumName,
result.ShouldBe(metadatumValue);
}

[Fact]
public void PropertyFunctionHashCodeSameOnlyIfStringSame()
public static IEnumerable<object[]> GetHashAlgoTypes()
=> Enum.GetNames(typeof(IntrinsicFunctions.StringHashingAlgorithm))
.Append(null)
.Select(t => new object[] { t });

[Theory]
[MemberData(nameof(GetHashAlgoTypes))]
public void PropertyFunctionHashCodeSameOnlyIfStringSame(string hashType)
{
PropertyDictionary<ProjectPropertyInstance> pg = new PropertyDictionary<ProjectPropertyInstance>();
Expander<ProjectPropertyInstance, ProjectItemInstance> expander = new Expander<ProjectPropertyInstance, ProjectItemInstance>(pg, FileSystems.Default);
Expand All @@ -3906,8 +3912,9 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame()
"cat12s",
"cat1s"
};
int[] hashes = stringsToHash.Select(toHash =>
(int)expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance))
string hashTypeString = hashType == null ? "" : $", '{hashType}'";
object[] hashes = stringsToHash.Select(toHash =>
expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance))
.ToArray();
for (int a = 0; a < hashes.Length; a++)
{
Expand All @@ -3925,6 +3932,33 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame()
}
}

[Theory]
[MemberData(nameof(GetHashAlgoTypes))]
public void PropertyFunctionHashCodeReturnsExpectedType(string hashType)
{
PropertyDictionary<ProjectPropertyInstance> pg = new PropertyDictionary<ProjectPropertyInstance>();
Expander<ProjectPropertyInstance, ProjectItemInstance> expander = new Expander<ProjectPropertyInstance, ProjectItemInstance>(pg, FileSystems.Default);
Type expectedType;

expectedType = hashType switch
{
null => typeof(int),
"Legacy" => typeof(int),
"Fnv1a32bit" => typeof(int),
"Fnv1a32bitFast" => typeof(int),
"Fnv1a64bit" => typeof(long),
"Fnv1a64bitFast" => typeof(long),
"Sha256" => typeof(string),
_ => throw new ArgumentOutOfRangeException(nameof(hashType))
};


string hashTypeString = hashType == null ? "" : $", '{hashType}'";
object hashValue = expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('FooBar'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance);

hashValue.ShouldBeOfType(expectedType);
}

[Theory]
[InlineData("easycase")]
[InlineData("")]
Expand Down
10 changes: 9 additions & 1 deletion src/Build/Evaluation/Expander.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4159,7 +4159,15 @@ private bool TryExecuteWellKnownFunction(out object returnVal, object objectInst
{
if (TryGetArg(args, out string arg0))
{
returnVal = IntrinsicFunctions.StableStringHash(arg0);
// Prevent loading methods refs from StringTools if ChangeWave opted out.
returnVal = ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave17_10)
? IntrinsicFunctions.StableStringHash(arg0)
: IntrinsicFunctions.StableStringHashLegacy(arg0);
return true;
}
else if (TryGetArgs(args, out string arg1, out string arg2) && Enum.TryParse<IntrinsicFunctions.StringHashingAlgorithm>(arg2, true, out var hashAlgorithm))
{
returnVal = IntrinsicFunctions.StableStringHash(arg1, hashAlgorithm);
return true;
}
}
Expand Down
58 changes: 54 additions & 4 deletions src/Build/Evaluation/IntrinsicFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
using System.Text;
Expand All @@ -14,6 +15,7 @@
using Microsoft.Build.Shared;
using Microsoft.Build.Shared.FileSystem;
using Microsoft.Build.Utilities;
using Microsoft.NET.StringTools;
using Microsoft.Win32;

// Needed for DoesTaskHostExistForParameters
Expand Down Expand Up @@ -397,12 +399,60 @@ internal static string ConvertFromBase64(string toDecode)
return Encoding.UTF8.GetString(Convert.FromBase64String(toDecode));
}

internal enum StringHashingAlgorithm
{
// Legacy way of calculating StableStringHash - which was derived from string GetHashCode
Legacy,
// FNV-1a 32bit hash
Fnv1a32bit,
// Custom FNV-1a 32bit hash - optimized for speed by hashing by the whole chars (not individual bytes)
Fnv1a32bitFast,
// FNV-1a 64bit hash
Fnv1a64bit,
// Custom FNV-1a 64bit hash - optimized for speed by hashing by the whole chars (not individual bytes)
Fnv1a64bitFast,
// SHA256 hash - gets the hex string of the hash (with no prefix)
Sha256
}

/// <summary>
/// Hash the string independent of bitness and target framework.
/// Legacy implementation that doesn't lead to JIT pulling the new functions from StringTools (so those must not be referenced anywhere in the function body)
/// - for cases where the calling code would erroneously load old version of StringTools alongside of the new version of Microsoft.Build.
/// Should be removed once Wave17_10 is removed.
/// </summary>
internal static int StableStringHash(string toHash)
{
return CommunicationsUtilities.GetHashCode(toHash);
internal static object StableStringHashLegacy(string toHash)
=> CommunicationsUtilities.GetHashCode(toHash);

/// <summary>
/// Hash the string independent of bitness, target framework and default codepage of the environment.
/// We do not want this to be inlined, as then the Expander would call directly the new overload, and hence
/// JIT load the functions from StringTools - so we would not be able to prevent their loading with ChangeWave as we do now.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
internal static object StableStringHash(string toHash)
=> StableStringHash(toHash, StringHashingAlgorithm.Legacy);

internal static object StableStringHash(string toHash, StringHashingAlgorithm algo) =>
algo switch
{
StringHashingAlgorithm.Legacy => CommunicationsUtilities.GetHashCode(toHash),
StringHashingAlgorithm.Fnv1a32bit => FowlerNollVo1aHash.ComputeHash32(toHash),
StringHashingAlgorithm.Fnv1a32bitFast => FowlerNollVo1aHash.ComputeHash32Fast(toHash),
StringHashingAlgorithm.Fnv1a64bit => FowlerNollVo1aHash.ComputeHash64(toHash),
StringHashingAlgorithm.Fnv1a64bitFast => FowlerNollVo1aHash.ComputeHash64Fast(toHash),
StringHashingAlgorithm.Sha256 => CalculateSha256(toHash),
_ => throw new ArgumentOutOfRangeException(nameof(algo), algo, null)
};

private static string CalculateSha256(string toHash)
{
var sha = System.Security.Cryptography.SHA256.Create();
var hashResult = new StringBuilder();
foreach (byte theByte in sha.ComputeHash(Encoding.UTF8.GetBytes(toHash)))
{
hashResult.Append(theByte.ToString("x2"));
}
return hashResult.ToString();
}

/// <summary>
Expand Down
40 changes: 6 additions & 34 deletions src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
using Microsoft.Build.Framework;
using Microsoft.Build.Framework.Profiler;
using Microsoft.Build.Shared;
using Microsoft.Build.Utilities;
using Microsoft.NET.StringTools;

#nullable disable

Expand Down Expand Up @@ -1260,9 +1262,9 @@ private void Write(IExtendedBuildEventArgs extendedData)

internal readonly struct HashKey : IEquatable<HashKey>
{
private readonly ulong value;
private readonly long value;

private HashKey(ulong i)
private HashKey(long i)
{
value = i;
}
Expand All @@ -1275,13 +1277,13 @@ public HashKey(string text)
}
else
{
value = FnvHash64.GetHashCode(text);
value = FowlerNollVo1aHash.ComputeHash64Fast(text);
}
}

public static HashKey Combine(HashKey left, HashKey right)
{
return new HashKey(FnvHash64.Combine(left.value, right.value));
return new HashKey(FowlerNollVo1aHash.Combine64(left.value, right.value));
}

public HashKey Add(HashKey other) => Combine(this, other);
Expand Down Expand Up @@ -1311,35 +1313,5 @@ public override string ToString()
return value.ToString();
}
}

internal static class FnvHash64
{
public const ulong Offset = 14695981039346656037;
public const ulong Prime = 1099511628211;

public static ulong GetHashCode(string text)
{
ulong hash = Offset;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
hash = (hash ^ ch) * Prime;
}
}

return hash;
}

public static ulong Combine(ulong left, ulong right)
{
unchecked
{
return (left ^ right) * Prime;
}
}
}
}
}
145 changes: 145 additions & 0 deletions src/StringTools/FowlerNollVo1aHash.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;
using System;

namespace Microsoft.NET.StringTools
{
/// <summary>
/// Fowler/Noll/Vo hashing.
/// </summary>
public static class FowlerNollVo1aHash
{
// Fowler/Noll/Vo hashing.
// http://www.isthe.com/chongo/tech/comp/fnv/
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
// http://www.isthe.com/chongo/src/fnv/hash_32a.c

// 32 bit FNV prime and offset basis for FNV-1a.
private const uint fnvPrimeA32Bit = 16777619;
private const uint fnvOffsetBasisA32Bit = 2166136261;

// 64 bit FNV prime and offset basis for FNV-1a.
private const long fnvPrimeA64Bit = 1099511628211;
private const long fnvOffsetBasisA64Bit = unchecked((long)14695981039346656037);

/// <summary>
/// Computes 32 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding).
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>32 bit signed hash</returns>
public static int ComputeHash32(string text)
{
uint hash = fnvOffsetBasisA32Bit;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
byte b = (byte)ch;
hash ^= b;
hash *= fnvPrimeA32Bit;

b = (byte)(ch >> 8);
hash ^= b;
hash *= fnvPrimeA32Bit;
}
}

return unchecked((int)hash);
}

/// <summary>
/// Computes 32 bit Fowler/Noll/Vo-1a inspired hash of a string.
/// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes.
/// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate.
/// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>32 bit unsigned hash</returns>
public static int ComputeHash32Fast(string text)
{
uint hash = fnvOffsetBasisA32Bit;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];

hash = (hash ^ ch) * fnvPrimeA32Bit;
}
}

return unchecked((int)hash);
}

/// <summary>
/// Computes 64 bit Fowler/Noll/Vo-1a inspired hash of a string.
/// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes.
/// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate.
/// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>64 bit unsigned hash</returns>
public static long ComputeHash64Fast(string text)
{
long hash = fnvOffsetBasisA64Bit;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];

hash = (hash ^ ch) * fnvPrimeA64Bit;
}
}

return hash;
}

/// <summary>
/// Computes 64 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding).
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>64 bit unsigned hash</returns>
public static long ComputeHash64(string text)
{
long hash = fnvOffsetBasisA64Bit;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
byte b = (byte)ch;
hash ^= b;
hash *= fnvPrimeA64Bit;

b = (byte)(ch >> 8);
hash ^= b;
hash *= fnvPrimeA64Bit;
}
}

return hash;
}

/// <summary>
/// Combines two 64 bit hashes generated by <see cref="FowlerNollVo1aHash"/> class into one.
/// </summary>
/// <param name="left">First hash value to be combined.</param>
/// <param name="right">Second hash value to be combined.</param>
/// <returns></returns>
public static long Combine64(long left, long right)
{
unchecked
{
return (left ^ right) * fnvPrimeA64Bit;
}
}
}
}

0 comments on commit 55777e8

Please sign in to comment.