Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shorten UTD marker file #9387

Merged
merged 8 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/Build/Evaluation/IntrinsicFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
using Microsoft.Build.Shared;
using Microsoft.Build.Shared.FileSystem;
using Microsoft.Build.Utilities;
using Microsoft.NET.StringTools;
using Microsoft.Win32;

// Needed for DoesTaskHostExistForParameters
Expand Down Expand Up @@ -398,11 +399,11 @@ internal static string ConvertFromBase64(string toDecode)
}

/// <summary>
/// Hash the string independent of bitness and target framework.
/// Hash the string independent of bitness, target framework and default codepage of the environment.
/// </summary>
internal static int StableStringHash(string toHash)
{
return CommunicationsUtilities.GetHashCode(toHash);
return FowlerNollVo1aHash.ComputeHash32(toHash);
}

/// <summary>
Expand Down
40 changes: 6 additions & 34 deletions src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
using Microsoft.Build.Framework;
using Microsoft.Build.Framework.Profiler;
using Microsoft.Build.Shared;
using Microsoft.Build.Utilities;
using Microsoft.NET.StringTools;

#nullable disable

Expand Down Expand Up @@ -1259,9 +1261,9 @@ private void Write(IExtendedBuildEventArgs extendedData)

internal readonly struct HashKey : IEquatable<HashKey>
{
private readonly ulong value;
private readonly long value;

private HashKey(ulong i)
private HashKey(long i)
{
value = i;
}
Expand All @@ -1274,13 +1276,13 @@ public HashKey(string text)
}
else
{
value = FnvHash64.GetHashCode(text);
value = FowlerNollVo1aHash.ComputeHash64Fast(text);
}
}

public static HashKey Combine(HashKey left, HashKey right)
{
return new HashKey(FnvHash64.Combine(left.value, right.value));
return new HashKey(FowlerNollVo1aHash.Combine64(left.value, right.value));
}

public HashKey Add(HashKey other) => Combine(this, other);
Expand Down Expand Up @@ -1310,35 +1312,5 @@ public override string ToString()
return value.ToString();
}
}

internal static class FnvHash64
{
public const ulong Offset = 14695981039346656037;
public const ulong Prime = 1099511628211;

public static ulong GetHashCode(string text)
{
ulong hash = Offset;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
hash = (hash ^ ch) * Prime;
}
}

return hash;
}

public static ulong Combine(ulong left, ulong right)
{
unchecked
{
return (left ^ right) * Prime;
}
}
}
}
}
135 changes: 135 additions & 0 deletions src/StringTools/FowlerNollVo1aHash.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.InteropServices;
using System;

namespace Microsoft.NET.StringTools
{
/// <summary>
/// Fowler/Noll/Vo hashing.
/// </summary>
public static class FowlerNollVo1aHash
{
// Fowler/Noll/Vo hashing.
// http://www.isthe.com/chongo/tech/comp/fnv/
// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
// http://www.isthe.com/chongo/src/fnv/hash_32a.c

// 32 bit FNV prime and offset basis for FNV-1a.
private const uint fnvPrimeA32Bit = 16777619;
private const uint fnvOffsetBasisA32Bit = 2166136261;

// 64 bit FNV prime and offset basis for FNV-1a.
private const long fnvPrimeA64Bit = 1099511628211;
private const long fnvOffsetBasisA64Bit = unchecked((long)14695981039346656037);

/// <summary>
/// Computes 32 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding).
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>32 bit signed hash</returns>
public static int ComputeHash32(string text)
{
uint hash = fnvOffsetBasisA32Bit;

#if NET35
unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
byte b = (byte)ch;
hash ^= b;
hash *= fnvPrimeA32Bit;

b = (byte)(ch >> 8);
hash ^= b;
hash *= fnvPrimeA32Bit;
}
}
#else
ReadOnlySpan<byte> span = MemoryMarshal.Cast<char, byte>(text.AsSpan());
foreach (byte b in span)
{
hash = unchecked((hash ^ b) * fnvPrimeA32Bit);
}
#endif

return unchecked((int)hash);
}

/// <summary>
/// Computes 64 bit Fowler/Noll/Vo-1a hash optimized for ASCII strings.
/// The hashing algorithm considers only the first 8 bits of each character.
/// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>64 bit unsigned hash</returns>
public static long ComputeHash64Fast(string text)
{
long hash = fnvOffsetBasisA64Bit;

unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];

hash = (hash ^ ch) * fnvPrimeA64Bit;
}
}

return hash;
}

/// <summary>
/// Computes 64 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding).
/// </summary>
/// <param name="text">String to be hashed.</param>
/// <returns>64 bit unsigned hash</returns>
public static long ComputeHash64(string text)
{
long hash = fnvOffsetBasisA64Bit;

#if NET35
unchecked
{
for (int i = 0; i < text.Length; i++)
{
char ch = text[i];
byte b = (byte)ch;
hash ^= b;
hash *= fnvPrimeA64Bit;

b = (byte)(ch >> 8);
hash ^= b;
hash *= fnvPrimeA64Bit;
}
}
#else
ReadOnlySpan<byte> span = MemoryMarshal.Cast<char, byte>(text.AsSpan());
foreach (byte b in span)
{
hash = unchecked((hash ^ b) * fnvPrimeA64Bit);
}
Comment on lines +111 to +115

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does MSBuild support big-endian platforms? It looks like this would not be stable between big-endian and little-endian.

Copy link
Member Author

@JanKrivanek JanKrivanek Nov 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KalleOlaviNiemitalo - Thank you for bringing this up!
We're not declaring what can be the expectations of stableness of the hashing between different systems (we actually do not declare anything ... yet :-)) - but it'd certainly be nice to have it more universal.

@uweigand - would you be willing to chime in and share your thoughts on the following?

Basically - since the MemoryMarshal.Cast<char, byte> changes how we fetch data from memory to processor - it'll behave differently between Endinan-ness representations. But how anout the version which manually splits each char to bytes:

for (int i = 0; i < text.Length; i++)
{
    char ch = text[i];
    byte b = (byte)ch;
    hash ^= b;
    hash *= fnvPrimeA64Bit;

    b = (byte)(ch >> 8);
    hash ^= b;
    hash *= fnvPrimeA64Bit;
}

since all this happens in processor after the data are fetched from memory - it should fetch same results regardless of endinan-ness - is that right?

FYI @ladipro (and thanks for all the offline consultation)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that would work the same in either endianness.

You could also check BitConverter.IsLittleEndian and use MemoryMarshal.Cast on little-endian platforms but fall back to the slower NET35-compatible implementation on big-endian platforms.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

…although I have not measured whether the NET35-compatible implementation actually is slower. In both implementations, each operation on hash depends on the previous one, so it doesn't look like there is much that the CPU could do in parallel. And the strings probably are pretty short anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me the code above implements three methods that are (potentially) different. There's the NET35 method in ComputeHash64 (which operates on single bytes at a time, splitting the 2-byte char in an endian-independent way), the !NET35 method in ComputeHash64 (which also operates on single bytes at a time, but splits the 2-byte char in an endian-dependent way), and then there's ComputeHash64Fast (which operates on 2-byte chars at a time, which appears to give a different result from either of the two other methods).

In general, I agree that it would be preferable to use a method that is stable across native endianness. Unless the NET35 method is really slower (and I also suspect it actually isn't), and performance matters at this place, it might be best to just use that method unconditionally?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re ComputeHash64Fast, this comment seems to be false:

/// The hashing algorithm considers only the first 8 bits of each character.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for all the detailed input! I'll remove the Spans version.

The Spans version is indeed not superior (it has slight to no advantage for large strings, but performs worse for a very short strings). It's nice that it doesn't need unchecked code and is more brief - but given the fact that it can perform worse and that the other version anyways need to be maintained - it's an easy choice.

I'll as well fix the description for the ComputeHash64Fast

#endif

return hash;
}

/// <summary>
/// Combines two 64 bit hashes generated by <see cref="FowlerNollVo1aHash"/> class into one.
/// </summary>
/// <param name="left">First hash value to be combined.</param>
/// <param name="right">Second hash value to be combined.</param>
/// <returns></returns>
public static long Combine64(long left, long right)
{
unchecked
{
return (left ^ right) * fnvPrimeA64Bit;
}
}
}
}
11 changes: 10 additions & 1 deletion src/Tasks/Microsoft.Common.CurrentVersion.targets
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,20 @@ Copyright (C) Microsoft Corporation. All rights reserved.
<PropertyGroup>
<_GenerateBindingRedirectsIntermediateAppConfig>$(IntermediateOutputPath)$(TargetFileName).config</_GenerateBindingRedirectsIntermediateAppConfig>
</PropertyGroup>

<PropertyGroup Condition="'$(MSBuildCopyMarkerName)' == ''">
<MSBuildCopyMarkerName>$(MSBuildProjectFile)</MSBuildCopyMarkerName>
<!-- For a long MSBuildProjectFile let's shorten this to 17 chars - using the first 8 chars of the filename and either the ProjectGuid if it exists -->
<MSBuildCopyMarkerName Condition="'$(MSBuildCopyMarkerName.Length)' &gt; '17' and '$(ProjectGuid)' != ''">$(MSBuildProjectFile.Substring(0,8)).$(ProjectGuid.Substring(1,8))</MSBuildCopyMarkerName>
<!-- or a filename hash if the guid is not present (in such case the filename was not shortened and is still over 17 chars long). -->
<MSBuildCopyMarkerName Condition="'$(MSBuildCopyMarkerName.Length)' &gt; '17'">$(MSBuildProjectFile.Substring(0,8)).$([MSBuild]::StableStringHash($(MSBuildProjectFile)).ToString("X8"))</MSBuildCopyMarkerName>
<MSBuildCopyMarkerName>$(MSBuildCopyMarkerName).Up2Date</MSBuildCopyMarkerName>
</PropertyGroup>

<ItemGroup>
<IntermediateAssembly Include="$(IntermediateOutputPath)$(TargetName)$(TargetExt)"/>
<FinalDocFile Include="@(DocFileItem->'$(OutDir)%(Filename)%(Extension)')"/>
<CopyUpToDateMarker Include="$([MSBuild]::NormalizePath('$(MSBuildProjectDirectory)', '$(IntermediateOutputPath)', '$(MSBuildProjectFile).CopyComplete'))" />
<CopyUpToDateMarker Include="$([MSBuild]::NormalizePath('$(MSBuildProjectDirectory)', '$(IntermediateOutputPath)', '$(MSBuildCopyMarkerName)'))" />
</ItemGroup>

<ItemGroup Condition="'$(ProduceReferenceAssembly)' == 'true'">
Expand Down