Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fallback keys to caching #2340

Closed
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Agent.Listener/Agent.Listener.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<PackageReference Include="System.IO.FileSystem.AccessControl" Version="4.4.0" />
<PackageReference Include="System.Security.Cryptography.ProtectedData" Version="4.4.0" />
<PackageReference Include="System.ServiceProcess.ServiceController" Version="4.4.0" />
<PackageReference Include="vss-api-netcore" Version="0.5.125-private" />
<PackageReference Include="vss-api-netcore" Version="0.5.126-private" />
</ItemGroup>

<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
Expand Down
2 changes: 1 addition & 1 deletion src/Agent.PluginHost/Agent.PluginHost.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="System.Runtime.Loader" Version="4.3.0" />
<PackageReference Include="vss-api-netcore" Version="0.5.125-private" />
<PackageReference Include="vss-api-netcore" Version="0.5.126-private" />
</ItemGroup>

<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
Expand Down
2 changes: 1 addition & 1 deletion src/Agent.Plugins/Agent.Plugins.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

<ItemGroup>
<PackageReference Include="azuredevops-testresultparser" Version="1.0.2" />
<PackageReference Include="vss-api-netcore" Version="0.5.125-private" />
<PackageReference Include="vss-api-netcore" Version="0.5.126-private" />
</ItemGroup>

<PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,7 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.TeamFoundation.Build.WebApi;
using Microsoft.VisualStudio.Services.BlobStore.Common;
using Microsoft.VisualStudio.Services.Content.Common.Tracing;
using Microsoft.VisualStudio.Services.BlobStore.WebApi;
using Microsoft.VisualStudio.Services.Common;
using Microsoft.VisualStudio.Services.WebApi;
using Microsoft.TeamFoundation.DistributedTask.WebApi;
using Microsoft.VisualStudio.Services.Agent.Util;
using Agent.Sdk;
using System.Text.RegularExpressions;
Expand Down
240 changes: 240 additions & 0 deletions src/Agent.Plugins/PipelineCache/FingerprintCreator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
using Agent.Sdk;
using BuildXL.Cache.ContentStore.Interfaces.Utils;
using Microsoft.VisualStudio.Services.PipelineCache.WebApi;
using Minimatch;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using System.Text;

[assembly: InternalsVisibleTo("Test")]

namespace Agent.Plugins.PipelineCache
{
public static class FingerprintCreator
{
private static readonly bool isWindows = RuntimeInformation.IsOSPlatform(OSPlatform.Windows);
private static readonly bool isCaseSensitive = RuntimeInformation.IsOSPlatform(OSPlatform.Linux);

// https://github.com/Microsoft/azure-pipelines-task-lib/blob/master/node/docs/findingfiles.md#matchoptions
private static readonly Options minimatchOptions = new Options
{
Dot = true,
NoBrace = true,
NoCase = !isCaseSensitive,
AllowWindowsPaths = isWindows,
};

private static readonly char[] GlobChars = new [] { '*', '?', '[', ']' };

private const char ForceStringLiteral = '"';

private static bool IsPathyChar(char c)
{
if (GlobChars.Contains(c)) return true;
if (c == Path.DirectorySeparatorChar) return true;
if (c == Path.AltDirectorySeparatorChar) return true;
if (c == Path.VolumeSeparatorChar) return true;
return !Path.GetInvalidFileNameChars().Contains(c);
}

internal static bool IsPathy(string keySegment)
{
if (keySegment.First() == ForceStringLiteral && keySegment.Last() == ForceStringLiteral) return false;
if (keySegment.Any(c => !IsPathyChar(c))) return false;
if (!keySegment.Contains(".")) return false;
if (keySegment.Last() == '.') return false;
return true;
}

internal static Func<string,bool> CreateMinimatchFilter(AgentTaskPluginExecutionContext context, string rule, bool invert)
{
Func<string,bool> filter = Minimatcher.CreateFilter(rule, minimatchOptions);
Func<string,bool> tracedFilter = (path) => {
bool filterResult = filter(path);
context.Verbose($"Path `{path}` is {(filterResult ? "" : "not")} {(invert ? "excluded" : "included")} because of pattern `{(invert ? "!" : "")}{rule}`.");
return invert ^ filterResult;
};

return tracedFilter;
}

internal static string MakePathAbsolute(string workingDirectory, string path)
{
// Normalize to some extent, let minimatch worry about casing
if (workingDirectory == null)
{
return Path.GetFullPath(path);
}
else
{
return Path.GetFullPath(path, workingDirectory);
}
}

internal static Func<string,bool> CreateFilter(
AgentTaskPluginExecutionContext context,
string workingDirectory,
string includeRule,
IEnumerable<string> excludeRules)
{
Func<string,bool> includeFilter = CreateMinimatchFilter(context, includeRule, invert: false);
Func<string,bool>[] excludeFilters = excludeRules.Select(excludeRule =>
CreateMinimatchFilter(context, excludeRule, invert: true)).ToArray();
Func<string,bool> filter = (path) => includeFilter(path) && excludeFilters.All(f => f(path));
return filter;
}


// Given a globby path, figure out where to start enumerating.
// Room for optimization here e.g.
// includeGlobPath = /dir/*foo*
// should map to
// enumerateRootPath = /dir/
// enumeratePattern = *foo*
// enumerateDepth = SearchOption.TopDirectoryOnly
//
// It's ok to come up with a file-enumeration that includes too much as the glob filter
// will filter out the extra, but it's not ok to include too little in the enumeration.
internal static void DetermineFileEnumerationFromGlob(
string includeGlobPathAbsolute,
out string enumerateRootPath,
out string enumeratePattern,
out SearchOption enumerateDepth)
{
int firstGlob = includeGlobPathAbsolute.IndexOfAny(GlobChars);
bool hasRecursive = includeGlobPathAbsolute.Contains("**", StringComparison.Ordinal);

// no globbing
if (firstGlob < 0)
{
enumerateRootPath = Path.GetDirectoryName(includeGlobPathAbsolute);
enumeratePattern = Path.GetFileName(includeGlobPathAbsolute);
enumerateDepth = SearchOption.TopDirectoryOnly;
}
else
{
int rootDirLength = includeGlobPathAbsolute.Substring(0,firstGlob).LastIndexOfAny( new [] { Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar});
enumerateRootPath = includeGlobPathAbsolute.Substring(0,rootDirLength);
enumeratePattern = "*";
enumerateDepth = hasRecursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;
}
}

public static Fingerprint EvaluateKeyToFingerprint(
AgentTaskPluginExecutionContext context,
string filePathRoot,
IEnumerable<string> keySegments)
{
var sha256 = new SHA256Managed();

string workingDirectoryValue = context.Variables.GetValueOrDefault(
"system.defaultworkingdirectory" // Constants.Variables.System.DefaultWorkingDirectory
)?.Value;

var resolvedSegments = new List<string>();

foreach (string keySegment in keySegments)
{
if (keySegment.Equals("*", StringComparison.Ordinal))
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
{
throw new ArgumentException("`*` is a reserved key segment. For path glob, use `./*`.");
}
else if (keySegment.Equals(Fingerprint.Wildcard, StringComparison.Ordinal))
{
throw new ArgumentException("`**` is a reserved key segment. For path glob, use `./**`.");
}
else if (keySegment.First() == '\'')
{
throw new ArgumentException("A key segment cannot start with a single-quote character`.");
}
else if (keySegment.First() == '`')
{
throw new ArgumentException("A key segment cannot start with a backtick character`.");
}
else if (IsPathy(keySegment))
{
context.Verbose($"Interpretting `{keySegment}` as a path.");

var segment = new StringBuilder();
bool foundFile = false;

string[] pathRules = keySegment.Split(new []{','}, StringSplitOptions.RemoveEmptyEntries);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be useful to add some comments here with some example segments (e.g. yarn.lock, !node_modules/yarn.lock)

johnterickson marked this conversation as resolved.
Show resolved Hide resolved
string rootRule = pathRules.First();
if(rootRule[0] == '!')
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
{
throw new ArgumentException("Path glob must start with an include glob.");
}

string workingDirectory = null;
if (!Path.IsPathFullyQualified(rootRule))
{
workingDirectory = workingDirectoryValue;
}

string absoluteRootRule = MakePathAbsolute(workingDirectory, rootRule);
context.Verbose($"Expanded include rule is `{absoluteRootRule}`.");
IEnumerable<string> absoluteExcludeRules = pathRules.Skip(1).Select(r => {
if (r[0] != '!')
{
throw new ArgumentException("Path glob must start with an exclude glob.");
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
}
return MakePathAbsolute(workingDirectory, r.Substring(1));
});
Func<string,bool> filter = CreateFilter(context, workingDirectory, absoluteRootRule, absoluteExcludeRules);

DetermineFileEnumerationFromGlob(
absoluteRootRule,
out string enumerateRootPath,
out string enumeratePattern,
out SearchOption enumerateDepth);

context.Verbose($"Enumerating starting at root `{enumerateRootPath}` with pattern `{enumeratePattern}`.");
IEnumerable<string> files = Directory.EnumerateFiles(enumerateRootPath, enumeratePattern, enumerateDepth);
files = files.Where(f => filter(f)).Distinct();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like we discussed, I think there's the potential for a lot of perf problems here. A segment like **/yarn.lock, !node_modules will end up iterating thousands of times over files that will never match. Maybe we'll just need to build in more optimizations for scenarios like this over time.

Might be nice to show (in telemetry and logs, but maybe just in "debug mode") the # of files enumerated, # of files hashed, and total time. In extreme cases (e.g. 35 seconds to calculate the key), we output a warning in the log, otherwise the user will just think upload/download time is slow.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My guess is that **/yarn.lock will be expensive query whether we say dir /s *' or dir /s yarn.lock` because the file system has to enumerate all the directories either way, but yes - definite room for improvement.


foreach(string path in files)
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
{
foundFile = true;

using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
{
byte[] hash = sha256.ComputeHash(fs);
// Path.GetRelativePath returns 'The relative path, or path if the paths don't share the same root.'
string displayPath = filePathRoot == null ? path : Path.GetRelativePath(filePathRoot, path);
segment.Append($"\nSHA256({displayPath})=[{fs.Length}]{hash.ToHex()}");
johnterickson marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (!foundFile)
{
throw new FileNotFoundException("No files found.");
}

string fileHashString = segment.ToString();
string fileHashStringHash = SummarizeString(fileHashString);
context.Output($"File hashes summarized as `{fileHashStringHash}` from BASE64(SHA256(`{fileHashString}`))");
resolvedSegments.Add(fileHashStringHash);
}
else
{
context.Verbose($"Interpretting `{keySegment}` as a string.");
resolvedSegments.Add($"{keySegment}");
}
}

return new Fingerprint() { Segments = resolvedSegments.ToArray() };
}

internal static string SummarizeString(string input)
{
var sha256 = new SHA256Managed();
byte[] fileHashStringBytes = sha256.ComputeHash(Encoding.UTF8.GetBytes(input));
return Convert.ToBase64String(fileHashStringBytes);
}
}
}
Loading