Skip to content

Commit

Permalink
Add regex "unit tests" test project (#65944)
Browse files Browse the repository at this point in the history
* Add regex "unit tests" test project

This follows the convention used by the networking tests, which typically have two distinct test projects per library: functional tests and unit tests.  The former are what we typically refer to as our tests for a library, whereas the latter build product source into the test project in order to directly validate internals (an alternative to this is to use InternalsVisibleTo, but that negatively impacts trimming and makes it more challenging to maintain a property boundary for the functional tests).  All the existing tests are moved unedited into the FunctionalTests, and a new UnitTests project is added with some initial tests for the RegexTreeAnalyzer code. The generator parser tests were also consolidated into the functional tests, as there's no longer a good reason for those few tests to be separate.

I fixed a few bugs in RegexTreeAnalyzer as a result.  In particular, we were over-annotating things as potentially containing captures or backtracking because in the implementation we were using the lookup APIs meant to be used only once all analysis was complete.  This doesn't have a negative functional impact, but it does negatively impact perf of compiled / source generator, which then generate unnecessary code.  We were also incorrectly conflating atomicity conferred by a grandparent with atomicity conferred by a parent; we need MayBacktracks to reflect only the atomicity directly contributed by a node, not by its parent's influence, as we need the parent to be able to understand whether the child might backtrack.

* Add some unit tests for RegexFindOptimizations

* Address PR feedback
  • Loading branch information
stephentoub committed Feb 28, 2022
1 parent 3d6781b commit b4c746b
Show file tree
Hide file tree
Showing 53 changed files with 433 additions and 205 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressi
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions", "src\System.Text.RegularExpressions.csproj", "{0409C086-D7CC-43F8-9762-C94FB1E47F5B}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Generators.Tests", "tests\System.Text.RegularExpressions.Generators.Tests\System.Text.RegularExpressions.Generators.Tests.csproj", "{32ABFCDA-10FD-4A98-A429-145C28021EBE}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\FunctionalTests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.Tests", "tests\System.Text.RegularExpressions.Tests.csproj", "{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "System.Text.RegularExpressions.UnitTests", "tests\UnitTests\System.Text.RegularExpressions.UnitTests.csproj", "{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{2ACCCAAB-F0CE-4839-82BD-F174861DEA78}"
EndProject
Expand Down Expand Up @@ -53,22 +53,22 @@ Global
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0409C086-D7CC-43F8-9762-C94FB1E47F5B}.Release|Any CPU.Build.0 = Release|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{32ABFCDA-10FD-4A98-A429-145C28021EBE}.Release|Any CPU.Build.0 = Release|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC}.Release|Any CPU.Build.0 = Release|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{63551298-BFD4-43FC-8465-AC454228B83C} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{32ABFCDA-10FD-4A98-A429-145C28021EBE} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{8EE1A7C4-3630-4900-8976-9B3ADAFF10DC} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{A86931EC-34DC-40A8-BD8C-F5E13BDBA903} = {2ACCCAAB-F0CE-4839-82BD-F174861DEA78}
{08F0E5F4-BBD5-45CC-BB12-BA37A83AD7B6} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
{77CDA838-6489-4816-8847-DE2C7F5E1DCE} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
{3699C8E2-C354-4AED-81DC-ECBAC3EFEB4B} = {0D20E771-24BD-4F9E-BBD0-60156E8C44FC}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,25 +127,17 @@ public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
// The set contains one and only one character, meaning every match starts
// with the same literal value (potentially case-insensitive). Search for that.
FixedDistanceLiteral = (chars[0], 0);
FindMode = (_rightToLeft, set.CaseInsensitive) switch
{
(false, false) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive,
(false, true) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive,
(true, false) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive,
(true, true) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive,
};
FindMode = set.CaseInsensitive ?
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive :
FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive;
}
else
{
// The set may match multiple characters. Search for that.
FixedDistanceSets = new() { (chars, set.CharClass, 0, set.CaseInsensitive) };
FindMode = (_rightToLeft, set.CaseInsensitive) switch
{
(false, false) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive,
(false, true) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive,
(true, false) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive,
(true, true) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive,
};
FindMode = set.CaseInsensitive ?
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive :
FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive;
_asciiLookups = new uint[1][];
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ internal static class RegexTreeAnalyzer
public static AnalysisResults Analyze(RegexCode code)
{
var results = new AnalysisResults(code);
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicBySelfOrParent: true);
results._complete = TryAnalyze(code.Tree.Root, results, isAtomicByAncestor: true);
return results;

static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicBySelfOrParent)
static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByAncestor)
{
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
return false;
}

if (isAtomicBySelfOrParent)
if (isAtomicByAncestor)
{
// We've been told by our parent that we should be considered atomic, so add ourselves
// to the atomic collection.
Expand All @@ -45,14 +45,15 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
}

// Update state for certain node types.
bool isAtomicBySelf = false;
switch (node.Kind)
{
// Some node types add atomicity around what they wrap. Set isAtomicBySelfOrParent to true for such nodes
// even if it was false upon entering the method.
case RegexNodeKind.Atomic:
case RegexNodeKind.NegativeLookaround:
case RegexNodeKind.PositiveLookaround:
isAtomicBySelfOrParent = true;
isAtomicBySelf = true;
break;

// Track any nodes that are themselves captures.
Expand All @@ -70,7 +71,7 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
// Determine whether the child should be treated as atomic (whether anything
// can backtrack into it), which is influenced by whether this node (the child's
// parent) is considered atomic by itself or by its parent.
bool treatChildAsAtomic = isAtomicBySelfOrParent && node.Kind switch
bool treatChildAsAtomic = (isAtomicByAncestor | isAtomicBySelf) && node.Kind switch
{
// If the parent is atomic, so is the child. That's the whole purpose
// of the Atomic node, and lookarounds are also implicitly atomic.
Expand Down Expand Up @@ -104,14 +105,16 @@ static bool TryAnalyze(RegexNode node, AnalysisResults results, bool isAtomicByS
}

// If the child contains captures, so too does this parent.
if (results.MayContainCapture(child))
if (results._containsCapture.Contains(child))
{
results._containsCapture.Add(node);
}

// If the child might require backtracking into it, so too might the parent,
// unless the parent is itself considered atomic.
if (!isAtomicBySelfOrParent && results.MayBacktrack(child))
// unless the parent is itself considered atomic. Here we don't consider parental
// atomicity, as we need to surface upwards to the parent whether any backtracking
// will be visible from this node to it.
if (!isAtomicBySelf && (results._mayBacktrack?.Contains(child) == true))
{
(results._mayBacktrack ??= new()).Add(node);
}
Expand Down Expand Up @@ -149,7 +152,7 @@ internal sealed class AnalysisResults
/// <summary>Gets the code that was analyzed.</summary>
public RegexCode Code { get; }

/// <summary>Gets whether a node is considered atomic based on itself or its ancestry.</summary>
/// <summary>Gets whether a node is considered atomic based on its ancestry.</summary>
public bool IsAtomicByAncestor(RegexNode node) => _isAtomicByAncestor.Contains(node);

/// <summary>Gets whether a node directly or indirectly contains captures.</summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ namespace System.Text.RegularExpressions.Tests
public static class RegexGeneratorHelper
{
private static readonly CSharpParseOptions s_previewParseOptions = CSharpParseOptions.Default.WithLanguageVersion(LanguageVersion.Preview);
private static readonly MetadataReference[] s_refs = CreateReferences();
private static readonly EmitOptions s_emitOptions = new EmitOptions(debugInformationFormat: DebugInformationFormat.Embedded);
private static readonly CSharpGeneratorDriver s_generatorDriver = CSharpGeneratorDriver.Create(new[] { new RegexGenerator().AsSourceGenerator() }, parseOptions: s_previewParseOptions);
private static Compilation? s_compilation;

internal static MetadataReference[] References { get; } = CreateReferences();

private static MetadataReference[] CreateReferences()
{
if (PlatformDetection.IsBrowser)
Expand All @@ -49,6 +50,61 @@ private static MetadataReference[] CreateReferences()
};
}

internal static byte[] CreateAssemblyImage(string source, string assemblyName)
{
CSharpCompilation compilation = CSharpCompilation.Create(
assemblyName,
new[] { CSharpSyntaxTree.ParseText(source, CSharpParseOptions.Default.WithLanguageVersion(LanguageVersion.Preview)) },
References,
new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary));

var ms = new MemoryStream();
if (compilation.Emit(ms).Success)
{
return ms.ToArray();
}

throw new InvalidOperationException();
}

internal static async Task<IReadOnlyList<Diagnostic>> RunGenerator(
string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default)
{
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
.AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#")
.WithMetadataReferences(additionalRefs is not null ? References.Concat(additionalRefs) : References)
.WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe)
.WithNullableContextOptions(NullableContextOptions.Enable))
.WithParseOptions(new CSharpParseOptions(langVersion))
.AddDocument("RegexGenerator.g.cs", SourceText.From(code, Encoding.UTF8)).Project;

Assert.True(proj.Solution.Workspace.TryApplyChanges(proj.Solution));

Compilation? comp = await proj!.GetCompilationAsync(CancellationToken.None).ConfigureAwait(false);
Debug.Assert(comp is not null);

var generator = new RegexGenerator();
CSharpGeneratorDriver cgd = CSharpGeneratorDriver.Create(new[] { generator.AsSourceGenerator() }, parseOptions: CSharpParseOptions.Default.WithLanguageVersion(langVersion));
GeneratorDriver gd = cgd.RunGenerators(comp!, cancellationToken);
GeneratorDriverRunResult generatorResults = gd.GetRunResult();
if (!compile)
{
return generatorResults.Diagnostics;
}

comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray());
EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken);
if (!results.Success || results.Diagnostics.Length != 0 || generatorResults.Diagnostics.Length != 0)
{
throw new ArgumentException(
string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine +
string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => t.ToString())));
}

return generatorResults.Diagnostics.Concat(results.Diagnostics).Where(d => d.Severity != DiagnosticSeverity.Hidden).ToArray();
}

internal static async Task<Regex> SourceGenRegexAsync(
string pattern, RegexOptions? options = null, TimeSpan? matchTimeout = null, CancellationToken cancellationToken = default)
{
Expand Down Expand Up @@ -109,7 +165,7 @@ internal static async Task<Regex[]> SourceGenRegexAsync(
var proj = new AdhocWorkspace()
.AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create()))
.AddProject("Test", "test.dll", "C#")
.WithMetadataReferences(s_refs)
.WithMetadataReferences(References)
.WithCompilationOptions(
new CSharpCompilationOptions(
OutputKind.DynamicallyLinkedLibrary,
Expand Down
Loading

0 comments on commit b4c746b

Please sign in to comment.