Skip to content

Commit

Permalink
Avoid keeping runtext alive in Regex cache (#31736)
Browse files Browse the repository at this point in the history
Null out any references to the string in case the Regex object is kept alive in a cache and the string object is very large.
  • Loading branch information
stephentoub committed Feb 5, 2020
1 parent ac249c4 commit 4352d7c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal Capture(string text, int index, int length)
public int Length { get; private protected set; }

/// <summary>The original string</summary>
internal string Text { get; private protected set; }
internal string Text { get; set; }

/// <summary>
/// Returns the value of this Regex Capture.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
// methods to push new subpattern match results into (or remove
// backtracked results from) the Match instance.

using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;

namespace System.Text.RegularExpressions
{
Expand Down Expand Up @@ -90,13 +88,6 @@ protected internal RegexRunner() { }

protected internal Match? Scan(Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, TimeSpan timeout)
{
// Store arguments into fields for derived runner to examine
runregex = regex;
runtext = text;
runtextbeg = textbeg;
runtextend = textend;
runtextpos = runtextstart = textstart;

// Handle timeout argument
_timeout = -1; // (int)Regex.InfiniteMatchTimeout.TotalMilliseconds
bool ignoreTimeout = _ignoreTimeout = Regex.InfiniteMatchTimeout == timeout;
Expand All @@ -114,32 +105,44 @@ protected internal RegexRunner() { }

// Configure the additional value to "bump" the position along each time we loop around
// to call FindFirstChar again, as well as the stopping position for the loop. We generally
// bump by 1 and stop at runtextend, but if we're examining right-to-left, we instead bump
// by -1 and stop at runtextbeg.
int bump = 1, stoppos = runtextend;
if (runregex.RightToLeft)
// bump by 1 and stop at textend, but if we're examining right-to-left, we instead bump
// by -1 and stop at textbeg.
int bump = 1, stoppos = textend;
if (regex.RightToLeft)
{
bump = -1;
stoppos = runtextbeg;
stoppos = textbeg;
}

// Store runtextpos into field, as we may bump it in next check. The remaining arguments
// are stored below once we're past the potential return in the next check.
runtextpos = textstart;

// If previous match was empty or failed, advance by one before matching.
if (prevlen == 0)
{
if (runtextpos == stoppos)
if (textstart == stoppos)
{
return Match.Empty;
}

runtextpos += bump;
}

// Store remaining arguments into fields now that we're going to start the scan.
// These are referenced by the derived runner.
runregex = regex;
runtext = text;
runtextstart = textstart;
runtextbeg = textbeg;
runtextend = textend;

// Main loop: FindFirstChar/Go + bump until the ending position.
bool initialized = false;
while (true)
{
#if DEBUG
if (runregex.IsDebug)
if (regex.IsDebug)
{
Debug.WriteLine("");
Debug.WriteLine($"Search range: from {runtextbeg} to {runtextend}");
Expand All @@ -164,7 +167,7 @@ protected internal RegexRunner() { }
}

#if DEBUG
if (runregex.IsDebug)
if (regex.IsDebug)
{
Debug.WriteLine($"Executing engine starting at {runtextpos}");
Debug.WriteLine("");
Expand All @@ -178,8 +181,11 @@ protected internal RegexRunner() { }
Match match = runmatch!;
if (match._matchcount[0] > 0)
{
runtext = null; // drop reference to text to avoid keeping it alive in a cache

if (quick)
{
runmatch!.Text = null!; // drop reference
return null;
}

Expand All @@ -198,6 +204,8 @@ protected internal RegexRunner() { }
// We failed to match at this position. If we're at the stopping point, we're done.
if (runtextpos == stoppos)
{
runtext = null; // drop reference to text to avoid keeping it alive in a cache
if (runmatch != null) runmatch.Text = null!;
return Match.Empty;
}

Expand All @@ -212,13 +220,6 @@ protected internal RegexRunner() { }
/// </remarks>
internal void Scan<TState>(Regex regex, string text, int textstart, ref TState state, MatchCallback<TState> callback, TimeSpan timeout)
{
// Store arguments into fields for derived runner to examine
runregex = regex;
runtext = text;
runtextbeg = 0;
runtextend = text.Length;
runtextpos = runtextstart = textstart;

// Handle timeout argument
_timeout = -1; // (int)Regex.InfiniteMatchTimeout.TotalMilliseconds
bool ignoreTimeout = _ignoreTimeout = Regex.InfiniteMatchTimeout == timeout;
Expand All @@ -235,21 +236,29 @@ internal void Scan<TState>(Regex regex, string text, int textstart, ref TState s

// Configure the additional value to "bump" the position along each time we loop around
// to call FindFirstChar again, as well as the stopping position for the loop. We generally
// bump by 1 and stop at runtextend, but if we're examining right-to-left, we instead bump
// by -1 and stop at runtextbeg.
int bump = 1, stoppos = runtextend;
if (runregex.RightToLeft)
// bump by 1 and stop at text.Length, but if we're examining right-to-left, we instead bump
// by -1 and stop at 0.
int bump = 1, stoppos = text.Length;
if (regex.RightToLeft)
{
bump = -1;
stoppos = runtextbeg;
stoppos = 0;
}

// Store remaining arguments into fields now that we're going to start the scan.
// These are referenced by the derived runner.
runregex = regex;
runtextstart = runtextpos = textstart;
runtext = text;
runtextend = text.Length;
runtextbeg = 0;

// Main loop: FindFirstChar/Go + bump until the ending position.
bool initialized = false;
while (true)
{
#if DEBUG
if (runregex.IsDebug)
if (regex.IsDebug)
{
Debug.WriteLine("");
Debug.WriteLine($"Search range: from {runtextbeg} to {runtextend}");
Expand All @@ -274,7 +283,7 @@ internal void Scan<TState>(Regex regex, string text, int textstart, ref TState s
}

#if DEBUG
if (runregex.IsDebug)
if (regex.IsDebug)
{
Debug.WriteLine($"Executing engine starting at {runtextpos}");
Debug.WriteLine("");
Expand All @@ -294,6 +303,7 @@ internal void Scan<TState>(Regex regex, string text, int textstart, ref TState s
if (!callback(ref state, match))
{
// If the callback returns false, we're done.
match.Text = runtext = null!; // drop reference to text to avoid keeping it alive in a cache
return;
}

Expand All @@ -305,6 +315,7 @@ internal void Scan<TState>(Regex regex, string text, int textstart, ref TState s
{
if (runtextpos == stoppos)
{
match.Text = runtext = null!; // drop reference to text to avoid keeping it alive in a cache
return;
}

Expand All @@ -324,6 +335,8 @@ internal void Scan<TState>(Regex regex, string text, int textstart, ref TState s
// We failed to match at this position. If we're at the stopping point, we're done.
if (runtextpos == stoppos)
{
runtext = null; // drop reference to text to avoid keeping it alive in a cache
if (runmatch != null) runmatch.Text = null!;
return;
}

Expand Down

0 comments on commit 4352d7c

Please sign in to comment.