From 55c713bffb685993cc8dcb06de1214cfdad94ebc Mon Sep 17 00:00:00 2001 From: shishirchawla Date: Wed, 8 Jan 2020 07:14:43 -0800 Subject: [PATCH] Added new regex option RegexOptions.AnyNewLine --- .../ref/System.Text.RegularExpressions.cs | 1 + .../System/Text/RegularExpressions/Regex.cs | 5 +- .../Text/RegularExpressions/RegexCode.cs | 7 +- .../Text/RegularExpressions/RegexCompiler.cs | 121 +++++++++++++++++- .../Text/RegularExpressions/RegexFCD.cs | 15 +++ .../RegularExpressions/RegexInterpreter.cs | 27 +++- .../Text/RegularExpressions/RegexNode.cs | 3 + .../Text/RegularExpressions/RegexOptions.cs | 1 + .../Text/RegularExpressions/RegexParser.cs | 25 +++- .../Text/RegularExpressions/RegexWriter.cs | 2 + .../tests/Regex.Ctor.Tests.cs | 6 +- .../tests/Regex.Match.Tests.cs | 97 +++++++++++++- .../tests/Regex.MultipleMatches.Tests.cs | 35 ++++- 13 files changed, 324 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs index 8c6bf1e7949c6..a1109df4427d3 100644 --- a/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs +++ b/src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs @@ -243,6 +243,7 @@ public enum RegexOptions RightToLeft = 64, ECMAScript = 256, CultureInvariant = 512, + AnyNewLine = 1024, } public abstract partial class RegexRunner { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs index cc505492c164a..43f41d89ea3dc 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs @@ -21,7 +21,7 @@ namespace System.Text.RegularExpressions /// public partial class Regex : ISerializable { - private const int MaxOptionShift = 10; + private const int MaxOptionShift = 11; protected internal string? pattern; // The string pattern provided protected internal RegexOptions roptions; // the top-level options from the options string @@ -137,7 +137,8 @@ internal static void ValidateOptions(RegexOptions options) #if DEBUG RegexOptions.Debug | #endif - RegexOptions.CultureInvariant)) != 0) + RegexOptions.CultureInvariant | + RegexOptions.AnyNewLine)) != 0) { throw new ArgumentOutOfRangeException(nameof(options)); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index 8c3da31d7c743..16258668891cd 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -53,7 +53,7 @@ internal sealed class RegexCode public const int Beginning = 18; // \A public const int Start = 19; // \G public const int EndZ = 20; // \Z - public const int End = 21; // \Z + public const int End = 21; // \z public const int Nothing = 22; // Reject! @@ -88,6 +88,9 @@ internal sealed class RegexCode public const int Notoneloopatomic = 44; // lef,back set,min,max (?> . {,n} ) public const int Setloopatomic = 45; // lef,back set,min,max (?> [\d]{,n} ) + public const int AnyEndZ = 46; // \Z + public const int AnyEol = 47; // $ + // Modifiers for alternate modes public const int Mask = 63; // Mask to get unmodified ordinary operator public const int Rtl = 64; // bit to indicate that we're reverse scanning. @@ -168,6 +171,7 @@ public static int OpcodeSize(int opcode) case Nothing: case Bol: case Eol: + case AnyEol: case Boundary: case Nonboundary: case ECMABoundary: @@ -175,6 +179,7 @@ public static int OpcodeSize(int opcode) case Beginning: case Start: case EndZ: + case AnyEndZ: case End: case Nullmark: case Setmark: diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 222c68f6c57d6..8d105ee64e1ae 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -876,7 +876,7 @@ protected void GenerateFindFirstChar() } } - if ((_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)) != 0) + if ((_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.AnyEndZ | RegexFCD.End)) != 0) { if (!_code!.RightToLeft) { @@ -908,7 +908,7 @@ protected void GenerateFindFirstChar() MarkLabel(l1); } - if ((_anchors & RegexFCD.EndZ) != 0) + if ((_anchors & (RegexFCD.EndZ | RegexFCD.AnyEndZ)) != 0) { Label l1 = DefineLabel(); Ldthisfld(s_runtextposField); @@ -978,6 +978,60 @@ protected void GenerateFindFirstChar() MarkLabel(l2); } + if ((_anchors & RegexFCD.AnyEndZ) != 0) + { + LocalBuilder diff = _temp1Local; + Label l1 = DefineLabel(); + Label l2 = DefineLabel(); + Label l3 = DefineLabel(); + Ldthisfld(s_runtextendField); + Ldthisfld(s_runtextposField); + Sub(); + Stloc(diff); + Ldloc(diff); + Ldc(2); + Bgt(l1); + Ldloc(diff); + Ldc(2); + Blt(l2); + Ldthisfld(s_runtextField); + Ldthisfld(s_runtextposField); + Callvirt(s_stringGetCharsMethod); + Ldc('\r'); + Bne(l1); + Ldthisfld(s_runtextField); + Ldthisfld(s_runtextposField); + Ldc(1); + Add(); + Callvirt(s_stringGetCharsMethod); + Ldc('\n'); + Bne(l1); + Br(l3); + + MarkLabel(l2); + Ldloc(diff); + Ldc(1); + Blt(l3); + Ldthisfld(s_runtextField); + Ldthisfld(s_runtextposField); + Callvirt(s_stringGetCharsMethod); + Ldc('\n'); + Beq(l3); + Ldthisfld(s_runtextField); + Ldthisfld(s_runtextposField); + Callvirt(s_stringGetCharsMethod); + Ldc('\r'); + Beq(l3); + + MarkLabel(l1); + Ldthis(); + Ldthisfld(s_runtextbegField); + Stfld(s_runtextposField); + Ldc(0); + Ret(); + MarkLabel(l3); + } + if ((_anchors & RegexFCD.Start) != 0) { Label l1 = DefineLabel(); @@ -3361,6 +3415,23 @@ private void GenerateOneCode() break; } + case RegexCode.AnyEol: + //: if (Rightchars() > 0 && CharAt(Textpos()) != '\n' && CharAt(Textpos()) != '\r') + //: break Backward; + { + Label l1 = _labels![NextCodepos()]; + Ldloc(_runtextposLocal!); + Ldloc(_runtextendLocal!); + Bge(l1); + Rightchar(); + Ldc('\n'); + Beq(l1); + Rightchar(); + Ldc('\r'); + BneFar(_backtrack); + break; + } + case RegexCode.Boundary: case RegexCode.Nonboundary: //: if (!IsBoundary(Textpos(), _textbeg, _textend)) @@ -3431,6 +3502,52 @@ private void GenerateOneCode() BneFar(_backtrack); break; + case RegexCode.AnyEndZ: + //: if (rightChars > 2) + //: break Backward; + //: if (rightChars == 1 && CharAt(Textpos()) != '\r' && CharAt(Textpos()) != '\n') + //: break Backward; + //: if (rightChars == 2 && (CharAt(Textpos()) != '\r' || CharAt(Textpos()+1) != '\n')) + //: break Backward; + { + LocalBuilder diff = _temp1Local!; + Label l1 = DefineLabel(); + + Ldloc(_runtextendLocal!); + Ldloc(_runtextposLocal!); + Sub(); + Stloc(diff); + Ldloc(diff); + Ldc(2); + BgtFar(_backtrack); + Ldloc(diff); + Ldc(2); + Blt(l1); + Rightchar(); + Ldc('\r'); + BneFar(_backtrack); + Ldloc(_runtextLocal!); + Ldloc(_runtextposLocal!); + Ldc(1); + Add(); + Callvirt(s_stringGetCharsMethod); + Ldc('\n'); + BneFar(_backtrack); + Br(_labels![NextCodepos()]); + + MarkLabel(l1); + Ldloc(diff); + Ldc(1); + Blt(_labels![NextCodepos()]); + Rightchar(); + Ldc('\n'); + Beq(_labels![NextCodepos()]); + Rightchar(); + Ldc('\r'); + BneFar(_backtrack); + break; + } + case RegexCode.End: //: if (Rightchars() > 0) //: break Backward; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs index ff958a4e151ad..fad6455bd1c35 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs @@ -35,6 +35,9 @@ internal ref struct RegexFCD public const int Boundary = 0x0040; public const int ECMABoundary = 0x0080; + public const int AnyEndZ = 0x0100; + public const int AnyEol = 0x0200; + private readonly List _fcStack; private ValueListBuilder _intStack; // must not be readonly private bool _skipAllChildren; // don't process any more children at the current level @@ -130,11 +133,13 @@ public static RegexPrefix Prefix(RegexTree tree) case RegexNode.Bol: case RegexNode.Eol: + case RegexNode.AnyEol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: + case RegexNode.AnyEndZ: case RegexNode.End: case RegexNode.Empty: case RegexNode.Require: @@ -185,11 +190,13 @@ public static int Anchors(RegexTree tree) case RegexNode.Bol: case RegexNode.Eol: + case RegexNode.AnyEol: case RegexNode.Boundary: case RegexNode.ECMABoundary: case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: + case RegexNode.AnyEndZ: case RegexNode.End: return result | AnchorFromType(curNode.Type); @@ -217,11 +224,13 @@ private static int AnchorFromType(int type) => { RegexNode.Bol => Bol, RegexNode.Eol => Eol, + RegexNode.AnyEol => AnyEol, RegexNode.Boundary => Boundary, RegexNode.ECMABoundary => ECMABoundary, RegexNode.Beginning => Beginning, RegexNode.Start => Start, RegexNode.EndZ => EndZ, + RegexNode.AnyEndZ => AnyEndZ, RegexNode.End => End, _ => 0, }; @@ -244,10 +253,14 @@ public static string AnchorDescription(int anchors) sb.Append(", ECMABoundary"); if (0 != (anchors & Eol)) sb.Append(", Eol"); + if (0 != (anchors & AnyEol)) + sb.Append(", AnyEol"); if (0 != (anchors & End)) sb.Append(", End"); if (0 != (anchors & EndZ)) sb.Append(", EndZ"); + if (0 != (anchors & AnyEndZ)) + sb.Append(", AnyEndZ"); if (sb.Length >= 2) return (sb.ToString(2, sb.Length - 2)); @@ -479,6 +492,7 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: + case RegexNode.AnyEol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: @@ -486,6 +500,7 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex) case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: + case RegexNode.AnyEndZ: case RegexNode.End: PushFC(new RegexFC(true)); break; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index a02da76c76121..4e4ce095ff2d4 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -387,7 +387,8 @@ private char CharAt(int j) protected override bool FindFirstChar() { - if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End))) + if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | + RegexFCD.AnyEndZ | RegexFCD.End))) { if (!_code.RightToLeft) { @@ -397,7 +398,7 @@ protected override bool FindFirstChar() runtextpos = runtextend; return false; } - if (0 != (_code.Anchors & RegexFCD.EndZ) && runtextpos < runtextend - 1) + if (0 != (_code.Anchors & (RegexFCD.EndZ | RegexFCD.AnyEndZ)) && runtextpos < runtextend - 1) { runtextpos = runtextend - 1; } @@ -411,6 +412,11 @@ protected override bool FindFirstChar() if ((0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend) || (0 != (_code.Anchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) || + (0 != (_code.Anchors & RegexFCD.AnyEndZ) && (runtextpos < runtextend - 2 || + (runtextpos == runtextend - 2 && (CharAt(runtextpos) != '\r' + || CharAt(runtextpos+1) != '\n')) || + (runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n' + && CharAt(runtextpos) != '\r'))) || (0 != (_code.Anchors & RegexFCD.Start) && runtextpos < runtextstart)) { runtextpos = runtextbeg; @@ -967,6 +973,12 @@ protected override void Go() advance = 0; continue; + case RegexCode.AnyEol: + if (Rightchars() > 0 && CharAt(Textpos()) != '\n' && CharAt(Textpos()) != '\r') + break; + advance = 0; + continue; + case RegexCode.Boundary: if (!IsBoundary(Textpos(), runtextbeg, runtextend)) break; @@ -1009,6 +1021,17 @@ protected override void Go() advance = 0; continue; + case RegexCode.AnyEndZ: + int rightChars = Rightchars(); + if (rightChars > 2) + break; + if (rightChars == 1 && CharAt(Textpos()) != '\r' && CharAt(Textpos()) != '\n') + break; + if (rightChars == 2 && (CharAt(Textpos()) != '\r' || CharAt(Textpos() + 1) != '\n')) + break; + advance = 0; + continue; + case RegexCode.End: if (Rightchars() > 0) break; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index dbd3339b5fe5c..a03dad2b6cd11 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -69,6 +69,7 @@ internal sealed class RegexNode public const int Bol = RegexCode.Bol; // ^ public const int Eol = RegexCode.Eol; // $ + public const int AnyEol = RegexCode.AnyEol; // $ public const int Boundary = RegexCode.Boundary; // \b public const int Nonboundary = RegexCode.Nonboundary; // \B public const int ECMABoundary = RegexCode.ECMABoundary; // \b @@ -76,6 +77,7 @@ internal sealed class RegexNode public const int Beginning = RegexCode.Beginning; // \A public const int Start = RegexCode.Start; // \G public const int EndZ = RegexCode.EndZ; // \Z + public const int AnyEndZ = RegexCode.AnyEndZ; // \Z public const int End = RegexCode.End; // \z public const int Oneloopatomic = RegexCode.Oneloopatomic; // c,n (?> a*) @@ -978,6 +980,7 @@ public string Description() if ((Options & RegexOptions.Singleline) != 0) argSb.Append("-S"); if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) argSb.Append("-X"); if ((Options & RegexOptions.ECMAScript) != 0) argSb.Append("-E"); + if ((Options & RegexOptions.AnyNewLine) != 0) argSb.Append("-A"); switch (Type) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs index e3a3087bbec63..3e22bf85e4e7f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs @@ -22,5 +22,6 @@ public enum RegexOptions ECMAScript = 0x0100, // "e" CultureInvariant = 0x0200, + AnyNewLine = 0x0400, // "a", Treat "$" as (?=\r\z|\n\z|\r\n\z|\z) } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs index 393a468e7b095..715c67a7e1963 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs @@ -374,7 +374,10 @@ private RegexNode ScanRegex() break; case '$': - AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ); + if (UseOptionA()) + AddUnitType(UseOptionM() ? RegexNode.AnyEol : RegexNode.AnyEndZ); + else + AddUnitType(UseOptionM() ? RegexNode.Eol : RegexNode.EndZ); break; case '.': @@ -384,7 +387,18 @@ private RegexNode ScanRegex() } else { - AddUnitNotone('\n'); + if (UseOptionA()) + { + // Allow everything from RegexCharClass.AnyClass except '\r' and '\n' + RegexCharClass anyClass = RegexCharClass.Parse(RegexCharClass.AnyClass); + RegexCharClass lecc = new RegexCharClass(); // line ending character class + lecc.AddChar('\r'); + lecc.AddChar('\n'); + anyClass.AddSubtraction(lecc); + AddUnitSet(anyClass.ToStringClass()); + } + else + AddUnitNotone('\n'); } break; @@ -1764,6 +1778,7 @@ private static RegexOptions OptionFromCode(char ch) 'd' => RegexOptions.Debug, #endif 'e' => RegexOptions.ECMAScript, + 'a' => RegexOptions.AnyNewLine, _ => 0, }; } @@ -2038,6 +2053,12 @@ private bool IsCaptureSlot(int i) /// True if E option enabling ECMAScript behavior is on. private bool UseOptionE() => (_options & RegexOptions.ECMAScript) != 0; + /// + /// True if A option altering meaning of $ to match both Windows' + /// Environment.NewLine and UNIX' Environment.NewLine is on. + /// + private bool UseOptionA() => (_options & RegexOptions.AnyNewLine) != 0; + private const byte Q = 5; // quantifier private const byte S = 4; // ordinary stopper private const byte Z = 3; // ScanBlank stopper diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs index 7c71cf5ef9378..9bae2735b22d5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs @@ -489,6 +489,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) case RegexNode.Nothing: case RegexNode.Bol: case RegexNode.Eol: + case RegexNode.AnyEol: case RegexNode.Boundary: case RegexNode.Nonboundary: case RegexNode.ECMABoundary: @@ -496,6 +497,7 @@ private void EmitFragment(int nodetype, RegexNode node, int curIndex) case RegexNode.Beginning: case RegexNode.Start: case RegexNode.EndZ: + case RegexNode.AnyEndZ: case RegexNode.End: Emit(node.Type); break; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs index 45986784625a2..94440bf802db2 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs @@ -18,7 +18,7 @@ public static IEnumerable Ctor_TestData() yield return new object[] { "foo", RegexOptions.None, Regex.InfiniteMatchTimeout }; yield return new object[] { "foo", RegexOptions.RightToLeft, Regex.InfiniteMatchTimeout }; yield return new object[] { "foo", RegexOptions.Compiled, Regex.InfiniteMatchTimeout }; - yield return new object[] { "foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant, Regex.InfiniteMatchTimeout }; + yield return new object[] { "foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.AnyNewLine | RegexOptions.Multiline | RegexOptions.CultureInvariant, Regex.InfiniteMatchTimeout }; yield return new object[] { "foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled, Regex.InfiniteMatchTimeout }; yield return new object[] { "foo", RegexOptions.None, new TimeSpan(1) }; yield return new object[] { "foo", RegexOptions.None, TimeSpan.FromMilliseconds(int.MaxValue - 1) }; @@ -73,8 +73,8 @@ public static void Ctor_Invalid() AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)(-1))); AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)(-1), new TimeSpan())); - AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)0x400)); - AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)0x400, new TimeSpan())); + AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)0x800)); + AssertExtensions.Throws("options", () => new Regex("foo", (RegexOptions)0x800, new TimeSpan())); AssertExtensions.Throws("options", () => new Regex("foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.RightToLeft)); AssertExtensions.Throws("options", () => new Regex("foo", RegexOptions.ECMAScript | RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.ExplicitCapture)); diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 6566129ed7f92..59612bcbe12f0 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -330,6 +330,39 @@ public static IEnumerable Match_Basic_TestData() // Surrogate pairs splitted up into UTF-16 code units. yield return new object[] { @"(\uD82F[\uDCA0-\uDCA3])", "\uD82F\uDCA2", RegexOptions.CultureInvariant, 0, 2, true, "\uD82F\uDCA2" }; + + // AnyNewLine (with none of the special characters used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4", RegexOptions.AnyNewLine, 0, 23, true, "line3\nline4" }; + + // AnyNewLine (with '\n' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\n", RegexOptions.AnyNewLine, 0, 24, true, "line3\nline4" }; + + // AnyNewLine (with '\r' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\r", RegexOptions.AnyNewLine, 0, 24, true, "line3\nline4" }; + + // AnyNewLine (with '\r\n' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\r\n", RegexOptions.AnyNewLine, 0, 25, true, "line3\nline4" }; + + // AnyNewLine | Multiline (with none of the special characters used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 23, true, "line3\nline4" }; + + // AnyNewLine | Multiline (with '\n' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\n", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 24, true, "line3\nline4" }; + + // AnyNewLine | Multiline (with '\r' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\r", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 24, true, "line3\nline4" }; + + // AnyNewLine | Multiline (with '\r\n' used as line ending) + yield return new object[] { @"line3\nline4$", "line1\nline2\nline3\nline4\r\n", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 25, true, "line3\nline4" }; + + // AnyNewLine + yield return new object[] { @"$", "line1\nline2\nline3\nline4\r\n", RegexOptions.AnyNewLine, 0, 25, true, "" }; + + // AnyNewLine | RightToLeft + yield return new object[] { @"$", "line1\nline2\nline3\nline4\r\n", RegexOptions.RightToLeft | RegexOptions.AnyNewLine, 0, 25, true, "" }; + + // AnyNewLine | Multiline ('.' will match everything except \r and \n) + yield return new object[] { @".*$", "foo\r\nbar", RegexOptions.AnyNewLine | RegexOptions.Multiline, 0, 8, true, "foo" }; } [Theory] @@ -629,7 +662,37 @@ public static IEnumerable Match_Advanced_TestData() } }; - // Mutliline + // AnyEndZ (with '\n' used as line ending) + yield return new object[] + { + "line3\nline4$", "line1\nline2\nline3\nline4\n", RegexOptions.AnyNewLine, 0, 24, + new CaptureData[] + { + new CaptureData("line3\nline4", 12, 11) + } + }; + + // AnyEndZ (with '\r' used as line ending) + yield return new object[] + { + "line3\nline4$", "line1\nline2\nline3\nline4\r", RegexOptions.AnyNewLine, 0, 24, + new CaptureData[] + { + new CaptureData("line3\nline4", 12, 11) + } + }; + + // AnyEndZ (with '\r\n' used as line ending) + yield return new object[] + { + "line3\nline4$", "line1\nline2\nline3\nline4\r\n", RegexOptions.AnyNewLine, 0, 25, + new CaptureData[] + { + new CaptureData("line3\nline4", 12, 11) + } + }; + + // Multiline yield return new object[] { "(line2$\n)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24, @@ -640,7 +703,7 @@ public static IEnumerable Match_Advanced_TestData() } }; - // Mutliline + // Multiline yield return new object[] { "(line2\n^)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24, @@ -651,10 +714,10 @@ public static IEnumerable Match_Advanced_TestData() } }; - // Mutliline + // Multiline yield return new object[] { - "(line3\n$\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24, + "(line3\n$\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 24, new CaptureData[] { new CaptureData("line3\n\nline4", 12, 12), @@ -662,7 +725,29 @@ public static IEnumerable Match_Advanced_TestData() } }; - // Mutliline + // Multiline (with '\r\n' used as line ending) + yield return new object[] + { + "(line3$\r\n)line4", "line1\nline2\nline3\r\nline4", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 24, + new CaptureData[] + { + new CaptureData("line3\r\nline4", 12, 12), + new CaptureData("line3\r\n", 12, 7) + } + }; + + // Multiline (with '\r' used as line ending) + yield return new object[] + { + "(line3$\r)line4", "line1\nline2\nline3\rline4", RegexOptions.Multiline | RegexOptions.AnyNewLine, 0, 23, + new CaptureData[] + { + new CaptureData("line3\rline4", 12, 11), + new CaptureData("line3\r", 12, 6) + } + }; + + // Multiline yield return new object[] { "(line3\n^\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24, @@ -673,7 +758,7 @@ public static IEnumerable Match_Advanced_TestData() } }; - // Mutliline + // Multiline yield return new object[] { "(line2$\n^)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24, diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs index 783c149e6acd4..894c932eb57dd 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.MultipleMatches.Tests.cs @@ -179,9 +179,38 @@ public static IEnumerable Matches_TestData() } }; + yield return new object[] + { + "^line3$\nline4", "line1\nline2\nline3\nline4\nline3\nline4\n", RegexOptions.Multiline, + new CaptureData[] + { + new CaptureData("line3\nline4", 12, 11), + new CaptureData("line3\nline4", 24, 11), + } + }; + + yield return new object[] + { + "^line3$", "line1\nline2\nline3\r\nline4\nline3\nline4\n", RegexOptions.Multiline | RegexOptions.AnyNewLine, + new CaptureData[] + { + new CaptureData("line3", 12, 5), + new CaptureData("line3", 25, 5), + } + }; + + yield return new object[] + { + "line3$", "line1\nline2\nline3\r\nline4\nline3\r", RegexOptions.AnyNewLine, + new CaptureData[] + { + new CaptureData("line3", 25, 5), + } + }; + if (!PlatformDetection.IsFullFramework) // missing fix in https://github.com/dotnet/runtime/pull/993 { - yield return new object[] + yield return new object[] { "[^]", "every", RegexOptions.ECMAScript, new CaptureData[] @@ -272,8 +301,8 @@ public void Matches_Invalid() // Options are invalid AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)(-1))); AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)(-1), TimeSpan.FromSeconds(1))); - AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x400)); - AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x400, TimeSpan.FromSeconds(1))); + AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x800)); + AssertExtensions.Throws("options", () => Regex.Matches("input", "pattern", (RegexOptions)0x800, TimeSpan.FromSeconds(1))); // MatchTimeout is invalid AssertExtensions.Throws("matchTimeout", () => Regex.Matches("input", "pattern", RegexOptions.None, TimeSpan.Zero));