microsoft · rbuckton · May 29, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 27, 2024
diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -1556,9 +1556,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                 tokenFlags |= TokenFlags.ContainsInvalidEscape;
                 if (isRegularExpression || shouldEmitInvalidEscapeError) {
                     const code = parseInt(text.substring(start + 1, pos), 8);
-                    if (isRegularExpression !== "annex-b") {
-                        error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
-                    }
+                    error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
                     return String.fromCharCode(code);
                 }
                 return text.substring(start, pos);
@@ -2426,6 +2424,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
             // Quickly get to the end of regex such that we know the flags
             let p = tokenStart + 1;
             let inEscape = false;
+            let namedCaptureGroups = false;
             // Although nested character classes are allowed in Unicode Sets mode,
             // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
             // Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2468,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                 else if (ch === CharacterCodes.closeBracket) {
                     inCharacterClass = false;
                 }
+                else if (
+                    ch === CharacterCodes.openParen
+                    && charCodeChecked(p + 1) === CharacterCodes.question
+                    && charCodeChecked(p + 2) === CharacterCodes.lessThan
+                    && charCodeChecked(p + 3) !== CharacterCodes.equals
+                    && charCodeChecked(p + 3) !== CharacterCodes.exclamation
+                ) {
+                    namedCaptureGroups = true;
+                }
                 p++;
             }
             const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
@@ -2505,7 +2513,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                 const saveEnd = end;
                 pos = tokenStart + 1;
                 end = endOfBody;
-                scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true);
+                scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true, namedCaptureGroups);
                 tokenStart = saveTokenStart;
                 tokenFlags = saveTokenFlags;
                 pos = savePos;
@@ -2517,7 +2525,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
         return token;
     }
 
-    function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
+    function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean, namedCaptureGroups: boolean) {
         // Why var? It avoids TDZ checks in the runtime which can be costly.
         // See: https://github.com/microsoft/TypeScript/issues/52924
         /* eslint-disable no-var */
@@ -2527,10 +2535,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
         /** Grammar parameter */
         var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
 
-        if (unicodeMode) {
-            // Annex B treats any unicode mode as the strict syntax.
-            annexB = false;
-        }
+        // Regular expressions are checked more strictly when either in 'u' or 'v' mode, or
+        // when not using the looser interpretation of the syntax from ECMA-262 Annex B.
+        var anyUnicodeModeOrNonAnnexB = unicodeMode || !annexB;
 
         /** @see {scanClassSetExpression} */
         var mayContainStrings = false;
@@ -2626,7 +2633,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                                 case CharacterCodes.exclamation:
                                     pos++;
                                     // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
-                                    isPreviousTermQuantifiable = annexB;
+                                    isPreviousTermQuantifiable = !anyUnicodeModeOrNonAnnexB;
                                     break;
                                 case CharacterCodes.lessThan:
                                     const groupNameStart = pos;
@@ -2675,6 +2682,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         const digitsStart = pos;
                         scanDigits();
                         const min = tokenValue;
+                        if (!anyUnicodeModeOrNonAnnexB && !min) {
+                            isPreviousTermQuantifiable = true;
+                            break;
+                        }
                         if (charCodeChecked(pos) === CharacterCodes.comma) {
                             pos++;
                             scanDigits();
@@ -2684,26 +2695,32 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                                     error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0);
                                 }
                                 else {
-                                    if (unicodeMode) {
-                                        error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
-                                    }
+                                    error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
                                     isPreviousTermQuantifiable = true;
                                     break;
                                 }
                             }
-                            if (max && Number.parseInt(min) > Number.parseInt(max)) {
+                            else if (max && Number.parseInt(min) > Number.parseInt(max) && (anyUnicodeModeOrNonAnnexB || charCodeChecked(pos) === CharacterCodes.closeBrace)) {
                                 error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
                             }
                         }
                         else if (!min) {
-                            if (unicodeMode) {
+                            if (anyUnicodeModeOrNonAnnexB) {
                                 error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
                             }
                             isPreviousTermQuantifiable = true;
                             break;
                         }
-                        scanExpectedChar(CharacterCodes.closeBrace);
-                        pos--;
+                        if (charCodeChecked(pos) !== CharacterCodes.closeBrace) {
+                            if (anyUnicodeModeOrNonAnnexB) {
+                                error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
+                                pos--;
+                            }
+                            else {
+                                isPreviousTermQuantifiable = true;
+                                break;
+                            }
+                        }
                     // falls through
                     case CharacterCodes.asterisk:
                     case CharacterCodes.plus:
@@ -2744,7 +2761,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                             // Assume what starting from the character to be outside of the regex
                             return;
                         }
-                        if (unicodeMode || ch === CharacterCodes.closeParen) {
+                        if (anyUnicodeModeOrNonAnnexB || ch === CharacterCodes.closeParen) {
                             error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
                         }
                         pos++;
@@ -2801,7 +2818,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         scanGroupName(/*isReference*/ true);
                         scanExpectedChar(CharacterCodes.greaterThan);
                     }
-                    else if (unicodeMode) {
+                    else if (namedCaptureGroups) {
                         error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
                     }
                     break;
@@ -2851,10 +2868,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         pos++;
                         return String.fromCharCode(ch & 0x1f);
                     }
-                    if (unicodeMode) {
+                    if (anyUnicodeModeOrNonAnnexB) {
                         error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
                     }
-                    else if (atomEscape && annexB) {
+                    else if (atomEscape) {
                         // Annex B treats
                         //
                         //  ExtendedAtom : `\` [lookahead = `c`]
@@ -2887,7 +2904,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         return "\\";
                     }
                     pos--;
-                    return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
+                    return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ anyUnicodeModeOrNonAnnexB || "annex-b");
             }
         }
 
@@ -2936,12 +2953,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                     if (isClassContentExit(ch)) {
                         return;
                     }
-                    if (!minCharacter && !annexB) {
+                    if (!minCharacter && anyUnicodeModeOrNonAnnexB) {
                         error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
                     }
                     const maxStart = pos;
                     const maxCharacter = scanClassAtom();
-                    if (!maxCharacter && !annexB) {
+                    if (!maxCharacter && anyUnicodeModeOrNonAnnexB) {
                         error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
                         continue;
                     }
@@ -3437,9 +3454,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                             error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start);
                         }
                     }
-                    else if (unicodeMode) {
+                    else if (anyUnicodeModeOrNonAnnexB) {
                         error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
                     }
+                    else {
+                        pos--;
+                        return false;
+                    }
                     return true;
             }
             return false;
@@ -3481,9 +3502,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
             }
         });
         forEach(decimalEscapes, escape => {
-            // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
-            // either a LegacyOctalEscapeSequence or IdentityEscape
-            if (!annexB && escape.value > numberOfCapturingGroups) {
+            // Although a DecimalEscape with a value greater than the number of capturing groups
+            // is treated as either a LegacyOctalEscapeSequence or an IdentityEscape in Annex B,
+            // an error is nevertheless reported since it's most likely a mistake.
+            if (escape.value > numberOfCapturingGroups) {
                 if (numberOfCapturingGroups) {
                     error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
                 }