-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Correct Regular Expressions Behavior Related to Annex B #58320
Changes from 6 commits
e049438
358eb30
8facb0a
f5c0b60
cff993f
603c3cf
2e62d25
ed08ef7
8b67d77
b48f0d0
c72f92f
70a3214
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2675,6 +2675,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
const digitsStart = pos; | ||
scanDigits(); | ||
const min = tokenValue; | ||
if (annexB && !min) { | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
if (charCodeChecked(pos) === CharacterCodes.comma) { | ||
pos++; | ||
scanDigits(); | ||
|
@@ -2684,26 +2688,32 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); | ||
} | ||
else { | ||
if (unicodeMode) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
} | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
} | ||
if (max && Number.parseInt(min) > Number.parseInt(max)) { | ||
else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) { | ||
error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); | ||
} | ||
} | ||
else if (!min) { | ||
if (unicodeMode) { | ||
if (!annexB) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
} | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
scanExpectedChar(CharacterCodes.closeBrace); | ||
pos--; | ||
if (charCodeChecked(pos) !== CharacterCodes.closeBrace) { | ||
if (annexB) { | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
else { | ||
error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace)); | ||
pos--; | ||
} | ||
} | ||
// falls through | ||
case CharacterCodes.asterisk: | ||
case CharacterCodes.plus: | ||
|
@@ -2744,7 +2754,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
// Assume what starting from the character to be outside of the regex | ||
return; | ||
} | ||
if (unicodeMode || ch === CharacterCodes.closeParen) { | ||
if (!annexB || ch === CharacterCodes.closeParen) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); | ||
} | ||
pos++; | ||
|
@@ -2801,7 +2811,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
scanGroupName(/*isReference*/ true); | ||
scanExpectedChar(CharacterCodes.greaterThan); | ||
} | ||
else if (unicodeMode) { | ||
else { | ||
// This is actually allowed in Annex B if there are no named capturing groups in the regex, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we keep track of whether we encountered a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, didn’t think of this clever but dirty solution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to error on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If then I would personally lean towards linting also There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every decimal escape in a character class I've ever seen has been a bug, so it makes sense to error for that case. |
||
// but if we were going to suppress these errors, we would have to record the positions of all '\k's | ||
// and defer the errors until after the scanning to know if the regex has any named capturing groups. | ||
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); | ||
} | ||
break; | ||
|
@@ -2851,10 +2864,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
pos++; | ||
return String.fromCharCode(ch & 0x1f); | ||
} | ||
if (unicodeMode) { | ||
if (!annexB) { | ||
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); | ||
} | ||
else if (atomEscape && annexB) { | ||
else if (atomEscape) { | ||
// Annex B treats | ||
// | ||
// ExtendedAtom : `\` [lookahead = `c`] | ||
|
@@ -3437,7 +3450,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); | ||
} | ||
} | ||
else if (unicodeMode) { | ||
else if (annexB) { | ||
pos--; | ||
return false; | ||
} | ||
else { | ||
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); | ||
} | ||
return true; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,272 @@ | ||
regularExpressionAnnexB.ts(2,8): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(2,12): error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets. | ||
regularExpressionAnnexB.ts(2,22): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(2,28): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(3,9): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(3,23): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(3,29): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(7,4): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(7,8): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(7,10): error TS1512: '\c' must be followed by an ASCII letter. | ||
regularExpressionAnnexB.ts(7,12): error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets. | ||
regularExpressionAnnexB.ts(7,14): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(7,18): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(7,22): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(7,24): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(7,28): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(7,30): error TS1531: '\p' must be followed by a Unicode property value expression enclosed in braces. | ||
regularExpressionAnnexB.ts(8,5): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(8,9): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(8,11): error TS1512: '\c' must be followed by an ASCII letter. | ||
regularExpressionAnnexB.ts(8,13): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(8,15): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(8,19): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(8,23): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(8,25): error TS1535: This character cannot be escaped in a regular expression. | ||
regularExpressionAnnexB.ts(8,29): error TS1125: Hexadecimal digit expected. | ||
regularExpressionAnnexB.ts(8,31): error TS1531: '\p' must be followed by a Unicode property value expression enclosed in braces. | ||
regularExpressionAnnexB.ts(9,4): error TS1531: '\P' must be followed by a Unicode property value expression enclosed in braces. | ||
regularExpressionAnnexB.ts(9,7): error TS1531: '\P' must be followed by a Unicode property value expression enclosed in braces. | ||
regularExpressionAnnexB.ts(9,9): error TS1516: A character class range must not be bounded by another character class. | ||
regularExpressionAnnexB.ts(23,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(23,8): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(24,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(24,9): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(25,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(25,10): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(26,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(26,5): error TS1506: Numbers out of order in quantifier. | ||
regularExpressionAnnexB.ts(26,10): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(29,4): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
regularExpressionAnnexB.ts(30,4): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
regularExpressionAnnexB.ts(31,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(31,5): error TS1505: Incomplete quantifier. Digit expected. | ||
regularExpressionAnnexB.ts(31,7): error TS1005: '}' expected. | ||
regularExpressionAnnexB.ts(31,8): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(32,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(32,6): error TS1005: '}' expected. | ||
regularExpressionAnnexB.ts(32,7): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(33,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(33,7): error TS1005: '}' expected. | ||
regularExpressionAnnexB.ts(33,8): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(34,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(34,8): error TS1005: '}' expected. | ||
regularExpressionAnnexB.ts(34,9): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(35,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(35,5): error TS1506: Numbers out of order in quantifier. | ||
regularExpressionAnnexB.ts(35,8): error TS1005: '}' expected. | ||
regularExpressionAnnexB.ts(35,9): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(36,4): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
regularExpressionAnnexB.ts(36,5): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? | ||
regularExpressionAnnexB.ts(37,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(37,5): error TS1505: Incomplete quantifier. Digit expected. | ||
regularExpressionAnnexB.ts(37,8): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(38,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(38,5): error TS1505: Incomplete quantifier. Digit expected. | ||
regularExpressionAnnexB.ts(38,9): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(39,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(39,8): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(40,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(40,9): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(41,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(41,10): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(42,4): error TS1507: There is nothing available for repetition. | ||
regularExpressionAnnexB.ts(42,5): error TS1506: Numbers out of order in quantifier. | ||
regularExpressionAnnexB.ts(42,10): error TS1507: There is nothing available for repetition. | ||
|
||
|
||
==== regularExpressionAnnexB.ts (75 errors) ==== | ||
const regexes: RegExp[] = [ | ||
/\q\u\i\c\k\_\f\o\x\-\j\u\m\p\s/, | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are all valid in Annex B. Even |
||
/[\q\u\i\c\k\_\f\o\x\-\j\u\m\p\s]/, | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
/\P[\P\w-_]/, | ||
|
||
// Compare to | ||
/\q\u\i\c\k\_\f\o\x\-\j\u\m\p\s/u, | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1512: '\c' must be followed by an ASCII letter. | ||
~~ | ||
!!! error TS1510: '\k' must be followed by a capturing group name enclosed in angle brackets. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1531: '\p' must be followed by a Unicode property value expression enclosed in braces. | ||
/[\q\u\i\c\k\_\f\o\x\-\j\u\m\p\s]/u, | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1512: '\c' must be followed by an ASCII letter. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1535: This character cannot be escaped in a regular expression. | ||
|
||
!!! error TS1125: Hexadecimal digit expected. | ||
~~ | ||
!!! error TS1531: '\p' must be followed by a Unicode property value expression enclosed in braces. | ||
/\P[\P\w-_]/u, | ||
~~ | ||
!!! error TS1531: '\P' must be followed by a Unicode property value expression enclosed in braces. | ||
~~ | ||
!!! error TS1531: '\P' must be followed by a Unicode property value expression enclosed in braces. | ||
~~ | ||
!!! error TS1516: A character class range must not be bounded by another character class. | ||
]; | ||
|
||
const regexesWithBraces: RegExp[] = [ | ||
/{??/, | ||
/{,??/, | ||
/{,1??/, | ||
/{1??/, | ||
/{1,??/, | ||
/{1,2??/, | ||
/{2,1??/, | ||
/{}??/, | ||
/{,}??/, | ||
/{,1}??/, | ||
/{1}??/, | ||
~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,}??/, | ||
~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,2}??/, | ||
~~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{2,1}??/, | ||
~~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~~~ | ||
!!! error TS1506: Numbers out of order in quantifier. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
// Compare to | ||
/{??/u, | ||
~ | ||
!!! error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
/{,??/u, | ||
~ | ||
!!! error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
/{,1??/u, | ||
~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1505: Incomplete quantifier. Digit expected. | ||
|
||
!!! error TS1005: '}' expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1??/u, | ||
~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1005: '}' expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,??/u, | ||
~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1005: '}' expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,2??/u, | ||
~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1005: '}' expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{2,1??/u, | ||
~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~~~ | ||
!!! error TS1506: Numbers out of order in quantifier. | ||
|
||
!!! error TS1005: '}' expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{}??/u, | ||
~ | ||
!!! error TS1508: Unexpected '{'. Did you mean to escape it with backslash? | ||
~ | ||
!!! error TS1508: Unexpected '}'. Did you mean to escape it with backslash? | ||
/{,}??/u, | ||
~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1505: Incomplete quantifier. Digit expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{,1}??/u, | ||
~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
|
||
!!! error TS1505: Incomplete quantifier. Digit expected. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1}??/u, | ||
~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,}??/u, | ||
~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{1,2}??/u, | ||
~~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
/{2,1}??/u, | ||
~~~~~~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
~~~ | ||
!!! error TS1506: Numbers out of order in quantifier. | ||
~ | ||
!!! error TS1507: There is nothing available for repetition. | ||
]; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though it may be redundant, I think it might be better to still indicate
unicodeMode
here so that someone editing this code in the future doesn't mistakenly think this only applies to non-Annex B code. It may be better to useunicodeMode || !annexB
and remove theif (unicodeMode) { annexB = false; }
at the top ofscanRegularExpressionWorker
.The same would go for other uses of
annexB
as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure you are really fine with a dozen of occurrences of
unicodeMode || !annexB
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, but
would work.