From 7238055918f38a9591e70a87f06aba176331624a Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Apr 2024 08:53:19 -0700 Subject: [PATCH 1/2] Add test --- .../reference/parseReplacementCharacter.errors.txt | 13 +++++++++++++ .../reference/parseReplacementCharacter.js | 12 ++++++++++++ .../reference/parseReplacementCharacter.symbols | 11 +++++++++++ .../reference/parseReplacementCharacter.types | 11 +++++++++++ tests/cases/compiler/parseReplacementCharacter.ts | 7 +++++++ 5 files changed, 54 insertions(+) create mode 100644 tests/baselines/reference/parseReplacementCharacter.errors.txt create mode 100644 tests/baselines/reference/parseReplacementCharacter.js create mode 100644 tests/baselines/reference/parseReplacementCharacter.symbols create mode 100644 tests/baselines/reference/parseReplacementCharacter.types create mode 100644 tests/cases/compiler/parseReplacementCharacter.ts diff --git a/tests/baselines/reference/parseReplacementCharacter.errors.txt b/tests/baselines/reference/parseReplacementCharacter.errors.txt new file mode 100644 index 0000000000000..f88ec120302e5 --- /dev/null +++ b/tests/baselines/reference/parseReplacementCharacter.errors.txt @@ -0,0 +1,13 @@ +parseReplacementCharacter.ts(1,1): error TS1490: File appears to be binary. + + +==== parseReplacementCharacter.ts (1 errors) ==== + "oops �� oops"; + +!!! error TS1490: File appears to be binary. + 'oops �� oops'; + `oops �� oops`; + `${"oops �� oops"}`; + // oops �� oops + /* oops �� oops */ + /** oops �� oops */ \ No newline at end of file diff --git a/tests/baselines/reference/parseReplacementCharacter.js b/tests/baselines/reference/parseReplacementCharacter.js new file mode 100644 index 0000000000000..530621e38a082 --- /dev/null +++ b/tests/baselines/reference/parseReplacementCharacter.js @@ -0,0 +1,12 @@ +//// [tests/cases/compiler/parseReplacementCharacter.ts] //// + +//// [parseReplacementCharacter.ts] +"oops �� oops"; +'oops �� oops'; +`oops �� oops`; +`${"oops �� oops"}`; +// oops �� oops +/* oops �� oops */ +/** oops �� oops */ + +//// [parseReplacementCharacter.js] diff --git a/tests/baselines/reference/parseReplacementCharacter.symbols b/tests/baselines/reference/parseReplacementCharacter.symbols new file mode 100644 index 0000000000000..89ade08029474 --- /dev/null +++ b/tests/baselines/reference/parseReplacementCharacter.symbols @@ -0,0 +1,11 @@ +//// [tests/cases/compiler/parseReplacementCharacter.ts] //// + +=== parseReplacementCharacter.ts === + +"oops �� oops"; +'oops �� oops'; +`oops �� oops`; +`${"oops �� oops"}`; +// oops �� oops +/* oops �� oops */ +/** oops �� oops */ diff --git a/tests/baselines/reference/parseReplacementCharacter.types b/tests/baselines/reference/parseReplacementCharacter.types new file mode 100644 index 0000000000000..89ade08029474 --- /dev/null +++ b/tests/baselines/reference/parseReplacementCharacter.types @@ -0,0 +1,11 @@ +//// [tests/cases/compiler/parseReplacementCharacter.ts] //// + +=== parseReplacementCharacter.ts === + +"oops �� oops"; +'oops �� oops'; +`oops �� oops`; +`${"oops �� oops"}`; +// oops �� oops +/* oops �� oops */ +/** oops �� oops */ diff --git a/tests/cases/compiler/parseReplacementCharacter.ts b/tests/cases/compiler/parseReplacementCharacter.ts new file mode 100644 index 0000000000000..b3094a5c75a58 --- /dev/null +++ b/tests/cases/compiler/parseReplacementCharacter.ts @@ -0,0 +1,7 @@ +"oops �� oops"; +'oops �� oops'; +`oops �� oops`; +`${"oops �� oops"}`; +// oops �� oops +/* oops �� oops */ +/** oops �� oops */ \ No newline at end of file From 5366cc3bdeeec7f059ea1e94f2b2ad8ed27056a2 Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Wed, 17 Apr 2024 09:02:57 -0700 Subject: [PATCH 2/2] Error on replacement character only outside of string scanning --- src/compiler/scanner.ts | 16 ++++------------ src/compiler/types.ts | 3 +++ .../reference/TransportStream.errors.txt | 16 ++++++++++++++-- tests/baselines/reference/TransportStream.js | 1 + tests/baselines/reference/TransportStream.types | 6 +++++- .../parseReplacementCharacter.errors.txt | 13 ------------- .../reference/parseReplacementCharacter.js | 4 ++++ .../reference/parseReplacementCharacter.types | 15 ++++++++++++++- 8 files changed, 45 insertions(+), 29 deletions(-) delete mode 100644 tests/baselines/reference/parseReplacementCharacter.errors.txt diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 6f9703d9f5553..7b0d1ebb87d35 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -1768,18 +1768,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const ch = codePointAt(text, pos); if (pos === 0) { - // If a file isn't valid text at all, it will usually be apparent - // in the first few characters because UTF-8 decode will fail and produce U+FFFD. - // If that happens, just issue one error and refuse to try to scan further; - // this is likely a binary file that cannot be parsed. - // - // It's safe to slice the text; U+FFFD can only be produced by an invalid decode, - // so even if we cut a surrogate pair in half, they wouldn't be U+FFFD. - if (text.slice(0, 256).includes("\uFFFD")) { - error(Diagnostics.File_appears_to_be_binary); - pos = end; - return token = SyntaxKind.NonTextFileMarkerTrivia; - } // Special handling for shebang if (ch === CharacterCodes.hash && isShebangTrivia(text, pos)) { pos = scanShebangTrivia(text, pos); @@ -2242,6 +2230,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Invalid_character, pos++, charSize(ch)); } return token = SyntaxKind.PrivateIdentifier; + case CharacterCodes.replacementCharacter: + error(Diagnostics.File_appears_to_be_binary, 0, 0); + pos = end; + return token = SyntaxKind.NonTextFileMarkerTrivia; default: const identifierKind = scanIdentifier(ch, languageVersion); if (identifierKind) { diff --git a/src/compiler/types.ts b/src/compiler/types.ts index 7231ec81f7830..d97dc608adcbf 100644 --- a/src/compiler/types.ts +++ b/src/compiler/types.ts @@ -7612,6 +7612,9 @@ export const enum CharacterCodes { mathematicalSpace = 0x205F, ogham = 0x1680, + // Unicode replacement character produced when a byte sequence is invalid + replacementCharacter = 0xFFFD, + _ = 0x5F, $ = 0x24, diff --git a/tests/baselines/reference/TransportStream.errors.txt b/tests/baselines/reference/TransportStream.errors.txt index 43b9b2755aeb9..198c2e2faac05 100644 --- a/tests/baselines/reference/TransportStream.errors.txt +++ b/tests/baselines/reference/TransportStream.errors.txt @@ -1,7 +1,19 @@ TransportStream.ts(1,1): error TS1490: File appears to be binary. +TransportStream.ts(1,1): error TS1434: Unexpected keyword or identifier. +TransportStream.ts(1,1): error TS2304: Cannot find name 'G'. +TransportStream.ts(1,3): error TS1127: Invalid character. +TransportStream.ts(1,4): error TS1128: Declaration or statement expected. -==== TransportStream.ts (1 errors) ==== +==== TransportStream.ts (5 errors) ==== G@�G@�G@� -!!! error TS1490: File appears to be binary. \ No newline at end of file +!!! error TS1490: File appears to be binary. + ~ +!!! error TS1434: Unexpected keyword or identifier. + ~ +!!! error TS2304: Cannot find name 'G'. + ~ +!!! error TS1127: Invalid character. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +!!! error TS1128: Declaration or statement expected. \ No newline at end of file diff --git a/tests/baselines/reference/TransportStream.js b/tests/baselines/reference/TransportStream.js index f73d86a37caab..19e4cfce936a5 100644 --- a/tests/baselines/reference/TransportStream.js +++ b/tests/baselines/reference/TransportStream.js @@ -4,3 +4,4 @@ G@�G@�G@� //// [TransportStream.js] +G; diff --git a/tests/baselines/reference/TransportStream.types b/tests/baselines/reference/TransportStream.types index e08ae8ea2ccd7..ff8f085c24646 100644 --- a/tests/baselines/reference/TransportStream.types +++ b/tests/baselines/reference/TransportStream.types @@ -1,5 +1,9 @@ //// [tests/cases/compiler/TransportStream.ts] //// === TransportStream.ts === - G@�G@�G@� +>G : any +> : ^^^ +> : any +> : ^^^ + diff --git a/tests/baselines/reference/parseReplacementCharacter.errors.txt b/tests/baselines/reference/parseReplacementCharacter.errors.txt deleted file mode 100644 index f88ec120302e5..0000000000000 --- a/tests/baselines/reference/parseReplacementCharacter.errors.txt +++ /dev/null @@ -1,13 +0,0 @@ -parseReplacementCharacter.ts(1,1): error TS1490: File appears to be binary. - - -==== parseReplacementCharacter.ts (1 errors) ==== - "oops �� oops"; - -!!! error TS1490: File appears to be binary. - 'oops �� oops'; - `oops �� oops`; - `${"oops �� oops"}`; - // oops �� oops - /* oops �� oops */ - /** oops �� oops */ \ No newline at end of file diff --git a/tests/baselines/reference/parseReplacementCharacter.js b/tests/baselines/reference/parseReplacementCharacter.js index 530621e38a082..0516045c27f30 100644 --- a/tests/baselines/reference/parseReplacementCharacter.js +++ b/tests/baselines/reference/parseReplacementCharacter.js @@ -10,3 +10,7 @@ /** oops �� oops */ //// [parseReplacementCharacter.js] +"oops �� oops"; +'oops �� oops'; +"oops \uFFFD\uFFFD oops"; +"".concat("oops �� oops"); diff --git a/tests/baselines/reference/parseReplacementCharacter.types b/tests/baselines/reference/parseReplacementCharacter.types index 89ade08029474..bfd0ca12901a2 100644 --- a/tests/baselines/reference/parseReplacementCharacter.types +++ b/tests/baselines/reference/parseReplacementCharacter.types @@ -1,11 +1,24 @@ //// [tests/cases/compiler/parseReplacementCharacter.ts] //// === parseReplacementCharacter.ts === - "oops �� oops"; +>"oops �� oops" : "oops �� oops" +> : ^^^^^^^^^^^^^^ + 'oops �� oops'; +>'oops �� oops' : "oops �� oops" +> : ^^^^^^^^^^^^^^ + `oops �� oops`; +>`oops �� oops` : "oops �� oops" +> : ^^^^^^^^^^^^^^ + `${"oops �� oops"}`; +>`${"oops �� oops"}` : "oops �� oops" +> : ^^^^^^^^^^^^^^ +>"oops �� oops" : "oops �� oops" +> : ^^^^^^^^^^^^^^ + // oops �� oops /* oops �� oops */ /** oops �� oops */