From 905e4d2e19edc4510afc9023b0e82e3d43c1f49b Mon Sep 17 00:00:00 2001 From: Steffen Forkmann Date: Tue, 31 Mar 2015 17:12:05 +0200 Subject: [PATCH] Throw error if unicode is in reserved space - fixes #338 --- src/fsharp/FSComp.txt | 1 + src/fsharp/lex.fsl | 8 ++++++-- .../E_InvalidUnicodeChar01.fs | 18 ++++++++++++++++++ .../StringsAndCharacters/env.lst | 1 + tests/fsharpqa/Source/test.lst | 2 +- 5 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/E_InvalidUnicodeChar01.fs diff --git a/src/fsharp/FSComp.txt b/src/fsharp/FSComp.txt index 2f43466da65..d338e664abc 100644 --- a/src/fsharp/FSComp.txt +++ b/src/fsharp/FSComp.txt @@ -1139,6 +1139,7 @@ lexIndentOffForML,"Consider using a file with extension '.ml' or '.mli' instead" 1242,parsMissingGreaterThan,"Unmatched '<'. Expected closing '>'" 1243,parsUnexpectedQuotationOperatorInTypeAliasDidYouMeanVerbatimString,"Unexpected quotation operator '<@' in type definition. If you intend to pass a verbatim string as a static argument to a type provider, put a space between the '<' and '@' characters." 1244,parsErrorParsingAsOperatorName,"Attempted to parse this as an operator name, but failed" +1245,lexInvalidUnicodeLiteral,"This is not a valid UTF-16 literal" # Fsc.exe resource strings fscTooManyErrors,"Exiting - too many errors" 2001,docfileNoXmlSuffix,"The documentation file has no .xml suffix" diff --git a/src/fsharp/lex.fsl b/src/fsharp/lex.fsl index 2c37ee00d93..a39b130b98c 100644 --- a/src/fsharp/lex.fsl +++ b/src/fsharp/lex.fsl @@ -752,8 +752,12 @@ and string sargs skip = parse | unicodeGraphShort { let (buf,_fin,m,args) = sargs - addUnicodeChar buf (int (unicodeGraphShort (lexemeTrimLeft lexbuf 2))); - if not skip then (STRING_TEXT (LexCont.String(!args.ifdefStack,m))) else string sargs skip lexbuf } + let c = int (unicodeGraphShort (lexemeTrimLeft lexbuf 2)) + if c >= 0xD800 && c <= 0xD8FF then + fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral()) (CHAR (char c)) + else + addUnicodeChar buf c; + if not skip then (STRING_TEXT (LexCont.String(!args.ifdefStack,m))) else string sargs skip lexbuf } | unicodeGraphLong { let (buf,_fin,m,args) = sargs diff --git a/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/E_InvalidUnicodeChar01.fs b/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/E_InvalidUnicodeChar01.fs new file mode 100644 index 00000000000..850578ec864 --- /dev/null +++ b/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/E_InvalidUnicodeChar01.fs @@ -0,0 +1,18 @@ +// #Regression #Conformance #LexicalAnalysis +#light + +// Verify error when trying to take the byte value of +// a unicode character literal. + +//This is not a valid UTF-16 literal +//This is not a valid UTF-16 literal + +let _ = '\uD7FF' // Ok + +let _ = '\uD800' + +let _ = "\uDFFF" + +let _ = "\uE000" // Ok + +exit 1 diff --git a/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/env.lst b/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/env.lst index 2880802f925..92cac21288a 100644 --- a/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/env.lst +++ b/tests/fsharpqa/Source/Conformance/LexicalAnalysis/StringsAndCharacters/env.lst @@ -23,6 +23,7 @@ SOURCE=E_ByteStrUnicodeChar01.fs # E_ByteStrUnicodeChar01.fs SOURCE=E_ByteCharUnicodeChar01.fs # E_ByteCharUnicodeChar01.fs + SOURCE=E_InvalidUnicodeChar01.fs # E_InvalidUnicodeChar01.fs SOURCE=E_MalformedShortUnicode01.fs SCFLAGS="--test:ErrorRanges" # E_MalformedShortUnicode01.fs SOURCE=UnicodeString03.fs # UnicodeString03.fs diff --git a/tests/fsharpqa/Source/test.lst b/tests/fsharpqa/Source/test.lst index d8948a6d9b9..337bf65257a 100644 --- a/tests/fsharpqa/Source/test.lst +++ b/tests/fsharpqa/Source/test.lst @@ -172,7 +172,7 @@ Conformance05 Conformance\LexicalAnalysis\LineDirectives Conformance05 Conformance\LexicalAnalysis\NumericLiterals Conformance05 Conformance\LexicalAnalysis\Shift\Generics -Conformance06 Conformance\LexicalAnalysis\StringsAndCharacters +Conformance06,Smoke Conformance\LexicalAnalysis\StringsAndCharacters Conformance06 Conformance\LexicalAnalysis\SymbolicKeywords Conformance06 Conformance\LexicalAnalysis\SymbolicOperators Conformance06 Conformance\LexicalAnalysis\Whitespace