From 8c1d485f587b6c2569fcf8623cbb7122a731112a Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Fri, 29 Sep 2023 19:01:45 +0200 Subject: [PATCH] Fix parser panics in edge cases Will partially address #5049 --- sway-parse/src/lib.rs | 45 ++++++++++++++++++++++++++++++++++++++++ sway-parse/src/parser.rs | 2 +- sway-parse/src/token.rs | 21 +++++++++++++++---- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/sway-parse/src/lib.rs b/sway-parse/src/lib.rs index 05b75265b21..2022c954d7a 100644 --- a/sway-parse/src/lib.rs +++ b/sway-parse/src/lib.rs @@ -63,3 +63,48 @@ pub fn parse_module_kind( } parser.parse() } + +#[cfg(test)] +mod tests { + use crate::*; + + #[test] + fn parse_invalid() { + // just make sure these do not panic + let _res = parse_file(&Handler::default(), Arc::from("script; fn main(256߄"), None); + let _res = parse_file( + &Handler::default(), + Arc::from( + "script; + fn karr() { + let c: f828 = 0x00000000000000000000000vncifxp; + abi Zezybt { + #[mfzbezc, storage(r# + true } + } + cug", + ), + None, + ); + let _res = parse_file( + &Handler::default(), + Arc::from( + "script; + + corefn main() { + let a: b256 = 0x000>0000000scri s = \"flibrary I24; + + use core::primitives::*; + use std::assert::assert; + + ///\u{7eb}", + ), + None, + ); + let _res = parse_file( + &Handler::default(), + Arc::from("script; \"\u{7eb}\u{7eb}"), + None, + ); + } +} diff --git a/sway-parse/src/parser.rs b/sway-parse/src/parser.rs index a8de5284815..0d5a87333fb 100644 --- a/sway-parse/src/parser.rs +++ b/sway-parse/src/parser.rs @@ -49,8 +49,8 @@ impl<'a, 'e> Parser<'a, 'e> { (self.full_span.end() + 1).saturating_sub(trim_offset), self.full_span.source_id().cloned(), ) + .unwrap_or(Span::dummy()) } - .unwrap(), }; self.emit_error_with_span(kind, span) } diff --git a/sway-parse/src/token.rs b/sway-parse/src/token.rs index 9cb90721b67..1ea53621870 100644 --- a/sway-parse/src/token.rs +++ b/sway-parse/src/token.rs @@ -209,6 +209,15 @@ pub fn lex_commented( character = next_character; index = next_index; } + if !(character.is_xid_start() || character == '_') { + let kind = LexErrorKind::InvalidCharacter { + position: index, + character, + }; + let span = span_one(&l, index, character); + error(l.handler, LexError { kind, span }); + continue; + } } // Don't accept just `_` as an identifier. @@ -466,10 +475,14 @@ fn lex_string( }, ) }; - let (next_index, next_character) = l - .stream - .next() - .ok_or_else(|| unclosed_string_lit(l, l.src.len() - 1))?; + let (next_index, next_character) = l.stream.next().ok_or_else(|| { + // last character may not be a unicode boundary + let mut end = l.src.len() - 1; + while !l.src.is_char_boundary(end) { + end -= 1; + } + unclosed_string_lit(l, end) + })?; parsed.push(match next_character { '\\' => parse_escape_code(l) .map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.len())))?,