From 73dae3fd56df5e0ee192b730d3710c2e276ffec4 Mon Sep 17 00:00:00 2001 From: Victor Lopez Date: Fri, 14 Apr 2023 19:11:09 +0200 Subject: [PATCH] feat: add line number to parsing error Prior to this commit, [ParsingError] tracked the step of the source instead of the line number. This is not friendly to the user as he won't be able to find the location of the error. A common standard is to display the line number, so the user can easily spot the location of the error. This commit introduces such track. It keeps a 1:1 map on [TokenStream] from the token to the parsed line. As in common text editors, line number starts at `1`. It also removes the `pos` from [Token] as its only purpose was to display the step to the user. The introduction of `line` completely supersedes that. closes #574 --- assembly/src/errors.rs | 92 +++++++++++++++++------------------ assembly/src/parsers/mod.rs | 6 +-- assembly/src/parsers/tests.rs | 68 +++++++++++++++++++++++++- assembly/src/tokens/mod.rs | 18 +++---- assembly/src/tokens/stream.rs | 22 ++++++--- 5 files changed, 141 insertions(+), 65 deletions(-) diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs index 87ea78ee0d..a82e352df7 100644 --- a/assembly/src/errors.rs +++ b/assembly/src/errors.rs @@ -139,7 +139,7 @@ impl std::error::Error for AssemblyError {} #[derive(Clone, Eq, PartialEq)] pub struct ParsingError { message: String, - step: usize, + line: usize, op: String, } @@ -150,15 +150,15 @@ impl ParsingError { pub fn empty_source() -> Self { ParsingError { message: "source code cannot be an empty string".to_string(), - step: 0, + line: 0, op: "".to_string(), } } - pub fn unexpected_eof(step: usize) -> Self { + pub fn unexpected_eof(line: usize) -> Self { ParsingError { message: "unexpected EOF".to_string(), - step, + line, op: "".to_string(), } } @@ -166,7 +166,7 @@ impl ParsingError { pub fn unexpected_token(token: &Token, expected: &str) -> Self { ParsingError { message: format!("unexpected token: expected '{expected}' but was '{token}'"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -176,7 +176,7 @@ impl ParsingError { pub fn duplicate_const_name(token: &Token, label: &str) -> Self { ParsingError { message: format!("duplicate constant name: '{label}'"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -184,7 +184,7 @@ impl ParsingError { pub fn invalid_const_name(token: &Token, err: LabelError) -> Self { ParsingError { message: format!("invalid constant name: {err}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -194,7 +194,7 @@ impl ParsingError { message: format!( "malformed constant `{token}` - invalid value: `{value}` - reason: {reason}" ), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -202,7 +202,7 @@ impl ParsingError { pub fn const_invalid_scope(token: &Token) -> Self { ParsingError { message: format!("invalid constant declaration: `{token}` - constants can only be defined below imports and above procedure / program bodies"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -210,7 +210,7 @@ impl ParsingError { pub fn const_not_found(token: &Token) -> Self { ParsingError { message: format!("constant used in operation `{token}` not found"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -220,7 +220,7 @@ impl ParsingError { message: format!( "failed to convert u64 constant used in `{token}` to required type {type_name}" ), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -231,7 +231,7 @@ impl ParsingError { pub fn invalid_op(token: &Token) -> Self { ParsingError { message: format!("instruction '{token}' is invalid"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -239,7 +239,7 @@ impl ParsingError { pub fn missing_param(token: &Token) -> Self { ParsingError { message: format!("malformed instruction '{token}': missing required parameter"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -247,7 +247,7 @@ impl ParsingError { pub fn extra_param(token: &Token) -> Self { ParsingError { message: format!("malformed instruction '{token}': too many parameters provided"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -258,7 +258,7 @@ impl ParsingError { "malformed instruction `{token}`: parameter '{}' is invalid", token.parts()[part_idx] ), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -269,7 +269,7 @@ impl ParsingError { "malformed instruction '{token}', parameter {} is invalid: {reason}", token.parts()[part_idx], ), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -280,7 +280,7 @@ impl ParsingError { pub fn dangling_else(token: &Token) -> Self { ParsingError { message: "else without matching if".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -288,7 +288,7 @@ impl ParsingError { pub fn unmatched_if(token: &Token) -> Self { ParsingError { message: "if without matching else/end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -296,7 +296,7 @@ impl ParsingError { pub fn unmatched_while(token: &Token) -> Self { ParsingError { message: "while without matching end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -304,7 +304,7 @@ impl ParsingError { pub fn unmatched_repeat(token: &Token) -> Self { ParsingError { message: "repeat without matching end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -312,7 +312,7 @@ impl ParsingError { pub fn unmatched_else(token: &Token) -> Self { ParsingError { message: "else without matching end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -320,7 +320,7 @@ impl ParsingError { pub fn unmatched_begin(token: &Token) -> Self { ParsingError { message: "begin without matching end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -328,7 +328,7 @@ impl ParsingError { pub fn dangling_ops_after_program(token: &Token) -> Self { ParsingError { message: "dangling instructions after program end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -336,16 +336,16 @@ impl ParsingError { pub fn dangling_ops_after_module(token: &Token) -> Self { ParsingError { message: "dangling instructions after module end".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } - pub fn dangling_procedure_comment(step: usize) -> Self { + pub fn dangling_procedure_comment(line: usize) -> Self { ParsingError { message: "Procedure comment is not immediately followed by a procedure declaration." .to_string(), - step, + line, op: "".to_string(), } } @@ -353,7 +353,7 @@ impl ParsingError { pub fn not_a_library_module(token: &Token) -> Self { ParsingError { message: "not a module: `begin` instruction found".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -364,7 +364,7 @@ impl ParsingError { pub fn duplicate_proc_name(token: &Token, label: &str) -> Self { ParsingError { message: format!("duplicate procedure name: {label}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -372,7 +372,7 @@ impl ParsingError { pub fn invalid_proc_name(token: &Token, err: LabelError) -> Self { ParsingError { message: format!("invalid procedure name: {err}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -383,7 +383,7 @@ impl ParsingError { "procedure name cannot be longer than {max_len} characters, but was {}", label.len() ), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -391,7 +391,7 @@ impl ParsingError { pub fn invalid_proc_locals(token: &Token, locals: &str) -> Self { ParsingError { message: format!("invalid procedure locals: {locals}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -399,7 +399,7 @@ impl ParsingError { pub fn too_many_proc_locals(token: &Token, num_locals: u64, max_locals: u64) -> Self { ParsingError { message: format!("number of procedure locals cannot be greater than {max_locals} characters, but was {num_locals}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -407,7 +407,7 @@ impl ParsingError { pub fn unmatched_proc(token: &Token, proc_name: &str) -> Self { ParsingError { message: format!("procedure '{proc_name}' has no matching end"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -415,7 +415,7 @@ impl ParsingError { pub fn proc_export_not_allowed(token: &Token, label: &str) -> Self { ParsingError { message: format!("exported procedures not allowed in this context: {label}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -426,7 +426,7 @@ impl ParsingError { pub fn invalid_proc_invocation(token: &Token, label: &str) -> Self { ParsingError { message: format!("invalid procedure invocation: {label}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -434,7 +434,7 @@ impl ParsingError { pub fn syscall_with_module_name(token: &Token) -> Self { ParsingError { message: "invalid syscall: cannot invoke a syscall on a named module".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -442,7 +442,7 @@ impl ParsingError { pub fn undefined_local_proc(token: &Token, label: &str) -> Self { ParsingError { message: format!("undefined local procedure: {label}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -450,7 +450,7 @@ impl ParsingError { pub fn procedure_module_not_imported(token: &Token, module_name: &str) -> Self { ParsingError { message: format!("module '{module_name}' was not imported"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -461,7 +461,7 @@ impl ParsingError { pub fn duplicate_module_import(token: &Token, module: &str) -> Self { ParsingError { message: format!("duplicate module import found: {module}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -469,7 +469,7 @@ impl ParsingError { pub fn invalid_module_path(token: &Token, module_path: &str) -> Self { ParsingError { message: format!("invalid module import path: {module_path}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -477,7 +477,7 @@ impl ParsingError { pub fn import_inside_body(token: &Token) -> Self { ParsingError { message: "import in procedure body".to_string(), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -485,7 +485,7 @@ impl ParsingError { pub fn invalid_library_path(token: &Token, error: LibraryError) -> Self { ParsingError { message: format!("invalid path resolution: {error}"), - step: token.pos(), + line: token.line(), op: token.to_string(), } } @@ -500,20 +500,20 @@ impl ParsingError { &self.op } - pub fn step(&self) -> usize { - self.step + pub const fn line(&self) -> usize { + self.line } } impl fmt::Debug for ParsingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "parsing error at {}: {}", self.step, self.message) + write!(f, "parsing error at line {}: {}", self.line, self.message) } } impl fmt::Display for ParsingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "parsing error at {}: {}", self.step, self.message) + write!(f, "parsing error at line {}: {}", self.line, self.message) } } diff --git a/assembly/src/parsers/mod.rs b/assembly/src/parsers/mod.rs index b23456fea3..721840a386 100644 --- a/assembly/src/parsers/mod.rs +++ b/assembly/src/parsers/mod.rs @@ -228,7 +228,8 @@ pub fn parse_program(source: &str) -> Result { context.parse_procedures(&mut tokens, false)?; // make sure program body is present - let next_token = tokens.read().ok_or_else(|| ParsingError::unexpected_eof(tokens.pos()))?; + let next_token = + tokens.read().ok_or_else(|| ParsingError::unexpected_eof(tokens.num_lines()))?; if next_token.parts()[0] != Token::BEGIN { return Err(ParsingError::unexpected_token(next_token, Token::BEGIN)); } @@ -240,9 +241,8 @@ pub fn parse_program(source: &str) -> Result { tokens.advance(); // make sure there is something to be read - let start_pos = tokens.pos(); if tokens.eof() { - return Err(ParsingError::unexpected_eof(start_pos)); + return Err(ParsingError::unexpected_eof(tokens.num_lines())); } let mut body = Vec::::new(); diff --git a/assembly/src/parsers/tests.rs b/assembly/src/parsers/tests.rs index 797cdccac6..2974203517 100644 --- a/assembly/src/parsers/tests.rs +++ b/assembly/src/parsers/tests.rs @@ -2,7 +2,7 @@ use vm_core::Felt; use super::{ parse_module, parse_program, BTreeMap, Instruction, LocalProcMap, ModuleAst, Node, - ProcedureAst, ProcedureId, ProgramAst, + ParsingError, ProcedureAst, ProcedureId, ProgramAst, Token, }; // UNIT TESTS @@ -692,6 +692,72 @@ fn test_ast_program_serde_control_flow() { assert_eq!(program, program_deserialized); } +#[test] +fn assert_parsing_line_unmatched_begin() { + let source = format!("\n\nbegin\npush.1.2\n\nadd mul"); + let err = parse_program(&source).err().unwrap(); + assert_eq!(err, ParsingError::unmatched_begin(&Token::new("begin", 3))); +} + +#[test] +fn assert_parsing_line_extra_param() { + let source = format!("begin add.1.2\nend"); + let err = parse_program(&source).err().unwrap(); + assert_eq!(err, ParsingError::extra_param(&Token::new("add.1.2", 1))); +} + +#[test] +fn assert_parsing_line_invalid_op() { + let source = "\ + begin + repeat.3 + push.1 + push.0.1 + end + + # some comments + + if.true + and + loc_store.0 + else + padw + end + + # more comments + # to test if line is correct + + while.true + push.5.7 + u32checked_add + loc_store.1 + push.0 + end + + repeat.3 + push.2 + u32overflowing_mulx + end + + end"; + let err = parse_program(&source).err().unwrap(); + assert_eq!(err, ParsingError::invalid_op(&Token::new("u32overflowing_mulx", 28))); +} + +#[test] +fn assert_parsing_line_unexpected_eof() { + let source = format!("proc.foo\nadd\nend"); + let err = parse_program(&source).err().unwrap(); + assert_eq!(err, ParsingError::unexpected_eof(3)); +} + +#[test] +fn assert_parsing_line_unexpected_token() { + let source = format!("proc.foo\nadd\nend\n\nmul"); + let err = parse_program(&source).err().unwrap(); + assert_eq!(err, ParsingError::unexpected_token(&Token::new("mul", 5), "begin")); +} + fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec) { let program = parse_program(source).unwrap(); assert_eq!(program.body, body); diff --git a/assembly/src/tokens/mod.rs b/assembly/src/tokens/mod.rs index 2d3f4f01d2..eeacf1924e 100644 --- a/assembly/src/tokens/mod.rs +++ b/assembly/src/tokens/mod.rs @@ -16,8 +16,8 @@ pub use stream::TokenStream; pub struct Token<'a> { /// The dot-separated parts of a token, e.g. `push.1` is split into `['push', '1']`. parts: Vec<&'a str>, - /// The token position in the token stream - pos: usize, + /// The line number linked to this token + line: usize, } impl<'a> Token<'a> { @@ -46,20 +46,20 @@ impl<'a> Token<'a> { /// /// # Panics /// Panic if the `token` parameter is an empty string. - pub fn new(token: &'a str, pos: usize) -> Self { + pub fn new(token: &'a str, line: usize) -> Self { assert!(!token.is_empty(), "token cannot be an empty string"); Self { parts: token.split('.').collect(), - pos, + line, } } // PUBLIC ACCESSORS // -------------------------------------------------------------------------------------------- - /// Returns the position of this token in the source. - pub fn pos(&self) -> usize { - self.pos + /// Returns the line number of this token in the source. + pub const fn line(&self) -> usize { + self.line } /// Returns the number of parts in this token. @@ -78,11 +78,11 @@ impl<'a> Token<'a> { /// /// # Panics /// Panic is the `token` parameter is an empty string. - pub fn update(&mut self, token: &'a str, pos: usize) { + pub fn update(&mut self, token: &'a str, line: usize) { assert!(!token.is_empty(), "token cannot be an empty string"); self.parts.clear(); token.split('.').for_each(|part| self.parts.push(part)); - self.pos = pos; + self.line = line; } // CONTROL TOKEN PARSERS / VALIDATORS diff --git a/assembly/src/tokens/stream.rs b/assembly/src/tokens/stream.rs index 388f27b74e..5e2749e5b1 100644 --- a/assembly/src/tokens/stream.rs +++ b/assembly/src/tokens/stream.rs @@ -10,6 +10,7 @@ pub const LINE_COMMENT_PREFIX: &str = "#"; #[derive(Debug)] pub struct TokenStream<'a> { tokens: Vec<&'a str>, + lines: Vec, current: Token<'a>, pos: usize, temp: Token<'a>, @@ -26,12 +27,14 @@ impl<'a> TokenStream<'a> { return Err(ParsingError::empty_source()); } let mut tokens = Vec::new(); + let mut lines = Vec::new(); let mut proc_comments = BTreeMap::new(); let mut module_comment = None; - let mut comment_builder = CommentBuilder(None); + let mut line_number = 0; for line in source.lines() { + line_number += 1; let line = line.trim(); if line.starts_with(DOC_COMMENT_PREFIX) { comment_builder.append_line(line); @@ -46,7 +49,7 @@ impl<'a> TokenStream<'a> { } else { // since we already have a module comment, this is a procedure comment // which is followed by a blank line. - return Err(ParsingError::dangling_procedure_comment(tokens.len())); + return Err(ParsingError::dangling_procedure_comment(line_number)); } } } else { @@ -62,19 +65,21 @@ impl<'a> TokenStream<'a> { if token.starts_with(Token::EXPORT) || token.starts_with(Token::PROC) { proc_comments.insert(tokens.len(), comment_builder.take_comment()); } else { - return Err(ParsingError::dangling_procedure_comment(tokens.len())); + return Err(ParsingError::dangling_procedure_comment(line_number)); } } tokens.append(&mut line_tokens); + lines.resize(tokens.len(), line_number); } } if tokens.is_empty() { return Err(ParsingError::empty_source()); } - let current = Token::new(tokens[0], 0); + let current = Token::new(tokens[0], 1); Ok(Self { tokens, + lines, current, pos: 0, temp: Token::default(), @@ -91,6 +96,11 @@ impl<'a> TokenStream<'a> { self.pos } + /// Returns the current lines count for the stream. + pub fn num_lines(&self) -> usize { + self.lines[self.pos.min(self.lines.len().saturating_sub(1))] + } + /// Returns 'true' all tokens from this stream have been read. pub fn eof(&self) -> bool { self.pos == self.tokens.len() @@ -119,7 +129,7 @@ impl<'a> TokenStream<'a> { if pos == self.pos { self.read() } else { - self.temp.update(self.tokens[pos], pos); + self.temp.update(self.tokens[pos], self.lines[pos]); Some(&self.temp) } } @@ -129,7 +139,7 @@ impl<'a> TokenStream<'a> { if !self.eof() { self.pos += 1; if !self.eof() { - self.current.update(self.tokens[self.pos], self.pos); + self.current.update(self.tokens[self.pos], self.lines[self.pos]); } } }