diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs index a82e352df7..eded24937b 100644 --- a/assembly/src/errors.rs +++ b/assembly/src/errors.rs @@ -1,4 +1,4 @@ -use super::{ProcedureId, String, ToString, Token, Vec}; +use super::{ProcedureId, SourceLocation, String, ToString, Token, Vec}; use core::fmt; // ASSEMBLY ERROR @@ -139,7 +139,7 @@ impl std::error::Error for AssemblyError {} #[derive(Clone, Eq, PartialEq)] pub struct ParsingError { message: String, - line: usize, + location: SourceLocation, op: String, } @@ -150,15 +150,15 @@ impl ParsingError { pub fn empty_source() -> Self { ParsingError { message: "source code cannot be an empty string".to_string(), - line: 0, + location: SourceLocation::default(), op: "".to_string(), } } - pub fn unexpected_eof(line: usize) -> Self { + pub fn unexpected_eof(location: SourceLocation) -> Self { ParsingError { message: "unexpected EOF".to_string(), - line, + location, op: "".to_string(), } } @@ -166,7 +166,7 @@ impl ParsingError { pub fn unexpected_token(token: &Token, expected: &str) -> Self { ParsingError { message: format!("unexpected token: expected '{expected}' but was '{token}'"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -176,7 +176,7 @@ impl ParsingError { pub fn duplicate_const_name(token: &Token, label: &str) -> Self { ParsingError { message: format!("duplicate constant name: '{label}'"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -184,7 +184,7 @@ impl ParsingError { pub fn invalid_const_name(token: &Token, err: LabelError) -> Self { ParsingError { message: format!("invalid constant name: {err}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -194,7 +194,7 @@ impl ParsingError { message: format!( "malformed constant `{token}` - invalid value: `{value}` - reason: {reason}" ), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -202,7 +202,7 @@ impl ParsingError { pub fn const_invalid_scope(token: &Token) -> Self { ParsingError { message: format!("invalid constant declaration: `{token}` - constants can only be defined below imports and above procedure / program bodies"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -210,7 +210,7 @@ impl ParsingError { pub fn const_not_found(token: &Token) -> Self { ParsingError { message: format!("constant used in operation `{token}` not found"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -220,7 +220,7 @@ impl ParsingError { message: format!( "failed to convert u64 constant used in `{token}` to required type {type_name}" ), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -231,7 +231,7 @@ impl ParsingError { pub fn invalid_op(token: &Token) -> Self { ParsingError { message: format!("instruction '{token}' is invalid"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -239,7 +239,7 @@ impl ParsingError { pub fn missing_param(token: &Token) -> Self { ParsingError { message: format!("malformed instruction '{token}': missing required parameter"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -247,7 +247,7 @@ impl ParsingError { pub fn extra_param(token: &Token) -> Self { ParsingError { message: format!("malformed instruction '{token}': too many parameters provided"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -258,7 +258,7 @@ impl ParsingError { "malformed instruction `{token}`: parameter '{}' is invalid", token.parts()[part_idx] ), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -269,7 +269,7 @@ impl ParsingError { "malformed instruction '{token}', parameter {} is invalid: {reason}", token.parts()[part_idx], ), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -280,7 +280,7 @@ impl ParsingError { pub fn dangling_else(token: &Token) -> Self { ParsingError { message: "else without matching if".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -288,7 +288,7 @@ impl ParsingError { pub fn unmatched_if(token: &Token) -> Self { ParsingError { message: "if without matching else/end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -296,7 +296,7 @@ impl ParsingError { pub fn unmatched_while(token: &Token) -> Self { ParsingError { message: "while without matching end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -304,7 +304,7 @@ impl ParsingError { pub fn unmatched_repeat(token: &Token) -> Self { ParsingError { message: "repeat without matching end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -312,7 +312,7 @@ impl ParsingError { pub fn unmatched_else(token: &Token) -> Self { ParsingError { message: "else without matching end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -320,7 +320,7 @@ impl ParsingError { pub fn unmatched_begin(token: &Token) -> Self { ParsingError { message: "begin without matching end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -328,7 +328,7 @@ impl ParsingError { pub fn dangling_ops_after_program(token: &Token) -> Self { ParsingError { message: "dangling instructions after program end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -336,16 +336,16 @@ impl ParsingError { pub fn dangling_ops_after_module(token: &Token) -> Self { ParsingError { message: "dangling instructions after module end".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } - pub fn dangling_procedure_comment(line: usize) -> Self { + pub fn dangling_procedure_comment(location: SourceLocation) -> Self { ParsingError { message: "Procedure comment is not immediately followed by a procedure declaration." .to_string(), - line, + location, op: "".to_string(), } } @@ -353,7 +353,7 @@ impl ParsingError { pub fn not_a_library_module(token: &Token) -> Self { ParsingError { message: "not a module: `begin` instruction found".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -364,7 +364,7 @@ impl ParsingError { pub fn duplicate_proc_name(token: &Token, label: &str) -> Self { ParsingError { message: format!("duplicate procedure name: {label}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -372,7 +372,7 @@ impl ParsingError { pub fn invalid_proc_name(token: &Token, err: LabelError) -> Self { ParsingError { message: format!("invalid procedure name: {err}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -383,7 +383,7 @@ impl ParsingError { "procedure name cannot be longer than {max_len} characters, but was {}", label.len() ), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -391,7 +391,7 @@ impl ParsingError { pub fn invalid_proc_locals(token: &Token, locals: &str) -> Self { ParsingError { message: format!("invalid procedure locals: {locals}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -399,7 +399,7 @@ impl ParsingError { pub fn too_many_proc_locals(token: &Token, num_locals: u64, max_locals: u64) -> Self { ParsingError { message: format!("number of procedure locals cannot be greater than {max_locals} characters, but was {num_locals}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -407,7 +407,7 @@ impl ParsingError { pub fn unmatched_proc(token: &Token, proc_name: &str) -> Self { ParsingError { message: format!("procedure '{proc_name}' has no matching end"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -415,7 +415,7 @@ impl ParsingError { pub fn proc_export_not_allowed(token: &Token, label: &str) -> Self { ParsingError { message: format!("exported procedures not allowed in this context: {label}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -426,7 +426,7 @@ impl ParsingError { pub fn invalid_proc_invocation(token: &Token, label: &str) -> Self { ParsingError { message: format!("invalid procedure invocation: {label}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -434,7 +434,7 @@ impl ParsingError { pub fn syscall_with_module_name(token: &Token) -> Self { ParsingError { message: "invalid syscall: cannot invoke a syscall on a named module".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -442,7 +442,7 @@ impl ParsingError { pub fn undefined_local_proc(token: &Token, label: &str) -> Self { ParsingError { message: format!("undefined local procedure: {label}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -450,7 +450,7 @@ impl ParsingError { pub fn procedure_module_not_imported(token: &Token, module_name: &str) -> Self { ParsingError { message: format!("module '{module_name}' was not imported"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -461,7 +461,7 @@ impl ParsingError { pub fn duplicate_module_import(token: &Token, module: &str) -> Self { ParsingError { message: format!("duplicate module import found: {module}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -469,7 +469,7 @@ impl ParsingError { pub fn invalid_module_path(token: &Token, module_path: &str) -> Self { ParsingError { message: format!("invalid module import path: {module_path}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -477,7 +477,7 @@ impl ParsingError { pub fn import_inside_body(token: &Token) -> Self { ParsingError { message: "import in procedure body".to_string(), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -485,7 +485,7 @@ impl ParsingError { pub fn invalid_library_path(token: &Token, error: LibraryError) -> Self { ParsingError { message: format!("invalid path resolution: {error}"), - line: token.line(), + location: *token.location(), op: token.to_string(), } } @@ -500,20 +500,20 @@ impl ParsingError { &self.op } - pub const fn line(&self) -> usize { - self.line + pub const fn location(&self) -> &SourceLocation { + &self.location } } impl fmt::Debug for ParsingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "parsing error at line {}: {}", self.line, self.message) + write!(f, "parsing error at {}: {}", self.location, self.message) } } impl fmt::Display for ParsingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "parsing error at line {}: {}", self.line, self.message) + write!(f, "parsing error at {}: {}", self.location, self.message) } } diff --git a/assembly/src/lib.rs b/assembly/src/lib.rs index cd3e554dff..0cc4784e8c 100644 --- a/assembly/src/lib.rs +++ b/assembly/src/lib.rs @@ -31,6 +31,7 @@ pub use vm_core::utils::{ }; mod tokens; +pub use tokens::SourceLocation; use tokens::{Token, TokenStream}; mod errors; diff --git a/assembly/src/parsers/mod.rs b/assembly/src/parsers/mod.rs index 721840a386..0dedb88033 100644 --- a/assembly/src/parsers/mod.rs +++ b/assembly/src/parsers/mod.rs @@ -229,7 +229,7 @@ pub fn parse_program(source: &str) -> Result { // make sure program body is present let next_token = - tokens.read().ok_or_else(|| ParsingError::unexpected_eof(tokens.num_lines()))?; + tokens.read().ok_or_else(|| ParsingError::unexpected_eof(*tokens.location()))?; if next_token.parts()[0] != Token::BEGIN { return Err(ParsingError::unexpected_token(next_token, Token::BEGIN)); } @@ -242,7 +242,7 @@ pub fn parse_program(source: &str) -> Result { // make sure there is something to be read if tokens.eof() { - return Err(ParsingError::unexpected_eof(tokens.num_lines())); + return Err(ParsingError::unexpected_eof(*tokens.location())); } let mut body = Vec::::new(); diff --git a/assembly/src/parsers/tests.rs b/assembly/src/parsers/tests.rs index 2974203517..b1dec1ad0f 100644 --- a/assembly/src/parsers/tests.rs +++ b/assembly/src/parsers/tests.rs @@ -4,6 +4,7 @@ use super::{ parse_module, parse_program, BTreeMap, Instruction, LocalProcMap, ModuleAst, Node, ParsingError, ProcedureAst, ProcedureId, ProgramAst, Token, }; +use crate::SourceLocation; // UNIT TESTS // ================================================================================================ @@ -696,14 +697,16 @@ fn test_ast_program_serde_control_flow() { fn assert_parsing_line_unmatched_begin() { let source = format!("\n\nbegin\npush.1.2\n\nadd mul"); let err = parse_program(&source).err().unwrap(); - assert_eq!(err, ParsingError::unmatched_begin(&Token::new("begin", 3))); + let location = SourceLocation::new(3, 1); + assert_eq!(err, ParsingError::unmatched_begin(&Token::new("begin", location))); } #[test] fn assert_parsing_line_extra_param() { let source = format!("begin add.1.2\nend"); let err = parse_program(&source).err().unwrap(); - assert_eq!(err, ParsingError::extra_param(&Token::new("add.1.2", 1))); + let location = SourceLocation::new(1, 7); + assert_eq!(err, ParsingError::extra_param(&Token::new("add.1.2", location))); } #[test] @@ -741,21 +744,24 @@ fn assert_parsing_line_invalid_op() { end"; let err = parse_program(&source).err().unwrap(); - assert_eq!(err, ParsingError::invalid_op(&Token::new("u32overflowing_mulx", 28))); + let location = SourceLocation::new(28, 13); + assert_eq!(err, ParsingError::invalid_op(&Token::new("u32overflowing_mulx", location))); } #[test] fn assert_parsing_line_unexpected_eof() { let source = format!("proc.foo\nadd\nend"); let err = parse_program(&source).err().unwrap(); - assert_eq!(err, ParsingError::unexpected_eof(3)); + let location = SourceLocation::new(3, 1); + assert_eq!(err, ParsingError::unexpected_eof(location)); } #[test] fn assert_parsing_line_unexpected_token() { let source = format!("proc.foo\nadd\nend\n\nmul"); let err = parse_program(&source).err().unwrap(); - assert_eq!(err, ParsingError::unexpected_token(&Token::new("mul", 5), "begin")); + let location = SourceLocation::new(5, 1); + assert_eq!(err, ParsingError::unexpected_token(&Token::new("mul", location), "begin")); } fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec) { diff --git a/assembly/src/tokens/lines.rs b/assembly/src/tokens/lines.rs index 27be5779db..bd6394ed8e 100644 --- a/assembly/src/tokens/lines.rs +++ b/assembly/src/tokens/lines.rs @@ -226,7 +226,7 @@ impl<'a> LineInfo<'a> { /// ``` /// /// `2` is returned. - pub const fn _char_offset(&self) -> u32 { + pub const fn char_offset(&self) -> u32 { self.char_offset } } diff --git a/assembly/src/tokens/location.rs b/assembly/src/tokens/location.rs new file mode 100644 index 0000000000..6927e91f48 --- /dev/null +++ b/assembly/src/tokens/location.rs @@ -0,0 +1,59 @@ +use super::LineInfo; +use core::fmt; + +// SOURCE LOCATION +// ================================================================================================ + +/// A struct containing information about the location of a source item. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SourceLocation { + // TODO add uri + line: u32, + column: u32, +} + +impl Default for SourceLocation { + fn default() -> Self { + Self { line: 1, column: 1 } + } +} + +impl From> for SourceLocation { + fn from(info: LineInfo<'_>) -> Self { + let line = info.line_number(); + let column = info.char_offset(); + Self::new(line, column) + } +} + +impl SourceLocation { + // CONSTRUCTORS + // ------------------------------------------------------------------------------------------------- + + /// Creates a new instance of [SourceLocation]. + pub const fn new(line: u32, column: u32) -> Self { + Self { line, column } + } + + // PUBLIC ACCESSORS + // ------------------------------------------------------------------------------------------------- + + /// Returns the line of the location. + pub const fn line(&self) -> u32 { + self.line + } + + // STATE MUTATORS + // ------------------------------------------------------------------------------------------------- + + /// Moves the column by the given offset. + pub fn move_column(&mut self, offset: u32) { + self.column += offset; + } +} + +impl fmt::Display for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[{}:{}]", self.line, self.column) + } +} diff --git a/assembly/src/tokens/mod.rs b/assembly/src/tokens/mod.rs index ab42a991aa..dc2fecb1c3 100644 --- a/assembly/src/tokens/mod.rs +++ b/assembly/src/tokens/mod.rs @@ -6,6 +6,8 @@ use core::fmt; mod lines; pub use lines::{LineInfo, LinesStream}; +mod location; +pub use location::SourceLocation; mod stream; pub use stream::TokenStream; @@ -19,8 +21,8 @@ pub use stream::TokenStream; pub struct Token<'a> { /// The dot-separated parts of a token, e.g. `push.1` is split into `['push', '1']`. parts: Vec<&'a str>, - /// The line number linked to this token - line: usize, + /// Source location linked to this token. + location: SourceLocation, } impl<'a> Token<'a> { @@ -54,20 +56,20 @@ impl<'a> Token<'a> { /// /// # Panics /// Panic if the `token` parameter is an empty string. - pub fn new(token: &'a str, line: usize) -> Self { + pub fn new(token: &'a str, location: SourceLocation) -> Self { assert!(!token.is_empty(), "token cannot be an empty string"); Self { parts: token.split('.').collect(), - line, + location, } } // PUBLIC ACCESSORS // -------------------------------------------------------------------------------------------- - /// Returns the line number of this token in the source. - pub const fn line(&self) -> usize { - self.line + /// Returns the [SourceLocation] linked to this [Token]. + pub const fn location(&self) -> &SourceLocation { + &self.location } /// Returns the number of parts in this token. @@ -86,11 +88,11 @@ impl<'a> Token<'a> { /// /// # Panics /// Panic is the `token` parameter is an empty string. - pub fn update(&mut self, token: &'a str, line: usize) { + pub fn update(&mut self, token: &'a str, location: SourceLocation) { assert!(!token.is_empty(), "token cannot be an empty string"); self.parts.clear(); token.split('.').for_each(|part| self.parts.push(part)); - self.line = line; + self.location = location; } // CONTROL TOKEN PARSERS / VALIDATORS diff --git a/assembly/src/tokens/stream.rs b/assembly/src/tokens/stream.rs index 6e7354ac5c..5dc189c2bc 100644 --- a/assembly/src/tokens/stream.rs +++ b/assembly/src/tokens/stream.rs @@ -1,4 +1,4 @@ -use super::{BTreeMap, LinesStream, ParsingError, String, Token, Vec}; +use super::{BTreeMap, LinesStream, ParsingError, SourceLocation, String, Token, Vec}; use core::fmt; // TOKEN STREAM @@ -7,7 +7,7 @@ use core::fmt; #[derive(Debug)] pub struct TokenStream<'a> { tokens: Vec<&'a str>, - lines: Vec, + locations: Vec, current: Token<'a>, pos: usize, temp: Token<'a>, @@ -22,52 +22,80 @@ impl<'a> TokenStream<'a> { pub fn new(source: &'a str) -> Result { // initialize the attributes let mut tokens = Vec::new(); - let mut lines = Vec::new(); + let mut locations = Vec::new(); let mut proc_comments = BTreeMap::new(); let mut module_comment = None; // fetch all tokens - for line_info in LinesStream::from(source) { - let line_number = line_info.line_number() as usize; - - match line_info.contents() { - Some(line) => { - // fill the doc comments for procedures - if line.starts_with(Token::EXPORT) || line.starts_with(Token::PROC) { - let doc_comment = build_comment(line_info.docs()); - proc_comments.insert(tokens.len(), doc_comment); - } else if !line_info.docs().is_empty() { - return Err(ParsingError::dangling_procedure_comment(line_number)); - } - - // for each token, skip comments & err when dangling docs; push otherwise - for token in line.split_whitespace() { - if token.starts_with(Token::DOC_COMMENT_PREFIX) { - return Err(ParsingError::dangling_procedure_comment(line_number)); - } else if token.starts_with(Token::COMMENT_PREFIX) { - break; - } - - tokens.push(token); - } + for info in LinesStream::from(source) { + let offset = info.char_offset(); + let mut location = SourceLocation::new(info.line_number(), 1 + offset); + + // fetch contents line + let mut contents = match info.contents() { + // if not first token & has docs without being export or proc, then dangling + Some(contents) + if !(tokens.is_empty() + || info.docs().is_empty() + || contents.trim().starts_with(Token::EXPORT) + || contents.trim().starts_with(Token::PROC)) => + { + return Err(ParsingError::dangling_procedure_comment(location)); } - // if first dangling comment, then module docs - // TODO consider using a dedicated symbol for module docs such as `//!` + Some(contents) => contents, + + // first dangling comments are module docs None if tokens.is_empty() => { - module_comment = build_comment(line_info.docs()); + module_comment = build_comment(info.docs()); + continue; } - // if has tokens, then dangling docs are illegal + // other dangling docs are forbidden None => { - return Err(ParsingError::dangling_procedure_comment( - line_info.line_number() as usize - )); + return Err(ParsingError::dangling_procedure_comment(location)); + } + }; + + while !contents.is_empty() { + // ignore comments; halt if dangling comment + if contents.starts_with(Token::DOC_COMMENT_PREFIX) { + return Err(ParsingError::dangling_procedure_comment(location)); + } else if contents.starts_with(Token::COMMENT_PREFIX) { + break; } - } - // extend lines until it fits the added tokens - lines.resize(tokens.len(), line_number); + // fill the doc comments for procedures + if contents.starts_with(Token::EXPORT) || contents.starts_with(Token::PROC) { + proc_comments.insert(tokens.len(), build_comment(info.docs())); + } + + // pick the current token & remainder + let (token, remainder) = match contents.split_once(char::is_whitespace) { + Some(split) => split, + + // last token; push and break + None => { + tokens.push(contents); + locations.push(location); + break; + } + }; + + // append the token + tokens.push(token); + locations.push(location); + + // seek next token + let n = match remainder.find(|c: char| !c.is_whitespace()) { + Some(n) => n, + None => break, + }; + + // update the offset; add extra char consumed by `split_once` + location.move_column(token.len() as u32 + n as u32 + 1); + contents = remainder.split_at(n).1; + } } // invalid if no tokens @@ -75,10 +103,11 @@ impl<'a> TokenStream<'a> { return Err(ParsingError::empty_source()); } - let current = Token::new(tokens[0], 1); + let location = SourceLocation::default(); + let current = Token::new(tokens[0], location); Ok(Self { tokens, - lines, + locations, current, pos: 0, temp: Token::default(), @@ -95,9 +124,10 @@ impl<'a> TokenStream<'a> { self.pos } - /// Returns the current lines count for the stream. - pub fn num_lines(&self) -> usize { - self.lines[self.pos.min(self.lines.len().saturating_sub(1))] + /// Returns the [SourceLocation] linked to the current [Token]. + pub fn location(&self) -> &SourceLocation { + let idx = self.pos.min(self.locations.len().saturating_sub(1)); + &self.locations[idx] } /// Returns 'true' all tokens from this stream have been read. @@ -128,7 +158,7 @@ impl<'a> TokenStream<'a> { if pos == self.pos { self.read() } else { - self.temp.update(self.tokens[pos], self.lines[pos]); + self.temp.update(self.tokens[pos], self.locations[pos]); Some(&self.temp) } } @@ -138,7 +168,7 @@ impl<'a> TokenStream<'a> { if !self.eof() { self.pos += 1; if !self.eof() { - self.current.update(self.tokens[self.pos], self.lines[self.pos]); + self.current.update(self.tokens[self.pos], self.locations[self.pos]); } } }