diff --git a/assembly/src/tokens/location.rs b/assembly/src/tokens/location.rs new file mode 100644 index 0000000000..f27373670b --- /dev/null +++ b/assembly/src/tokens/location.rs @@ -0,0 +1,28 @@ +// SOURCE LOCATION +// ================================================================================================ + +/// A struct containing information about the location of a source item. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SourceLocation { + line: u32, + column: u32, +} + +impl SourceLocation { + + // CONSTRUCTORS + // ------------------------------------------------------------------------------------------------- + + /// Creates a new instance of [SourceLocation]. + pub const fn new(line: u32, column: u32) -> Self { + Self { line, column } + } + + // PUBLIC ACCESSORS + // ------------------------------------------------------------------------------------------------- + + /// Returns the line of the location. + pub const fn line(&self) -> u32 { + self.line + } +} diff --git a/assembly/src/tokens/mod.rs b/assembly/src/tokens/mod.rs index f2535073d1..c880e62fb9 100644 --- a/assembly/src/tokens/mod.rs +++ b/assembly/src/tokens/mod.rs @@ -3,8 +3,11 @@ use super::{ }; use core::fmt; +mod location; +pub use location::SourceLocation; + mod tokenizer; -pub use tokenizer::Tokenizer; +use tokenizer::Tokenizer; mod stream; pub use stream::TokenStream; diff --git a/assembly/src/tokens/stream.rs b/assembly/src/tokens/stream.rs index 4d8c174c2b..fb80747b01 100644 --- a/assembly/src/tokens/stream.rs +++ b/assembly/src/tokens/stream.rs @@ -32,25 +32,29 @@ impl<'a> TokenStream<'a> { let mut proc_comments = BTreeMap::new(); // fetch all tokens - for (token, docs, line) in Tokenizer::from(source) { - match token { + for source in Tokenizer::from(source) { + match source.token() { Some(token) => { if token.starts_with(Token::EXPORT) || token.starts_with(Token::PROC) { - let doc_comment = build_comment(&docs); + let doc_comment = build_comment(source.docs()); proc_comments.insert(tokens.len(), doc_comment); - } else if !docs.is_empty() { - return Err(ParsingError::dangling_procedure_comment(line as usize)); + } else if !source.docs().is_empty() { + return Err(ParsingError::dangling_procedure_comment( + source.line() as usize + )); } tokens.push(token); - lines.push(line as usize); + lines.push(source.line() as usize); } None if tokens.is_empty() => { - module_comment = build_comment(&docs); + module_comment = build_comment(source.docs()); } - None => return Err(ParsingError::dangling_procedure_comment(line as usize)), + None => { + return Err(ParsingError::dangling_procedure_comment(source.line() as usize)) + } } } diff --git a/assembly/src/tokens/tokenizer.rs b/assembly/src/tokens/tokenizer.rs index cacc758e15..2235848271 100644 --- a/assembly/src/tokens/tokenizer.rs +++ b/assembly/src/tokens/tokenizer.rs @@ -1,4 +1,4 @@ -use super::Token; +use super::{SourceLocation, Token}; use core::{mem, str::Lines}; // TOKENIZER @@ -27,7 +27,7 @@ impl<'a> From<&'a str> for Tokenizer<'a> { } impl<'a> Iterator for Tokenizer<'a> { - type Item = (Option<&'a str>, Vec<&'a str>, u32); + type Item = SourceToken<'a>; fn next(&mut self) -> Option { // fetch the remainder of the current line @@ -69,6 +69,7 @@ impl<'a> Iterator for Tokenizer<'a> { self.lines_count += 1; self.column = 1; + // only non-empty docs are consumed let doc = doc.trim(); if !doc.is_empty() { self.docs.push(doc.trim()); @@ -81,8 +82,15 @@ impl<'a> Iterator for Tokenizer<'a> { line } None => { + let column = line + .find(Token::DOC_COMMENT_PREFIX) + .map(|c| c as u32 + 1) + .unwrap_or(1); let docs = mem::take(&mut self.docs); - return Some((None, docs, self.lines_count.saturating_sub(1))); + let location = + SourceLocation::new(self.lines_count.saturating_sub(1), column); + let source = SourceToken::new(location).with_docs(docs); + return Some(source); } }; } @@ -111,11 +119,16 @@ impl<'a> Iterator for Tokenizer<'a> { // ``` // */ if trimmed.is_empty() || trimmed.starts_with(Token::LINE_COMMENT_PREFIX) { + let column = + line.find(Token::DOC_COMMENT_PREFIX).map(|c| c as u32 + 1).unwrap_or(1); self.lines_count += 1; self.column = 1; self.current = self.lines.next(); let docs = mem::take(&mut self.docs); - return Some((None, docs, self.lines_count.saturating_sub(2))); + let location = + SourceLocation::new(self.lines_count.saturating_sub(2), column); + let source = SourceToken::new(location).with_docs(docs); + return Some(source); } // skip empty lines & comments until next valid token, including doc comments @@ -153,8 +166,12 @@ impl<'a> Iterator for Tokenizer<'a> { line = match self.lines.next() { Some(line) => line, None => { + let column = + line.find(Token::DOC_COMMENT_PREFIX).map(|c| c as u32 + 1).unwrap_or(1); let docs = mem::take(&mut self.docs); - return Some((None, docs, self.lines_count.saturating_sub(1))); + let location = SourceLocation::new(self.lines_count.saturating_sub(1), column); + let source = SourceToken::new(location).with_docs(docs); + return Some(source); } }; } @@ -168,6 +185,7 @@ impl<'a> Iterator for Tokenizer<'a> { // split the line with the consumed token & update the column for the next token let (line, remainder) = line.split_at(end as usize); let token = line.split_at(start as usize).1; + let column = start + self.column; self.column += line.len() as u32; // if the remainder is not empty, keep it for next iteration @@ -177,9 +195,68 @@ impl<'a> Iterator for Tokenizer<'a> { } // returns the token with docs - let token = Some(token); let docs = mem::take(&mut self.docs); - Some((token, docs, self.lines_count)) + let location = SourceLocation::new(self.lines_count, column); + let source = SourceToken::new(location).with_token(token).with_docs(docs); + Some(source) + } +} + +// SOURCE TOKEN +// ================================================================================================ + +/// A struct that links a string token with extracted documentation and [SourceLocation]. +#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SourceToken<'a> { + docs: Vec<&'a str>, + location: SourceLocation, + token: Option<&'a str>, +} + +impl<'a> SourceToken<'a> { + // CONSTRUCTORS + // ------------------------------------------------------------------------------------------------- + + /// Creates a new instance of [SourceToken]. + pub fn new(location: SourceLocation) -> Self { + Self { + docs: [].to_vec(), + location, + token: None, + } + } + + /// Replace the extracted token. + pub fn with_token(mut self, token: &'a str) -> Self { + self.token.replace(token); + self + } + + /// Replace the extracted documentation for the token. + pub fn with_docs(mut self, docs: I) -> Self + where + I: IntoIterator, + { + self.docs = docs.into_iter().collect(); + self + } + + // PUBLIC ACCESSORS + // ------------------------------------------------------------------------------------------------- + + /// Returns the underlying docs. + pub fn docs(&self) -> &[&'a str] { + &self.docs + } + + /// Returns the underlying location line. + pub const fn line(&self) -> u32 { + self.location.line() + } + + /// Returns the underlying token, if present. + pub const fn token(&self) -> Option<&'a str> { + self.token } } @@ -217,6 +294,12 @@ fn next_token_range(line: &str) -> Option<(u32, u32)> { // UNIT TESTS // ================================================================================================ +#[cfg(test)] +fn loc<'a>(line: u32, column: u32) -> SourceToken<'a> { + let location = SourceLocation::new(line, column); + SourceToken::new(location) +} + #[test] fn next_token_range_works() { assert_eq!(None, next_token_range("")); @@ -242,7 +325,7 @@ fn token_lines_single_token() { begin "#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), [].to_vec(), 2)), tokens.next()); + assert_eq!(Some(loc(2, 9).with_token("begin")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -250,10 +333,10 @@ fn token_lines_single_token() { fn token_lines_inline_tokens() { let source = "begin add mul end"; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), [].to_vec(), 1)), tokens.next()); - assert_eq!(Some((Some("add"), [].to_vec(), 1)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 1)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 1)), tokens.next()); + assert_eq!(Some(loc(1, 1).with_token("begin")), tokens.next()); + assert_eq!(Some(loc(1, 7).with_token("add")), tokens.next()); + assert_eq!(Some(loc(1, 11).with_token("mul")), tokens.next()); + assert_eq!(Some(loc(1, 15).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -262,10 +345,10 @@ fn token_lines_multiline_tokens() { let source = r#"begin add # foo mul end"#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), [].to_vec(), 1)), tokens.next()); - assert_eq!(Some((Some("add"), [].to_vec(), 1)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 2)), tokens.next()); + assert_eq!(Some(loc(1, 1).with_token("begin")), tokens.next()); + assert_eq!(Some(loc(1, 7).with_token("add")), tokens.next()); + assert_eq!(Some(loc(2, 13).with_token("mul")), tokens.next()); + assert_eq!(Some(loc(2, 17).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -282,10 +365,13 @@ fn token_lines_simple_mod_comment() { # baz"#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), ["some mod comment"].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("add"), [].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 6)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 7)), tokens.next()); + assert_eq!( + Some(loc(2, 5).with_token("begin").with_docs(["some mod comment"])), + tokens.next() + ); + assert_eq!(Some(loc(2, 11).with_token("add")), tokens.next()); + assert_eq!(Some(loc(6, 13).with_token("mul")), tokens.next()); + assert_eq!(Some(loc(7, 7).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -302,11 +388,14 @@ fn token_lines_dangling_comment() { #! dangling comment"#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), ["some mod comment"].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("add"), [].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 6)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 7)), tokens.next()); - assert_eq!(Some((None, ["dangling comment"].to_vec(), 10)), tokens.next()); + assert_eq!( + Some(loc(2, 5).with_token("begin").with_docs(["some mod comment"])), + tokens.next() + ); + assert_eq!(Some(loc(2, 11).with_token("add")), tokens.next()); + assert_eq!(Some(loc(6, 13).with_token("mul")), tokens.next()); + assert_eq!(Some(loc(7, 7).with_token("end")), tokens.next()); + assert_eq!(Some(loc(10, 13).with_docs(["dangling comment"])), tokens.next()); assert_eq!(None, tokens.next()); } @@ -324,15 +413,18 @@ fn token_lines_inline_doc_comment() { #! baz"#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("begin"), ["some mod comment"].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("add"), [].to_vec(), 2)), tokens.next()); - assert_eq!(Some((None, ["bar"].to_vec(), 4)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 6)), tokens.next()); assert_eq!( - Some((Some("end"), ["end doc comment with trailing spaces"].to_vec(), 8)), + Some(loc(2, 5).with_token("begin").with_docs(["some mod comment"])), tokens.next() ); - assert_eq!(Some((None, ["baz"].to_vec(), 11)), tokens.next()); + assert_eq!(Some(loc(2, 11).with_token("add")), tokens.next()); + assert_eq!(Some(loc(4, 1).with_docs(["bar"])), tokens.next()); + assert_eq!(Some(loc(6, 13).with_token("mul")), tokens.next()); + assert_eq!( + Some(loc(8, 7).with_token("end").with_docs(["end doc comment with trailing spaces"])), + tokens.next() + ); + assert_eq!(Some(loc(11, 13).with_docs(["baz"])), tokens.next()); assert_eq!(None, tokens.next()); } @@ -353,21 +445,21 @@ fn token_lines_inline_multiline_doc_comment() { #! some dangling doc comment"#; let mut tokens = Tokenizer::from(source); assert_eq!( - Some((Some("begin"), ["some mod comment", "additional docs"].to_vec(), 3)), + Some(loc(3, 5).with_token("begin").with_docs(["some mod comment", "additional docs"])), tokens.next() ); - assert_eq!(Some((Some("add"), [].to_vec(), 3)), tokens.next()); - assert_eq!(Some((None, ["bar"].to_vec(), 5)), tokens.next()); - assert_eq!(Some((Some("mul"), [].to_vec(), 7)), tokens.next()); + assert_eq!(Some(loc(3, 11).with_token("add")), tokens.next()); + assert_eq!(Some(loc(5, 1).with_docs(["bar"])), tokens.next()); + assert_eq!(Some(loc(7, 13).with_token("mul")), tokens.next()); assert_eq!( - Some(( - Some("end"), - ["end doc comment with trailing spaces", "more lines...."].to_vec(), - 10 - )), + Some( + loc(10, 7) + .with_token("end") + .with_docs(["end doc comment with trailing spaces", "more lines...."]) + ), tokens.next() ); - assert_eq!(Some((None, ["some dangling doc comment"].to_vec(), 13)), tokens.next()); + assert_eq!(Some(loc(13, 13).with_docs(["some dangling doc comment"])), tokens.next()); assert_eq!(None, tokens.next()); } @@ -380,12 +472,16 @@ fn token_lines_simple_proc() { end"#; let mut tokens = Tokenizer::from(source); assert_eq!( - Some((Some("proc.foo"), ["some proc comment", "additional docs"].to_vec(), 3)), + Some( + loc(3, 5) + .with_token("proc.foo") + .with_docs(["some proc comment", "additional docs"]) + ), tokens.next() ); - assert_eq!(Some((Some("add"), [].to_vec(), 4)), tokens.next()); - assert_eq!(Some((Some("mul.5"), [].to_vec(), 4)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 5)), tokens.next()); + assert_eq!(Some(loc(4, 9).with_token("add")), tokens.next()); + assert_eq!(Some(loc(4, 13).with_token("mul.5")), tokens.next()); + assert_eq!(Some(loc(5, 5).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -412,26 +508,26 @@ end #! final dangling comment "#; let mut tokens = Tokenizer::from(source); assert_eq!( - Some((None, ["Some multiline mod", "docs", "with more than two lines"].to_vec(), 4)), + Some(loc(4, 1).with_docs(["Some multiline mod", "docs", "with more than two lines",])), tokens.next() ); assert_eq!( - Some(( - Some("export.foo.1"), - ["Some multiline proc", "docs", "additional comments"].to_vec(), - 9 - )), + Some(loc(9, 1).with_token("export.foo.1").with_docs([ + "Some multiline proc", + "docs", + "additional comments", + ])), tokens.next() ); - assert_eq!(Some((Some("loc_load.0"), [].to_vec(), 10)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 11)), tokens.next()); + assert_eq!(Some(loc(10, 5).with_token("loc_load.0")), tokens.next()); + assert_eq!(Some(loc(11, 1).with_token("end")), tokens.next()); assert_eq!( - Some((Some("proc.bar.2"), ["More multiline proc", "docs"].to_vec(), 15)), + Some(loc(15, 1).with_token("proc.bar.2").with_docs(["More multiline proc", "docs",])), tokens.next() ); - assert_eq!(Some((Some("padw"), [].to_vec(), 16)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 17)), tokens.next()); - assert_eq!(Some((None, ["final dangling comment"].to_vec(), 19)), tokens.next()); + assert_eq!(Some(loc(16, 5).with_token("padw")), tokens.next()); + assert_eq!(Some(loc(17, 1).with_token("end")), tokens.next()); + assert_eq!(Some(loc(19, 1).with_docs(["final dangling comment"])), tokens.next()); assert_eq!(None, tokens.next()); } @@ -440,13 +536,13 @@ fn token_lines_trailing_spaces() { let source = r#" export.verify #=> [main_trace_commitment] - exec.random_coin::reseed + exec.random_coin::reseed end "#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("export.verify"), [].to_vec(), 2)), tokens.next()); - assert_eq!(Some((Some("exec.random_coin::reseed"), [].to_vec(), 4)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 5)), tokens.next()); + assert_eq!(Some(loc(2, 1).with_token("export.verify")), tokens.next()); + assert_eq!(Some(loc(4, 5).with_token("exec.random_coin::reseed")), tokens.next()); + assert_eq!(Some(loc(5, 1).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); } @@ -465,8 +561,11 @@ export.verify end "#; let mut tokens = Tokenizer::from(source); - assert_eq!(Some((Some("export.verify"), ["Foo", "Bar", "Baz"].to_vec(), 9)), tokens.next()); - assert_eq!(Some((Some("push.0"), [].to_vec(), 10)), tokens.next()); - assert_eq!(Some((Some("end"), [].to_vec(), 11)), tokens.next()); + assert_eq!( + Some(loc(9, 1).with_token("export.verify").with_docs(["Foo", "Bar", "Baz",])), + tokens.next() + ); + assert_eq!(Some(loc(10, 5).with_token("push.0")), tokens.next()); + assert_eq!(Some(loc(11, 1).with_token("end")), tokens.next()); assert_eq!(None, tokens.next()); }