From b2acbc5a8024becf2eb67d3ef9600d819d9aa473 Mon Sep 17 00:00:00 2001 From: Victor Lopez Date: Fri, 19 May 2023 18:31:14 +0200 Subject: [PATCH 1/2] feat: add SourceLocation to Node This commit adds source location to parsed nodes, allowing the construction of source mapping. It modifies the [Node] structure based on the discussion that took place here: --- assembly/src/assembler/mod.rs | 33 +- assembly/src/parsers/body.rs | 116 +++++++ assembly/src/parsers/context.rs | 94 ++++-- assembly/src/parsers/mod.rs | 79 +++-- assembly/src/parsers/nodes.rs | 16 +- assembly/src/parsers/serde/deserialization.rs | 26 +- assembly/src/parsers/serde/mod.rs | 4 +- assembly/src/parsers/serde/serialization.rs | 37 ++- assembly/src/parsers/tests.rs | 289 ++++++++++++++---- assembly/src/tokens/stream.rs | 2 +- 10 files changed, 545 insertions(+), 151 deletions(-) create mode 100644 assembly/src/parsers/body.rs diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs index 092554857a..e25fd196dc 100644 --- a/assembly/src/assembler/mod.rs +++ b/assembly/src/assembler/mod.rs @@ -204,9 +204,9 @@ impl Assembler { prologue: vec![Operation::Push(num_locals), Operation::FmpUpdate], epilogue: vec![Operation::Push(-num_locals), Operation::FmpUpdate], }; - self.compile_body(proc.body.iter(), context, Some(wrapper))? + self.compile_body(proc.body.nodes().iter(), context, Some(wrapper))? } else { - self.compile_body(proc.body.iter(), context, None)? + self.compile_body(proc.body.nodes().iter(), context, None)? }; context.complete_proc(code_root); @@ -233,47 +233,48 @@ impl Assembler { for node in body { match node.borrow() { - Node::Instruction(instruction) => { - if let Some(block) = - self.compile_instruction(instruction, &mut span, context)? - { + Node::Instruction(inner) => { + if let Some(block) = self.compile_instruction(inner, &mut span, context)? { span.extract_span_into(&mut blocks); blocks.push(block); } } - Node::IfElse(t, f) => { + Node::IfElse { + true_case, + false_case, + } => { span.extract_span_into(&mut blocks); - let t = self.compile_body(t.iter(), context, None)?; + let true_case = self.compile_body(true_case.nodes().iter(), context, None)?; // else is an exception because it is optional; hence, will have to be replaced // by noop span - let f = if !f.is_empty() { - self.compile_body(f.iter(), context, None)? + let false_case = if !false_case.nodes().is_empty() { + self.compile_body(false_case.nodes().iter(), context, None)? } else { CodeBlock::new_span(vec![Operation::Noop]) }; - let block = CodeBlock::new_split(t, f); + let block = CodeBlock::new_split(true_case, false_case); blocks.push(block); } - Node::Repeat(n, nodes) => { + Node::Repeat { times, body } => { span.extract_span_into(&mut blocks); - let block = self.compile_body(nodes.iter(), context, None)?; + let block = self.compile_body(body.nodes().iter(), context, None)?; - for _ in 0..*n { + for _ in 0..*times { blocks.push(block.clone()); } } - Node::While(nodes) => { + Node::While { body } => { span.extract_span_into(&mut blocks); - let block = self.compile_body(nodes.iter(), context, None)?; + let block = self.compile_body(body.nodes().iter(), context, None)?; let block = CodeBlock::new_loop(block); blocks.push(block); diff --git a/assembly/src/parsers/body.rs b/assembly/src/parsers/body.rs new file mode 100644 index 0000000000..7f918ca21e --- /dev/null +++ b/assembly/src/parsers/body.rs @@ -0,0 +1,116 @@ +use super::{Node, SourceLocation}; +use core::{iter, slice}; + +// CODE BODY +// ================================================================================================ + +/// A parsed code container to bind a contiguous sequence of [Node] to their optional +/// [SourceLocation]. +/// +/// Will yield an iterator of each [Node] with its respective [SourceLocation]. The iterator will +/// be empty if the [SourceLocation] isn't provided. +#[derive(Clone, Default, Eq, Debug)] +pub struct CodeBody { + nodes: Vec, + locations: Vec, +} + +impl PartialEq for CodeBody { + fn eq(&self, other: &Self) -> bool { + // TODO deserialized node will not restore location, but equality must hold + let nodes = self.nodes == other.nodes; + let locations = self.locations == other.locations; + let left_empty = self.locations.is_empty(); + let right_empty = other.locations.is_empty(); + nodes && (locations || left_empty || right_empty) + } +} + +impl FromIterator for CodeBody { + fn from_iter>(nodes: T) -> Self { + Self { + nodes: nodes.into_iter().collect(), + locations: Vec::new(), + } + } +} + +impl FromIterator<(Node, SourceLocation)> for CodeBody { + fn from_iter>(nodes: T) -> Self { + let (nodes, locations) = nodes.into_iter().unzip(); + Self { nodes, locations } + } +} + +impl CodeBody { + // CONSTRUCTOR + // -------------------------------------------------------------------------------------------- + + /// Creates a new instance with the provided `nodes`. + pub fn new(nodes: N) -> Self + where + N: IntoIterator, + { + Self { + nodes: nodes.into_iter().collect(), + locations: Vec::new(), + } + } + + /// Binds [SourceLocation] to their respective [Node]. + /// + /// It is expected to have the `locations` length equal to the `self.nodes` length. + pub fn with_source_locations(mut self, locations: L) -> Self + where + L: IntoIterator, + { + self.locations = locations.into_iter().collect(); + self + } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Pushes the provided location to the structure. + /// + /// Locations are expected to map `1:1` to their nodes; except for the block termination that + /// is always the last location. + pub fn push_location(&mut self, location: SourceLocation) { + self.locations.push(location); + } + + /// Replaces the source locations for this instance. + pub fn replace_locations(&mut self, locations: Vec) { + self.locations = locations; + } + + // PUBLIC ACCESSORS + // -------------------------------------------------------------------------------------------- + + /// Returns the [Node] sequence. + pub fn nodes(&self) -> &[Node] { + &self.nodes + } + + /// Returns the [SourceLocations] bound to the nodes of this body structure. + pub fn locations(&self) -> &[SourceLocation] { + &self.locations + } + + // DESTRUCTURING + // -------------------------------------------------------------------------------------------- + + /// Returns the internal parts of this parsed code. + pub fn into_parts(self) -> (Vec, Vec) { + (self.nodes, self.locations) + } +} + +impl<'a> IntoIterator for &'a CodeBody { + type Item = (&'a Node, &'a SourceLocation); + type IntoIter = iter::Zip, slice::Iter<'a, SourceLocation>>; + + fn into_iter(self) -> Self::IntoIter { + self.nodes.iter().zip(self.locations.iter()) + } +} diff --git a/assembly/src/parsers/context.rs b/assembly/src/parsers/context.rs index d63298f1b7..2310db9758 100644 --- a/assembly/src/parsers/context.rs +++ b/assembly/src/parsers/context.rs @@ -1,6 +1,7 @@ use super::{ - adv_ops, field_ops, io_ops, stack_ops, u32_ops, Instruction, LibraryPath, LocalConstMap, - LocalProcMap, Node, ParsingError, ProcedureAst, ProcedureId, Token, TokenStream, MAX_DOCS_LEN, + adv_ops, field_ops, io_ops, stack_ops, u32_ops, CodeBody, Instruction, LibraryPath, + LocalConstMap, LocalProcMap, Node, ParsingError, ProcedureAst, ProcedureId, Token, TokenStream, + MAX_DOCS_LEN, }; use vm_core::utils::{ collections::{BTreeMap, Vec}, @@ -32,15 +33,17 @@ impl ParserContext { fn parse_if(&self, tokens: &mut TokenStream) -> Result { // record start of the if-else block and consume the 'if' token let if_start = tokens.pos(); - tokens.read().expect("no if token").validate_if()?; + let if_token = tokens.read().expect("no if token"); + + if_token.validate_if()?; tokens.advance(); // read the `if` clause - let t_branch = self.parse_body(tokens, true)?; + let mut true_case = self.parse_body(tokens, true)?; // build the `else` clause; if the else clause is specified, then parse it; // otherwise, set the `else` to an empty vector - let f_branch = match tokens.read() { + let false_case = match tokens.read() { Some(token) => match token.parts()[0] { Token::ELSE => { // record start of the `else` block and consume the `else` token @@ -49,7 +52,7 @@ impl ParserContext { tokens.advance(); // parse the `false` branch - let f_branch = self.parse_body(tokens, false)?; + let false_case = self.parse_body(tokens, false)?; // consume the `end` token match tokens.read() { @@ -58,7 +61,14 @@ impl ParserContext { Err(ParsingError::unmatched_else(token)) } Some(token) => match token.parts()[0] { - Token::END => token.validate_end(), + Token::END => { + // the end token is duplicated for the if body so consistency is + // maintained over the assumption that a body is always terminated + // with an `end` location - in this case, both `if.true` and `else` + // have points to the same `end`. + true_case.push_location(*token.location()); + token.validate_end() + } Token::ELSE => Err(ParsingError::dangling_else(token)), _ => { let token = tokens.read_at(else_start).expect("no else token"); @@ -69,13 +79,13 @@ impl ParserContext { tokens.advance(); // return the `false` branch - f_branch + false_case } Token::END => { // consume the `end` token and return an empty vector token.validate_end()?; tokens.advance(); - Vec::new() + CodeBody::default() } _ => { let token = tokens.read_at(if_start).expect("no if token"); @@ -88,18 +98,22 @@ impl ParserContext { } }; - Ok(Node::IfElse(t_branch, f_branch)) + Ok(Node::IfElse { + true_case, + false_case, + }) } /// Parses a while statement from the provided token stream into an AST node. fn parse_while(&self, tokens: &mut TokenStream) -> Result { // record start of the while block and consume the 'while' token let while_start = tokens.pos(); - tokens.read().expect("no while token").validate_while()?; + let while_token = tokens.read().expect("no while token"); + while_token.validate_while()?; tokens.advance(); // read the loop body - let loop_body = self.parse_body(tokens, false)?; + let body = self.parse_body(tokens, false)?; // consume the `end` token match tokens.read() { @@ -118,18 +132,19 @@ impl ParserContext { }?; tokens.advance(); - Ok(Node::While(loop_body)) + Ok(Node::While { body }) } /// Parses a repeat statement from the provided token stream into an AST node. fn parse_repeat(&self, tokens: &mut TokenStream) -> Result { // record start of the repeat block and consume the 'repeat' token let repeat_start = tokens.pos(); - let count = tokens.read().expect("no repeat token").parse_repeat()?; + let repeat_token = tokens.read().expect("no repeat token"); + let times = repeat_token.parse_repeat()?; tokens.advance(); // read the loop body - let loop_body = self.parse_body(tokens, false)?; + let body = self.parse_body(tokens, false)?; // consume the `end` token match tokens.read() { @@ -148,7 +163,7 @@ impl ParserContext { }?; tokens.advance(); - Ok(Node::Repeat(count, loop_body)) + Ok(Node::Repeat { times, body }) } // CALL PARSERS @@ -162,10 +177,12 @@ impl ParserContext { if let Some(module_name) = module_name { let proc_id = self.get_imported_proc_id(proc_name, module_name, token)?; - Ok(Node::Instruction(Instruction::ExecImported(proc_id))) + let inner = Instruction::ExecImported(proc_id); + Ok(Node::Instruction(inner)) } else { let index = self.get_local_proc_index(proc_name, token)?; - Ok(Node::Instruction(Instruction::ExecLocal(index))) + let inner = Instruction::ExecLocal(index); + Ok(Node::Instruction(inner)) } } @@ -177,10 +194,12 @@ impl ParserContext { if let Some(module_name) = module_name { let proc_id = self.get_imported_proc_id(proc_name, module_name, token)?; - Ok(Node::Instruction(Instruction::CallImported(proc_id))) + let inner = Instruction::CallImported(proc_id); + Ok(Node::Instruction(inner)) } else { let index = self.get_local_proc_index(proc_name, token)?; - Ok(Node::Instruction(Instruction::CallLocal(index))) + let inner = Instruction::CallLocal(index); + Ok(Node::Instruction(inner)) } } @@ -190,7 +209,8 @@ impl ParserContext { let proc_name = token.parse_syscall()?; let proc_id = ProcedureId::from_kernel_name(proc_name); - Ok(Node::Instruction(Instruction::SysCall(proc_id))) + let inner = Instruction::SysCall(proc_id); + Ok(Node::Instruction(inner)) } // PROCEDURE PARSERS @@ -237,6 +257,7 @@ impl ParserContext { if self.local_procs.contains_key(name.as_str()) { return Err(ParsingError::duplicate_proc_name(header, name.as_str())); } + let start = *header.location(); tokens.advance(); // attach doc comments (if any) to exported procedures @@ -274,7 +295,9 @@ impl ParserContext { tokens.advance(); // build and return the procedure - Ok(ProcedureAst::new(name, num_locals, body, is_export, docs)) + let (nodes, locations) = body.into_parts(); + Ok(ProcedureAst::new(name, num_locals, nodes, is_export, docs) + .with_source_locations(locations, start)) } // BODY PARSER @@ -287,13 +310,24 @@ impl ParserContext { &self, tokens: &mut TokenStream, break_on_else: bool, - ) -> Result, ParsingError> { + ) -> Result { let start_pos = tokens.pos(); let mut nodes = Vec::new(); + let mut locations = Vec::new(); while let Some(token) = tokens.read() { + // locations are tracked inside the body, except for nested block declaration that have + // their locations tracked on the node + if !matches!(token.parts()[0], Token::EXPORT | Token::PROC | Token::BEGIN | Token::ELSE) + { + locations.push(*token.location()); + } + match token.parts()[0] { - Token::IF => nodes.push(self.parse_if(tokens)?), + Token::IF => { + let body = self.parse_if(tokens)?; + nodes.push(body); + } Token::ELSE => { token.validate_else()?; if break_on_else { @@ -301,8 +335,14 @@ impl ParserContext { } return Err(ParsingError::dangling_else(token)); } - Token::WHILE => nodes.push(self.parse_while(tokens)?), - Token::REPEAT => nodes.push(self.parse_repeat(tokens)?), + Token::WHILE => { + let body = self.parse_while(tokens)?; + nodes.push(body); + } + Token::REPEAT => { + let body = self.parse_repeat(tokens)?; + nodes.push(body); + } Token::END => { token.validate_end()?; break; @@ -327,7 +367,7 @@ impl ParserContext { return Err(ParsingError::body_too_long(token, nodes.len(), MAX_BODY_LEN)); } - Ok(nodes) + Ok(CodeBody::new(nodes).with_source_locations(locations)) } // HELPER METHODS diff --git a/assembly/src/parsers/mod.rs b/assembly/src/parsers/mod.rs index d402f8c4c3..c1cbc3bc7e 100644 --- a/assembly/src/parsers/mod.rs +++ b/assembly/src/parsers/mod.rs @@ -1,10 +1,12 @@ use super::{ BTreeMap, ByteReader, ByteWriter, Deserializable, DeserializationError, Felt, LabelError, - LibraryPath, ParsingError, ProcedureId, ProcedureName, Serializable, SliceReader, StarkField, - String, ToString, Token, TokenStream, Vec, MAX_LABEL_LEN, + LibraryPath, ParsingError, ProcedureId, ProcedureName, Serializable, SliceReader, + SourceLocation, StarkField, String, ToString, Token, TokenStream, Vec, MAX_LABEL_LEN, }; use core::{fmt::Display, ops::RangeBounds, str::from_utf8}; +mod body; +use body::CodeBody; mod nodes; use crate::utils::bound_into_included_u64; pub use nodes::{Instruction, Node}; @@ -47,11 +49,12 @@ type LocalConstMap = BTreeMap; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ProgramAst { local_procs: Vec, - body: Vec, + body: CodeBody, + start: SourceLocation, } impl ProgramAst { - // AST + // CONSTRUCTORS // -------------------------------------------------------------------------------------------- /// Constructs a [ProgramAst]. /// @@ -60,7 +63,25 @@ impl ProgramAst { if local_procs.len() > MAX_LOCL_PROCS { return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCL_PROCS)); } - Ok(Self { local_procs, body }) + let start = SourceLocation::default(); + let body = CodeBody::new(body); + Ok(Self { + local_procs, + body, + start, + }) + } + + /// Binds the provided `locations` into the ast nodes. + /// + /// The `start` location points to the first node of this block. + pub fn with_source_locations(mut self, locations: L, start: SourceLocation) -> Self + where + L: IntoIterator, + { + self.start = start; + self.body = self.body.with_source_locations(locations); + self } // PARSER @@ -92,6 +113,7 @@ impl ProgramAst { let program_start = tokens.pos(); // consume the 'begin' token let header = tokens.read().expect("missing program header"); + let start = *header.location(); header.validate_begin()?; tokens.advance(); @@ -124,7 +146,8 @@ impl ProgramAst { } let local_procs = sort_procs_into_vec(context.local_procs); - Self::new(local_procs, body) + let (nodes, locations) = body.into_parts(); + Ok(Self::new(local_procs, nodes)?.with_source_locations(locations, start)) } // SERIALIZATION / DESERIALIZATION @@ -141,9 +164,9 @@ impl ProgramAst { target.write_u16(self.local_procs.len() as u16); self.local_procs.write_into(&mut target); - assert!(self.body.len() <= u16::MAX as usize, "too many body instructions"); - target.write_u16(self.body.len() as u16); - self.body.write_into(&mut target); + assert!(self.body.nodes().len() <= u16::MAX as usize, "too many body instructions"); + target.write_u16(self.body.nodes().len() as u16); + self.body.nodes().write_into(&mut target); target } @@ -156,9 +179,8 @@ impl ProgramAst { let local_procs = Deserializable::read_batch_from(&mut source, num_local_procs as usize)?; let body_len = source.read_u16()? as usize; - let body = Deserializable::read_batch_from(&mut source, body_len)?; - - match Self::new(local_procs, body) { + let nodes = Deserializable::read_batch_from(&mut source, body_len)?; + match Self::new(local_procs, nodes) { Err(err) => Err(DeserializationError::UnknownError(err.message().clone())), Ok(res) => Ok(res), } @@ -169,7 +191,7 @@ impl ProgramAst { /// Returns local procedures and body nodes of this program. pub fn into_parts(self) -> (Vec, Vec) { - (self.local_procs, self.body) + (self.local_procs, self.body.into_parts().0) } } @@ -326,12 +348,13 @@ pub struct ProcedureAst { pub name: ProcedureName, pub docs: Option, pub num_locals: u16, - pub body: Vec, + pub body: CodeBody, + pub start: SourceLocation, pub is_export: bool, } impl ProcedureAst { - // AST + // CONSTRUCTORS // -------------------------------------------------------------------------------------------- /// Constructs a [ProcedureAst]. /// @@ -343,14 +366,29 @@ impl ProcedureAst { is_export: bool, docs: Option, ) -> Self { + let start = SourceLocation::default(); + let body = CodeBody::new(body); Self { name, docs, num_locals, body, is_export, + start, } } + + /// Binds the provided `locations` into the ast nodes. + /// + /// The `start` location points to the first node of this block. + pub fn with_source_locations(mut self, locations: L, start: SourceLocation) -> Self + where + L: IntoIterator, + { + self.start = start; + self.body = self.body.with_source_locations(locations); + self + } } impl Serializable for ProcedureAst { @@ -372,9 +410,9 @@ impl Serializable for ProcedureAst { target.write_bool(self.is_export); target.write_u16(self.num_locals); - assert!(self.body.len() <= u16::MAX as usize, "too many body instructions"); - target.write_u16(self.body.len() as u16); - self.body.write_into(target); + assert!(self.body.nodes().len() <= u16::MAX as usize, "too many body instructions"); + target.write_u16(self.body.nodes().len() as u16); + self.body.nodes().write_into(target); } } @@ -394,11 +432,14 @@ impl Deserializable for ProcedureAst { let is_export = source.read_bool()?; let num_locals = source.read_u16()?; let body_len = source.read_u16()? as usize; - let body = Deserializable::read_batch_from(source, body_len)?; + let nodes = Deserializable::read_batch_from(source, body_len)?; + let body = CodeBody::new(nodes); + let start = SourceLocation::default(); Ok(Self { name, num_locals, body, + start, is_export, docs, }) diff --git a/assembly/src/parsers/nodes.rs b/assembly/src/parsers/nodes.rs index 07977dfcab..801c8d3da2 100644 --- a/assembly/src/parsers/nodes.rs +++ b/assembly/src/parsers/nodes.rs @@ -1,4 +1,4 @@ -use super::{Felt, ProcedureId, Vec}; +use super::{CodeBody, Felt, ProcedureId, Vec}; use core::fmt; // NODES @@ -8,9 +8,17 @@ use core::fmt; #[derive(Clone, PartialEq, Eq, Debug)] pub enum Node { Instruction(Instruction), - IfElse(Vec, Vec), - Repeat(u32, Vec), - While(Vec), + IfElse { + true_case: CodeBody, + false_case: CodeBody, + }, + Repeat { + times: u32, + body: CodeBody, + }, + While { + body: CodeBody, + }, } /// This holds the list of instructions supported in a Miden program. diff --git a/assembly/src/parsers/serde/deserialization.rs b/assembly/src/parsers/serde/deserialization.rs index 6113c74a57..9ce2e78b4a 100644 --- a/assembly/src/parsers/serde/deserialization.rs +++ b/assembly/src/parsers/serde/deserialization.rs @@ -1,6 +1,6 @@ use super::{ - ByteReader, Deserializable, DeserializationError, Felt, Instruction, Node, OpCode, ProcedureId, - MAX_PUSH_INPUTS, + ByteReader, CodeBody, Deserializable, DeserializationError, Felt, Instruction, Node, OpCode, + ProcedureId, MAX_PUSH_INPUTS, }; // NODE DESERIALIZATION @@ -14,30 +14,38 @@ impl Deserializable for Node { source.read_u8()?; let if_block_len = source.read_u16()? as usize; - let if_block = Deserializable::read_batch_from(source, if_block_len)?; + let nodes = Deserializable::read_batch_from(source, if_block_len)?; + let true_case = CodeBody::new(nodes); let else_block_len = source.read_u16()? as usize; - let else_block = Deserializable::read_batch_from(source, else_block_len)?; + let nodes = Deserializable::read_batch_from(source, else_block_len)?; + let false_case = CodeBody::new(nodes); - Ok(Node::IfElse(if_block, else_block)) + Ok(Node::IfElse { + true_case, + false_case, + }) } else if first_byte == OpCode::Repeat as u8 { source.read_u8()?; - let repeat_count = source.read_u32()?; + let times = source.read_u32()?; let nodes_len = source.read_u16()? as usize; let nodes = Deserializable::read_batch_from(source, nodes_len)?; + let body = CodeBody::new(nodes); - Ok(Node::Repeat(repeat_count, nodes)) + Ok(Node::Repeat { times, body }) } else if first_byte == OpCode::While as u8 { source.read_u8()?; let nodes_len = source.read_u16()? as usize; let nodes = Deserializable::read_batch_from(source, nodes_len)?; + let body = CodeBody::new(nodes); - Ok(Node::While(nodes)) + Ok(Node::While { body }) } else { - Ok(Node::Instruction(Deserializable::read_from(source)?)) + let inner = Deserializable::read_from(source)?; + Ok(Node::Instruction(inner)) } } } diff --git a/assembly/src/parsers/serde/mod.rs b/assembly/src/parsers/serde/mod.rs index d30fbd09c0..cba80298c5 100644 --- a/assembly/src/parsers/serde/mod.rs +++ b/assembly/src/parsers/serde/mod.rs @@ -1,6 +1,6 @@ use super::{ - ByteReader, ByteWriter, Deserializable, DeserializationError, Felt, Instruction, Node, - ProcedureId, Serializable, + ByteReader, ByteWriter, CodeBody, Deserializable, DeserializationError, Felt, Instruction, + Node, ProcedureId, Serializable, }; use crate::MAX_PUSH_INPUTS; use num_enum::TryFromPrimitive; diff --git a/assembly/src/parsers/serde/serialization.rs b/assembly/src/parsers/serde/serialization.rs index 50cfe75d7e..c23f2c05ac 100644 --- a/assembly/src/parsers/serde/serialization.rs +++ b/assembly/src/parsers/serde/serialization.rs @@ -9,32 +9,37 @@ impl Serializable for Node { // the body parser match self { - Self::Instruction(i) => i.write_into(target), - Self::IfElse(if_clause, else_clause) => { + // TODO this initial implementation will store location only for in-memory compilation + // and will not serialize it. + Self::Instruction(inner) => inner.write_into(target), + Self::IfElse { + true_case, + false_case, + } => { OpCode::IfElse.write_into(target); - assert!(if_clause.len() <= u16::MAX as usize, "too many body nodes"); - target.write_u16(if_clause.len() as u16); - if_clause.write_into(target); + assert!(true_case.nodes().len() <= u16::MAX as usize, "too many body nodes"); + target.write_u16(true_case.nodes().len() as u16); + true_case.nodes().write_into(target); - assert!(else_clause.len() <= u16::MAX as usize, "too many body nodes"); - target.write_u16(else_clause.len() as u16); - else_clause.write_into(target); + assert!(false_case.nodes().len() <= u16::MAX as usize, "too many body nodes"); + target.write_u16(false_case.nodes().len() as u16); + false_case.nodes().write_into(target); } - Self::Repeat(times, nodes) => { + Self::Repeat { times, body } => { OpCode::Repeat.write_into(target); target.write_u32(*times); - assert!(nodes.len() <= u16::MAX as usize, "too many body nodes"); - target.write_u16(nodes.len() as u16); - nodes.write_into(target); + assert!(body.nodes().len() <= u16::MAX as usize, "too many body nodes"); + target.write_u16(body.nodes().len() as u16); + body.nodes().write_into(target); } - Self::While(nodes) => { + Self::While { body } => { OpCode::While.write_into(target); - assert!(nodes.len() <= u16::MAX as usize, "too many body nodes"); - target.write_u16(nodes.len() as u16); - nodes.write_into(target); + assert!(body.nodes().len() <= u16::MAX as usize, "too many body nodes"); + target.write_u16(body.nodes().len() as u16); + body.nodes().write_into(target); } } } diff --git a/assembly/src/parsers/tests.rs b/assembly/src/parsers/tests.rs index f923e580e5..c44d89d918 100644 --- a/assembly/src/parsers/tests.rs +++ b/assembly/src/parsers/tests.rs @@ -1,10 +1,9 @@ -use vm_core::Felt; - use super::{ - BTreeMap, Instruction, LocalProcMap, ModuleAst, Node, ParsingError, ProcedureAst, ProcedureId, - ProgramAst, Token, + BTreeMap, CodeBody, Instruction, LocalProcMap, ModuleAst, Node, ParsingError, ProcedureAst, + ProcedureId, ProgramAst, SourceLocation, Token, }; -use crate::SourceLocation; +use core::mem; +use vm_core::Felt; // UNIT TESTS // ================================================================================================ @@ -100,21 +99,40 @@ fn test_ast_parsing_program_proc() { exec.foo exec.bar end"; - let proc_body1: Vec = vec![Node::Instruction(Instruction::LocLoad(0))]; + let mut procedures: LocalProcMap = BTreeMap::new(); procedures.insert( String::from("foo"), ( 0, - ProcedureAst::new(String::from("foo").try_into().unwrap(), 1, proc_body1, false, None), + ProcedureAst::new( + String::from("foo").try_into().unwrap(), + 1, + [Node::Instruction(Instruction::LocLoad(0))].to_vec(), + false, + None, + ) + .with_source_locations( + [SourceLocation::new(2, 9), SourceLocation::new(3, 5)], + SourceLocation::new(1, 1), + ), ), ); - let proc_body2: Vec = vec![Node::Instruction(Instruction::PadW)]; procedures.insert( String::from("bar"), ( 1, - ProcedureAst::new(String::from("bar").try_into().unwrap(), 2, proc_body2, false, None), + ProcedureAst::new( + String::from("bar").try_into().unwrap(), + 2, + [Node::Instruction(Instruction::PadW)].to_vec(), + false, + None, + ) + .with_source_locations( + [SourceLocation::new(5, 9), SourceLocation::new(6, 5)], + SourceLocation::new(4, 5), + ), ), ); let nodes: Vec = vec![ @@ -131,12 +149,21 @@ fn test_ast_parsing_module() { loc_load.0 end"; let mut procedures: LocalProcMap = BTreeMap::new(); - let proc_body: Vec = vec![Node::Instruction(Instruction::LocLoad(0))]; procedures.insert( String::from("foo"), ( 0, - ProcedureAst::new(String::from("foo").try_into().unwrap(), 1, proc_body, true, None), + ProcedureAst::new( + String::from("foo").try_into().unwrap(), + 1, + [Node::Instruction(Instruction::LocLoad(0))].to_vec(), + true, + None, + ) + .with_source_locations( + [SourceLocation::new(2, 9), SourceLocation::new(3, 5)], + SourceLocation::new(1, 1), + ), ), ); ProgramAst::parse(source).expect_err("Program should contain body and no export"); @@ -211,34 +238,57 @@ fn test_ast_parsing_module_nested_if() { end"; let mut procedures: LocalProcMap = BTreeMap::new(); - let proc_body: Vec = vec![ + let proc_body_nodes = [ Node::Instruction(Instruction::PushU8(1)), - Node::IfElse( - [ + Node::IfElse { + true_case: CodeBody::new([ Node::Instruction(Instruction::PushU8(0)), Node::Instruction(Instruction::PushU8(1)), - Node::IfElse( - [ + Node::IfElse { + true_case: CodeBody::new([ Node::Instruction(Instruction::PushU8(0)), Node::Instruction(Instruction::Sub), - ] - .to_vec(), - [ + ]) + .with_source_locations([ + SourceLocation::new(7, 17), + SourceLocation::new(8, 17), + SourceLocation::new(12, 13), + ]), + false_case: CodeBody::new([ Node::Instruction(Instruction::PushU8(1)), Node::Instruction(Instruction::Sub), - ] - .to_vec(), - ), - ] - .to_vec(), - vec![], - ), - ]; + ]) + .with_source_locations([ + SourceLocation::new(10, 17), + SourceLocation::new(11, 17), + SourceLocation::new(12, 13), + ]), + }, + ]) + .with_source_locations([ + SourceLocation::new(4, 13), + SourceLocation::new(5, 13), + SourceLocation::new(6, 13), + SourceLocation::new(13, 9), + ]), + false_case: CodeBody::default(), + }, + ] + .to_vec(); + let proc_body_locations = + [SourceLocation::new(2, 9), SourceLocation::new(3, 9), SourceLocation::new(14, 5)]; procedures.insert( String::from("foo"), ( 0, - ProcedureAst::new(String::from("foo").try_into().unwrap(), 0, proc_body, false, None), + ProcedureAst::new( + String::from("foo").try_into().unwrap(), + 0, + proc_body_nodes, + false, + None, + ) + .with_source_locations(proc_body_locations, SourceLocation::new(1, 1)), ), ); ProgramAst::parse(source).expect_err("Program should contain body and no export"); @@ -274,28 +324,60 @@ fn test_ast_parsing_module_sequential_if() { end"; let mut procedures: LocalProcMap = BTreeMap::new(); - let proc_body: Vec = vec![ + let proc_body_nodes = [ Node::Instruction(Instruction::PushU8(1)), - Node::IfElse( - [ + Node::IfElse { + true_case: CodeBody::new([ Node::Instruction(Instruction::PushU8(5)), Node::Instruction(Instruction::PushU8(1)), - ] - .to_vec(), - vec![], - ), - Node::IfElse( - [Node::Instruction(Instruction::PushU8(0)), Node::Instruction(Instruction::Sub)] - .to_vec(), - [Node::Instruction(Instruction::PushU8(1)), Node::Instruction(Instruction::Sub)] - .to_vec(), - ), + ]) + .with_source_locations([ + SourceLocation::new(4, 13), + SourceLocation::new(5, 13), + SourceLocation::new(6, 9), + ]), + false_case: CodeBody::default(), + }, + Node::IfElse { + true_case: CodeBody::new([ + Node::Instruction(Instruction::PushU8(0)), + Node::Instruction(Instruction::Sub), + ]) + .with_source_locations([ + SourceLocation::new(8, 13), + SourceLocation::new(9, 13), + SourceLocation::new(13, 9), + ]), + false_case: CodeBody::new([ + Node::Instruction(Instruction::PushU8(1)), + Node::Instruction(Instruction::Sub), + ]) + .with_source_locations([ + SourceLocation::new(11, 13), + SourceLocation::new(12, 13), + SourceLocation::new(13, 9), + ]), + }, + ] + .to_vec(); + let proc_body_locations = [ + SourceLocation::new(2, 9), + SourceLocation::new(3, 9), + SourceLocation::new(7, 9), + SourceLocation::new(14, 5), ]; procedures.insert( String::from("foo"), ( 0, - ProcedureAst::new(String::from("foo").try_into().unwrap(), 0, proc_body, false, None), + ProcedureAst::new( + String::from("foo").try_into().unwrap(), + 0, + proc_body_nodes, + false, + None, + ) + .with_source_locations(proc_body_locations, SourceLocation::new(1, 1)), ), ); ProgramAst::parse(source).expect_err("Program should contain body and no export"); @@ -312,6 +394,49 @@ fn test_ast_parsing_module_sequential_if() { } } +#[test] +fn parsed_while_if_body() { + let source = "\ + begin + push.1 + while.true + mul + end + add + if.true + div + end + mul + end + "; + + let body = ProgramAst::parse(source).unwrap().body; + let expected = CodeBody::new([ + Node::Instruction(Instruction::PushU8(1)), + Node::While { + body: CodeBody::new([Node::Instruction(Instruction::Mul)]) + .with_source_locations([SourceLocation::new(4, 13), SourceLocation::new(5, 9)]), + }, + Node::Instruction(Instruction::Add), + Node::IfElse { + true_case: CodeBody::new([Node::Instruction(Instruction::Div)]) + .with_source_locations([SourceLocation::new(8, 13), SourceLocation::new(9, 9)]), + false_case: CodeBody::default(), + }, + Node::Instruction(Instruction::Mul), + ]) + .with_source_locations([ + SourceLocation::new(2, 9), + SourceLocation::new(3, 9), + SourceLocation::new(6, 9), + SourceLocation::new(7, 9), + SourceLocation::new(10, 9), + SourceLocation::new(11, 5), + ]); + + assert_eq!(body, expected); +} + // PROCEDURE IMPORTS // ================================================================================================ @@ -380,14 +505,17 @@ fn test_ast_parsing_simple_docs() { loc_load.0 end"; - let proc_body_foo: Vec = vec![Node::Instruction(Instruction::LocLoad(0))]; let docs_foo = "proc doc".to_string(); let procedure = ProcedureAst::new( String::from("foo").try_into().unwrap(), 1, - proc_body_foo, + [Node::Instruction(Instruction::LocLoad(0))].to_vec(), true, Some(docs_foo), + ) + .with_source_locations( + [SourceLocation::new(3, 9), SourceLocation::new(4, 5)], + SourceLocation::new(2, 5), ); let module = ModuleAst::parse(source).unwrap(); @@ -427,7 +555,6 @@ export.baz.3 push.0 end"; let mut procedures: LocalProcMap = BTreeMap::new(); - let proc_body_foo: Vec = vec![Node::Instruction(Instruction::LocLoad(0))]; let docs_foo = "Test documenation for export procedure foo in parsing test. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. @@ -441,14 +568,17 @@ of the comments is correctly parsed. There was a bug here earlier." ProcedureAst::new( String::from("foo").try_into().unwrap(), 1, - proc_body_foo, + [Node::Instruction(Instruction::LocLoad(0))].to_vec(), true, Some(docs_foo), + ) + .with_source_locations( + [SourceLocation::new(11, 5), SourceLocation::new(12, 1)], + SourceLocation::new(10, 1), ), ), ); - let proc_body_bar: Vec = vec![Node::Instruction(Instruction::PadW)]; procedures.insert( String::from("bar"), ( @@ -456,15 +586,17 @@ of the comments is correctly parsed. There was a bug here earlier." ProcedureAst::new( String::from("bar").try_into().unwrap(), 2, - proc_body_bar, + [Node::Instruction(Instruction::PadW)].to_vec(), false, None, + ) + .with_source_locations( + [SourceLocation::new(18, 5), SourceLocation::new(19, 1)], + SourceLocation::new(17, 1), ), ), ); - let proc_body_baz: Vec = - vec![Node::Instruction(Instruction::PadW), Node::Instruction(Instruction::PushU8(0))]; let docs_baz = "Test documenation for export procedure baz in parsing test. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna @@ -477,15 +609,24 @@ aliqua." ProcedureAst::new( String::from("baz").try_into().unwrap(), 3, - proc_body_baz, + [Node::Instruction(Instruction::PadW), Node::Instruction(Instruction::PushU8(0))] + .to_vec(), true, Some(docs_baz), + ) + .with_source_locations( + [ + SourceLocation::new(25, 5), + SourceLocation::new(26, 5), + SourceLocation::new(27, 1), + ], + SourceLocation::new(24, 1), ), ), ); ProgramAst::parse(source).expect_err("Program should contain body and no export"); - let module = ModuleAst::parse(source).unwrap(); + let mut module = ModuleAst::parse(source).unwrap(); let module_docs = "Test documenation for the whole module in parsing test. Lorem ipsum dolor sit amet, @@ -507,6 +648,7 @@ of the comments is correctly parsed. There was a bug here earlier." let module_serialized = module.to_bytes(); let module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); + clear_procs_loc_module(&mut module); assert_eq!(module, module_deserialized); } @@ -583,10 +725,11 @@ fn test_ast_parsing_module_docs_fail() { #[test] fn test_ast_program_serde_simple() { let source = "begin push.0xabc234 push.0 assertz end"; - let program = ProgramAst::parse(source).unwrap(); + let mut program = ProgramAst::parse(source).unwrap(); let program_serialized = program.to_bytes(); let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); + clear_procs_loc_program(&mut program); assert_eq!(program, program_deserialized); } @@ -603,10 +746,11 @@ fn test_ast_program_serde_local_procs() { exec.foo exec.bar end"; - let program = ProgramAst::parse(source).unwrap(); + let mut program = ProgramAst::parse(source).unwrap(); let program_serialized = program.to_bytes(); let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); + clear_procs_loc_program(&mut program); assert_eq!(program, program_deserialized); } @@ -619,10 +763,11 @@ fn test_ast_program_serde_exported_procs() { export.bar.2 padw end"; - let module = ModuleAst::parse(source).unwrap(); + let mut module = ModuleAst::parse(source).unwrap(); let module_serialized = module.to_bytes(); let module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); + clear_procs_loc_module(&mut module); assert_eq!(module, module_deserialized); } @@ -656,10 +801,11 @@ fn test_ast_program_serde_control_flow() { end"; - let program = ProgramAst::parse(source).unwrap(); + let mut program = ProgramAst::parse(source).unwrap(); let program_serialized = program.to_bytes(); let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); + clear_procs_loc_program(&mut program); assert_eq!(program, program_deserialized); } @@ -736,7 +882,7 @@ fn assert_parsing_line_unexpected_token() { fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec) { let program = ProgramAst::parse(source).unwrap(); - assert_eq!(program.body, body); + assert_eq!(program.body.nodes(), body); assert_eq!(program.local_procs.len(), procedures.len()); for (i, proc) in program.local_procs.iter().enumerate() { assert_eq!( @@ -748,3 +894,32 @@ fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec ); } } + +// HELPER FUNCTIONS +// ================================================================================================ + +/// Clears the proc locations. +/// +/// Currently, the locations are not part of the serialized libraries; thus, they have to be +/// cleaned before equality is checked for tests +#[cfg(test)] +fn clear_procs_loc_module(module: &mut ModuleAst) { + module.local_procs.iter_mut().for_each(|m| { + m.body = CodeBody::new(mem::take(&mut m.body).into_parts().0); + m.start = SourceLocation::default(); + }); +} + +/// Clears the proc locations. +/// +/// Currently, the locations are not part of the serialized libraries; thus, they have to be +/// cleaned before equality is checked for tests +#[cfg(test)] +fn clear_procs_loc_program(program: &mut ProgramAst) { + program.start = SourceLocation::default(); + program.local_procs.iter_mut().for_each(|m| { + m.body = CodeBody::new(mem::take(&mut m.body).into_parts().0); + m.start = SourceLocation::default(); + }); + program.body = CodeBody::new(mem::take(&mut program.body).into_parts().0); +} diff --git a/assembly/src/tokens/stream.rs b/assembly/src/tokens/stream.rs index aa98b1c472..534fa20dbf 100644 --- a/assembly/src/tokens/stream.rs +++ b/assembly/src/tokens/stream.rs @@ -70,7 +70,7 @@ impl<'a> TokenStream<'a> { return Err(ParsingError::empty_source()); } - let location = SourceLocation::default(); + let location = locations[0]; let current = Token::new(tokens[0], location); Ok(Self { tokens, From 3f672365a58bddadc9d7ba2868dcf1924f077491 Mon Sep 17 00:00:00 2001 From: Victor Lopez Date: Fri, 19 May 2023 18:35:23 +0200 Subject: [PATCH 2/2] feat: add serialization to ast/masl locations --- assembly/src/library/masl.rs | 32 +++++++++- assembly/src/library/mod.rs | 27 ++++++++ assembly/src/library/tests.rs | 57 +++++++++++++++++ assembly/src/parsers/body.rs | 39 +++++++++++- assembly/src/parsers/mod.rs | 104 ++++++++++++++++++++++++++++++- assembly/src/parsers/tests.rs | 106 ++++++++++++++++++++------------ assembly/src/tokens/location.rs | 16 +++++ assembly/src/tokens/mod.rs | 5 +- stdlib/build.rs | 3 +- 9 files changed, 343 insertions(+), 46 deletions(-) create mode 100644 assembly/src/library/tests.rs diff --git a/assembly/src/library/masl.rs b/assembly/src/library/masl.rs index 6237cc50b6..cd0c07aa18 100644 --- a/assembly/src/library/masl.rs +++ b/assembly/src/library/masl.rs @@ -18,6 +18,8 @@ pub struct MaslLibrary { namespace: LibraryNamespace, /// Version of the library. version: Version, + /// Flag defining if locations are serialized with the library. + has_source_locations: bool, /// Available modules. modules: Vec, } @@ -51,9 +53,10 @@ impl MaslLibrary { /// # Errors /// Returns an error if the provided `modules` vector is empty or contains more than /// [u16::MAX] elements. - fn new( + pub(super) fn new( namespace: LibraryNamespace, version: Version, + has_source_locations: bool, modules: Vec, ) -> Result { if modules.is_empty() { @@ -69,9 +72,18 @@ impl MaslLibrary { Ok(Self { namespace, version, + has_source_locations, modules, }) } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Clears the source locations from this bundle. + pub fn clear_locations(&mut self) { + self.modules.iter_mut().for_each(|m| m.clear_locations()) + } } #[cfg(feature = "std")] @@ -96,6 +108,7 @@ mod use_std { pub fn read_from_dir

( path: P, namespace: LibraryNamespace, + with_source_locations: bool, version: Version, ) -> io::Result where @@ -118,7 +131,7 @@ mod use_std { .map(|(path, ast)| Module { path, ast }) .collect(); - Self::new(namespace, version, modules) + Self::new(namespace, version, with_source_locations, modules) .map_err(|err| io::Error::new(io::ErrorKind::Other, format!("{err}"))) } @@ -229,6 +242,13 @@ impl Serializable for MaslLibrary { .write_into(target); module.ast.write_into(target); }); + + // optionally write the locations into the target. given the modules count is already + // written, we can safely dump the locations structs + target.write_bool(self.has_source_locations); + if self.has_source_locations { + self.modules.iter().for_each(|m| m.write_source_locations(target)); + } } } @@ -247,7 +267,13 @@ impl Deserializable for MaslLibrary { modules.push(Module { path, ast }); } - Self::new(namespace, version, modules) + // for each module, load its locations + let has_source_locations = source.read_bool()?; + if has_source_locations { + modules.iter_mut().try_for_each(|m| m.load_source_locations(source))?; + } + + Self::new(namespace, version, has_source_locations, modules) .map_err(|err| DeserializationError::InvalidValue(format!("{err}"))) } } diff --git a/assembly/src/library/mod.rs b/assembly/src/library/mod.rs index 20ff0c6db9..13b53db4af 100644 --- a/assembly/src/library/mod.rs +++ b/assembly/src/library/mod.rs @@ -10,6 +10,9 @@ pub use masl::MaslLibrary; mod path; pub use path::LibraryPath; +#[cfg(test)] +mod tests; + // LIBRARY // ================================================================================================ @@ -89,6 +92,30 @@ impl Module { LibraryError::inconsistent_namespace(self.path.first(), namespace.as_str()) }) } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Clears the source locations from this module. + pub fn clear_locations(&mut self) { + self.ast.clear_locations() + } + + // SERIALIZATION / DESERIALIZATION + // -------------------------------------------------------------------------------------------- + + /// Loads the [SourceLocation] of the procedures via [ModuleAst::load_source_locations]. + pub fn load_source_locations( + &mut self, + source: &mut R, + ) -> Result<(), DeserializationError> { + self.ast.load_source_locations(source) + } + + /// Writes the [SourceLocation] of the procedures via [ModuleAst::write_source_locations]. + pub fn write_source_locations(&self, target: &mut W) { + self.ast.write_source_locations(target) + } } impl PartialOrd for Module { diff --git a/assembly/src/library/tests.rs b/assembly/src/library/tests.rs new file mode 100644 index 0000000000..a1183f68c7 --- /dev/null +++ b/assembly/src/library/tests.rs @@ -0,0 +1,57 @@ +use super::{LibraryNamespace, LibraryPath, MaslLibrary, Module, ModuleAst, Version}; +use vm_core::utils::{Deserializable, Serializable, SliceReader}; + +#[test] +fn masl_locations_serialization() { + // declare foo module + let foo = r#" + export.foo + add + end + export.foo_mul + mul + end + "#; + let path = LibraryPath::new("test::foo").unwrap(); + let ast = ModuleAst::parse(foo).unwrap(); + let foo = Module::new(path, ast); + + // declare bar module + let bar = r#" + export.bar + mtree_get + end + export.bar_mul + mul + end + "#; + let path = LibraryPath::new("test::bar").unwrap(); + let ast = ModuleAst::parse(bar).unwrap(); + let bar = Module::new(path, ast); + let modules = [foo, bar].to_vec(); + + // create the bundle with locations + let namespace = LibraryNamespace::new("test").unwrap(); + let version = Version::MIN; + let locations = true; + let bundle = MaslLibrary::new(namespace, version, locations, modules.clone()).unwrap(); + + // serialize/deserialize the bundle + let mut bytes = Vec::new(); + bundle.write_into(&mut bytes); + let deserialized = MaslLibrary::read_from(&mut SliceReader::new(&bytes)).unwrap(); + assert_eq!(bundle, deserialized); + + // create the bundle without locations + let namespace = LibraryNamespace::new("test").unwrap(); + let locations = false; + let mut bundle = MaslLibrary::new(namespace, version, locations, modules).unwrap(); + + // serialize/deserialize the bundle + let mut bytes = Vec::new(); + bundle.write_into(&mut bytes); + let deserialized = MaslLibrary::read_from(&mut SliceReader::new(&bytes)).unwrap(); + assert_ne!(bundle, deserialized, "sanity check"); + bundle.clear_locations(); + assert_eq!(bundle, deserialized); +} diff --git a/assembly/src/parsers/body.rs b/assembly/src/parsers/body.rs index 7f918ca21e..bb90358bc9 100644 --- a/assembly/src/parsers/body.rs +++ b/assembly/src/parsers/body.rs @@ -1,4 +1,7 @@ -use super::{Node, SourceLocation}; +use super::{ + ByteReader, ByteWriter, Deserializable, DeserializationError, Node, Serializable, + SourceLocation, +}; use core::{iter, slice}; // CODE BODY @@ -84,6 +87,38 @@ impl CodeBody { self.locations = locations; } + // SERIALIZATION / DESERIALIZATION + // -------------------------------------------------------------------------------------------- + + /// Loads the [SourceLocation] from the `source`. + /// + /// The `source` is expected to provide a locations count equal to the block nodes count + 1, + /// having the last element reserved for its `end` node. This way, the locations count is not + /// expected to be read, as opposed to common vector serialization strategies. + /// + /// This implementation intentionally diverges from [Deserializable] so locations can be + /// optionally stored. + pub fn load_source_locations( + &mut self, + source: &mut R, + ) -> Result<(), DeserializationError> { + self.locations = (0..=self.nodes.len()) + .map(|_| SourceLocation::read_from(source)) + .collect::>()?; + Ok(()) + } + + /// Writes the [SourceLocation] into `target`. + /// + /// The locations will be written directly, without storing the locations count. This is the + /// counterpart of [CodeBody::load_source_locations]. + /// + /// This implementation intentionally diverges from [Serializable] so locations can be + /// optionally stored. + pub fn write_source_locations(&self, target: &mut W) { + self.locations.iter().for_each(|l| l.write_into(target)); + } + // PUBLIC ACCESSORS // -------------------------------------------------------------------------------------------- @@ -93,7 +128,7 @@ impl CodeBody { } /// Returns the [SourceLocations] bound to the nodes of this body structure. - pub fn locations(&self) -> &[SourceLocation] { + pub fn source_locations(&self) -> &[SourceLocation] { &self.locations } diff --git a/assembly/src/parsers/mod.rs b/assembly/src/parsers/mod.rs index c1cbc3bc7e..2eea4f890e 100644 --- a/assembly/src/parsers/mod.rs +++ b/assembly/src/parsers/mod.rs @@ -3,7 +3,7 @@ use super::{ LibraryPath, ParsingError, ProcedureId, ProcedureName, Serializable, SliceReader, SourceLocation, StarkField, String, ToString, Token, TokenStream, Vec, MAX_LABEL_LEN, }; -use core::{fmt::Display, ops::RangeBounds, str::from_utf8}; +use core::{fmt::Display, iter, ops::RangeBounds, str::from_utf8}; mod body; use body::CodeBody; @@ -84,6 +84,14 @@ impl ProgramAst { self } + // PUBLIC ACCESSORS + // -------------------------------------------------------------------------------------------- + + /// Returns the [SourceLocation] associated with this program, if present. + pub fn source_locations(&self) -> impl Iterator { + iter::once(&self.start).chain(self.body.source_locations().iter()) + } + // PARSER // -------------------------------------------------------------------------------------------- /// Parses the provided source into a [ProgramAst]. @@ -186,6 +194,31 @@ impl ProgramAst { } } + /// Loads the [SourceLocation] from the `source`. + /// + /// It expects the `start` location at the first position, and will subsequentially load the + /// body via [CodeBody::load_source_locations]. Finally, it will load the local procedures via + /// [ProcedureAst::load_source_locations]. + pub fn load_source_locations( + &mut self, + source: &mut R, + ) -> Result<(), DeserializationError> { + self.start = SourceLocation::read_from(source)?; + self.body.load_source_locations(source)?; + self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source)) + } + + /// Writes the [SourceLocation] into `target`. + /// + /// It will write the `start` location, and then execute the body serialization via + /// [CodeBlock::write_source_locations]. Finally, it will write the local procedures via + /// [ProcedureAst::write_source_locations]. + pub fn write_source_locations(&self, target: &mut W) { + self.start.write_into(target); + self.body.write_source_locations(target); + self.local_procs.iter().for_each(|p| p.write_source_locations(target)) + } + // DESTRUCTURING // -------------------------------------------------------------------------------------------- @@ -274,6 +307,14 @@ impl ModuleAst { self.docs.as_ref() } + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Clears the source locations from this module. + pub fn clear_locations(&mut self) { + self.local_procs.iter_mut().for_each(|p| p.clear_locations()) + } + // SERIALIZATION / DESERIALIZATION // -------------------------------------------------------------------------------------------- @@ -289,6 +330,25 @@ impl ModuleAst { let mut source = SliceReader::new(bytes); Self::read_from(&mut source) } + + /// Loads the [SourceLocation] of the procedures via [ProcedureAst::load_source_locations]. + /// + /// The local procedures are expected to have deterministic order from parse. This way, the + /// serialization can be simplified into a contiguous sequence of locations. + pub fn load_source_locations( + &mut self, + source: &mut R, + ) -> Result<(), DeserializationError> { + self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source)) + } + + /// Writes the [SourceLocation] of the procedures via [ProcedureAst::write_source_locations]. + /// + /// The local procedures are expected to have deterministic order from parse. This way, the + /// serialization can be simplified into a contiguous sequence of locations. + pub fn write_source_locations(&self, target: &mut W) { + self.local_procs.iter().for_each(|p| p.write_source_locations(target)) + } } impl Serializable for ModuleAst { @@ -389,6 +449,48 @@ impl ProcedureAst { self.body = self.body.with_source_locations(locations); self } + + // PUBLIC ACCESSORS + // -------------------------------------------------------------------------------------------- + + /// Returns the [SourceLocation] associated with this procedure, if present. + pub fn source_locations(&self) -> impl Iterator { + iter::once(&self.start).chain(self.body.source_locations().iter()) + } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Clears the source locations from this Ast. + pub fn clear_locations(&mut self) { + self.start = SourceLocation::default(); + self.body.replace_locations([].to_vec()); + } + + // SERIALIZATION / DESERIALIZATION + // -------------------------------------------------------------------------------------------- + + /// Loads the [SourceLocation] from the `source`. + /// + /// It expects the `start` location at the first position, and will subsequentially load the + /// body via [CodeBody::load_source_locations]. + pub fn load_source_locations( + &mut self, + source: &mut R, + ) -> Result<(), DeserializationError> { + self.start = SourceLocation::read_from(source)?; + self.body.load_source_locations(source)?; + Ok(()) + } + + /// Writes the [SourceLocation] into `target`. + /// + /// It will write the `start` location, and then execute the body serialization via + /// [CodeBlock::write_source_locations]. + pub fn write_source_locations(&self, target: &mut W) { + self.start.write_into(target); + self.body.write_source_locations(target); + } } impl Serializable for ProcedureAst { diff --git a/assembly/src/parsers/tests.rs b/assembly/src/parsers/tests.rs index c44d89d918..a36ebaa1ce 100644 --- a/assembly/src/parsers/tests.rs +++ b/assembly/src/parsers/tests.rs @@ -1,9 +1,8 @@ use super::{ - BTreeMap, CodeBody, Instruction, LocalProcMap, ModuleAst, Node, ParsingError, ProcedureAst, - ProcedureId, ProgramAst, SourceLocation, Token, + BTreeMap, CodeBody, Felt, Instruction, LocalProcMap, ModuleAst, Node, ParsingError, + ProcedureAst, ProcedureId, ProgramAst, SourceLocation, Token, }; -use core::mem; -use vm_core::Felt; +use vm_core::utils::SliceReader; // UNIT TESTS // ================================================================================================ @@ -626,7 +625,7 @@ aliqua." ); ProgramAst::parse(source).expect_err("Program should contain body and no export"); - let mut module = ModuleAst::parse(source).unwrap(); + let module = ModuleAst::parse(source).unwrap(); let module_docs = "Test documenation for the whole module in parsing test. Lorem ipsum dolor sit amet, @@ -648,7 +647,7 @@ of the comments is correctly parsed. There was a bug here earlier." let module_serialized = module.to_bytes(); let module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); - clear_procs_loc_module(&mut module); + let module = clear_procs_loc_module(module); assert_eq!(module, module_deserialized); } @@ -725,12 +724,7 @@ fn test_ast_parsing_module_docs_fail() { #[test] fn test_ast_program_serde_simple() { let source = "begin push.0xabc234 push.0 assertz end"; - let mut program = ProgramAst::parse(source).unwrap(); - let program_serialized = program.to_bytes(); - let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); - - clear_procs_loc_program(&mut program); - assert_eq!(program, program_deserialized); + assert_correct_program_serialization(source); } #[test] @@ -746,12 +740,7 @@ fn test_ast_program_serde_local_procs() { exec.foo exec.bar end"; - let mut program = ProgramAst::parse(source).unwrap(); - let program_serialized = program.to_bytes(); - let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); - - clear_procs_loc_program(&mut program); - assert_eq!(program, program_deserialized); + assert_correct_program_serialization(source); } #[test] @@ -763,12 +752,7 @@ fn test_ast_program_serde_exported_procs() { export.bar.2 padw end"; - let mut module = ModuleAst::parse(source).unwrap(); - let module_serialized = module.to_bytes(); - let module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); - - clear_procs_loc_module(&mut module); - assert_eq!(module, module_deserialized); + assert_correct_module_serialization(source); } #[test] @@ -800,13 +784,7 @@ fn test_ast_program_serde_control_flow() { end end"; - - let mut program = ProgramAst::parse(source).unwrap(); - let program_serialized = program.to_bytes(); - let program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); - - clear_procs_loc_program(&mut program); - assert_eq!(program, program_deserialized); + assert_correct_program_serialization(source); } #[test] @@ -902,24 +880,76 @@ fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec /// /// Currently, the locations are not part of the serialized libraries; thus, they have to be /// cleaned before equality is checked for tests -#[cfg(test)] -fn clear_procs_loc_module(module: &mut ModuleAst) { +fn clear_procs_loc_module(mut module: ModuleAst) -> ModuleAst { module.local_procs.iter_mut().for_each(|m| { - m.body = CodeBody::new(mem::take(&mut m.body).into_parts().0); + m.body.replace_locations([].to_vec()); m.start = SourceLocation::default(); }); + module } /// Clears the proc locations. /// /// Currently, the locations are not part of the serialized libraries; thus, they have to be /// cleaned before equality is checked for tests -#[cfg(test)] -fn clear_procs_loc_program(program: &mut ProgramAst) { +fn clear_procs_loc_program(mut program: ProgramAst) -> ProgramAst { program.start = SourceLocation::default(); program.local_procs.iter_mut().for_each(|m| { - m.body = CodeBody::new(mem::take(&mut m.body).into_parts().0); + m.body.replace_locations([].to_vec()); m.start = SourceLocation::default(); }); - program.body = CodeBody::new(mem::take(&mut program.body).into_parts().0); + program.body.replace_locations([].to_vec()); + program +} + +fn assert_correct_program_serialization(source: &str) { + let program = ProgramAst::parse(source).unwrap(); + + // assert the correct program serialization + let program_serialized = program.to_bytes(); + let mut program_deserialized = ProgramAst::from_bytes(program_serialized.as_slice()).unwrap(); + let clear_program = clear_procs_loc_program(program.clone()); + assert_eq!(clear_program, program_deserialized); + + // assert the correct locations serialization + let mut locations = Vec::new(); + program.write_source_locations(&mut locations); + + // assert empty locations + { + let mut locations = program_deserialized.source_locations(); + let start = locations.next().unwrap(); + assert_eq!(start, &SourceLocation::default()); + assert!(locations.next().is_none()); + } + + program_deserialized + .load_source_locations(&mut SliceReader::new(&locations)) + .unwrap(); + assert_eq!(program, program_deserialized); +} + +fn assert_correct_module_serialization(source: &str) { + let module = ModuleAst::parse(source).unwrap(); + let module_serialized = module.to_bytes(); + let mut module_deserialized = ModuleAst::from_bytes(module_serialized.as_slice()).unwrap(); + let clear_module = clear_procs_loc_module(module.clone()); + assert_eq!(clear_module, module_deserialized); + + // assert the correct locations serialization + let mut locations = Vec::new(); + module.write_source_locations(&mut locations); + + // assert module locations are empty + module_deserialized.procs().iter().for_each(|m| { + let mut locations = m.source_locations(); + let start = locations.next().unwrap(); + assert_eq!(start, &SourceLocation::default()); + assert!(locations.next().is_none()); + }); + + module_deserialized + .load_source_locations(&mut SliceReader::new(&locations)) + .unwrap(); + assert_eq!(module, module_deserialized); } diff --git a/assembly/src/tokens/location.rs b/assembly/src/tokens/location.rs index 6ac8914bbd..db789b6f2d 100644 --- a/assembly/src/tokens/location.rs +++ b/assembly/src/tokens/location.rs @@ -1,3 +1,4 @@ +use super::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; use core::fmt; // SOURCE LOCATION @@ -48,3 +49,18 @@ impl fmt::Display for SourceLocation { write!(f, "[{}:{}]", self.line, self.column) } } + +impl Serializable for SourceLocation { + fn write_into(&self, target: &mut W) { + target.write_u32(self.line); + target.write_u32(self.column); + } +} + +impl Deserializable for SourceLocation { + fn read_from(source: &mut R) -> Result { + let line = source.read_u32()?; + let column = source.read_u32()?; + Ok(Self { line, column }) + } +} diff --git a/assembly/src/tokens/mod.rs b/assembly/src/tokens/mod.rs index c3813b652f..e1c1a957a3 100644 --- a/assembly/src/tokens/mod.rs +++ b/assembly/src/tokens/mod.rs @@ -1,4 +1,7 @@ -use super::{BTreeMap, LibraryPath, ParsingError, ProcedureName, String, ToString, Vec}; +use super::{ + BTreeMap, ByteReader, ByteWriter, Deserializable, DeserializationError, LibraryPath, + ParsingError, ProcedureName, Serializable, String, ToString, Vec, +}; use core::fmt; mod lines; diff --git a/stdlib/build.rs b/stdlib/build.rs index 85a6b438da..de99e432ad 100644 --- a/stdlib/build.rs +++ b/stdlib/build.rs @@ -30,7 +30,8 @@ fn main() -> io::Result<()> { let namespace = LibraryNamespace::try_from("std".to_string()).expect("invalid base namespace"); let version = Version::try_from(env!("CARGO_PKG_VERSION")).expect("invalid cargo version"); - let stdlib = MaslLibrary::read_from_dir(ASM_DIR_PATH, namespace, version)?; + let locations = true; // store & load locations by default + let stdlib = MaslLibrary::read_from_dir(ASM_DIR_PATH, namespace, locations, version)?; let docs = stdlib .modules() .map(|module| (module.path.to_string(), module.ast.clone()))