From d964419826de916e8e17f5b942c7b5b7187674ab Mon Sep 17 00:00:00 2001 From: Mahmoud Abumandour Date: Thu, 12 Oct 2023 00:03:16 -0700 Subject: [PATCH 1/6] Refactor a common interface for code generators (preparing for LLVM) --- Cargo.toml | 4 +- src/code_generator.rs | 612 ++---------------------------------------- src/main.rs | 5 +- src/x86_generator.rs | 595 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 619 insertions(+), 597 deletions(-) create mode 100644 src/x86_generator.rs diff --git a/Cargo.toml b/Cargo.toml index 74e64ac..2d1ea25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" phf = { version = "0.11.2", features = ["macros"] } rstest = "0.18.2" tempfile = "3" +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm15-0"] } + [dependencies.uuid] version = "1.4.1" @@ -14,4 +16,4 @@ features = [ "v4", # Lets you generate random UUIDs "fast-rng", # Use a faster (but still sufficiently random) RNG "macro-diagnostics", # Enable better diagnostics for compile-time UUIDs -] \ No newline at end of file +] diff --git a/src/code_generator.rs b/src/code_generator.rs index 188ee86..b8079d0 100644 --- a/src/code_generator.rs +++ b/src/code_generator.rs @@ -1,22 +1,7 @@ use crate::ast::ASTNode::*; use crate::ast::*; -use crate::symbol_table; -use crate::symbol_table::*; -use crate::tokens::*; -use uuid::Uuid; - -pub struct CodeGenerator { - symbol_table: SymbolTable, -} - -impl CodeGenerator { - pub fn new() -> Self { - Self { - symbol_table: SymbolTable::new(), - } - } - - pub fn generate(&mut self, root: &ASTNode) -> String { +pub trait CodeGenerator { + fn generate(&mut self, root: &ASTNode) -> String { match root { TranslationUnit(..) => self.generate_translation_unit(root), ReturnStatement(..) => self.generate_return_statement(root), @@ -34,594 +19,33 @@ impl CodeGenerator { } } - fn generate_for(&mut self, node: &ASTNode) -> String { - match node { - For(_, [init, condition, update], body) => { - let stack_top = self.symbol_table.current_scope_stack_top(); - self.symbol_table - .push_scope(symbol_table::Scope::new(stack_top)); - let mut result = String::new(); - result.push_str(&self.generate(init)); - let enter_label = self.unique_label("__FOR_ENTER_"); - result.push_str(&format!("{}:\n", enter_label)); - let mut body = self.generate(body); - body.push_str(&self.generate(update)); - body.push_str(&format!("jmp {}\n", enter_label)); - let condition = self.generate(condition); - - if condition.is_empty() { - result.push_str(&body); - } else { - result.push_str(&self.generate_condition_block( - "__FOR_EXIT_", - &condition, - &body, - )); - } - - self.symbol_table.pop_scope(); - result - } - _ => panic!("Internal error: Expected for statement, found {:?}", node), - } - } - - fn generate_expression_statement(&mut self, node: &ASTNode) -> String { - match node { - ExpressionStatement(expression) => { - if let Expression::Assignment(..) = expression { - self.generate_assignment(expression) - } else { - self.generate_expression(expression) - } - } - _ => panic!( - "Internal error: Expected expression statement, found {:?}", - node - ), - } - } - - fn generate_do_while(&mut self, node: &ASTNode) -> String { - match node { - ASTNode::DoWhile(_, body, _, condition) => { - let mut result = String::new(); - let enter_label = self.unique_label("__DO_WHILE_ENTER_"); - let body_label = self.unique_label("__DO_WHILE_BODY_"); - result.push_str(&format!("jmp {}\n", body_label)); - let mut body = format!("{}:\n{}", body_label, &self.generate(body)); - body.push_str(&format!("jmp {}\n", enter_label)); - let condition = self.generate(condition); - result.push_str(&format!("{}:\n", enter_label)); - result.push_str(&self.generate_condition_block( - "__DO_WHILE_EXIT_", - &condition, - &body, - )); - result - } - _ => panic!("Internal error: Expected do while node, found {:?}", node), - } - } - - fn generate_while(&mut self, while_node: &ASTNode) -> String { - match while_node { - ASTNode::While(_, condition, body) => { - let mut result = String::new(); - let enter_label = self.unique_label("__WHILE_ENTER_"); - let condition = self.generate(condition); - let mut body = self.generate(body); - body.push_str(&format!("jmp {}\n", enter_label)); - - result.push_str(&format!("{}:\n", enter_label)); - result.push_str(&self.generate_condition_block("__WHILE_EXIT_", &condition, &body)); - result - } - _ => panic!( - "Internal error: Expected while node, found {:?}", - while_node - ), - } - } - - fn generate_scope(&mut self, scope: &ASTNode) -> String { - self.generate_scope_with_variables(scope, symbol_table::Scope::default(&self.symbol_table)) - } - - fn generate_scope_with_variables( - &mut self, - scope_node: &ASTNode, - scope: symbol_table::Scope, - ) -> String { - let mut result = String::new(); - match scope_node { - ASTNode::Scope(statements) => { - self.symbol_table.push_scope(scope); - for statement in statements { - result.push_str(self.generate(statement).as_str()); - } - self.symbol_table.pop_scope(); - result - } - _ => panic!(), - } - } - - fn generate_expression(&mut self, expression: &Expression) -> String { - match expression { - Expression::Empty => "".to_string(), - Expression::IntegerLiteral(_) => self.generate_integral_literal(expression), - Expression::Variable(_) => self.generate_variable_expression(expression), - Expression::Binary(_, _, _) => self.generate_binary_expression(expression), - Expression::Unary(_, _) => self.generate_unary_expression(expression), - Expression::Assignment(..) => self.generate_assignment(expression), - Expression::FunctionCall(..) => self.generate_function_call(expression), - Expression::Parenthesized(internal_expression) => { - self.generate_expression(internal_expression) - } - } - } - - fn get_expression_size_in_bytes(exp: &Expression) -> usize { - 4 - } - - fn generate_function_call(&mut self, call: &Expression) -> String { - let mut result = String::new(); - match call { - Expression::FunctionCall(name, parameters) => { - // TODO extract pointer size into a function (and use it in the symbol table too) - let mut push_offset = -16; // return address + rbp - for param in parameters { - let param_size = CodeGenerator::get_expression_size_in_bytes(param); - // Since we're dealing with the stack, subtraction of the offset occurs first - push_offset -= param_size as i32; - result.push_str(&format!( - "{computation}\ - {mov} {result}, {offset}(%rsp)\n", - computation = self.generate_expression(param), - mov = CodeGenerator::mov_mnemonic(param_size), - result = CodeGenerator::get_reg1(param_size), - offset = push_offset - )); - } - result.push_str(&format!("call {}\n", name.value)); - result - } - _ => panic!("Exp"), - } - } - - fn generate_unary_expression(&mut self, expression: &Expression) -> String { - let mut result = String::new(); - match expression { - Expression::Unary(operator, expression) => { - result.push_str(self.generate_expression(&expression).as_str()); - match operator.token_type { - TokenType::Plus => {} - TokenType::Minus => { - result.push_str(&format!("neg {}\n", CodeGenerator::get_reg1(8))); - } - _ => panic!("Unsupported unary operator: {:#?}", operator), - } - } - _ => panic!( - "Internal Error: Expected a unary expression, found: {:#?}", - expression - ), - } - result - } - - fn generate_binary_expression(&mut self, expression: &Expression) -> String { - let mut result = String::new(); - match expression { - Expression::Binary(token, left, right) => { - result.push_str(&self.generate_expression(right)); - result.push_str(format!("push {}\n", CodeGenerator::get_reg1(8)).as_str()); - result.push_str(&self.generate_expression(left)); - result.push_str(format!("pop {}\n", CodeGenerator::get_reg2(8)).as_str()); - - // TODO: Support floating point operations - let reg1 = CodeGenerator::get_reg1(4); - let reg2 = CodeGenerator::get_reg2(4); - match token.token_type { - TokenType::Plus => { - result.push_str(&format!("add {}, {}\n", reg2, reg1)); - } - TokenType::Minus => { - result.push_str(&format!("sub {}, {}\n", reg2, reg1)); - } - TokenType::Star => { - result.push_str(&format!("imul {}, {}\n", reg2, reg1)); - } - TokenType::Slash => { - result.push_str(&format!("push %rax\n")); - result.push_str(&format!("push %rdx\n")); - result.push_str(&format!("mov {}, %eax\n", reg1)); - result.push_str(&format!("mov $0, %edx\n")); - result.push_str(&format!("idiv {}\n", reg2)); - result.push_str(&format!("mov %eax, {}\n", reg1)); - result.push_str(&format!("pop %rdx\n")); - result.push_str(&format!("pop %rax\n")); - } - // TODO: Account for short-circuiting of boolean expressions. - // For now, we evaluate the full expression no matter how it - // is structured. - TokenType::AndAnd => { - result.push_str(&format!("and {}, {}\nand $1, {}\n", reg2, reg1, reg1)); - } - TokenType::BarBar => { - result.push_str(&format!("or {}, {}\nand $1, {}\n", reg2, reg1, reg1)); - } - TokenType::And => { - result.push_str(&format!("and {}, {}\n", reg2, reg1)); - } - TokenType::Bar => { - result.push_str(&format!("or {}, {}\n", reg2, reg1)); - } - TokenType::Caret => { - result.push_str(&format!("xor {}, {}\n", reg2, reg1)); - } - TokenType::EqualsEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("sete %bl\n")); - result.push_str(&format!("movzbl %bl, {}\n", reg1)); - } - TokenType::NotEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setne %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::GreaterThan => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setg %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::GreaterThanEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setge %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::LessThan => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setl %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::LessThanEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setle %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - _ => panic!("Unsupported operator: {:?}", token), - } - } - _ => panic!( - "Internal Error: Expected a binary expression, found: {:#?}", - expression - ), - } - result - } - - fn generate_integral_literal(&self, expression: &Expression) -> String { - match expression { - Expression::IntegerLiteral(token) => { - format!( - "mov ${}, {}\n", - token.value.clone(), - CodeGenerator::get_reg1(8) - ) - } - _ => panic!( - "Internal Error: Expected integral literal, found: {:#?}", - expression - ), - } - } - - fn generate_variable_expression(&self, expression: &Expression) -> String { - match expression { - Expression::Variable(token) => { - let definition = self - .symbol_table - .get(token.value.as_str()) - .unwrap_or_else(|| panic!("Undefined variable: {}", token.value.as_str())); - match definition { - Symbol::Variable { stack_offset, .. } => { - let mov_instruction = CodeGenerator::mov_mnemonic(definition.size()); - format!( - "{} {}(%rbp), {}\n", - mov_instruction, - *stack_offset, - CodeGenerator::get_reg1(definition.size()) - ) - } - _ => panic!(), - } - } - _ => panic!( - "Internal Error: Expected variable expression, found: {:#?}", - expression - ), - } - } - - fn generate_translation_unit(&mut self, node: &ASTNode) -> String { - let mut result = String::new(); - match node { - TranslationUnit(nodes_vector) => { - // TODO Support global variables - // FIXME this is a scope for storing global FUNCTION declarations only - // and should not be used for global variables - self.symbol_table.push_scope(symbol_table::Scope::new(0)); - for node in nodes_vector { - result.push_str(&self.generate(node)); - } - self.symbol_table.pop_scope(); - result - } - _ => panic!("Internal Error: Expected program node, found {:?}", node), - } - } - - fn generate_return_statement(&mut self, node: &ASTNode) -> String { - match node { - ReturnStatement(_, expr_node) => { - let mut result = self.generate(expr_node); - result.push_str(&format!( - "mov %rbp, %rsp\nmov {}, %rax\npop %rbp\nret\n", - CodeGenerator::get_reg1(8) - )); - result - } - _ => panic!("Return: Expected a return node, found {:?}", node), - } - } - - fn generate_return_void() -> &'static str { - "mov %rbp, %rsp\npop %rbp\nret\n" - } - - fn generate_assignment(&mut self, expression: &Expression) -> String { - match expression { - Expression::Assignment(identifier, expr_node) => { - let variable = self.symbol_table.get(&identifier.value).unwrap_or_else(|| { - panic!( - "Assignment: the identifier `{}` is not defined", - identifier.value - ) - }); - let mut variable_stack_offset = 0; - let variable_size = variable.size(); - match variable { - Symbol::Variable { stack_offset, .. } => { - variable_stack_offset = *stack_offset; - } - _ => panic!(), - } - - // TODO support referential assignment - let mov_instruction = CodeGenerator::mov_mnemonic(variable.size()); - let mut result = self.generate_expression(expr_node); - - result.push_str(&format!( - "{} {}, {}(%rbp)\n", - mov_instruction, - CodeGenerator::get_reg1(variable_size), - variable_stack_offset - )); - result - } - _ => panic!(), - } - } - - fn unique_label(&mut self, prefix: &str) -> String { - return format!("{}_{}", prefix, Uuid::new_v4().simple()); - } - - fn generate_condition_block(&mut self, prefix: &str, condition: &str, body: &str) -> String { - let mut results = String::new(); - results.push_str(condition); - results.push_str(&format!("cmp $0, {}\n", CodeGenerator::get_reg1(8))); - let else_label = self.unique_label(prefix); - results.push_str(&format!("je {}\n", else_label)); - results.push_str(body); - results.push_str(&format!("{}:\n", else_label)); - results - } - - fn generate_if_statement(&mut self, node: &ASTNode) -> String { - match node { - ASTNode::If(_, condition, body, else_body) => { - let mut results = String::new(); - let condition = self.generate(condition); - let body = self.generate(body); - results.push_str(&self.generate_condition_block("__IF_", &condition, &body)); - if let Some(else_body) = else_body { - results.push_str(&self.generate(else_body)); - } - results - } - _ => panic!("Internal error: Expected if statement, found {:?}", node), - } - } - - fn generate_variable_declaration(&mut self, node: &ASTNode) -> String { - match node { - VariableDeclaration(variable_type, identifier) => { - if self - .symbol_table - .get_at_current_scope(&identifier.value) - .is_some() - { - panic!( - "Declaration: the identifier `{}` is already in use", - identifier.value - ) - } - - self.symbol_table - .insert_top(&identifier.value, &variable_type.value); - } - _ => panic!("Declaration: Expected declaration node, found {:?}", node), - } - "".to_string() - } - - fn generate_variable_definition(&mut self, node: &ASTNode) -> String { - if let VariableDefinition(type_token, identifier, expression) = node { - self.generate_variable_declaration(&VariableDeclaration( - type_token.clone(), - identifier.clone(), - )); - if let ExpressionNode(expr) = &(**expression) { - self.generate_assignment(&Expression::Assignment( - identifier.clone(), - Box::new(expr.clone()), - )) - } else { - panic!( - "Internal error: Expected expression node, found {:?}", - expression - ) - } - } else { - panic!( - "Internal error: Expected variable definition, found {:?}", - node - ) - } - } - - fn generate_function_declaration(&mut self, node: &ASTNode) -> String { - match node { - FunctionDeclaration(return_type, identifier, func_parameters) => { - if self - .symbol_table - .get_at_current_scope(&identifier.value) - .is_some() - { - panic!( - "Declaration: the function `{}` is already declared", - identifier.value - ) - } - - let mut parameters = Vec::new(); - for node in func_parameters { - if let VariableDeclaration(variable_type, ..) = node { - parameters.push(variable_type.value.clone()); - } else if let FunctionDeclaration(..) = node { - todo!("Support function declaration as a parameter") - } else { - panic!("") - } - } - let symbol = Symbol::Function { - return_type: return_type.value.clone(), - parameters, - }; - - self.symbol_table.insert(&identifier.value, &symbol); - } - _ => panic!("Declaration: Expected declaration node, found {:?}", node), - } - "".to_string() - } - - fn generate_function_definition(&mut self, node: &ASTNode) -> String { - let mut result = String::new(); - - if let FunctionDefinition(return_type, identifier, parameters, body) = node { - self.generate_function_declaration(&FunctionDeclaration( - return_type.clone(), - identifier.clone(), - parameters.clone(), - )); - - let mut scope = symbol_table::Scope::default(&self.symbol_table); - for param in parameters { - if let VariableDeclaration(variable_type, name) = param { - scope.insert_top(name.value.as_str(), &variable_type.value); - } else if let FunctionDeclaration(..) = node { - todo!("Support function declaration as a parameter") - } else { - panic!("") - } - } - - self.symbol_table.reset_largest_offset(); - result.push_str(&self.generate_scope_with_variables(body, scope)); - - // Quick hack. Instead of checking whether the function returns - // at the end, just inject a redundant return - // TODO show warnings in case of mismatched returns - - format!( - ".global {name}\n\ - {name}:\n\ - push %rbp\n\ - mov %rsp, %rbp\n\ - subq ${frame_size}, %rsp\n\ - {body}\ - # Redundant return ---\n\ - {redundant_return}\ - # --------------------\n\ - ", - name = identifier.value, - frame_size = -self.symbol_table.current_largest_offset(), - body = result, - redundant_return = CodeGenerator::generate_return_void() - ) - } else { - panic!( - "Internal error: Expected variable definition, found {:?}", - node - ) - } - } - - fn mov_mnemonic(size: usize) -> &'static str { - match size { - 1 => "movb", - 2 => "movw", - 4 => "movl", - 8 => "movq", - _ => panic!("Unsupported size `{}`.", size), - } - } - - fn get_reg1(size: usize) -> &'static str { - match size { - 1 | 2 | 4 => "%ebx", - 8 => "%rbx", - _ => panic!("Invalid register size: {}", size), - } - } - - fn get_reg2(size: usize) -> &'static str { - match size { - 1 | 2 | 4 => "%ecx", - 8 => "%rcx", - _ => panic!("Invalid register size: {}", size), - } - } + fn generate_translation_unit(&mut self, node: &ASTNode) -> String; + fn generate_return_statement(&mut self, node: &ASTNode) -> String; + fn generate_variable_declaration(&mut self, node: &ASTNode) -> String; + fn generate_variable_definition(&mut self, node: &ASTNode) -> String; + fn generate_function_declaration(&mut self, node: &ASTNode) -> String; + fn generate_function_definition(&mut self, node: &ASTNode) -> String; + fn generate_expression(&mut self, expression: &Expression) -> String; + fn generate_scope(&mut self, scope: &ASTNode) -> String; + fn generate_if_statement(&mut self, node: &ASTNode) -> String; + fn generate_while(&mut self, while_node: &ASTNode) -> String; + fn generate_do_while(&mut self, node: &ASTNode) -> String; + fn generate_expression_statement(&mut self, node: &ASTNode) -> String; + fn generate_for(&mut self, node: &ASTNode) -> String; } #[cfg(test)] mod tests { - - use super::CodeGenerator; + use crate::code_generator::*; use crate::lexer::Lexer; use crate::parser::Parser; use crate::test_utils::*; + use crate::x86_generator::X86CodeGenerator; fn generate_code(src: String) -> String { let tokens = Lexer::new(src).lex(); let ast = Parser::new(tokens).parse(); - let mut generator = CodeGenerator::new(); + let mut generator = X86CodeGenerator::new(); let generated = generator.generate(&ast); return generated; } diff --git a/src/main.rs b/src/main.rs index 667c536..60b049b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ mod parser; mod symbol_table; mod test_utils; mod tokens; +mod x86_generator; -fn main() { -} + +fn main() {} diff --git a/src/x86_generator.rs b/src/x86_generator.rs new file mode 100644 index 0000000..c1ecd12 --- /dev/null +++ b/src/x86_generator.rs @@ -0,0 +1,595 @@ +use crate::ast::ASTNode::*; +use crate::ast::*; +use crate::code_generator::*; +use crate::symbol_table::{self, Symbol, SymbolTable}; +use crate::tokens::*; +use uuid::Uuid; + +pub struct X86CodeGenerator { + symbol_table: SymbolTable, +} + +impl X86CodeGenerator { + pub fn new() -> Self { + Self { + symbol_table: SymbolTable::new(), + } + } + + fn generate_scope_with_variables( + &mut self, + scope_node: &ASTNode, + scope: symbol_table::Scope, + ) -> String { + let mut result = String::new(); + match scope_node { + ASTNode::Scope(statements) => { + self.symbol_table.push_scope(scope); + for statement in statements { + result.push_str(self.generate(statement).as_str()); + } + self.symbol_table.pop_scope(); + result + } + _ => panic!(), + } + } + + fn get_expression_size_in_bytes(exp: &Expression) -> usize { + 4 + } + + fn generate_function_call(&mut self, call: &Expression) -> String { + let mut result = String::new(); + match call { + Expression::FunctionCall(name, parameters) => { + // TODO extract pointer size into a function (and use it in the symbol table too) + let mut push_offset = -16; // return address + rbp + for param in parameters { + let param_size = X86CodeGenerator::get_expression_size_in_bytes(param); + // Since we're dealing with the stack, subtraction of the offset occurs first + push_offset -= param_size as i32; + result.push_str(&format!( + "{computation}\ + {mov} {result}, {offset}(%rsp)\n", + computation = self.generate_expression(param), + mov = X86CodeGenerator::mov_mnemonic(param_size), + result = X86CodeGenerator::get_reg1(param_size), + offset = push_offset + )); + } + result.push_str(&format!("call {}\n", name.value)); + result + } + _ => panic!("Exp"), + } + } + + fn generate_unary_expression(&mut self, expression: &Expression) -> String { + let mut result = String::new(); + match expression { + Expression::Unary(operator, expression) => { + result.push_str(self.generate_expression(&expression).as_str()); + match operator.token_type { + TokenType::Plus => {} + TokenType::Minus => { + result.push_str(&format!("neg {}\n", X86CodeGenerator::get_reg1(8))); + } + _ => panic!("Unsupported unary operator: {:#?}", operator), + } + } + _ => panic!( + "Internal Error: Expected a unary expression, found: {:#?}", + expression + ), + } + result + } + + fn generate_binary_expression(&mut self, expression: &Expression) -> String { + let mut result = String::new(); + match expression { + Expression::Binary(token, left, right) => { + result.push_str(&self.generate_expression(right)); + result.push_str(format!("push {}\n", X86CodeGenerator::get_reg1(8)).as_str()); + result.push_str(&self.generate_expression(left)); + result.push_str(format!("pop {}\n", X86CodeGenerator::get_reg2(8)).as_str()); + + // TODO: Support floating point operations + let reg1 = X86CodeGenerator::get_reg1(4); + let reg2 = X86CodeGenerator::get_reg2(4); + match token.token_type { + TokenType::Plus => { + result.push_str(&format!("add {}, {}\n", reg2, reg1)); + } + TokenType::Minus => { + result.push_str(&format!("sub {}, {}\n", reg2, reg1)); + } + TokenType::Star => { + result.push_str(&format!("imul {}, {}\n", reg2, reg1)); + } + TokenType::Slash => { + result.push_str(&format!("push %rax\n")); + result.push_str(&format!("push %rdx\n")); + result.push_str(&format!("mov {}, %eax\n", reg1)); + result.push_str(&format!("mov $0, %edx\n")); + result.push_str(&format!("idiv {}\n", reg2)); + result.push_str(&format!("mov %eax, {}\n", reg1)); + result.push_str(&format!("pop %rdx\n")); + result.push_str(&format!("pop %rax\n")); + } + // TODO: Account for short-circuiting of boolean expressions. + // For now, we evaluate the full expression no matter how it + // is structured. + TokenType::AndAnd => { + result.push_str(&format!("and {}, {}\nand $1, {}\n", reg2, reg1, reg1)); + } + TokenType::BarBar => { + result.push_str(&format!("or {}, {}\nand $1, {}\n", reg2, reg1, reg1)); + } + TokenType::And => { + result.push_str(&format!("and {}, {}\n", reg2, reg1)); + } + TokenType::Bar => { + result.push_str(&format!("or {}, {}\n", reg2, reg1)); + } + TokenType::Caret => { + result.push_str(&format!("xor {}, {}\n", reg2, reg1)); + } + TokenType::EqualsEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("sete %bl\n")); + result.push_str(&format!("movzbl %bl, {}\n", reg1)); + } + TokenType::NotEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setne %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::GreaterThan => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setg %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::GreaterThanEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setge %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::LessThan => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setl %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::LessThanEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setle %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + _ => panic!("Unsupported operator: {:?}", token), + } + } + _ => panic!( + "Internal Error: Expected a binary expression, found: {:#?}", + expression + ), + } + result + } + + fn generate_integral_literal(&self, expression: &Expression) -> String { + match expression { + Expression::IntegerLiteral(token) => { + format!( + "mov ${}, {}\n", + token.value.clone(), + X86CodeGenerator::get_reg1(8) + ) + } + _ => panic!( + "Internal Error: Expected integral literal, found: {:#?}", + expression + ), + } + } + + fn generate_variable_expression(&self, expression: &Expression) -> String { + match expression { + Expression::Variable(token) => { + let definition = self + .symbol_table + .get(token.value.as_str()) + .unwrap_or_else(|| panic!("Undefined variable: {}", token.value.as_str())); + match definition { + Symbol::Variable { stack_offset, .. } => { + let mov_instruction = X86CodeGenerator::mov_mnemonic(definition.size()); + format!( + "{} {}(%rbp), {}\n", + mov_instruction, + *stack_offset, + X86CodeGenerator::get_reg1(definition.size()) + ) + } + _ => panic!(), + } + } + _ => panic!( + "Internal Error: Expected variable expression, found: {:#?}", + expression + ), + } + } + + fn generate_return_void() -> &'static str { + "mov %rbp, %rsp\npop %rbp\nret\n" + } + + fn generate_assignment(&mut self, expression: &Expression) -> String { + match expression { + Expression::Assignment(identifier, expr_node) => { + let variable = self.symbol_table.get(&identifier.value).unwrap_or_else(|| { + panic!( + "Assignment: the identifier `{}` is not defined", + identifier.value + ) + }); + let mut variable_stack_offset = 0; + let variable_size = variable.size(); + match variable { + Symbol::Variable { stack_offset, .. } => { + variable_stack_offset = *stack_offset; + } + _ => panic!(), + } + + // TODO support referential assignment + let mov_instruction = X86CodeGenerator::mov_mnemonic(variable.size()); + let mut result = self.generate_expression(expr_node); + + result.push_str(&format!( + "{} {}, {}(%rbp)\n", + mov_instruction, + X86CodeGenerator::get_reg1(variable_size), + variable_stack_offset + )); + result + } + _ => panic!(), + } + } + + fn unique_label(&mut self, prefix: &str) -> String { + return format!("{}_{}", prefix, Uuid::new_v4().simple()); + } + + fn generate_condition_block(&mut self, prefix: &str, condition: &str, body: &str) -> String { + let mut results = String::new(); + results.push_str(condition); + results.push_str(&format!("cmp $0, {}\n", X86CodeGenerator::get_reg1(8))); + let else_label = self.unique_label(prefix); + results.push_str(&format!("je {}\n", else_label)); + results.push_str(body); + results.push_str(&format!("{}:\n", else_label)); + results + } + + fn mov_mnemonic(size: usize) -> &'static str { + match size { + 1 => "movb", + 2 => "movw", + 4 => "movl", + 8 => "movq", + _ => panic!("Unsupported size `{}`.", size), + } + } + + fn get_reg1(size: usize) -> &'static str { + match size { + 1 | 2 | 4 => "%ebx", + 8 => "%rbx", + _ => panic!("Invalid register size: {}", size), + } + } + + fn get_reg2(size: usize) -> &'static str { + match size { + 1 | 2 | 4 => "%ecx", + 8 => "%rcx", + _ => panic!("Invalid register size: {}", size), + } + } +} + +impl CodeGenerator for X86CodeGenerator { + fn generate_translation_unit(&mut self, node: &ASTNode) -> String { + let mut result = String::new(); + match node { + TranslationUnit(nodes_vector) => { + // TODO Support global variables + // FIXME this is a scope for storing global FUNCTION declarations only + // and should not be used for global variables + self.symbol_table.push_scope(symbol_table::Scope::new(0)); + for node in nodes_vector { + result.push_str(&self.generate(node)); + } + self.symbol_table.pop_scope(); + result + } + _ => panic!("Internal Error: Expected program node, found {:?}", node), + } + } + + fn generate_return_statement(&mut self, node: &ASTNode) -> String { + match node { + ReturnStatement(_, expr_node) => { + let mut result = self.generate(expr_node); + result.push_str(&format!( + "mov %rbp, %rsp\nmov {}, %rax\npop %rbp\nret\n", + X86CodeGenerator::get_reg1(8) + )); + result + } + _ => panic!("Return: Expected a return node, found {:?}", node), + } + } + + fn generate_variable_declaration(&mut self, node: &ASTNode) -> String { + match node { + VariableDeclaration(variable_type, identifier) => { + if self + .symbol_table + .get_at_current_scope(&identifier.value) + .is_some() + { + panic!( + "Declaration: the identifier `{}` is already in use", + identifier.value + ) + } + + self.symbol_table + .insert_top(&identifier.value, &variable_type.value); + } + _ => panic!("Declaration: Expected declaration node, found {:?}", node), + } + "".to_string() + } + + fn generate_variable_definition(&mut self, node: &ASTNode) -> String { + if let VariableDefinition(type_token, identifier, expression) = node { + self.generate_variable_declaration(&VariableDeclaration( + type_token.clone(), + identifier.clone(), + )); + if let ExpressionNode(expr) = &(**expression) { + self.generate_assignment(&Expression::Assignment( + identifier.clone(), + Box::new(expr.clone()), + )) + } else { + panic!( + "Internal error: Expected expression node, found {:?}", + expression + ) + } + } else { + panic!( + "Internal error: Expected variable definition, found {:?}", + node + ) + } + } + + fn generate_function_declaration(&mut self, node: &ASTNode) -> String { + match node { + FunctionDeclaration(return_type, identifier, func_parameters) => { + if self + .symbol_table + .get_at_current_scope(&identifier.value) + .is_some() + { + panic!( + "Declaration: the function `{}` is already declared", + identifier.value + ) + } + + let mut parameters = Vec::new(); + for node in func_parameters { + if let VariableDeclaration(variable_type, ..) = node { + parameters.push(variable_type.value.clone()); + } else if let FunctionDeclaration(..) = node { + todo!("Support function declaration as a parameter") + } else { + panic!("") + } + } + let symbol = Symbol::Function { + return_type: return_type.value.clone(), + parameters, + }; + + self.symbol_table.insert(&identifier.value, &symbol); + } + _ => panic!("Declaration: Expected declaration node, found {:?}", node), + } + "".to_string() + } + + fn generate_function_definition(&mut self, node: &ASTNode) -> String { + let mut result = String::new(); + + if let FunctionDefinition(return_type, identifier, parameters, body) = node { + self.generate_function_declaration(&FunctionDeclaration( + return_type.clone(), + identifier.clone(), + parameters.clone(), + )); + + let mut scope = symbol_table::Scope::default(&self.symbol_table); + for param in parameters { + if let VariableDeclaration(variable_type, name) = param { + scope.insert_top(name.value.as_str(), &variable_type.value); + } else if let FunctionDeclaration(..) = node { + todo!("Support function declaration as a parameter") + } else { + panic!("") + } + } + + self.symbol_table.reset_largest_offset(); + result.push_str(&self.generate_scope_with_variables(body, scope)); + + // Quick hack. Instead of checking whether the function returns + // at the end, just inject a redundant return + // TODO show warnings in case of mismatched returns + + format!( + ".global {name}\n\ + {name}:\n\ + push %rbp\n\ + mov %rsp, %rbp\n\ + subq ${frame_size}, %rsp\n\ + {body}\ + # Redundant return ---\n\ + {redundant_return}\ + # --------------------\n\ + ", + name = identifier.value, + frame_size = -self.symbol_table.current_largest_offset(), + body = result, + redundant_return = X86CodeGenerator::generate_return_void() + ) + } else { + panic!( + "Internal error: Expected variable definition, found {:?}", + node + ) + } + } + + fn generate_expression(&mut self, expression: &Expression) -> String { + match expression { + Expression::Empty => "".to_string(), + Expression::IntegerLiteral(_) => self.generate_integral_literal(expression), + Expression::Variable(_) => self.generate_variable_expression(expression), + Expression::Binary(_, _, _) => self.generate_binary_expression(expression), + Expression::Unary(_, _) => self.generate_unary_expression(expression), + Expression::Assignment(..) => self.generate_assignment(expression), + Expression::FunctionCall(..) => self.generate_function_call(expression), + Expression::Parenthesized(internal_expression) => { + self.generate_expression(internal_expression) + } + } + } + + fn generate_scope(&mut self, scope: &ASTNode) -> String { + self.generate_scope_with_variables(scope, symbol_table::Scope::default(&self.symbol_table)) + } + + fn generate_if_statement(&mut self, node: &ASTNode) -> String { + match node { + ASTNode::If(_, condition, body, else_body) => { + let mut results = String::new(); + let condition = self.generate(condition); + let body = self.generate(body); + results.push_str(&self.generate_condition_block("__IF_", &condition, &body)); + if let Some(else_body) = else_body { + results.push_str(&self.generate(else_body)); + } + results + } + _ => panic!("Internal error: Expected if statement, found {:?}", node), + } + } + + fn generate_while(&mut self, while_node: &ASTNode) -> String { + match while_node { + ASTNode::While(_, condition, body) => { + let mut result = String::new(); + let enter_label = self.unique_label("__WHILE_ENTER_"); + let condition = self.generate(condition); + let mut body = self.generate(body); + body.push_str(&format!("jmp {}\n", enter_label)); + + result.push_str(&format!("{}:\n", enter_label)); + result.push_str(&self.generate_condition_block("__WHILE_EXIT_", &condition, &body)); + result + } + _ => panic!( + "Internal error: Expected while node, found {:?}", + while_node + ), + } + } + + fn generate_do_while(&mut self, node: &ASTNode) -> String { + match node { + ASTNode::DoWhile(_, body, _, condition) => { + let mut result = String::new(); + let enter_label = self.unique_label("__DO_WHILE_ENTER_"); + let body_label = self.unique_label("__DO_WHILE_BODY_"); + result.push_str(&format!("jmp {}\n", body_label)); + let mut body = format!("{}:\n{}", body_label, &self.generate(body)); + body.push_str(&format!("jmp {}\n", enter_label)); + let condition = self.generate(condition); + result.push_str(&format!("{}:\n", enter_label)); + result.push_str(&self.generate_condition_block( + "__DO_WHILE_EXIT_", + &condition, + &body, + )); + result + } + _ => panic!("Internal error: Expected do while node, found {:?}", node), + } + } + + fn generate_expression_statement(&mut self, node: &ASTNode) -> String { + match node { + ExpressionStatement(expression) => { + if let Expression::Assignment(..) = expression { + self.generate_assignment(expression) + } else { + self.generate_expression(expression) + } + } + _ => panic!( + "Internal error: Expected expression statement, found {:?}", + node + ), + } + } + + fn generate_for(&mut self, node: &ASTNode) -> String { + match node { + For(_, [init, condition, update], body) => { + let stack_top = self.symbol_table.current_scope_stack_top(); + self.symbol_table + .push_scope(symbol_table::Scope::new(stack_top)); + let mut result = String::new(); + result.push_str(&self.generate(init)); + let enter_label = self.unique_label("__FOR_ENTER_"); + result.push_str(&format!("{}:\n", enter_label)); + let mut body = self.generate(body); + body.push_str(&self.generate(update)); + body.push_str(&format!("jmp {}\n", enter_label)); + let condition = self.generate(condition); + + if condition.is_empty() { + result.push_str(&body); + } else { + result.push_str(&self.generate_condition_block( + "__FOR_EXIT_", + &condition, + &body, + )); + } + + self.symbol_table.pop_scope(); + result + } + _ => panic!("Internal error: Expected for statement, found {:?}", node), + } + } +} From bdbaa37ac951a3010d1b1c953b349e1dab40636d Mon Sep 17 00:00:00 2001 From: Mahmoud Abumandour Date: Thu, 12 Oct 2023 00:23:47 -0700 Subject: [PATCH 2/6] Structure the project into modules --- src/{ => code_generation}/code_generator.rs | 14 +++++++------- src/code_generation/mod.rs | 3 +++ src/{ => code_generation}/symbol_table.rs | 5 ++--- src/{ => code_generation}/x86_generator.rs | 10 +++++----- src/{ => lexical_analysis}/lexer.rs | 2 +- src/lexical_analysis/mod.rs | 2 ++ src/{ => lexical_analysis}/tokens.rs | 0 src/main.rs | 13 ++++--------- src/{ => syntax_analysis}/ast.rs | 2 +- src/syntax_analysis/mod.rs | 2 ++ src/{ => syntax_analysis}/parser.rs | 10 +++++----- src/utils/mod.rs | 1 + src/{ => utils}/test_utils.rs | 0 13 files changed, 33 insertions(+), 31 deletions(-) rename src/{ => code_generation}/code_generator.rs (96%) create mode 100644 src/code_generation/mod.rs rename src/{ => code_generation}/symbol_table.rs (95%) rename src/{ => code_generation}/x86_generator.rs (98%) rename src/{ => lexical_analysis}/lexer.rs (99%) create mode 100644 src/lexical_analysis/mod.rs rename src/{ => lexical_analysis}/tokens.rs (100%) rename src/{ => syntax_analysis}/ast.rs (97%) create mode 100644 src/syntax_analysis/mod.rs rename src/{ => syntax_analysis}/parser.rs (99%) create mode 100644 src/utils/mod.rs rename src/{ => utils}/test_utils.rs (100%) diff --git a/src/code_generator.rs b/src/code_generation/code_generator.rs similarity index 96% rename from src/code_generator.rs rename to src/code_generation/code_generator.rs index b8079d0..78081b3 100644 --- a/src/code_generator.rs +++ b/src/code_generation/code_generator.rs @@ -1,5 +1,5 @@ -use crate::ast::ASTNode::*; -use crate::ast::*; +use crate::syntax_analysis::ast::ASTNode::*; +use crate::syntax_analysis::ast::*; pub trait CodeGenerator { fn generate(&mut self, root: &ASTNode) -> String { match root { @@ -36,11 +36,11 @@ pub trait CodeGenerator { #[cfg(test)] mod tests { - use crate::code_generator::*; - use crate::lexer::Lexer; - use crate::parser::Parser; - use crate::test_utils::*; - use crate::x86_generator::X86CodeGenerator; + use crate::code_generation::code_generator::*; + use crate::code_generation::x86_generator::X86CodeGenerator; + use crate::lexical_analysis::lexer::Lexer; + use crate::syntax_analysis::parser::Parser; + use crate::utils::test_utils::*; fn generate_code(src: String) -> String { let tokens = Lexer::new(src).lex(); diff --git a/src/code_generation/mod.rs b/src/code_generation/mod.rs new file mode 100644 index 0000000..c68a72e --- /dev/null +++ b/src/code_generation/mod.rs @@ -0,0 +1,3 @@ +pub mod code_generator; +mod symbol_table; +pub mod x86_generator; diff --git a/src/symbol_table.rs b/src/code_generation/symbol_table.rs similarity index 95% rename from src/symbol_table.rs rename to src/code_generation/symbol_table.rs index e2b6f1a..32c231a 100644 --- a/src/symbol_table.rs +++ b/src/code_generation/symbol_table.rs @@ -1,4 +1,3 @@ -use crate::symbol_table::Symbol::Variable; use std::cmp::min; use std::collections::HashMap; @@ -22,7 +21,7 @@ impl Symbol { pub fn size(&self) -> usize { match self { - Variable { variable_type, .. } => Symbol::get_type_size_in_bytes(variable_type), + Symbol::Variable { variable_type, .. } => Symbol::get_type_size_in_bytes(variable_type), Symbol::Function { .. } => panic!(), } } @@ -57,7 +56,7 @@ impl Scope { pub fn insert_top(&mut self, symbol_name: &str, variable_type: &str) { self.insert( symbol_name, - &Variable { + &Symbol::Variable { variable_type: String::from(variable_type), stack_offset: self.stack_top - (Symbol::get_type_size_in_bytes(variable_type) as isize), diff --git a/src/x86_generator.rs b/src/code_generation/x86_generator.rs similarity index 98% rename from src/x86_generator.rs rename to src/code_generation/x86_generator.rs index c1ecd12..d30a1da 100644 --- a/src/x86_generator.rs +++ b/src/code_generation/x86_generator.rs @@ -1,8 +1,8 @@ -use crate::ast::ASTNode::*; -use crate::ast::*; -use crate::code_generator::*; -use crate::symbol_table::{self, Symbol, SymbolTable}; -use crate::tokens::*; +use crate::code_generation::code_generator::*; +use crate::code_generation::symbol_table::{self, Symbol, SymbolTable}; +use crate::lexical_analysis::tokens::*; +use crate::syntax_analysis::ast::ASTNode::*; +use crate::syntax_analysis::ast::*; use uuid::Uuid; pub struct X86CodeGenerator { diff --git a/src/lexer.rs b/src/lexical_analysis/lexer.rs similarity index 99% rename from src/lexer.rs rename to src/lexical_analysis/lexer.rs index 9c35bb3..b87d814 100644 --- a/src/lexer.rs +++ b/src/lexical_analysis/lexer.rs @@ -1,4 +1,4 @@ -use crate::tokens::*; +use crate::lexical_analysis::tokens::*; pub struct Lexer { src: String, diff --git a/src/lexical_analysis/mod.rs b/src/lexical_analysis/mod.rs new file mode 100644 index 0000000..97b4a58 --- /dev/null +++ b/src/lexical_analysis/mod.rs @@ -0,0 +1,2 @@ +pub mod lexer; +pub mod tokens; diff --git a/src/tokens.rs b/src/lexical_analysis/tokens.rs similarity index 100% rename from src/tokens.rs rename to src/lexical_analysis/tokens.rs diff --git a/src/main.rs b/src/main.rs index 60b049b..c1c7f64 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,6 @@ -mod ast; -mod code_generator; -mod lexer; -mod parser; -mod symbol_table; -mod test_utils; -mod tokens; -mod x86_generator; - +mod code_generation; +mod lexical_analysis; +mod syntax_analysis; +mod utils; fn main() {} diff --git a/src/ast.rs b/src/syntax_analysis/ast.rs similarity index 97% rename from src/ast.rs rename to src/syntax_analysis/ast.rs index e59ff1d..8db7841 100644 --- a/src/ast.rs +++ b/src/syntax_analysis/ast.rs @@ -1,4 +1,4 @@ -use crate::tokens::*; +use crate::lexical_analysis::tokens::*; #[derive(Clone, Debug, PartialEq, Eq)] pub enum Expression { diff --git a/src/syntax_analysis/mod.rs b/src/syntax_analysis/mod.rs new file mode 100644 index 0000000..a310c76 --- /dev/null +++ b/src/syntax_analysis/mod.rs @@ -0,0 +1,2 @@ +pub mod ast; +pub mod parser; diff --git a/src/parser.rs b/src/syntax_analysis/parser.rs similarity index 99% rename from src/parser.rs rename to src/syntax_analysis/parser.rs index 230b19e..8546ade 100644 --- a/src/parser.rs +++ b/src/syntax_analysis/parser.rs @@ -1,6 +1,6 @@ -use crate::ast::ASTNode::*; -use crate::ast::*; -use crate::tokens::*; +use crate::lexical_analysis::tokens::*; +use crate::syntax_analysis::ast::ASTNode::*; +use crate::syntax_analysis::ast::*; pub struct Parser { tokens: Vec, @@ -395,8 +395,8 @@ impl Parser { #[cfg(test)] mod tests { use super::*; - use crate::ast::ASTNode::*; - use crate::lexer::Lexer; + use crate::lexical_analysis::lexer::Lexer; + use crate::syntax_analysis::ast::ASTNode::*; #[rstest::rstest] #[case("int x = 55;", TranslationUnit( diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..681d26e --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1 @@ +pub mod test_utils; diff --git a/src/test_utils.rs b/src/utils/test_utils.rs similarity index 100% rename from src/test_utils.rs rename to src/utils/test_utils.rs From fa576452fdf1360c718d78b4e81dbda51ef90d4a Mon Sep 17 00:00:00 2001 From: Mahmoud Abumandour Date: Sun, 22 Oct 2023 17:25:23 -0700 Subject: [PATCH 3/6] Handle single-line comments --- src/lexical_analysis/lexer.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lexical_analysis/lexer.rs b/src/lexical_analysis/lexer.rs index b87d814..c5176f1 100644 --- a/src/lexical_analysis/lexer.rs +++ b/src/lexical_analysis/lexer.rs @@ -41,6 +41,14 @@ impl Lexer { '!' => match_double_char_token!('=', TokenType::NotEquals), '>' => match_double_char_token!('=', TokenType::GreaterThanEquals), '<' => match_double_char_token!('=', TokenType::LessThanEquals), + '/' => { + if self.peek(1) == '/' { + while self.current() != '\n' && self.current() != EOF { + self.advance(); + } + continue; + } + } _ => {} } @@ -194,6 +202,15 @@ mod tests { assert_eq!(TokenType::Eof, tokens[1].token_type); } + #[rstest::rstest] + #[case("// This is a comment ending with EOF")] + #[case("// This is a comment ending with a new line\n")] + fn test_comment(#[case] test_case: String) { + let tokens = Lexer::new(test_case.clone()).lex(); + assert_eq!(1, tokens.len()); + assert_eq!(TokenType::Eof, tokens[0].token_type); + } + #[rstest::rstest] #[case("int x = 55; ", vec![ Token{value: "int".to_string(), token_type: TokenType::Type, pos: 0}, From b0cbe2cb077fd3e9fec98cb2f39664ba8355f9a5 Mon Sep 17 00:00:00 2001 From: Mahmoud Abumandour Date: Sun, 22 Oct 2023 18:41:06 -0700 Subject: [PATCH 4/6] Add initial LLVM support This commit restructures the project into two different code generators (LLVM and native x86). It also adds basic LLVM support (functions & variables). This also adds support for reading test cases from files, which we use to test LLVM code generation. --- src/code_generation/code_generator.rs | 36 +- src/code_generation/llvm/generator.rs | 492 ++++++++++++ src/code_generation/llvm/mod.rs | 2 + src/code_generation/llvm/symbol_table.rs | 63 ++ src/code_generation/mod.rs | 4 +- .../{x86_generator.rs => x86/generator.rs} | 730 +++++++++--------- src/code_generation/x86/mod.rs | 2 + src/code_generation/{ => x86}/symbol_table.rs | 0 src/tests/variables.c | 36 + src/tests/variables_error.c | 36 + src/utils/test_utils.rs | 51 +- 11 files changed, 1059 insertions(+), 393 deletions(-) create mode 100644 src/code_generation/llvm/generator.rs create mode 100644 src/code_generation/llvm/mod.rs create mode 100644 src/code_generation/llvm/symbol_table.rs rename src/code_generation/{x86_generator.rs => x86/generator.rs} (95%) create mode 100644 src/code_generation/x86/mod.rs rename src/code_generation/{ => x86}/symbol_table.rs (100%) create mode 100644 src/tests/variables.c create mode 100644 src/tests/variables_error.c diff --git a/src/code_generation/code_generator.rs b/src/code_generation/code_generator.rs index 78081b3..cea14b9 100644 --- a/src/code_generation/code_generator.rs +++ b/src/code_generation/code_generator.rs @@ -1,43 +1,13 @@ use crate::syntax_analysis::ast::ASTNode::*; use crate::syntax_analysis::ast::*; pub trait CodeGenerator { - fn generate(&mut self, root: &ASTNode) -> String { - match root { - TranslationUnit(..) => self.generate_translation_unit(root), - ReturnStatement(..) => self.generate_return_statement(root), - VariableDeclaration(..) => self.generate_variable_declaration(root), - VariableDefinition(..) => self.generate_variable_definition(root), - FunctionDeclaration(..) => self.generate_function_declaration(root), - FunctionDefinition(..) => self.generate_function_definition(root), - ExpressionNode(expression) => self.generate_expression(expression), - Scope(..) => self.generate_scope(root), - If(..) => self.generate_if_statement(root), - While(..) => self.generate_while(root), - DoWhile(..) => self.generate_do_while(root), - ExpressionStatement(..) => self.generate_expression_statement(root), - For(..) => self.generate_for(root), - } - } - - fn generate_translation_unit(&mut self, node: &ASTNode) -> String; - fn generate_return_statement(&mut self, node: &ASTNode) -> String; - fn generate_variable_declaration(&mut self, node: &ASTNode) -> String; - fn generate_variable_definition(&mut self, node: &ASTNode) -> String; - fn generate_function_declaration(&mut self, node: &ASTNode) -> String; - fn generate_function_definition(&mut self, node: &ASTNode) -> String; - fn generate_expression(&mut self, expression: &Expression) -> String; - fn generate_scope(&mut self, scope: &ASTNode) -> String; - fn generate_if_statement(&mut self, node: &ASTNode) -> String; - fn generate_while(&mut self, while_node: &ASTNode) -> String; - fn generate_do_while(&mut self, node: &ASTNode) -> String; - fn generate_expression_statement(&mut self, node: &ASTNode) -> String; - fn generate_for(&mut self, node: &ASTNode) -> String; + fn generate(&mut self, node: &ASTNode) -> String; } #[cfg(test)] mod tests { use crate::code_generation::code_generator::*; - use crate::code_generation::x86_generator::X86CodeGenerator; + use crate::code_generation::x86::generator; use crate::lexical_analysis::lexer::Lexer; use crate::syntax_analysis::parser::Parser; use crate::utils::test_utils::*; @@ -45,7 +15,7 @@ mod tests { fn generate_code(src: String) -> String { let tokens = Lexer::new(src).lex(); let ast = Parser::new(tokens).parse(); - let mut generator = X86CodeGenerator::new(); + let mut generator = generator::X86CodeGenerator::new(); let generated = generator.generate(&ast); return generated; } diff --git a/src/code_generation/llvm/generator.rs b/src/code_generation/llvm/generator.rs new file mode 100644 index 0000000..2130dd6 --- /dev/null +++ b/src/code_generation/llvm/generator.rs @@ -0,0 +1,492 @@ +use crate::code_generation::code_generator::*; +use crate::code_generation::llvm::symbol_table::*; +use crate::lexical_analysis::tokens::*; +use crate::syntax_analysis::ast::ASTNode::*; +use crate::syntax_analysis::ast::*; +use std::fmt::Pointer; + +use inkwell::basic_block::BasicBlock; +use inkwell::builder::Builder; +use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::types::*; +use inkwell::values::*; + +pub struct LLVMGenerator<'ctx> { + context: &'ctx Context, + builder: Builder<'ctx>, + module: Module<'ctx>, + symbol_table: SymbolTable<'ctx>, +} + +impl<'ctx> LLVMGenerator<'ctx> { + pub fn new(context: &'ctx mut Context) -> Self { + let builder = context.create_builder(); + let module = context.create_module("main"); + let symbol_table = SymbolTable::new(); + + Self { + context, + builder, + module, + symbol_table, + } + } +} + +impl<'ctx> CodeGenerator for LLVMGenerator<'ctx> { + fn generate(&mut self, node: &ASTNode) -> String { + let _ = self.generate_internal(node); + self.module.print_to_string().to_string() + } +} + +impl<'ctx> LLVMGenerator<'ctx> { + fn generate_internal(&mut self, node: &ASTNode) -> impl AnyValue<'ctx> { + match node { + TranslationUnit(..) => self.generate_translation_unit(node).as_any_value_enum(), + ReturnStatement(..) => self.generate_return_statement(node).as_any_value_enum(), + VariableDeclaration(..) => self.generate_variable_declaration(node).as_any_value_enum(), + VariableDefinition(..) => self.generate_variable_definition(node).as_any_value_enum(), + FunctionDeclaration(..) => self.generate_function_declaration(node).as_any_value_enum(), + FunctionDefinition(..) => self.generate_function_definition(node).as_any_value_enum(), + ExpressionNode(expression) => self.generate_expression(expression).as_any_value_enum(), + // Scope(..) => self.generate_scope(node), + // If(..) => self.generate_if_statement(node), + // While(..) => self.generate_while(node), + // DoWhile(..) => self.generate_do_while(node), + // ExpressionStatement(..) => self.generate_expression_statement(node), + // For(..) => self.generate_for(node), + _ => panic!(), + } + } + pub fn generate_translation_unit(&mut self, node: &ASTNode) -> IntValue<'ctx> { + match node { + TranslationUnit(statements) => { + for statement in statements { + self.generate_internal(&statement); + } + } + _ => panic!( + "Internal error: expected translation unit, found: {:?}", + node + ), + } + // FIXME this is a placeholder as this function does not generate LLVM objects. + self.context.i32_type().const_int(0, false) + } + + fn is_in_global_scope(&self) -> bool { + self.symbol_table.scopes.len() == 0 + } + + fn generate_variable_declaration(&mut self, node: &ASTNode) -> PointerValue<'ctx> { + match node { + VariableDeclaration(variable_type, identifier) => { + let variable_type = variable_type.value.as_str(); + if self.is_in_global_scope() { + if let Some(variable) = + self.symbol_table.find_in_global_scope(&identifier.value) + { + return variable.pointer; + } + + let g = self.module.add_global( + self.get_llvm_type_from_string(variable_type), + None, + identifier.value.as_str(), + ); + + // TODO set the alignment depending on the size of the type. + g.set_initializer(&self.get_llvm_type_from_string(variable_type).const_zero()); + + let g = g.as_pointer_value(); + self.symbol_table.insert_global( + &identifier.value, + &Variable { + pointer: g, + variable_type: self.get_llvm_type_from_string(variable_type), + is_initialized: false, + }, + ); + g + } else { + if self + .symbol_table + .find_in_current_scope(&identifier.value) + .is_some() + { + panic!("Variable {} already defined", identifier.value); + } + + let alloca = self.generate_alloca_instruction( + &self.builder.get_insert_block().unwrap(), + identifier.value.as_str(), + variable_type, + ); + self.symbol_table.insert( + &identifier.value, + &Variable { + pointer: alloca, + variable_type: self.get_llvm_type_from_string(variable_type), + is_initialized: false, + }, + ); + alloca + } + } + _ => panic!( + "Internal error: expected variable declaration, found: {:?}", + node + ), + } + } + + fn generate_variable_definition(&mut self, node: &ASTNode) -> PointerValue<'ctx> { + match node { + VariableDefinition(variable_type, identifier, expression) => { + if self.is_in_global_scope() { + let decl = self.symbol_table.find_in_global_scope(&identifier.value); + match decl { + Some(decl) => { + if decl.is_initialized { + panic!("Variable {} already defined", identifier.value); + } else { + let global_var = + self.module.get_global(identifier.value.as_str()).unwrap(); + match expression.as_ref() { + ExpressionNode(expression) => { + let generated_expression = + self.generate_expression(expression); + global_var.set_initializer(&generated_expression); + } + _ => panic!( + "Internal error: expected expression node, found: {:?}", + node + ), + }; + return global_var.as_pointer_value(); + } + } + None => {} + } + + let variable_type = variable_type.value.as_str(); + let g = self.module.add_global( + self.get_llvm_type_from_string(variable_type), + None, + identifier.value.as_str(), + ); + + // FIXME make sure that the initializer is evaluated at compile time. + let g = match expression.as_ref() { + ExpressionNode(expression) => { + let generated_expression = self.generate_expression(expression); + g.set_initializer(&generated_expression); + g + } + _ => panic!( + "Internal error: expected expression node, found: {:?}", + node + ), + }; + + let g = g.as_pointer_value(); + self.symbol_table.insert_global( + &identifier.value, + &Variable { + pointer: g, + variable_type: self.get_llvm_type_from_string(variable_type), + is_initialized: true, + }, + ); + g + } else { + if self + .symbol_table + .find_in_current_scope(&identifier.value) + .is_some() + { + panic!("Variable {} already defined", identifier.value); + } + + let variable_type = variable_type.value.as_str(); + let alloca = self.generate_alloca_instruction( + &self.builder.get_insert_block().unwrap(), + identifier.value.as_str(), + variable_type, + ); + match expression.as_ref() { + ExpressionNode(expression) => { + let generated_expression = self.generate_expression(expression); + self.builder + .build_store(alloca, generated_expression) + .unwrap(); + self.symbol_table.insert( + &identifier.value, + &Variable { + pointer: alloca, + variable_type: self.get_llvm_type_from_string(variable_type), + is_initialized: true, + }, + ); + alloca + } + _ => panic!( + "Internal error: expected expression node, found: {:?}", + node + ), + } + } + } + _ => panic!( + "Internal error: expected variable definition, found: {:?}", + node + ), + } + } + + fn generate_function_declaration(&mut self, node: &ASTNode) -> FunctionValue<'ctx> { + match node { + FunctionDeclaration(return_type, name, params) => { + let mut param_types = Vec::new(); + for param in params { + match param { + VariableDeclaration(type_token, _) => { + param_types.push( + self.get_llvm_type_from_string(type_token.value.as_str()) + .into(), + ); + } + _ => panic!( + "Internal error: expected variable declaration, found: {:?}", + param + ), + } + } + let return_type = self.get_llvm_type_from_string(return_type.value.as_str()); + let function_type = return_type.fn_type(¶m_types, false); + let function = self + .module + .add_function(name.value.as_str(), function_type, None); + for (i, arg) in function.get_param_iter().enumerate() { + match ¶ms[i] { + VariableDeclaration(_, name) => { + arg.set_name(name.value.as_str()); + } + _ => panic!( + "Internal error: expected variable declaration, found: {:?}", + params[i] + ), + } + } + // TODO add the function to the symbol table. + function + } + _ => panic!( + "Internal error: expected function declaration, found: {:?}", + node + ), + } + } + + fn generate_function_definition(&mut self, node: &ASTNode) -> FunctionValue<'ctx> { + match node { + FunctionDefinition(return_type, name, params, body) => { + let declaration = self.generate_function_declaration(&FunctionDeclaration( + return_type.clone(), + name.clone(), + params.clone(), + )); + + self.symbol_table.push_scope(); + + let basic_block = self.context.append_basic_block(declaration, "entry"); + self.builder.position_at_end(basic_block); + + for (i, arg) in declaration.get_param_iter().enumerate() { + match ¶ms[i] { + VariableDeclaration(variable_type, name) => { + let variable_type = variable_type.value.as_str(); + let alloca = self.generate_alloca_instruction( + &basic_block, + name.value.as_str(), + variable_type, + ); + self.builder.build_store(alloca, arg).unwrap(); + self.symbol_table.insert( + &name.value, + &Variable { + pointer: alloca, + variable_type: self.get_llvm_type_from_string(variable_type), + is_initialized: true, + }, + ); + } + _ => panic!( + "Internal error: expected variable declaration, found: {:?}", + params[i] + ), + } + } + + match body.as_ref() { + Scope(statements) => { + for statement in statements { + self.generate_internal(&statement); + } + } + _ => panic!("Internal error: expected scope, found: {:?}", body), + } + self.symbol_table.pop_scope(); + return declaration; + } + _ => panic!(), + } + } + + fn generate_alloca_instruction( + &mut self, + entry_block: &BasicBlock, + name: &str, + variable_type: &str, + ) -> PointerValue<'ctx> { + let builder = self.context.create_builder(); + + match entry_block.get_first_instruction() { + Some(first_instr) => builder.position_before(&first_instr), + None => builder.position_at_end(*entry_block), + } + let llvm_type = self.get_llvm_type_from_string(variable_type); + builder.build_alloca(llvm_type, name).unwrap() + } + + fn generate_return_statement(&mut self, node: &ASTNode) -> InstructionValue<'ctx> { + match node { + ReturnStatement(_, expression_node) => match expression_node.as_ref() { + ExpressionNode(expression) => { + let generated_expression = self.generate_expression(expression); + self.builder + .build_return(Some(&generated_expression)) + .unwrap() + } + _ => panic!( + "Internal error: expected expression node, found: {:?}", + node + ), + }, + _ => panic!( + "Internal error: expected return statement, found: {:?}", + node + ), + } + } + + fn generate_expression(&mut self, expression: &Expression) -> BasicValueEnum<'ctx> { + match expression { + Expression::IntegerLiteral { .. } => self.generate_integer_literal(expression), + Expression::Variable(name) => self.generate_variable_expression(name), + _ => todo!(), + } + } + + fn generate_variable_expression(&mut self, name: &Token) -> BasicValueEnum<'ctx> { + if let Some(variable) = self.symbol_table.find(&name.value) { + self.builder + .build_load( + variable.variable_type, + variable.pointer, + name.value.as_str(), + ) + .unwrap() + } else if let Some(global_variable) = self.symbol_table.find_in_global_scope(&name.value) { + self.builder + .build_load( + global_variable.variable_type, + global_variable.pointer, + name.value.as_str(), + ) + .unwrap() + } else { + panic!("Reference to undefined variable `{}`", name.value); + } + } + + fn generate_integer_literal(&mut self, expression: &Expression) -> BasicValueEnum<'ctx> { + match expression { + Expression::IntegerLiteral(token) => self + .context + .i32_type() + .const_int(token.value.parse().unwrap(), false) + .as_basic_value_enum(), + _ => panic!(), + } + } + + fn generate_scope(&mut self, scope: &ASTNode) -> String { + todo!() + } + + fn generate_if_statement(&mut self, node: &ASTNode) -> String { + todo!() + } + + fn generate_while(&mut self, while_node: &ASTNode) -> String { + todo!() + } + + fn generate_do_while(&mut self, node: &ASTNode) -> String { + todo!() + } + + fn generate_expression_statement(&mut self, node: &ASTNode) -> String { + todo!() + } + + fn generate_for(&mut self, node: &ASTNode) -> String { + todo!() + } + + fn get_llvm_type_from_string(&self, type_str: &str) -> BasicTypeEnum<'ctx> { + match type_str { + "int" => BasicTypeEnum::IntType(self.context.i32_type()), + _ => panic!(), + } + } +} + +#[cfg(test)] +mod tests { + use super::CodeGenerator; + use crate::utils::test_utils::{interpret_llvm_ir, parse_test_file}; + use crate::{code_generation, lexical_analysis, syntax_analysis}; + use inkwell::context::Context; + + fn run_tests_from_file(path: &str) { + let test_cases = parse_test_file(path); + for test_case in test_cases { + let mut context = Context::create(); + let tokens = lexical_analysis::lexer::Lexer::new(test_case.source).lex(); + let ast = syntax_analysis::parser::Parser::new(tokens).parse(); + let generated_ir = + code_generation::llvm::generator::LLVMGenerator::new(&mut context).generate(&ast); + let exit_code = interpret_llvm_ir(&generated_ir); + assert_eq!( + test_case.expected, exit_code, + "Test case: {} -- Expected: {}, found: {}", + test_case.name, test_case.expected, exit_code + ); + } + } + + #[test] + fn test_variable_declarations_and_definitions() { + run_tests_from_file("./src/tests/variables.c"); + } + + #[test] + #[should_panic] + fn test_erroneous_variable_declarations_and_definitions() { + run_tests_from_file("./src/tests/variables_error.c"); + } +} diff --git a/src/code_generation/llvm/mod.rs b/src/code_generation/llvm/mod.rs new file mode 100644 index 0000000..1bc099b --- /dev/null +++ b/src/code_generation/llvm/mod.rs @@ -0,0 +1,2 @@ +pub mod generator; +pub mod symbol_table; diff --git a/src/code_generation/llvm/symbol_table.rs b/src/code_generation/llvm/symbol_table.rs new file mode 100644 index 0000000..9ccdc5d --- /dev/null +++ b/src/code_generation/llvm/symbol_table.rs @@ -0,0 +1,63 @@ +use inkwell::values::PointerValue; +use std::collections::HashMap; + +#[derive(Clone)] +pub struct Variable<'ctx> { + pub pointer: PointerValue<'ctx>, + pub variable_type: inkwell::types::BasicTypeEnum<'ctx>, + pub is_initialized: bool, +} + +pub struct SymbolTable<'ctx> { + pub scopes: Vec>>, + pub globals: HashMap>, +} + +impl<'ctx> SymbolTable<'ctx> { + pub fn new() -> Self { + Self { + scopes: vec![], + globals: HashMap::new(), + } + } + + pub fn push_scope(&mut self) { + self.scopes.push(HashMap::new()); + } + + pub fn pop_scope(&mut self) { + self.scopes.pop(); + } + + pub fn insert_global(&mut self, name: &str, variable: &Variable<'ctx>) { + self.globals.insert(String::from(name), variable.clone()); + } + + pub fn insert(&mut self, name: &str, variable: &Variable<'ctx>) { + self.scopes + .last_mut() + .unwrap() + .insert(String::from(name), variable.clone()); + } + + pub fn find(&self, name: &str) -> Option<&Variable<'ctx>> { + for scope in self.scopes.iter().rev() { + if let Some(value) = scope.get(name) { + return Some(value); + } + } + None + } + + pub fn find_in_current_scope(&self, name: &str) -> Option<&Variable<'ctx>> { + if let Some(scope) = self.scopes.last() { + return scope.get(name); + } else { + None + } + } + + pub fn find_in_global_scope(&self, name: &str) -> Option<&Variable<'ctx>> { + self.globals.get(name) + } +} diff --git a/src/code_generation/mod.rs b/src/code_generation/mod.rs index c68a72e..8350e10 100644 --- a/src/code_generation/mod.rs +++ b/src/code_generation/mod.rs @@ -1,3 +1,3 @@ pub mod code_generator; -mod symbol_table; -pub mod x86_generator; +pub mod llvm; +pub mod x86; diff --git a/src/code_generation/x86_generator.rs b/src/code_generation/x86/generator.rs similarity index 95% rename from src/code_generation/x86_generator.rs rename to src/code_generation/x86/generator.rs index d30a1da..355645a 100644 --- a/src/code_generation/x86_generator.rs +++ b/src/code_generation/x86/generator.rs @@ -1,5 +1,5 @@ use crate::code_generation::code_generator::*; -use crate::code_generation::symbol_table::{self, Symbol, SymbolTable}; +use crate::code_generation::x86::symbol_table::{self, Symbol, SymbolTable}; use crate::lexical_analysis::tokens::*; use crate::syntax_analysis::ast::ASTNode::*; use crate::syntax_analysis::ast::*; @@ -9,6 +9,26 @@ pub struct X86CodeGenerator { symbol_table: SymbolTable, } +impl CodeGenerator for X86CodeGenerator { + fn generate(&mut self, node: &ASTNode) -> String { + match node { + TranslationUnit(..) => self.generate_translation_unit(node), + ReturnStatement(..) => self.generate_return_statement(node), + VariableDeclaration(..) => self.generate_variable_declaration(node), + VariableDefinition(..) => self.generate_variable_definition(node), + FunctionDeclaration(..) => self.generate_function_declaration(node), + FunctionDefinition(..) => self.generate_function_definition(node), + ExpressionNode(expression) => self.generate_expression(expression), + Scope(..) => self.generate_scope(node), + If(..) => self.generate_if_statement(node), + While(..) => self.generate_while(node), + DoWhile(..) => self.generate_do_while(node), + ExpressionStatement(..) => self.generate_expression_statement(node), + For(..) => self.generate_for(node), + } + } +} + impl X86CodeGenerator { pub fn new() -> Self { Self { @@ -34,390 +54,123 @@ impl X86CodeGenerator { _ => panic!(), } } - - fn get_expression_size_in_bytes(exp: &Expression) -> usize { - 4 - } - - fn generate_function_call(&mut self, call: &Expression) -> String { + fn generate_translation_unit(&mut self, node: &ASTNode) -> String { let mut result = String::new(); - match call { - Expression::FunctionCall(name, parameters) => { - // TODO extract pointer size into a function (and use it in the symbol table too) - let mut push_offset = -16; // return address + rbp - for param in parameters { - let param_size = X86CodeGenerator::get_expression_size_in_bytes(param); - // Since we're dealing with the stack, subtraction of the offset occurs first - push_offset -= param_size as i32; - result.push_str(&format!( - "{computation}\ - {mov} {result}, {offset}(%rsp)\n", - computation = self.generate_expression(param), - mov = X86CodeGenerator::mov_mnemonic(param_size), - result = X86CodeGenerator::get_reg1(param_size), - offset = push_offset - )); + match node { + TranslationUnit(nodes_vector) => { + // TODO Support global variables + // FIXME this is a scope for storing global FUNCTION declarations only + // and should not be used for global variables + self.symbol_table.push_scope(symbol_table::Scope::new(0)); + for node in nodes_vector { + result.push_str(&self.generate(node)); } - result.push_str(&format!("call {}\n", name.value)); + self.symbol_table.pop_scope(); result } - _ => panic!("Exp"), + _ => panic!("Internal Error: Expected program node, found {:?}", node), } } - fn generate_unary_expression(&mut self, expression: &Expression) -> String { - let mut result = String::new(); - match expression { - Expression::Unary(operator, expression) => { - result.push_str(self.generate_expression(&expression).as_str()); - match operator.token_type { - TokenType::Plus => {} - TokenType::Minus => { - result.push_str(&format!("neg {}\n", X86CodeGenerator::get_reg1(8))); - } - _ => panic!("Unsupported unary operator: {:#?}", operator), - } + fn generate_return_statement(&mut self, node: &ASTNode) -> String { + match node { + ReturnStatement(_, expr_node) => { + let mut result = self.generate(expr_node); + result.push_str(&format!( + "mov %rbp, %rsp\nmov {}, %rax\npop %rbp\nret\n", + X86CodeGenerator::get_reg1(8) + )); + result } - _ => panic!( - "Internal Error: Expected a unary expression, found: {:#?}", - expression - ), + _ => panic!("Return: Expected a return node, found {:?}", node), } - result } - fn generate_binary_expression(&mut self, expression: &Expression) -> String { - let mut result = String::new(); - match expression { - Expression::Binary(token, left, right) => { - result.push_str(&self.generate_expression(right)); - result.push_str(format!("push {}\n", X86CodeGenerator::get_reg1(8)).as_str()); - result.push_str(&self.generate_expression(left)); - result.push_str(format!("pop {}\n", X86CodeGenerator::get_reg2(8)).as_str()); - - // TODO: Support floating point operations - let reg1 = X86CodeGenerator::get_reg1(4); - let reg2 = X86CodeGenerator::get_reg2(4); - match token.token_type { - TokenType::Plus => { - result.push_str(&format!("add {}, {}\n", reg2, reg1)); - } - TokenType::Minus => { - result.push_str(&format!("sub {}, {}\n", reg2, reg1)); - } - TokenType::Star => { - result.push_str(&format!("imul {}, {}\n", reg2, reg1)); - } - TokenType::Slash => { - result.push_str(&format!("push %rax\n")); - result.push_str(&format!("push %rdx\n")); - result.push_str(&format!("mov {}, %eax\n", reg1)); - result.push_str(&format!("mov $0, %edx\n")); - result.push_str(&format!("idiv {}\n", reg2)); - result.push_str(&format!("mov %eax, {}\n", reg1)); - result.push_str(&format!("pop %rdx\n")); - result.push_str(&format!("pop %rax\n")); - } - // TODO: Account for short-circuiting of boolean expressions. - // For now, we evaluate the full expression no matter how it - // is structured. - TokenType::AndAnd => { - result.push_str(&format!("and {}, {}\nand $1, {}\n", reg2, reg1, reg1)); - } - TokenType::BarBar => { - result.push_str(&format!("or {}, {}\nand $1, {}\n", reg2, reg1, reg1)); - } - TokenType::And => { - result.push_str(&format!("and {}, {}\n", reg2, reg1)); - } - TokenType::Bar => { - result.push_str(&format!("or {}, {}\n", reg2, reg1)); - } - TokenType::Caret => { - result.push_str(&format!("xor {}, {}\n", reg2, reg1)); - } - TokenType::EqualsEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("sete %bl\n")); - result.push_str(&format!("movzbl %bl, {}\n", reg1)); - } - TokenType::NotEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setne %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::GreaterThan => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setg %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::GreaterThanEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setge %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::LessThan => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setl %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - TokenType::LessThanEquals => { - result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); - result.push_str(&format!("setle %bl\n")); - result.push_str(&format!("movzbl %bl, %ebx\n")); - } - _ => panic!("Unsupported operator: {:?}", token), + fn generate_variable_declaration(&mut self, node: &ASTNode) -> String { + match node { + VariableDeclaration(variable_type, identifier) => { + if self + .symbol_table + .get_at_current_scope(&identifier.value) + .is_some() + { + panic!( + "Declaration: the identifier `{}` is already in use", + identifier.value + ) } + + self.symbol_table + .insert_top(&identifier.value, &variable_type.value); } - _ => panic!( - "Internal Error: Expected a binary expression, found: {:#?}", - expression - ), + _ => panic!("Declaration: Expected declaration node, found {:?}", node), } - result + "".to_string() } - fn generate_integral_literal(&self, expression: &Expression) -> String { - match expression { - Expression::IntegerLiteral(token) => { - format!( - "mov ${}, {}\n", - token.value.clone(), - X86CodeGenerator::get_reg1(8) + fn generate_variable_definition(&mut self, node: &ASTNode) -> String { + if let VariableDefinition(type_token, identifier, expression) = node { + self.generate_variable_declaration(&VariableDeclaration( + type_token.clone(), + identifier.clone(), + )); + if let ExpressionNode(expr) = &(**expression) { + self.generate_assignment(&Expression::Assignment( + identifier.clone(), + Box::new(expr.clone()), + )) + } else { + panic!( + "Internal error: Expected expression node, found {:?}", + expression ) } - _ => panic!( - "Internal Error: Expected integral literal, found: {:#?}", - expression - ), + } else { + panic!( + "Internal error: Expected variable definition, found {:?}", + node + ) } } - fn generate_variable_expression(&self, expression: &Expression) -> String { - match expression { - Expression::Variable(token) => { - let definition = self + fn generate_function_declaration(&mut self, node: &ASTNode) -> String { + match node { + FunctionDeclaration(return_type, identifier, func_parameters) => { + if self .symbol_table - .get(token.value.as_str()) - .unwrap_or_else(|| panic!("Undefined variable: {}", token.value.as_str())); - match definition { - Symbol::Variable { stack_offset, .. } => { - let mov_instruction = X86CodeGenerator::mov_mnemonic(definition.size()); - format!( - "{} {}(%rbp), {}\n", - mov_instruction, - *stack_offset, - X86CodeGenerator::get_reg1(definition.size()) - ) + .get_at_current_scope(&identifier.value) + .is_some() + { + panic!( + "Declaration: the function `{}` is already declared", + identifier.value + ) + } + + let mut parameters = Vec::new(); + for node in func_parameters { + if let VariableDeclaration(variable_type, ..) = node { + parameters.push(variable_type.value.clone()); + } else if let FunctionDeclaration(..) = node { + todo!("Support function declaration as a parameter") + } else { + panic!("") } - _ => panic!(), } + let symbol = Symbol::Function { + return_type: return_type.value.clone(), + parameters, + }; + + self.symbol_table.insert(&identifier.value, &symbol); } - _ => panic!( - "Internal Error: Expected variable expression, found: {:#?}", - expression - ), + _ => panic!("Declaration: Expected declaration node, found {:?}", node), } + "".to_string() } - fn generate_return_void() -> &'static str { - "mov %rbp, %rsp\npop %rbp\nret\n" - } - - fn generate_assignment(&mut self, expression: &Expression) -> String { - match expression { - Expression::Assignment(identifier, expr_node) => { - let variable = self.symbol_table.get(&identifier.value).unwrap_or_else(|| { - panic!( - "Assignment: the identifier `{}` is not defined", - identifier.value - ) - }); - let mut variable_stack_offset = 0; - let variable_size = variable.size(); - match variable { - Symbol::Variable { stack_offset, .. } => { - variable_stack_offset = *stack_offset; - } - _ => panic!(), - } - - // TODO support referential assignment - let mov_instruction = X86CodeGenerator::mov_mnemonic(variable.size()); - let mut result = self.generate_expression(expr_node); - - result.push_str(&format!( - "{} {}, {}(%rbp)\n", - mov_instruction, - X86CodeGenerator::get_reg1(variable_size), - variable_stack_offset - )); - result - } - _ => panic!(), - } - } - - fn unique_label(&mut self, prefix: &str) -> String { - return format!("{}_{}", prefix, Uuid::new_v4().simple()); - } - - fn generate_condition_block(&mut self, prefix: &str, condition: &str, body: &str) -> String { - let mut results = String::new(); - results.push_str(condition); - results.push_str(&format!("cmp $0, {}\n", X86CodeGenerator::get_reg1(8))); - let else_label = self.unique_label(prefix); - results.push_str(&format!("je {}\n", else_label)); - results.push_str(body); - results.push_str(&format!("{}:\n", else_label)); - results - } - - fn mov_mnemonic(size: usize) -> &'static str { - match size { - 1 => "movb", - 2 => "movw", - 4 => "movl", - 8 => "movq", - _ => panic!("Unsupported size `{}`.", size), - } - } - - fn get_reg1(size: usize) -> &'static str { - match size { - 1 | 2 | 4 => "%ebx", - 8 => "%rbx", - _ => panic!("Invalid register size: {}", size), - } - } - - fn get_reg2(size: usize) -> &'static str { - match size { - 1 | 2 | 4 => "%ecx", - 8 => "%rcx", - _ => panic!("Invalid register size: {}", size), - } - } -} - -impl CodeGenerator for X86CodeGenerator { - fn generate_translation_unit(&mut self, node: &ASTNode) -> String { - let mut result = String::new(); - match node { - TranslationUnit(nodes_vector) => { - // TODO Support global variables - // FIXME this is a scope for storing global FUNCTION declarations only - // and should not be used for global variables - self.symbol_table.push_scope(symbol_table::Scope::new(0)); - for node in nodes_vector { - result.push_str(&self.generate(node)); - } - self.symbol_table.pop_scope(); - result - } - _ => panic!("Internal Error: Expected program node, found {:?}", node), - } - } - - fn generate_return_statement(&mut self, node: &ASTNode) -> String { - match node { - ReturnStatement(_, expr_node) => { - let mut result = self.generate(expr_node); - result.push_str(&format!( - "mov %rbp, %rsp\nmov {}, %rax\npop %rbp\nret\n", - X86CodeGenerator::get_reg1(8) - )); - result - } - _ => panic!("Return: Expected a return node, found {:?}", node), - } - } - - fn generate_variable_declaration(&mut self, node: &ASTNode) -> String { - match node { - VariableDeclaration(variable_type, identifier) => { - if self - .symbol_table - .get_at_current_scope(&identifier.value) - .is_some() - { - panic!( - "Declaration: the identifier `{}` is already in use", - identifier.value - ) - } - - self.symbol_table - .insert_top(&identifier.value, &variable_type.value); - } - _ => panic!("Declaration: Expected declaration node, found {:?}", node), - } - "".to_string() - } - - fn generate_variable_definition(&mut self, node: &ASTNode) -> String { - if let VariableDefinition(type_token, identifier, expression) = node { - self.generate_variable_declaration(&VariableDeclaration( - type_token.clone(), - identifier.clone(), - )); - if let ExpressionNode(expr) = &(**expression) { - self.generate_assignment(&Expression::Assignment( - identifier.clone(), - Box::new(expr.clone()), - )) - } else { - panic!( - "Internal error: Expected expression node, found {:?}", - expression - ) - } - } else { - panic!( - "Internal error: Expected variable definition, found {:?}", - node - ) - } - } - - fn generate_function_declaration(&mut self, node: &ASTNode) -> String { - match node { - FunctionDeclaration(return_type, identifier, func_parameters) => { - if self - .symbol_table - .get_at_current_scope(&identifier.value) - .is_some() - { - panic!( - "Declaration: the function `{}` is already declared", - identifier.value - ) - } - - let mut parameters = Vec::new(); - for node in func_parameters { - if let VariableDeclaration(variable_type, ..) = node { - parameters.push(variable_type.value.clone()); - } else if let FunctionDeclaration(..) = node { - todo!("Support function declaration as a parameter") - } else { - panic!("") - } - } - let symbol = Symbol::Function { - return_type: return_type.value.clone(), - parameters, - }; - - self.symbol_table.insert(&identifier.value, &symbol); - } - _ => panic!("Declaration: Expected declaration node, found {:?}", node), - } - "".to_string() - } - - fn generate_function_definition(&mut self, node: &ASTNode) -> String { - let mut result = String::new(); + fn generate_function_definition(&mut self, node: &ASTNode) -> String { + let mut result = String::new(); if let FunctionDefinition(return_type, identifier, parameters, body) = node { self.generate_function_declaration(&FunctionDeclaration( @@ -592,4 +345,267 @@ impl CodeGenerator for X86CodeGenerator { _ => panic!("Internal error: Expected for statement, found {:?}", node), } } + fn get_expression_size_in_bytes(exp: &Expression) -> usize { + 4 + } + + fn generate_function_call(&mut self, call: &Expression) -> String { + let mut result = String::new(); + match call { + Expression::FunctionCall(name, parameters) => { + // TODO extract pointer size into a function (and use it in the symbol table too) + let mut push_offset = -16; // return address + rbp + for param in parameters { + let param_size = X86CodeGenerator::get_expression_size_in_bytes(param); + // Since we're dealing with the stack, subtraction of the offset occurs first + push_offset -= param_size as i32; + result.push_str(&format!( + "{computation}\ + {mov} {result}, {offset}(%rsp)\n", + computation = self.generate_expression(param), + mov = X86CodeGenerator::mov_mnemonic(param_size), + result = X86CodeGenerator::get_reg1(param_size), + offset = push_offset + )); + } + result.push_str(&format!("call {}\n", name.value)); + result + } + _ => panic!("Exp"), + } + } + + fn generate_unary_expression(&mut self, expression: &Expression) -> String { + let mut result = String::new(); + match expression { + Expression::Unary(operator, expression) => { + result.push_str(self.generate_expression(&expression).as_str()); + match operator.token_type { + TokenType::Plus => {} + TokenType::Minus => { + result.push_str(&format!("neg {}\n", X86CodeGenerator::get_reg1(8))); + } + _ => panic!("Unsupported unary operator: {:#?}", operator), + } + } + _ => panic!( + "Internal Error: Expected a unary expression, found: {:#?}", + expression + ), + } + result + } + + fn generate_binary_expression(&mut self, expression: &Expression) -> String { + let mut result = String::new(); + match expression { + Expression::Binary(token, left, right) => { + result.push_str(&self.generate_expression(right)); + result.push_str(format!("push {}\n", X86CodeGenerator::get_reg1(8)).as_str()); + result.push_str(&self.generate_expression(left)); + result.push_str(format!("pop {}\n", X86CodeGenerator::get_reg2(8)).as_str()); + + // TODO: Support floating point operations + let reg1 = X86CodeGenerator::get_reg1(4); + let reg2 = X86CodeGenerator::get_reg2(4); + match token.token_type { + TokenType::Plus => { + result.push_str(&format!("add {}, {}\n", reg2, reg1)); + } + TokenType::Minus => { + result.push_str(&format!("sub {}, {}\n", reg2, reg1)); + } + TokenType::Star => { + result.push_str(&format!("imul {}, {}\n", reg2, reg1)); + } + TokenType::Slash => { + result.push_str(&format!("push %rax\n")); + result.push_str(&format!("push %rdx\n")); + result.push_str(&format!("mov {}, %eax\n", reg1)); + result.push_str(&format!("mov $0, %edx\n")); + result.push_str(&format!("idiv {}\n", reg2)); + result.push_str(&format!("mov %eax, {}\n", reg1)); + result.push_str(&format!("pop %rdx\n")); + result.push_str(&format!("pop %rax\n")); + } + // TODO: Account for short-circuiting of boolean expressions. + // For now, we evaluate the full expression no matter how it + // is structured. + TokenType::AndAnd => { + result.push_str(&format!("and {}, {}\nand $1, {}\n", reg2, reg1, reg1)); + } + TokenType::BarBar => { + result.push_str(&format!("or {}, {}\nand $1, {}\n", reg2, reg1, reg1)); + } + TokenType::And => { + result.push_str(&format!("and {}, {}\n", reg2, reg1)); + } + TokenType::Bar => { + result.push_str(&format!("or {}, {}\n", reg2, reg1)); + } + TokenType::Caret => { + result.push_str(&format!("xor {}, {}\n", reg2, reg1)); + } + TokenType::EqualsEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("sete %bl\n")); + result.push_str(&format!("movzbl %bl, {}\n", reg1)); + } + TokenType::NotEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setne %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::GreaterThan => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setg %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::GreaterThanEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setge %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::LessThan => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setl %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + TokenType::LessThanEquals => { + result.push_str(&format!("cmp {}, {}\n", reg2, reg1)); + result.push_str(&format!("setle %bl\n")); + result.push_str(&format!("movzbl %bl, %ebx\n")); + } + _ => panic!("Unsupported operator: {:?}", token), + } + } + _ => panic!( + "Internal Error: Expected a binary expression, found: {:#?}", + expression + ), + } + result + } + + fn generate_integral_literal(&self, expression: &Expression) -> String { + match expression { + Expression::IntegerLiteral(token) => { + format!( + "mov ${}, {}\n", + token.value.clone(), + X86CodeGenerator::get_reg1(8) + ) + } + _ => panic!( + "Internal Error: Expected integral literal, found: {:#?}", + expression + ), + } + } + + fn generate_variable_expression(&self, expression: &Expression) -> String { + match expression { + Expression::Variable(token) => { + let definition = self + .symbol_table + .get(token.value.as_str()) + .unwrap_or_else(|| panic!("Undefined variable: {}", token.value.as_str())); + match definition { + Symbol::Variable { stack_offset, .. } => { + let mov_instruction = X86CodeGenerator::mov_mnemonic(definition.size()); + format!( + "{} {}(%rbp), {}\n", + mov_instruction, + *stack_offset, + X86CodeGenerator::get_reg1(definition.size()) + ) + } + _ => panic!(), + } + } + _ => panic!( + "Internal Error: Expected variable expression, found: {:#?}", + expression + ), + } + } + + fn generate_return_void() -> &'static str { + "mov %rbp, %rsp\npop %rbp\nret\n" + } + + fn generate_assignment(&mut self, expression: &Expression) -> String { + match expression { + Expression::Assignment(identifier, expr_node) => { + let variable = self.symbol_table.get(&identifier.value).unwrap_or_else(|| { + panic!( + "Assignment: the identifier `{}` is not defined", + identifier.value + ) + }); + let mut variable_stack_offset = 0; + let variable_size = variable.size(); + match variable { + Symbol::Variable { stack_offset, .. } => { + variable_stack_offset = *stack_offset; + } + _ => panic!(), + } + + // TODO support referential assignment + let mov_instruction = X86CodeGenerator::mov_mnemonic(variable.size()); + let mut result = self.generate_expression(expr_node); + + result.push_str(&format!( + "{} {}, {}(%rbp)\n", + mov_instruction, + X86CodeGenerator::get_reg1(variable_size), + variable_stack_offset + )); + result + } + _ => panic!(), + } + } + + fn unique_label(&mut self, prefix: &str) -> String { + return format!("{}_{}", prefix, Uuid::new_v4().simple()); + } + + fn generate_condition_block(&mut self, prefix: &str, condition: &str, body: &str) -> String { + let mut results = String::new(); + results.push_str(condition); + results.push_str(&format!("cmp $0, {}\n", X86CodeGenerator::get_reg1(8))); + let else_label = self.unique_label(prefix); + results.push_str(&format!("je {}\n", else_label)); + results.push_str(body); + results.push_str(&format!("{}:\n", else_label)); + results + } + + fn mov_mnemonic(size: usize) -> &'static str { + match size { + 1 => "movb", + 2 => "movw", + 4 => "movl", + 8 => "movq", + _ => panic!("Unsupported size `{}`.", size), + } + } + + fn get_reg1(size: usize) -> &'static str { + match size { + 1 | 2 | 4 => "%ebx", + 8 => "%rbx", + _ => panic!("Invalid register size: {}", size), + } + } + + fn get_reg2(size: usize) -> &'static str { + match size { + 1 | 2 | 4 => "%ecx", + 8 => "%rcx", + _ => panic!("Invalid register size: {}", size), + } + } } diff --git a/src/code_generation/x86/mod.rs b/src/code_generation/x86/mod.rs new file mode 100644 index 0000000..1bc099b --- /dev/null +++ b/src/code_generation/x86/mod.rs @@ -0,0 +1,2 @@ +pub mod generator; +pub mod symbol_table; diff --git a/src/code_generation/symbol_table.rs b/src/code_generation/x86/symbol_table.rs similarity index 100% rename from src/code_generation/symbol_table.rs rename to src/code_generation/x86/symbol_table.rs diff --git a/src/tests/variables.c b/src/tests/variables.c new file mode 100644 index 0000000..ffd2f44 --- /dev/null +++ b/src/tests/variables.c @@ -0,0 +1,36 @@ +// CASE Return Local +// RETURNS 1 + +int main() { + int x = 1; + return x; +} + +// CASE Return Global +// RETURNS 2 + +int d = 2; +int main() { + return d; +} + +// CASE Multiple Global Declarations +// RETURNS 3 + +int d; +int d; +int d = 3; + +int main() { + return d; +} + +// CASE Global and Local Declarations & Definitions +// RETURNS 4 + +int d = 3; + +int main() { + int d = 4; + return d; +} diff --git a/src/tests/variables_error.c b/src/tests/variables_error.c new file mode 100644 index 0000000..3f61b86 --- /dev/null +++ b/src/tests/variables_error.c @@ -0,0 +1,36 @@ +// CASE Multiple Global Definitions +// RETURNS 2 + +int d = 1; +int d = 2; + +int main() { + return d; +} + +// CASE Multiple Local Definitions +// RETURNS 2 + +int main() { + int d = 1; + int d = 2; + return d; +} + +// CASE Multiple Local Declarations +// RETURNS 2 + +int main() { + int d; + int d; + return d; +} + +// CASE Local Declaration and Definition +// RETURNS 2 + +int main() { + int d; + int d = 2; + return d; +} diff --git a/src/utils/test_utils.rs b/src/utils/test_utils.rs index b075b9d..3cb0b02 100644 --- a/src/utils/test_utils.rs +++ b/src/utils/test_utils.rs @@ -1,5 +1,6 @@ +use std::env; use std::fs::{remove_file, File}; -use std::io::Write; +use std::io::{Read, Write}; use std::process::Command; use uuid::Uuid; pub fn expect_exit_code(source: String, expected: i32) -> std::io::Result<()> { @@ -40,3 +41,51 @@ pub fn expect_exit_code(source: String, expected: i32) -> std::io::Result<()> { Ok(()) } + +#[derive(Debug)] +pub struct TestCase { + pub name: String, + pub source: String, + pub expected: i32, +} + +pub fn parse_test_file(path: &str) -> Vec { + let mut file = File::open(path).unwrap(); + let mut contents = String::new(); + file.read_to_string(&mut contents).unwrap(); + let test_strings = contents.split("// CASE ").skip(1).collect::>(); + let mut result = vec![]; + for test_string in test_strings { + let mut lines = test_string.lines(); + let name = lines.next().unwrap().clone().to_string(); + let expected = lines + .next() + .unwrap() + .strip_prefix("// RETURNS ") + .unwrap() + .parse::() + .unwrap(); + let source = lines.collect::>().join("\n"); + result.push(TestCase { + name, + source, + expected, + }); + } + result +} + +pub fn interpret_llvm_ir(ir: &str) -> i32 { + let id = Uuid::new_v4(); + let ir_path = format!("./{}.ll", id); + let mut ir_file = File::create(&ir_path).unwrap(); + ir_file.write_all(ir.as_bytes()).unwrap(); + + let mut output = Command::new("lli") + .arg(&ir_path) + .output() + .expect("Failed to compile generated code"); + let exit_code = output.status; + remove_file(&ir_path).unwrap(); + return exit_code.code().unwrap(); +} From b7645461db44ab5fc65de16a5927ec57ccf01912 Mon Sep 17 00:00:00 2001 From: Osama Ahmad Date: Tue, 24 Oct 2023 20:43:11 +0300 Subject: [PATCH 5/6] CI: install LLVM before running the tests --- .github/workflows/rust.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 31000a2..4023a08 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -15,7 +15,10 @@ jobs: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v3 + - name: Install LLVM + run: sudo apt-get -y install llvm-dev clang - name: Build run: cargo build --verbose - name: Run tests From 965d4350995a185f30cbc973f093f32a9e744cc0 Mon Sep 17 00:00:00 2001 From: Osama Ahmad Date: Tue, 24 Oct 2023 21:01:13 +0300 Subject: [PATCH 6/6] Add -opaque-pointers flag to lli --- src/utils/test_utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/test_utils.rs b/src/utils/test_utils.rs index 3cb0b02..cd80e1c 100644 --- a/src/utils/test_utils.rs +++ b/src/utils/test_utils.rs @@ -82,6 +82,7 @@ pub fn interpret_llvm_ir(ir: &str) -> i32 { ir_file.write_all(ir.as_bytes()).unwrap(); let mut output = Command::new("lli") + .arg("-opaque-pointers") .arg(&ir_path) .output() .expect("Failed to compile generated code");