From 9a6b4b230c7f2fa9780713143b16d1923fddc284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vili=20Sinerv=C3=A4?= Date: Fri, 21 Feb 2025 17:05:08 +0200 Subject: [PATCH] Assembly generator, excluding instrinsics --- src/compiler.rs | 12 +- src/compiler/assembly_generator.rs | 238 +++++++++++++++++++++++++++++ src/compiler/ir.rs | 19 ++- 3 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 src/compiler/assembly_generator.rs diff --git a/src/compiler.rs b/src/compiler.rs index d390bdd..ece6a00 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,13 +1,14 @@ use std::io; +use assembly_generator::generate_assembly; use interpreter::interpret; -use ir::IrInstruction; use ir_generator::generate_ir; use parser::parse; use symtab::SymTab; use tokenizer::tokenize; use type_checker::type_check; +mod assembly_generator; mod ast; mod interpreter; mod ir; @@ -19,20 +20,19 @@ mod tokenizer; mod type_checker; mod variable; -pub fn compile(code: &str) -> Vec { +pub fn compile(code: &str) -> String { let tokens = tokenize(code); let mut ast = parse(&tokens); type_check(&mut ast, &mut SymTab::new_type_table()); - generate_ir(&ast) + let ir = generate_ir(&ast); + generate_assembly(&ir) } pub fn start_compiler() { let lines = io::stdin().lines(); for line in lines.map_while(Result::ok) { println!(); - for instruction in compile(&line) { - println!("{instruction}"); - } + println!("{}", compile(&line)); println!(); } } diff --git a/src/compiler/assembly_generator.rs b/src/compiler/assembly_generator.rs new file mode 100644 index 0000000..2ac92a5 --- /dev/null +++ b/src/compiler/assembly_generator.rs @@ -0,0 +1,238 @@ +use std::collections::{HashMap, HashSet}; + +use crate::compiler::ir::{IrInstruction, IrVar}; + +pub fn generate_assembly(instructions: &Vec) -> String { + const ARG_REGISTERS: [&str; 6] = ["%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"]; + const INSTRINSICS: [&str; 13] = [ + "+", + "*", + "-", + "/", + "%", + "<", + "<=", + ">", + ">=", + "==", + "!=", + "unary_not", + "unary_-", + ]; + let locals = Locals::new(instructions); + + let mut out = String::from( + "\t.extern print_int +\t.extern print_bool +\t.extern read_int +\t.global main +\t.type main, @function + +\t.section .text + +main: +\tpushq %rbp +\tmovq %rsp, %rbp +", + ); + + out.push_str(&format!("\tsubq ${}, %rsp\n", locals.stack_used())); + + for instruction in instructions { + out.push_str(&format!("\n\t# {}\n", instruction.instruction)); + + use crate::compiler::ir::IrInstructionType::*; + match &instruction.instruction { + LoadBoolConst(val, dest) => { + let val = if *val { 1 } else { 0 }; + out.push_str(&format!("\tmovq ${val}, {}\n", locals.get_ref(dest))); + } + LoadIntConst(val, dest) => { + // x86-64 weirdness with large integers + if -2_i64.pow(31) <= *val && *val < 2_i64.pow(31) { + out.push_str(&format!("\tmovq ${val}, {}\n", locals.get_ref(dest))); + } else { + out.push_str(&format!("\tmovq ${val}, %rax\n")); + out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(dest))); + } + } + Copy(src, dest) => { + out.push_str(&format!("\tmovq {}, %rax\n", locals.get_ref(src))); + out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(dest))); + } + Call(op_var, arg_vec, output_var) => { + assert!( + arg_vec.len() <= 6, + "More than 6 args to a function '{op_var}' !" + ); + + if INSTRINSICS.contains(&&*op_var.name) { + handle_intrinsics(&mut out, &locals, op_var, arg_vec, output_var); + } else { + // Align stack according to spec + if locals.stack_used % 16 != 0 { + out.push_str("\tsubq $8, %rsp\n"); + } + + for (i, var) in arg_vec.iter().enumerate() { + out.push_str(&format!( + "\tmovq {}, {}\n", + locals.get_ref(var), + ARG_REGISTERS[i] + )); + } + out.push_str(&format!("\tcallq {op_var}\n")); + out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(output_var))); + + // Align stack according to spec + if locals.stack_used % 16 != 0 { + out.push_str("\taddq $8, %rsp\n"); + } + } + } + Jump(target) => { + let Label(target_name) = &target.instruction else { + panic!("Tried to jump to non-label {target}") + }; + out.push_str(&format!("\tjmp .L{target_name}\n")); + } + CondJump(cond, jmp_then, jmp_else) => { + let Label(then_target) = &jmp_then.instruction else { + panic!("Tried to jump to non-label {jmp_then}") + }; + let Label(else_target) = &jmp_else.instruction else { + panic!("Tried to jump to non-label {jmp_else}") + }; + + out.push_str(&format!("\tcmpq $0, {}\n", locals.get_ref(cond))); + out.push_str(&format!("\tjne .L{then_target}\n")); + out.push_str(&format!("\tjmp .L{else_target}\n")); + } + Label(name) => out.push_str(&format!(".L{name}:\n")), + } + } + + out.push_str( + " +\tmovq $0, %rax +\tmovq %rbp, %rsp +\tpopq %rbp +\tret", + ); + + out +} + +fn handle_intrinsics( + out: &mut String, + locals: &Locals, + op_var: &IrVar, + arg_vec: &[IrVar], + output_var: &IrVar, +) { + let out_reg = "%rax"; + match &*op_var.name { + "+" => { + todo!(); + } + "*" => { + todo!(); + } + "-" => { + todo!(); + } + "/" => { + todo!(); + } + "%" => { + todo!(); + } + "<" => { + todo!(); + } + "<=" => { + todo!(); + } + ">" => { + todo!(); + } + ">=" => { + todo!(); + } + "==" => { + todo!(); + } + "!=" => { + todo!(); + } + "unary_not" => { + todo!(); + } + "unary_-" => { + out.push_str(&format!( + "\tmovq {}, {out_reg}\n", + locals.get_ref(&arg_vec[0]) + )); + out.push_str(&format!("\tnegq {out_reg}\n")); + } + _ => panic!("Unknown intrinsic {op_var}!"), + } + out.push_str(&format!( + "\tmovq {out_reg}, {}\n", + locals.get_ref(output_var) + )); +} + +#[derive(Debug)] +struct Locals { + stack_used: i64, // Bytes + var_to_location: HashMap, // Assembly reference as string +} + +impl Locals { + pub fn new(instructions: &Vec) -> Self { + let ir_vars = Self::get_all_ir_vars(instructions); + + let mut stack_used = 0; + let mut var_to_location = HashMap::new(); + + for var in ir_vars { + stack_used += 8; + var_to_location.insert(var, format!("-{stack_used}(%rbp)")); + } + + Self { + var_to_location, + stack_used, + } + } + + pub fn get_ref(&self, var: &IrVar) -> &str { + self.var_to_location + .get(var) + .expect("Tried to use non-existant var in assembly generation!") + } + + pub fn stack_used(&self) -> i64 { + self.stack_used + } + + fn get_all_ir_vars(instructions: &Vec) -> Vec { + let mut var_set = HashSet::new(); + let globals = IrVar::new_global_types() + .into_keys() + .collect::>(); + + for instruction in instructions { + for var in instruction.get_vars() { + if !globals.contains(&var) { + var_set.insert(var); + } + } + } + + let mut var_vec = var_set.into_iter().collect::>(); + var_vec.sort(); + var_vec + } +} diff --git a/src/compiler/ir.rs b/src/compiler/ir.rs index 1285e46..be790b6 100644 --- a/src/compiler/ir.rs +++ b/src/compiler/ir.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt}; use crate::compiler::{token::CodeLocation, variable::Type}; -#[derive(PartialEq, Clone, Eq, Hash)] +#[derive(PartialEq, Clone, Eq, Hash, Ord, PartialOrd)] pub struct IrVar { pub name: String, } @@ -68,6 +68,21 @@ impl IrInstruction { pub fn new(loc: CodeLocation, instruction: IrInstructionType) -> Self { Self { loc, instruction } } + + pub fn get_vars(&self) -> Vec { + use IrInstructionType::*; + match &self.instruction { + LoadBoolConst(_, var) | LoadIntConst(_, var) | CondJump(var, _, _) => vec![var.clone()], + Copy(var1, var2) => vec![var1.clone(), var2.clone()], + Call(var1, var_vec, var2) => { + let mut var_vec = var_vec.clone(); + var_vec.push(var1.clone()); + var_vec.push(var2.clone()); + var_vec + } + _ => vec![], + } + } } #[derive(Debug, PartialEq, Clone, Eq, Hash)] @@ -92,7 +107,7 @@ impl fmt::Display for IrInstructionType { IrInstructionType::CondJump(cond, then_dest, else_dest) => { format!("CondJump({cond}, {then_dest}, {else_dest})") } - IrInstructionType::Label(name) => format!("\nLabel({name})"), + IrInstructionType::Label(name) => format!("Label({name})"), }; write!(f, "{}", string)