1
0
Fork 0

Assembly generator, excluding instrinsics

This commit is contained in:
Vili Sinervä 2025-02-21 17:05:08 +02:00
parent 0c781f84b5
commit 9a6b4b230c
No known key found for this signature in database
GPG key ID: DF8FEAF54EFAC996
3 changed files with 261 additions and 8 deletions

View file

@ -1,13 +1,14 @@
use std::io;
use assembly_generator::generate_assembly;
use interpreter::interpret;
use ir::IrInstruction;
use ir_generator::generate_ir;
use parser::parse;
use symtab::SymTab;
use tokenizer::tokenize;
use type_checker::type_check;
mod assembly_generator;
mod ast;
mod interpreter;
mod ir;
@ -19,20 +20,19 @@ mod tokenizer;
mod type_checker;
mod variable;
pub fn compile(code: &str) -> Vec<IrInstruction> {
pub fn compile(code: &str) -> String {
let tokens = tokenize(code);
let mut ast = parse(&tokens);
type_check(&mut ast, &mut SymTab::new_type_table());
generate_ir(&ast)
let ir = generate_ir(&ast);
generate_assembly(&ir)
}
pub fn start_compiler() {
let lines = io::stdin().lines();
for line in lines.map_while(Result::ok) {
println!();
for instruction in compile(&line) {
println!("{instruction}");
}
println!("{}", compile(&line));
println!();
}
}

View file

@ -0,0 +1,238 @@
use std::collections::{HashMap, HashSet};
use crate::compiler::ir::{IrInstruction, IrVar};
pub fn generate_assembly(instructions: &Vec<IrInstruction>) -> String {
const ARG_REGISTERS: [&str; 6] = ["%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"];
const INSTRINSICS: [&str; 13] = [
"+",
"*",
"-",
"/",
"%",
"<",
"<=",
">",
">=",
"==",
"!=",
"unary_not",
"unary_-",
];
let locals = Locals::new(instructions);
let mut out = String::from(
"\t.extern print_int
\t.extern print_bool
\t.extern read_int
\t.global main
\t.type main, @function
\t.section .text
main:
\tpushq %rbp
\tmovq %rsp, %rbp
",
);
out.push_str(&format!("\tsubq ${}, %rsp\n", locals.stack_used()));
for instruction in instructions {
out.push_str(&format!("\n\t# {}\n", instruction.instruction));
use crate::compiler::ir::IrInstructionType::*;
match &instruction.instruction {
LoadBoolConst(val, dest) => {
let val = if *val { 1 } else { 0 };
out.push_str(&format!("\tmovq ${val}, {}\n", locals.get_ref(dest)));
}
LoadIntConst(val, dest) => {
// x86-64 weirdness with large integers
if -2_i64.pow(31) <= *val && *val < 2_i64.pow(31) {
out.push_str(&format!("\tmovq ${val}, {}\n", locals.get_ref(dest)));
} else {
out.push_str(&format!("\tmovq ${val}, %rax\n"));
out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(dest)));
}
}
Copy(src, dest) => {
out.push_str(&format!("\tmovq {}, %rax\n", locals.get_ref(src)));
out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(dest)));
}
Call(op_var, arg_vec, output_var) => {
assert!(
arg_vec.len() <= 6,
"More than 6 args to a function '{op_var}' !"
);
if INSTRINSICS.contains(&&*op_var.name) {
handle_intrinsics(&mut out, &locals, op_var, arg_vec, output_var);
} else {
// Align stack according to spec
if locals.stack_used % 16 != 0 {
out.push_str("\tsubq $8, %rsp\n");
}
for (i, var) in arg_vec.iter().enumerate() {
out.push_str(&format!(
"\tmovq {}, {}\n",
locals.get_ref(var),
ARG_REGISTERS[i]
));
}
out.push_str(&format!("\tcallq {op_var}\n"));
out.push_str(&format!("\tmovq %rax, {}\n", locals.get_ref(output_var)));
// Align stack according to spec
if locals.stack_used % 16 != 0 {
out.push_str("\taddq $8, %rsp\n");
}
}
}
Jump(target) => {
let Label(target_name) = &target.instruction else {
panic!("Tried to jump to non-label {target}")
};
out.push_str(&format!("\tjmp .L{target_name}\n"));
}
CondJump(cond, jmp_then, jmp_else) => {
let Label(then_target) = &jmp_then.instruction else {
panic!("Tried to jump to non-label {jmp_then}")
};
let Label(else_target) = &jmp_else.instruction else {
panic!("Tried to jump to non-label {jmp_else}")
};
out.push_str(&format!("\tcmpq $0, {}\n", locals.get_ref(cond)));
out.push_str(&format!("\tjne .L{then_target}\n"));
out.push_str(&format!("\tjmp .L{else_target}\n"));
}
Label(name) => out.push_str(&format!(".L{name}:\n")),
}
}
out.push_str(
"
\tmovq $0, %rax
\tmovq %rbp, %rsp
\tpopq %rbp
\tret",
);
out
}
fn handle_intrinsics(
out: &mut String,
locals: &Locals,
op_var: &IrVar,
arg_vec: &[IrVar],
output_var: &IrVar,
) {
let out_reg = "%rax";
match &*op_var.name {
"+" => {
todo!();
}
"*" => {
todo!();
}
"-" => {
todo!();
}
"/" => {
todo!();
}
"%" => {
todo!();
}
"<" => {
todo!();
}
"<=" => {
todo!();
}
">" => {
todo!();
}
">=" => {
todo!();
}
"==" => {
todo!();
}
"!=" => {
todo!();
}
"unary_not" => {
todo!();
}
"unary_-" => {
out.push_str(&format!(
"\tmovq {}, {out_reg}\n",
locals.get_ref(&arg_vec[0])
));
out.push_str(&format!("\tnegq {out_reg}\n"));
}
_ => panic!("Unknown intrinsic {op_var}!"),
}
out.push_str(&format!(
"\tmovq {out_reg}, {}\n",
locals.get_ref(output_var)
));
}
#[derive(Debug)]
struct Locals {
stack_used: i64, // Bytes
var_to_location: HashMap<IrVar, String>, // Assembly reference as string
}
impl Locals {
pub fn new(instructions: &Vec<IrInstruction>) -> Self {
let ir_vars = Self::get_all_ir_vars(instructions);
let mut stack_used = 0;
let mut var_to_location = HashMap::new();
for var in ir_vars {
stack_used += 8;
var_to_location.insert(var, format!("-{stack_used}(%rbp)"));
}
Self {
var_to_location,
stack_used,
}
}
pub fn get_ref(&self, var: &IrVar) -> &str {
self.var_to_location
.get(var)
.expect("Tried to use non-existant var in assembly generation!")
}
pub fn stack_used(&self) -> i64 {
self.stack_used
}
fn get_all_ir_vars(instructions: &Vec<IrInstruction>) -> Vec<IrVar> {
let mut var_set = HashSet::new();
let globals = IrVar::new_global_types()
.into_keys()
.collect::<HashSet<IrVar>>();
for instruction in instructions {
for var in instruction.get_vars() {
if !globals.contains(&var) {
var_set.insert(var);
}
}
}
let mut var_vec = var_set.into_iter().collect::<Vec<IrVar>>();
var_vec.sort();
var_vec
}
}

View file

@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt};
use crate::compiler::{token::CodeLocation, variable::Type};
#[derive(PartialEq, Clone, Eq, Hash)]
#[derive(PartialEq, Clone, Eq, Hash, Ord, PartialOrd)]
pub struct IrVar {
pub name: String,
}
@ -68,6 +68,21 @@ impl IrInstruction {
pub fn new(loc: CodeLocation, instruction: IrInstructionType) -> Self {
Self { loc, instruction }
}
pub fn get_vars(&self) -> Vec<IrVar> {
use IrInstructionType::*;
match &self.instruction {
LoadBoolConst(_, var) | LoadIntConst(_, var) | CondJump(var, _, _) => vec![var.clone()],
Copy(var1, var2) => vec![var1.clone(), var2.clone()],
Call(var1, var_vec, var2) => {
let mut var_vec = var_vec.clone();
var_vec.push(var1.clone());
var_vec.push(var2.clone());
var_vec
}
_ => vec![],
}
}
}
#[derive(Debug, PartialEq, Clone, Eq, Hash)]
@ -92,7 +107,7 @@ impl fmt::Display for IrInstructionType {
IrInstructionType::CondJump(cond, then_dest, else_dest) => {
format!("CondJump({cond}, {then_dest}, {else_dest})")
}
IrInstructionType::Label(name) => format!("\nLabel({name})"),
IrInstructionType::Label(name) => format!("Label({name})"),
};
write!(f, "{}", string)