1
0
Fork 0
This repository has been archived on 2025-03-30. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
compiler-course/src/compiler/assembler.rs

326 lines
9.3 KiB
Rust

use std::fs::File;
use std::io::{Read, Write};
use std::process::Command;
use tempfile::TempDir;
pub fn assemble(assembly: String) -> Vec<u8> {
let tmp_dir = TempDir::with_prefix("compiler_").expect("Failed to create temp directory");
let workdir = tmp_dir.path();
let stdlib_asm = &workdir
.join("stdlib.s")
.into_os_string()
.into_string()
.unwrap();
let stdlib_obj = &workdir
.join("stdlib.o")
.into_os_string()
.into_string()
.unwrap();
let program_asm = &workdir
.join("program.s")
.into_os_string()
.into_string()
.unwrap();
let program_obj = &workdir
.join("program.o")
.into_os_string()
.into_string()
.unwrap();
let output_file = &workdir
.join("a.out")
.into_os_string()
.into_string()
.unwrap();
let mut file = File::create(stdlib_asm.clone()).expect("Failed to write temp file!");
file.write_all(STDLIB_ASM_CODE.as_bytes())
.expect("Can't write to temp file!");
let mut file = File::create(program_asm.clone()).expect("Failed to write temp file!");
file.write_all(assembly.as_bytes())
.expect("Can't write to temp file!");
let as_out1 = Command::new("as")
.args(["-g", "-o", stdlib_obj, stdlib_asm])
.output()
.expect("Could not run 'as' command!");
let as_out2 = Command::new("as")
.args(["-g", "-o", program_obj, program_asm])
.output()
.expect("Could not run 'as' command!");
let ld_out = Command::new("ld")
.args(["-o", output_file, "-static", stdlib_obj, program_obj])
.output()
.expect("Could not run 'as' command!");
println!("{as_out1:?}");
println!("{as_out2:?}");
println!("{ld_out:?}");
println!("{workdir:?}");
let mut file = File::open(output_file).expect("Can't open compiler output!");
let mut output = Vec::new();
file.read_to_end(&mut output)
.expect("Can't read compiler output!");
output
}
const STDLIB_ASM_CODE: &str = "
.global _start
.global print_int
.global print_bool
.global read_int
.extern main
.section .text
# BEGIN START (we skip this part when linking with C)
# ***** Function '_start' *****
# Calls function 'main' and halts the program
_start:
call main
movq $60, %rax
xorq %rdi, %rdi
syscall
# END START
# ***** Function 'print_int' *****
# Prints a 64-bit signed integer followed by a newline.
#
# We'll build up the digits to print on the stack.
# We generate the least significant digit first,
# and the stack grows downward, so that works out nicely.
#
# Algorithm:
# push(newline)
# if x < 0:
# negative = true
# x = -x
# while x > 0:
# push(digit for (x % 10))
# x = x / 10
# if negative:
# push(minus sign)
# syscall 'write' with pushed data
# return the original argument
#
# Registers:
# - rdi = our input number, which we divide down as we go
# - rsp = stack pointer, pointing to the next character to emit.
# - rbp = pointer to one after the last byte of our output (which grows downward)
# - r9 = whether the number was negative
# - r10 = a copy of the original input, so we can return it
# - rax, rcx and rdx are used by intermediate computations
print_int:
pushq %rbp # Save previous stack frame pointer
movq %rsp, %rbp # Set stack frame pointer
movq %rdi, %r10 # Back up original input
decq %rsp # Point rsp at first byte of output
# TODO: this non-alignment confuses debuggers. Use a different register?
# Add newline as the last output byte
movb $10, (%rsp) # ASCII newline = 10
decq %rsp
# Check for zero and negative cases
xorq %r9, %r9
xorq %rax, %rax
cmpq $0, %rdi
je .Ljust_zero
jge .Ldigit_loop
incq %r9 # If < 0, set %r9 to 1
.Ldigit_loop:
cmpq $0, %rdi
je .Ldigits_done # Loop done when input = 0
# Divide rdi by 10
movq %rdi, %rax
movq $10, %rcx
cqto
idivq %rcx # Sets rax = quotient and rdx = remainder
movq %rax, %rdi # The quotient becomes our remaining input
cmpq $0, %rdx # If the remainder is negative (because the input is), negate it
jge .Lnot_negative
negq %rdx
.Lnot_negative:
addq $48, %rdx # ASCII '0' = 48. Add the remainder to get the correct digit.
movb %dl, (%rsp) # Store the digit in the output
decq %rsp
jmp .Ldigit_loop
.Ljust_zero:
movb $48, (%rsp) # ASCII '0' = 48
decq %rsp
.Ldigits_done:
# Add minus sign if negative
cmpq $0, %r9
je .Lminus_done
movb $45, (%rsp) # ASCII '-' = 45
decq %rsp
.Lminus_done:
# Call syscall 'write'
movq $1, %rax # rax = syscall number for write
movq $1, %rdi # rdi = file handle for stdout
# rsi = pointer to message
movq %rsp, %rsi
incq %rsi
# rdx = number of bytes
movq %rbp, %rdx
subq %rsp, %rdx
decq %rdx
syscall
# Restore stack registers and return the original input
movq %rbp, %rsp
popq %rbp
movq %r10, %rax
ret
# ***** Function 'print_bool' *****
# Prints either 'true' or 'false', followed by a newline.
print_bool:
pushq %rbp # Save previous stack frame pointer
movq %rsp, %rbp # Set stack frame pointer
movq %rdi, %r10 # Back up original input
cmpq $0, %rdi # See if the argument is false (i.e. 0)
jne .Ltrue
movq $false_str, %rsi # If so, set %rsi to the address of the string for false
movq $false_str_len, %rdx # and %rdx to the length of that string,
jmp .Lwrite
.Ltrue:
movq $true_str, %rsi # otherwise do the same with the string for true.
movq $true_str_len, %rdx
.Lwrite:
# Call syscall 'write'
movq $1, %rax # rax = syscall number for write
movq $1, %rdi # rdi = file handle for stdout
# rsi = pointer to message (already set above)
# rdx = number of bytes (already set above)
syscall
# Restore stack registers and return the original input
movq %rbp, %rsp
popq %rbp
movq %r10, %rax
ret
true_str:
.ascii \"true\\n\"
true_str_len = . - true_str
false_str:
.ascii \"false\\n\"
false_str_len = . - false_str
# ***** Function 'read_int' *****
# Reads an integer from stdin, skipping non-digit characters, until a newline.
#
# To avoid the complexity of buffering, it very inefficiently
# makes a syscall to read each byte.
#
# It crashes the program if input could not be read.
read_int:
pushq %rbp # Save previous stack frame pointer
movq %rsp, %rbp # Set stack frame pointer
pushq %r12 # Back up r12 since it's callee-saved
pushq $0 # Reserve space for input
# (we only write the lowest byte,
# but loading 64-bits at once is easier)
xorq %r9, %r9 # Clear r9 - it'll store the minus sign
xorq %r10, %r10 # Clear r10 - it'll accumulate our output
# Skip r11 - syscalls destroy it
xorq %r12, %r12 # Clear r12 - it'll count the number of input bytes read.
# Loop until a newline or end of input is encountered
.Lloop:
# Call syscall 'read'
xorq %rax, %rax # syscall number for read = 0
xorq %rdi, %rdi # file handle for stdin = 0
movq %rsp, %rsi # rsi = pointer to buffer
movq $1, %rdx # rdx = buffer size
syscall # result in rax = number of bytes read,
# or 0 on end of input, -1 on error
# Check return value: either -1, 0 or 1.
cmpq $0, %rax
jg .Lno_error
je .Lend_of_input
jmp .Lerror
.Lend_of_input:
cmpq $0, %r12
je .Lerror # If we've read no input, it's an error.
jmp .Lend # Otherwise complete reading this input.
.Lno_error:
incq %r12 # Increment input byte counter
movq (%rsp), %r8 # Load input byte to r8
# If the input byte is 10 (newline), exit the loop
cmpq $10, %r8
je .Lend
# If the input byte is 45 (minus sign), negate r9
cmpq $45, %r8
jne .Lnegation_done
xorq $1, %r9
.Lnegation_done:
# If the input byte is not between 48 ('0') and 57 ('9')
# then skip it as a junk character.
cmpq $48, %r8
jl .Lloop
cmpq $57, %r8
jg .Lloop
# Subtract 48 to get a digit 0..9
subq $48, %r8
# Shift the digit onto the result
imulq $10, %r10
addq %r8, %r10
jmp .Lloop
.Lend:
# If it's a negative number, negate the result
cmpq $0, %r9
je .Lfinal_negation_done
neg %r10
.Lfinal_negation_done:
# Restore stack registers and return the result
popq %r12
movq %rbp, %rsp
popq %rbp
movq %r10, %rax
ret
.Lerror:
# Write error message to stderr with syscall 'write'
movq $1, %rax
movq $2, %rdi
movq $read_int_error_str, %rsi
movq $read_int_error_str_len, %rdx
syscall
# Exit the program
movq $60, %rax # Syscall number for exit = 60.
movq $1, %rdi # Set exit code 1.
syscall
read_int_error_str:
.ascii \"Error: read_int() failed to read input\\n\"
read_int_error_str_len = . - read_int_error_str
";