326 lines
9.3 KiB
Rust
326 lines
9.3 KiB
Rust
use std::fs::File;
|
|
use std::io::{Read, Write};
|
|
use std::process::Command;
|
|
use tempfile::TempDir;
|
|
|
|
pub fn assemble(assembly: String) -> Vec<u8> {
|
|
let tmp_dir = TempDir::with_prefix("compiler_").expect("Failed to create temp directory");
|
|
let workdir = tmp_dir.path();
|
|
|
|
let stdlib_asm = &workdir
|
|
.join("stdlib.s")
|
|
.into_os_string()
|
|
.into_string()
|
|
.unwrap();
|
|
let stdlib_obj = &workdir
|
|
.join("stdlib.o")
|
|
.into_os_string()
|
|
.into_string()
|
|
.unwrap();
|
|
let program_asm = &workdir
|
|
.join("program.s")
|
|
.into_os_string()
|
|
.into_string()
|
|
.unwrap();
|
|
let program_obj = &workdir
|
|
.join("program.o")
|
|
.into_os_string()
|
|
.into_string()
|
|
.unwrap();
|
|
let output_file = &workdir
|
|
.join("a.out")
|
|
.into_os_string()
|
|
.into_string()
|
|
.unwrap();
|
|
|
|
let mut file = File::create(stdlib_asm.clone()).expect("Failed to write temp file!");
|
|
file.write_all(STDLIB_ASM_CODE.as_bytes())
|
|
.expect("Can't write to temp file!");
|
|
|
|
let mut file = File::create(program_asm.clone()).expect("Failed to write temp file!");
|
|
file.write_all(assembly.as_bytes())
|
|
.expect("Can't write to temp file!");
|
|
|
|
let as_out1 = Command::new("as")
|
|
.args(["-g", "-o", stdlib_obj, stdlib_asm])
|
|
.output()
|
|
.expect("Could not run 'as' command!");
|
|
|
|
let as_out2 = Command::new("as")
|
|
.args(["-g", "-o", program_obj, program_asm])
|
|
.output()
|
|
.expect("Could not run 'as' command!");
|
|
|
|
let ld_out = Command::new("ld")
|
|
.args(["-o", output_file, "-static", stdlib_obj, program_obj])
|
|
.output()
|
|
.expect("Could not run 'as' command!");
|
|
|
|
println!("{as_out1:?}");
|
|
println!("{as_out2:?}");
|
|
println!("{ld_out:?}");
|
|
println!("{workdir:?}");
|
|
|
|
let mut file = File::open(output_file).expect("Can't open compiler output!");
|
|
let mut output = Vec::new();
|
|
file.read_to_end(&mut output)
|
|
.expect("Can't read compiler output!");
|
|
output
|
|
}
|
|
|
|
const STDLIB_ASM_CODE: &str = "
|
|
.global _start
|
|
.global print_int
|
|
.global print_bool
|
|
.global read_int
|
|
.extern main
|
|
.section .text
|
|
|
|
# BEGIN START (we skip this part when linking with C)
|
|
# ***** Function '_start' *****
|
|
# Calls function 'main' and halts the program
|
|
|
|
_start:
|
|
call main
|
|
movq $60, %rax
|
|
xorq %rdi, %rdi
|
|
syscall
|
|
# END START
|
|
|
|
# ***** Function 'print_int' *****
|
|
# Prints a 64-bit signed integer followed by a newline.
|
|
#
|
|
# We'll build up the digits to print on the stack.
|
|
# We generate the least significant digit first,
|
|
# and the stack grows downward, so that works out nicely.
|
|
#
|
|
# Algorithm:
|
|
# push(newline)
|
|
# if x < 0:
|
|
# negative = true
|
|
# x = -x
|
|
# while x > 0:
|
|
# push(digit for (x % 10))
|
|
# x = x / 10
|
|
# if negative:
|
|
# push(minus sign)
|
|
# syscall 'write' with pushed data
|
|
# return the original argument
|
|
#
|
|
# Registers:
|
|
# - rdi = our input number, which we divide down as we go
|
|
# - rsp = stack pointer, pointing to the next character to emit.
|
|
# - rbp = pointer to one after the last byte of our output (which grows downward)
|
|
# - r9 = whether the number was negative
|
|
# - r10 = a copy of the original input, so we can return it
|
|
# - rax, rcx and rdx are used by intermediate computations
|
|
|
|
print_int:
|
|
pushq %rbp # Save previous stack frame pointer
|
|
movq %rsp, %rbp # Set stack frame pointer
|
|
movq %rdi, %r10 # Back up original input
|
|
decq %rsp # Point rsp at first byte of output
|
|
# TODO: this non-alignment confuses debuggers. Use a different register?
|
|
|
|
# Add newline as the last output byte
|
|
movb $10, (%rsp) # ASCII newline = 10
|
|
decq %rsp
|
|
|
|
# Check for zero and negative cases
|
|
xorq %r9, %r9
|
|
xorq %rax, %rax
|
|
cmpq $0, %rdi
|
|
je .Ljust_zero
|
|
jge .Ldigit_loop
|
|
incq %r9 # If < 0, set %r9 to 1
|
|
|
|
.Ldigit_loop:
|
|
cmpq $0, %rdi
|
|
je .Ldigits_done # Loop done when input = 0
|
|
|
|
# Divide rdi by 10
|
|
movq %rdi, %rax
|
|
movq $10, %rcx
|
|
cqto
|
|
idivq %rcx # Sets rax = quotient and rdx = remainder
|
|
|
|
movq %rax, %rdi # The quotient becomes our remaining input
|
|
cmpq $0, %rdx # If the remainder is negative (because the input is), negate it
|
|
jge .Lnot_negative
|
|
negq %rdx
|
|
.Lnot_negative:
|
|
addq $48, %rdx # ASCII '0' = 48. Add the remainder to get the correct digit.
|
|
movb %dl, (%rsp) # Store the digit in the output
|
|
decq %rsp
|
|
jmp .Ldigit_loop
|
|
|
|
.Ljust_zero:
|
|
movb $48, (%rsp) # ASCII '0' = 48
|
|
decq %rsp
|
|
|
|
.Ldigits_done:
|
|
|
|
# Add minus sign if negative
|
|
cmpq $0, %r9
|
|
je .Lminus_done
|
|
movb $45, (%rsp) # ASCII '-' = 45
|
|
decq %rsp
|
|
.Lminus_done:
|
|
|
|
# Call syscall 'write'
|
|
movq $1, %rax # rax = syscall number for write
|
|
movq $1, %rdi # rdi = file handle for stdout
|
|
# rsi = pointer to message
|
|
movq %rsp, %rsi
|
|
incq %rsi
|
|
# rdx = number of bytes
|
|
movq %rbp, %rdx
|
|
subq %rsp, %rdx
|
|
decq %rdx
|
|
syscall
|
|
|
|
# Restore stack registers and return the original input
|
|
movq %rbp, %rsp
|
|
popq %rbp
|
|
movq %r10, %rax
|
|
ret
|
|
|
|
|
|
# ***** Function 'print_bool' *****
|
|
# Prints either 'true' or 'false', followed by a newline.
|
|
print_bool:
|
|
pushq %rbp # Save previous stack frame pointer
|
|
movq %rsp, %rbp # Set stack frame pointer
|
|
movq %rdi, %r10 # Back up original input
|
|
|
|
cmpq $0, %rdi # See if the argument is false (i.e. 0)
|
|
jne .Ltrue
|
|
movq $false_str, %rsi # If so, set %rsi to the address of the string for false
|
|
movq $false_str_len, %rdx # and %rdx to the length of that string,
|
|
jmp .Lwrite
|
|
.Ltrue:
|
|
movq $true_str, %rsi # otherwise do the same with the string for true.
|
|
movq $true_str_len, %rdx
|
|
|
|
.Lwrite:
|
|
# Call syscall 'write'
|
|
movq $1, %rax # rax = syscall number for write
|
|
movq $1, %rdi # rdi = file handle for stdout
|
|
# rsi = pointer to message (already set above)
|
|
# rdx = number of bytes (already set above)
|
|
syscall
|
|
|
|
# Restore stack registers and return the original input
|
|
movq %rbp, %rsp
|
|
popq %rbp
|
|
movq %r10, %rax
|
|
ret
|
|
|
|
true_str:
|
|
.ascii \"true\\n\"
|
|
true_str_len = . - true_str
|
|
false_str:
|
|
.ascii \"false\\n\"
|
|
false_str_len = . - false_str
|
|
|
|
# ***** Function 'read_int' *****
|
|
# Reads an integer from stdin, skipping non-digit characters, until a newline.
|
|
#
|
|
# To avoid the complexity of buffering, it very inefficiently
|
|
# makes a syscall to read each byte.
|
|
#
|
|
# It crashes the program if input could not be read.
|
|
read_int:
|
|
pushq %rbp # Save previous stack frame pointer
|
|
movq %rsp, %rbp # Set stack frame pointer
|
|
pushq %r12 # Back up r12 since it's callee-saved
|
|
pushq $0 # Reserve space for input
|
|
# (we only write the lowest byte,
|
|
# but loading 64-bits at once is easier)
|
|
|
|
xorq %r9, %r9 # Clear r9 - it'll store the minus sign
|
|
xorq %r10, %r10 # Clear r10 - it'll accumulate our output
|
|
# Skip r11 - syscalls destroy it
|
|
xorq %r12, %r12 # Clear r12 - it'll count the number of input bytes read.
|
|
|
|
# Loop until a newline or end of input is encountered
|
|
.Lloop:
|
|
# Call syscall 'read'
|
|
xorq %rax, %rax # syscall number for read = 0
|
|
xorq %rdi, %rdi # file handle for stdin = 0
|
|
movq %rsp, %rsi # rsi = pointer to buffer
|
|
movq $1, %rdx # rdx = buffer size
|
|
syscall # result in rax = number of bytes read,
|
|
# or 0 on end of input, -1 on error
|
|
|
|
# Check return value: either -1, 0 or 1.
|
|
cmpq $0, %rax
|
|
jg .Lno_error
|
|
je .Lend_of_input
|
|
jmp .Lerror
|
|
|
|
.Lend_of_input:
|
|
cmpq $0, %r12
|
|
je .Lerror # If we've read no input, it's an error.
|
|
jmp .Lend # Otherwise complete reading this input.
|
|
|
|
.Lno_error:
|
|
incq %r12 # Increment input byte counter
|
|
movq (%rsp), %r8 # Load input byte to r8
|
|
|
|
# If the input byte is 10 (newline), exit the loop
|
|
cmpq $10, %r8
|
|
je .Lend
|
|
|
|
# If the input byte is 45 (minus sign), negate r9
|
|
cmpq $45, %r8
|
|
jne .Lnegation_done
|
|
xorq $1, %r9
|
|
.Lnegation_done:
|
|
|
|
# If the input byte is not between 48 ('0') and 57 ('9')
|
|
# then skip it as a junk character.
|
|
cmpq $48, %r8
|
|
jl .Lloop
|
|
cmpq $57, %r8
|
|
jg .Lloop
|
|
|
|
# Subtract 48 to get a digit 0..9
|
|
subq $48, %r8
|
|
|
|
# Shift the digit onto the result
|
|
imulq $10, %r10
|
|
addq %r8, %r10
|
|
|
|
jmp .Lloop
|
|
|
|
.Lend:
|
|
# If it's a negative number, negate the result
|
|
cmpq $0, %r9
|
|
je .Lfinal_negation_done
|
|
neg %r10
|
|
.Lfinal_negation_done:
|
|
# Restore stack registers and return the result
|
|
popq %r12
|
|
movq %rbp, %rsp
|
|
popq %rbp
|
|
movq %r10, %rax
|
|
ret
|
|
|
|
.Lerror:
|
|
# Write error message to stderr with syscall 'write'
|
|
movq $1, %rax
|
|
movq $2, %rdi
|
|
movq $read_int_error_str, %rsi
|
|
movq $read_int_error_str_len, %rdx
|
|
syscall
|
|
|
|
# Exit the program
|
|
movq $60, %rax # Syscall number for exit = 60.
|
|
movq $1, %rdi # Set exit code 1.
|
|
syscall
|
|
|
|
read_int_error_str:
|
|
.ascii \"Error: read_int() failed to read input\\n\"
|
|
read_int_error_str_len = . - read_int_error_str
|
|
";
|