diff --git a/.test-gadget/auth_token b/.test-gadget/auth_token new file mode 100644 index 0000000..b0838a7 --- /dev/null +++ b/.test-gadget/auth_token @@ -0,0 +1 @@ +c7cf8994-380d-40c9-b8b9-be52ae7e92e7 \ No newline at end of file diff --git a/.test-gadget/course.json b/.test-gadget/course.json new file mode 100644 index 0000000..6059669 --- /dev/null +++ b/.test-gadget/course.json @@ -0,0 +1,3 @@ +{ + "server_base_url": "https://test-gadget.compilers.how" +} \ No newline at end of file diff --git a/.test-gadget/last_submission_id.txt b/.test-gadget/last_submission_id.txt new file mode 100644 index 0000000..6172cb1 --- /dev/null +++ b/.test-gadget/last_submission_id.txt @@ -0,0 +1 @@ +3dd0c210-4c98-49fc-aa2f-187425635164 \ No newline at end of file diff --git a/.test-gadget/last_submission_rsyncsig.bin b/.test-gadget/last_submission_rsyncsig.bin new file mode 100644 index 0000000..0f9fa40 Binary files /dev/null and b/.test-gadget/last_submission_rsyncsig.bin differ diff --git a/.test-gadget/test-gadget-client-linux b/.test-gadget/test-gadget-client-linux new file mode 100755 index 0000000..4f5aea0 Binary files /dev/null and b/.test-gadget/test-gadget-client-linux differ diff --git a/.test-gadget/test-gadget-client-macos b/.test-gadget/test-gadget-client-macos new file mode 100755 index 0000000..3d9762c Binary files /dev/null and b/.test-gadget/test-gadget-client-macos differ diff --git a/.test-gadget/test-gadget-client-windows.exe b/.test-gadget/test-gadget-client-windows.exe new file mode 100755 index 0000000..a4923c5 Binary files /dev/null and b/.test-gadget/test-gadget-client-windows.exe differ diff --git a/Cargo.lock b/Cargo.lock index f0b3f00..49f5e6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,60 @@ dependencies = [ "memchr", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bitflags" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "compiler-course" version = "0.1.0" dependencies = [ + "base64", "json", "regex", + "tempfile", +] + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi", + "windows-targets", ] [[package]] @@ -25,12 +73,30 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "078e285eafdfb6c4b434e0d31e8cfcb5115b651496faca5749b88fafd4f23bfd" +[[package]] +name = "libc" +version = "0.2.170" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "once_cell" +version = "1.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" + [[package]] name = "regex" version = "1.11.1" @@ -59,3 +125,121 @@ name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "tempfile" +version = "3.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +dependencies = [ + "cfg-if", + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags", +] diff --git a/Cargo.toml b/Cargo.toml index aa8658a..b50c49f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +base64 = "0.22.1" json = "0.12.4" regex = "1.11.1" +tempfile = "3.17.1" diff --git a/src/compiler.rs b/src/compiler.rs index ece6a00..51ea4d6 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,6 +1,8 @@ use std::io; +use assembler::assemble; use assembly_generator::generate_assembly; +use base64::{engine::general_purpose, Engine}; use interpreter::interpret; use ir_generator::generate_ir; use parser::parse; @@ -8,6 +10,7 @@ use symtab::SymTab; use tokenizer::tokenize; use type_checker::type_check; +mod assembler; mod assembly_generator; mod ast; mod interpreter; @@ -25,14 +28,15 @@ pub fn compile(code: &str) -> String { let mut ast = parse(&tokens); type_check(&mut ast, &mut SymTab::new_type_table()); let ir = generate_ir(&ast); - generate_assembly(&ir) + let assembly = generate_assembly(&ir); + general_purpose::STANDARD.encode(&assemble(assembly)) } pub fn start_compiler() { let lines = io::stdin().lines(); for line in lines.map_while(Result::ok) { println!(); - println!("{}", compile(&line)); + println!("{:?}", compile(&line)); println!(); } } diff --git a/src/compiler/assembler.rs b/src/compiler/assembler.rs new file mode 100644 index 0000000..c15f660 --- /dev/null +++ b/src/compiler/assembler.rs @@ -0,0 +1,326 @@ +use std::fs::File; +use std::io::{Read, Write}; +use std::process::Command; +use tempfile::TempDir; + +pub fn assemble(assembly: String) -> Vec { + let tmp_dir = TempDir::with_prefix("compiler_").expect("Failed to create temp directory"); + let workdir = tmp_dir.path(); + + let stdlib_asm = &workdir + .join("stdlib.s") + .into_os_string() + .into_string() + .unwrap(); + let stdlib_obj = &workdir + .join("stdlib.o") + .into_os_string() + .into_string() + .unwrap(); + let program_asm = &workdir + .join("program.s") + .into_os_string() + .into_string() + .unwrap(); + let program_obj = &workdir + .join("program.o") + .into_os_string() + .into_string() + .unwrap(); + let output_file = &workdir + .join("a.out") + .into_os_string() + .into_string() + .unwrap(); + + let mut file = File::create(stdlib_asm.clone()).expect("Failed to write temp file!"); + file.write_all(STDLIB_ASM_CODE.as_bytes()) + .expect("Can't write to temp file!"); + + let mut file = File::create(program_asm.clone()).expect("Failed to write temp file!"); + file.write_all(assembly.as_bytes()) + .expect("Can't write to temp file!"); + + let as_out1 = Command::new("as") + .args(["-g", "-o", stdlib_obj, stdlib_asm]) + .output() + .expect("Could not run 'as' command!"); + + let as_out2 = Command::new("as") + .args(["-g", "-o", program_obj, program_asm]) + .output() + .expect("Could not run 'as' command!"); + + let ld_out = Command::new("ld") + .args(["-o", output_file, "-static", stdlib_obj, program_obj]) + .output() + .expect("Could not run 'as' command!"); + + println!("{as_out1:?}"); + println!("{as_out2:?}"); + println!("{ld_out:?}"); + println!("{workdir:?}"); + + let mut file = File::open(output_file).expect("Can't open compiler output!"); + let mut output = Vec::new(); + file.read_to_end(&mut output) + .expect("Can't read compiler output!"); + output +} + +const STDLIB_ASM_CODE: &str = " + .global _start + .global print_int + .global print_bool + .global read_int + .extern main + .section .text + +# BEGIN START (we skip this part when linking with C) +# ***** Function '_start' ***** +# Calls function 'main' and halts the program + +_start: + call main + movq $60, %rax + xorq %rdi, %rdi + syscall +# END START + +# ***** Function 'print_int' ***** +# Prints a 64-bit signed integer followed by a newline. +# +# We'll build up the digits to print on the stack. +# We generate the least significant digit first, +# and the stack grows downward, so that works out nicely. +# +# Algorithm: +# push(newline) +# if x < 0: +# negative = true +# x = -x +# while x > 0: +# push(digit for (x % 10)) +# x = x / 10 +# if negative: +# push(minus sign) +# syscall 'write' with pushed data +# return the original argument +# +# Registers: +# - rdi = our input number, which we divide down as we go +# - rsp = stack pointer, pointing to the next character to emit. +# - rbp = pointer to one after the last byte of our output (which grows downward) +# - r9 = whether the number was negative +# - r10 = a copy of the original input, so we can return it +# - rax, rcx and rdx are used by intermediate computations + +print_int: + pushq %rbp # Save previous stack frame pointer + movq %rsp, %rbp # Set stack frame pointer + movq %rdi, %r10 # Back up original input + decq %rsp # Point rsp at first byte of output + # TODO: this non-alignment confuses debuggers. Use a different register? + + # Add newline as the last output byte + movb $10, (%rsp) # ASCII newline = 10 + decq %rsp + + # Check for zero and negative cases + xorq %r9, %r9 + xorq %rax, %rax + cmpq $0, %rdi + je .Ljust_zero + jge .Ldigit_loop + incq %r9 # If < 0, set %r9 to 1 + +.Ldigit_loop: + cmpq $0, %rdi + je .Ldigits_done # Loop done when input = 0 + + # Divide rdi by 10 + movq %rdi, %rax + movq $10, %rcx + cqto + idivq %rcx # Sets rax = quotient and rdx = remainder + + movq %rax, %rdi # The quotient becomes our remaining input + cmpq $0, %rdx # If the remainder is negative (because the input is), negate it + jge .Lnot_negative + negq %rdx +.Lnot_negative: + addq $48, %rdx # ASCII '0' = 48. Add the remainder to get the correct digit. + movb %dl, (%rsp) # Store the digit in the output + decq %rsp + jmp .Ldigit_loop + +.Ljust_zero: + movb $48, (%rsp) # ASCII '0' = 48 + decq %rsp + +.Ldigits_done: + + # Add minus sign if negative + cmpq $0, %r9 + je .Lminus_done + movb $45, (%rsp) # ASCII '-' = 45 + decq %rsp +.Lminus_done: + + # Call syscall 'write' + movq $1, %rax # rax = syscall number for write + movq $1, %rdi # rdi = file handle for stdout + # rsi = pointer to message + movq %rsp, %rsi + incq %rsi + # rdx = number of bytes + movq %rbp, %rdx + subq %rsp, %rdx + decq %rdx + syscall + + # Restore stack registers and return the original input + movq %rbp, %rsp + popq %rbp + movq %r10, %rax + ret + + +# ***** Function 'print_bool' ***** +# Prints either 'true' or 'false', followed by a newline. +print_bool: + pushq %rbp # Save previous stack frame pointer + movq %rsp, %rbp # Set stack frame pointer + movq %rdi, %r10 # Back up original input + + cmpq $0, %rdi # See if the argument is false (i.e. 0) + jne .Ltrue + movq $false_str, %rsi # If so, set %rsi to the address of the string for false + movq $false_str_len, %rdx # and %rdx to the length of that string, + jmp .Lwrite +.Ltrue: + movq $true_str, %rsi # otherwise do the same with the string for true. + movq $true_str_len, %rdx + +.Lwrite: + # Call syscall 'write' + movq $1, %rax # rax = syscall number for write + movq $1, %rdi # rdi = file handle for stdout + # rsi = pointer to message (already set above) + # rdx = number of bytes (already set above) + syscall + + # Restore stack registers and return the original input + movq %rbp, %rsp + popq %rbp + movq %r10, %rax + ret + +true_str: + .ascii \"true\\n\" +true_str_len = . - true_str +false_str: + .ascii \"false\\n\" +false_str_len = . - false_str + +# ***** Function 'read_int' ***** +# Reads an integer from stdin, skipping non-digit characters, until a newline. +# +# To avoid the complexity of buffering, it very inefficiently +# makes a syscall to read each byte. +# +# It crashes the program if input could not be read. +read_int: + pushq %rbp # Save previous stack frame pointer + movq %rsp, %rbp # Set stack frame pointer + pushq %r12 # Back up r12 since it's callee-saved + pushq $0 # Reserve space for input + # (we only write the lowest byte, + # but loading 64-bits at once is easier) + + xorq %r9, %r9 # Clear r9 - it'll store the minus sign + xorq %r10, %r10 # Clear r10 - it'll accumulate our output + # Skip r11 - syscalls destroy it + xorq %r12, %r12 # Clear r12 - it'll count the number of input bytes read. + + # Loop until a newline or end of input is encountered +.Lloop: + # Call syscall 'read' + xorq %rax, %rax # syscall number for read = 0 + xorq %rdi, %rdi # file handle for stdin = 0 + movq %rsp, %rsi # rsi = pointer to buffer + movq $1, %rdx # rdx = buffer size + syscall # result in rax = number of bytes read, + # or 0 on end of input, -1 on error + + # Check return value: either -1, 0 or 1. + cmpq $0, %rax + jg .Lno_error + je .Lend_of_input + jmp .Lerror + +.Lend_of_input: + cmpq $0, %r12 + je .Lerror # If we've read no input, it's an error. + jmp .Lend # Otherwise complete reading this input. + +.Lno_error: + incq %r12 # Increment input byte counter + movq (%rsp), %r8 # Load input byte to r8 + + # If the input byte is 10 (newline), exit the loop + cmpq $10, %r8 + je .Lend + + # If the input byte is 45 (minus sign), negate r9 + cmpq $45, %r8 + jne .Lnegation_done + xorq $1, %r9 +.Lnegation_done: + + # If the input byte is not between 48 ('0') and 57 ('9') + # then skip it as a junk character. + cmpq $48, %r8 + jl .Lloop + cmpq $57, %r8 + jg .Lloop + + # Subtract 48 to get a digit 0..9 + subq $48, %r8 + + # Shift the digit onto the result + imulq $10, %r10 + addq %r8, %r10 + + jmp .Lloop + +.Lend: + # If it's a negative number, negate the result + cmpq $0, %r9 + je .Lfinal_negation_done + neg %r10 +.Lfinal_negation_done: + # Restore stack registers and return the result + popq %r12 + movq %rbp, %rsp + popq %rbp + movq %r10, %rax + ret + +.Lerror: + # Write error message to stderr with syscall 'write' + movq $1, %rax + movq $2, %rdi + movq $read_int_error_str, %rsi + movq $read_int_error_str_len, %rdx + syscall + + # Exit the program + movq $60, %rax # Syscall number for exit = 60. + movq $1, %rdi # Set exit code 1. + syscall + +read_int_error_str: + .ascii \"Error: read_int() failed to read input\\n\" +read_int_error_str_len = . - read_int_error_str +"; diff --git a/src/server.rs b/src/server.rs index f547183..b8e31bd 100644 --- a/src/server.rs +++ b/src/server.rs @@ -35,8 +35,13 @@ fn handle_connection(mut stream: TcpStream) { "ping" => println!("ping"), "compile" => { let program = json_request["code"].as_str().unwrap(); - compiler::compile(program); - println!("compile code:\n\n{program}\n"); + let output = compiler::compile(program); + + let json_response = json::object! { + "program": output, + }; + let response = json_response.as_str().unwrap().as_bytes(); + stream.write_all(response).unwrap(); } _ => panic!("Unexpected command!"), } diff --git a/test-gadget.py b/test-gadget.py new file mode 100755 index 0000000..2aca9e2 --- /dev/null +++ b/test-gadget.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# Runs the correct test-gadget client program in .test-gadget/test-gadget-client-$PLATFORM + +import os +import platform +import sys +from pathlib import Path + + +def get_platform_binary() -> str: + system = platform.system().lower() + if system == "darwin": + return "test-gadget-client-macos" + elif system == "windows": + return "test-gadget-client-windows.exe" + elif system == "linux": + return "test-gadget-client-linux" + else: + print(f"Unsupported platform: {system}", file=sys.stderr) + sys.exit(1) + + +script_dir = Path(__file__).parent +dist_dir = script_dir / ".test-gadget" +binary = dist_dir / get_platform_binary() + +if not binary.exists(): + print(f"Program not found: {binary}", file=sys.stderr) + sys.exit(1) + +os.execv(str(binary), [str(binary)] + sys.argv[1:])