From 9a13d0b9b64a0fadcaea27526ef71d2bfd884bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vili=20Sinerv=C3=A4?= Date: Wed, 26 Feb 2025 21:50:08 +0200 Subject: [PATCH] Add error handling for tokenizer errors --- src/compiler.rs | 17 +++-- src/compiler/parser/tests.rs | 139 ++++++++++++++++++----------------- src/compiler/tokenizer.rs | 46 ++++++++---- src/compiler/type_checker.rs | 13 ++-- src/server.rs | 6 +- 5 files changed, 125 insertions(+), 96 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index e55b393..086f558 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,4 +1,4 @@ -use std::io; +use std::{error::Error, io}; use assembler::assemble; use assembly_generator::generate_assembly; @@ -23,22 +23,23 @@ mod tokenizer; mod type_checker; mod variable; -pub fn compile(code: &str) -> String { - let tokens = tokenize(code); +pub fn compile(code: &str) -> Result> { + let tokens = tokenize(code)?; let mut ast = parse(&tokens); type_check(&mut ast, &mut SymTab::new_type_table()); let ir = generate_ir(&ast); let assembly = generate_assembly(&ir); - general_purpose::STANDARD.encode(&assemble(assembly)) + Ok(general_purpose::STANDARD.encode(assemble(assembly))) } pub fn start_compiler() { let lines = io::stdin().lines(); for line in lines.map_while(Result::ok) { - println!(); - println!("{:?}", compile(&line)); - println!(); + match compile(&line) { + Ok(_) => println!("\nCompilation OK :)\n"), + Err(e) => println!("\n{}\n", e), + } } } @@ -47,7 +48,7 @@ pub fn start_interpreter() { #[allow(clippy::manual_flatten)] for line in lines { if let Ok(code) = line { - let tokens = tokenize(&code); + let tokens = tokenize(&code).unwrap(); let ast = parse(&tokens); let val = interpret(&ast, &mut SymTab::new_val_table()); diff --git a/src/compiler/parser/tests.rs b/src/compiler/parser/tests.rs index 3fb08b1..1dd7426 100644 --- a/src/compiler/parser/tests.rs +++ b/src/compiler/parser/tests.rs @@ -139,39 +139,39 @@ fn test_empty() { #[test] #[should_panic] fn test_invalid_start() { - parse(&tokenize("1 2 + 3")); + parse(&tokenize("1 2 + 3").unwrap()); } #[test] #[should_panic] fn test_invalid_middle() { - parse(&tokenize("1 + 2 2 + 3")); + parse(&tokenize("1 + 2 2 + 3").unwrap()); } #[test] #[should_panic] fn test_invalid_end() { - parse(&tokenize("1 + 2 3")); + parse(&tokenize("1 + 2 3").unwrap()); } #[test] fn test_binary_op_basic() { - let result = parse(&tokenize("1 + 23")); + let result = parse(&tokenize("1 + 23").unwrap()); assert_eq!(result, bin_ast!(int_ast_b!(1), "+", int_ast_b!(23))); - let result = parse(&tokenize("4 - 56")); + let result = parse(&tokenize("4 - 56").unwrap()); assert_eq!(result, bin_ast!(int_ast_b!(4), "-", int_ast_b!(56))); - let result = parse(&tokenize("1 * 2")); + let result = parse(&tokenize("1 * 2").unwrap()); assert_eq!(result, bin_ast!(int_ast_b!(1), "*", int_ast_b!(2))); - let result = parse(&tokenize("1 / 2")); + let result = parse(&tokenize("1 / 2").unwrap()); assert_eq!(result, bin_ast!(int_ast_b!(1), "/", int_ast_b!(2))); } #[test] fn test_binary_op_all_levels() { - let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7")); + let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap()); assert_eq!( result, bin_ast!( @@ -200,16 +200,16 @@ fn test_binary_op_all_levels() { #[test] fn test_binary_op_identifier() { - let result = parse(&tokenize("a + 1")); + let result = parse(&tokenize("a + 1").unwrap()); assert_eq!(result, bin_ast!(id_ast_b!("a"), "+", int_ast_b!(1))); - let result = parse(&tokenize("1 - a")); + let result = parse(&tokenize("1 - a").unwrap()); assert_eq!(result, bin_ast!(int_ast_b!(1), "-", id_ast_b!("a"))); } #[test] fn test_binary_op_multiple() { - let result = parse(&tokenize("1 + 2 - 3")); + let result = parse(&tokenize("1 + 2 - 3").unwrap()); assert_eq!( result, bin_ast!( @@ -222,7 +222,7 @@ fn test_binary_op_multiple() { #[test] fn test_binary_op_precedence() { - let result = parse(&tokenize("1 + 2 * 3")); + let result = parse(&tokenize("1 + 2 * 3").unwrap()); assert_eq!( result, bin_ast!( @@ -232,7 +232,7 @@ fn test_binary_op_precedence() { ) ); - let result = parse(&tokenize("1 - 2 / 3")); + let result = parse(&tokenize("1 - 2 / 3").unwrap()); assert_eq!( result, bin_ast!( @@ -245,7 +245,7 @@ fn test_binary_op_precedence() { #[test] fn test_assignment_basic() { - let result = parse(&tokenize("a = 1 + 2")); + let result = parse(&tokenize("a = 1 + 2").unwrap()); assert_eq!( result, bin_ast!( @@ -258,7 +258,7 @@ fn test_assignment_basic() { #[test] fn test_assignment_chain() { - let result = parse(&tokenize("a = b = 1 + 2")); + let result = parse(&tokenize("a = b = 1 + 2").unwrap()); assert_eq!( result, bin_ast!( @@ -276,21 +276,21 @@ fn test_assignment_chain() { #[test] #[should_panic] fn test_assignment_invalid() { - parse(&tokenize("a =")); + parse(&tokenize("a =").unwrap()); } #[test] fn test_unary_basic() { - let result = parse(&tokenize("not x")); + let result = parse(&tokenize("not x").unwrap()); assert_eq!(result, un_ast!("not", id_ast_b!("x"))); - let result = parse(&tokenize("-x")); + let result = parse(&tokenize("-x").unwrap()); assert_eq!(result, un_ast!("-", id_ast_b!("x"))); - let result = parse(&tokenize("-1")); + let result = parse(&tokenize("-1").unwrap()); assert_eq!(result, un_ast!("-", int_ast_b!(1))); - let result = parse(&tokenize("-1 + 2")); + let result = parse(&tokenize("-1 + 2").unwrap()); assert_eq!( result, bin_ast!(un_ast_b!("-", int_ast_b!(1)), "+", int_ast_b!(2)) @@ -299,16 +299,16 @@ fn test_unary_basic() { #[test] fn test_unary_chain() { - let result = parse(&tokenize("not not x")); + let result = parse(&tokenize("not not x").unwrap()); assert_eq!(result, un_ast!("not", un_ast_b!("not", id_ast_b!("x")))); - let result = parse(&tokenize("--x")); + let result = parse(&tokenize("--x").unwrap()); assert_eq!(result, un_ast!("-", un_ast_b!("-", id_ast_b!("x")))); - let result = parse(&tokenize("--1")); + let result = parse(&tokenize("--1").unwrap()); assert_eq!(result, un_ast!("-", un_ast_b!("-", int_ast_b!(1)))); - let result = parse(&tokenize("--1 + 2")); + let result = parse(&tokenize("--1 + 2").unwrap()); assert_eq!( result, bin_ast!( @@ -321,7 +321,7 @@ fn test_unary_chain() { #[test] fn test_parenthesized() { - let result = parse(&tokenize("(1+2)*3")); + let result = parse(&tokenize("(1+2)*3").unwrap()); assert_eq!( result, bin_ast!( @@ -334,7 +334,7 @@ fn test_parenthesized() { #[test] fn test_parenthesized_nested() { - let result = parse(&tokenize("((1 - 2))/3")); + let result = parse(&tokenize("((1 - 2))/3").unwrap()); assert_eq!( result, bin_ast!( @@ -344,7 +344,7 @@ fn test_parenthesized_nested() { ) ); - let result = parse(&tokenize("((1 + 2)*3) / 4")); + let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap()); assert_eq!( result, bin_ast!( @@ -362,12 +362,12 @@ fn test_parenthesized_nested() { #[test] #[should_panic] fn test_parenthesized_mismatched() { - parse(&tokenize("(1+2*3")); + parse(&tokenize("(1+2*3").unwrap()); } #[test] fn test_if_then() { - let result = parse(&tokenize("if 1 + 2 then 3")); + let result = parse(&tokenize("if 1 + 2 then 3").unwrap()); assert_eq!( result, con_ast!( @@ -380,7 +380,7 @@ fn test_if_then() { #[test] fn test_if_then_else() { - let result = parse(&tokenize("if a then b + c else 1 * 2")); + let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap()); assert_eq!( result, con_ast!( @@ -393,7 +393,7 @@ fn test_if_then_else() { #[test] fn test_if_then_else_embedded() { - let result = parse(&tokenize("1 + if true then 2 else 3")); + let result = parse(&tokenize("1 + if true then 2 else 3").unwrap()); assert_eq!( result, bin_ast!( @@ -406,7 +406,7 @@ fn test_if_then_else_embedded() { #[test] fn test_if_then_else_nested() { - let result = parse(&tokenize("if true then if false then 1 else 2 else 3")); + let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap()); assert_eq!( result, con_ast!( @@ -420,15 +420,15 @@ fn test_if_then_else_nested() { #[test] #[should_panic] fn test_if_no_then() { - parse(&tokenize("if true")); + parse(&tokenize("if true").unwrap()); } #[test] fn test_func_basic() { - let result = parse(&tokenize("f(a, b)")); + let result = parse(&tokenize("f(a, b)").unwrap()); assert_eq!(result, fun_ast!("f", vec![id_ast!("a"), id_ast!("b"),])); - let result = parse(&tokenize("f(a, 1 + 2)")); + let result = parse(&tokenize("f(a, 1 + 2)").unwrap()); assert_eq!( result, fun_ast!( @@ -437,13 +437,13 @@ fn test_func_basic() { ) ); - let result = parse(&tokenize("f()")); + let result = parse(&tokenize("f()").unwrap()); assert_eq!(result, fun_ast!("f", vec![])); } #[test] fn test_func_embedded() { - let result = parse(&tokenize("1 + f(a)")); + let result = parse(&tokenize("1 + f(a)").unwrap()); assert_eq!( result, bin_ast!(int_ast_b!(1), "+", fun_ast_b!("f", vec![id_ast!("a")])) @@ -452,7 +452,7 @@ fn test_func_embedded() { #[test] fn test_func_nested() { - let result = parse(&tokenize("f(a, g(b))")); + let result = parse(&tokenize("f(a, g(b))").unwrap()); assert_eq!( result, fun_ast!("f", vec![id_ast!("a"), fun_ast!("g", vec![id_ast!("b")]),]) @@ -462,18 +462,18 @@ fn test_func_nested() { #[test] #[should_panic] fn test_func_missing_comma() { - parse(&tokenize("f(a b)")); + parse(&tokenize("f(a b)").unwrap()); } #[test] #[should_panic] fn test_func_missing_close() { - parse(&tokenize("f(a")); + parse(&tokenize("f(a").unwrap()); } #[test] fn test_block_basic() { - let result = parse(&tokenize("{ a = 1; b; }")); + let result = parse(&tokenize("{ a = 1; b; }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -483,7 +483,7 @@ fn test_block_basic() { ]) ); - let result = parse(&tokenize("{ a = 1; b }")); + let result = parse(&tokenize("{ a = 1; b }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -495,7 +495,7 @@ fn test_block_basic() { #[test] fn test_block_embedded() { - let result = parse(&tokenize("{ 1 + 2 } * 3")); + let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap()); assert_eq!( result, bin_ast!( @@ -508,7 +508,7 @@ fn test_block_embedded() { #[test] fn test_block_nested() { - let result = parse(&tokenize("{ a = { 1 + 2}}")); + let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap()); assert_eq!( result, block_ast!(vec![bin_ast!( @@ -522,21 +522,21 @@ fn test_block_nested() { #[test] #[should_panic] fn test_block_unmatched() { - parse(&tokenize("{ a = 1 ")); + parse(&tokenize("{ a = 1 ").unwrap()); } #[test] #[should_panic] fn test_block_missing_semicolon() { - parse(&tokenize("{ a = 1\nb }")); + parse(&tokenize("{ a = 1\nb }").unwrap()); } #[test] fn test_var_basic() { - let result = parse(&tokenize("var x = 1")); + let result = parse(&tokenize("var x = 1").unwrap()); assert_eq!(result, var_ast!("x", int_ast_b!(1), None)); - let result = parse(&tokenize("{ var x = 1; x = 2; }")); + let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -549,7 +549,7 @@ fn test_var_basic() { #[test] fn test_var_typed() { - let result = parse(&tokenize("var x: Int = 1")); + let result = parse(&tokenize("var x: Int = 1").unwrap()); assert_eq!( result, var_ast!( @@ -562,7 +562,7 @@ fn test_var_typed() { ) ); - let result = parse(&tokenize("var x: Bool = true")); + let result = parse(&tokenize("var x: Bool = true").unwrap()); assert_eq!( result, var_ast!( @@ -579,18 +579,18 @@ fn test_var_typed() { #[test] #[should_panic] fn test_var_chain() { - parse(&tokenize("var x = var y = 1")); + parse(&tokenize("var x = var y = 1").unwrap()); } #[test] #[should_panic] fn test_var_embedded() { - parse(&tokenize("if true then var x = 3")); + parse(&tokenize("if true then var x = 3").unwrap()); } #[test] fn test_omitting_semicolons() { - let result = parse(&tokenize("{ { a } { b } }")); + let result = parse(&tokenize("{ { a } { b } }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -599,7 +599,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a } b }")); + let result = parse(&tokenize("{ if true then { a } b }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -608,7 +608,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a }; b }")); + let result = parse(&tokenize("{ if true then { a }; b }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -617,7 +617,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a } else { b } c }")); + let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap()); assert_eq!( result, block_ast!(vec![ @@ -630,7 +630,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("x = { { f(a) } { b } }")); + let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap()); assert_eq!( result, bin_ast!( @@ -647,12 +647,12 @@ fn test_omitting_semicolons() { #[test] #[should_panic] fn test_omitting_semicolons_invalid() { - parse(&tokenize("{ if true then { a } b c }")); + parse(&tokenize("{ if true then { a } b c }").unwrap()); } #[test] fn test_while_do() { - let result = parse(&tokenize("while 1 + 2 do 3")); + let result = parse(&tokenize("while 1 + 2 do 3").unwrap()); assert_eq!( result, while_ast!(bin_ast_b!(int_ast_b!(1), "+", int_ast_b!(2)), int_ast_b!(3)) @@ -661,7 +661,7 @@ fn test_while_do() { #[test] fn test_while_do_embedded() { - let result = parse(&tokenize("1 + while true do 2")); + let result = parse(&tokenize("1 + while true do 2").unwrap()); assert_eq!( result, bin_ast!( @@ -674,7 +674,7 @@ fn test_while_do_embedded() { #[test] fn test_while_do_nested() { - let result = parse(&tokenize("while true do while false do 1")); + let result = parse(&tokenize("while true do while false do 1").unwrap()); assert_eq!( result, while_ast!( @@ -687,18 +687,18 @@ fn test_while_do_nested() { #[test] #[should_panic] fn test_while_no_do() { - parse(&tokenize("while true")); + parse(&tokenize("while true").unwrap()); } #[test] fn test_multiple_top_levels() { - let result = parse(&tokenize("a;")); + let result = parse(&tokenize("a;").unwrap()); assert_eq!(result, block_ast!(vec![id_ast!("a"), empty_ast!()])); - let result = parse(&tokenize("a; b")); + let result = parse(&tokenize("a; b").unwrap()); assert_eq!(result, block_ast!(vec![id_ast!("a"), id_ast!("b")])); - let result = parse(&tokenize("{}{}")); + let result = parse(&tokenize("{}{}").unwrap()); assert_eq!( result, block_ast!(vec![block_ast!(vec![]), block_ast!(vec![])]) @@ -707,8 +707,9 @@ fn test_multiple_top_levels() { #[test] fn test_large() { - let result = parse(&tokenize( - " + let result = parse( + &tokenize( + " { while f() do { x = 10; @@ -723,7 +724,9 @@ fn test_large() { 123 } ", - )); + ) + .unwrap(), + ); assert_eq!( result, diff --git a/src/compiler/tokenizer.rs b/src/compiler/tokenizer.rs index e068376..12f1bb0 100644 --- a/src/compiler/tokenizer.rs +++ b/src/compiler/tokenizer.rs @@ -1,7 +1,22 @@ +use std::{error::Error, fmt::Display}; + use crate::compiler::token::{CodeLocation, Token, TokenType}; use regex::Regex; -pub fn tokenize(code: &str) -> Vec { +#[derive(Debug)] +pub struct TokenizeError { + message: String, +} + +impl Display for TokenizeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "TokenizerError: {}", self.message) + } +} + +impl Error for TokenizeError {} + +pub fn tokenize(code: &str) -> Result, TokenizeError> { // We only want to compile the regexes once // The ordering of these is important! let regexes = vec![ @@ -48,16 +63,19 @@ pub fn tokenize(code: &str) -> Vec { } if !valid_token { - panic!( - "Invalid token on line {} in position {}", - line_number + 1, - pos + 1 - ); + return Err(TokenizeError { + message: format!( + "Invalid token starting with '{}' on line {} in position {}", + &line[pos..pos + 1], + line_number + 1, + pos + 1 + ), + }); } } } - tokens + Ok(tokens) } #[cfg(test)] @@ -66,7 +84,7 @@ mod tests { #[test] fn test_tokenize_basic() { let loc = CodeLocation::new(usize::MAX, usize::MAX); - let result = tokenize("if 3 \n\twhile"); + let result = tokenize("if 3 \n\twhile").unwrap(); use TokenType::*; assert_eq!( @@ -81,7 +99,7 @@ mod tests { #[test] fn test_tokenize_code_location() { - let result = tokenize("if 3\n while"); + let result = tokenize("if 3\n while").unwrap(); use TokenType::*; assert_eq!( @@ -113,7 +131,7 @@ mod tests { #[test] fn test_tokenize_comment() { let loc = CodeLocation::new(usize::MAX, usize::MAX); - let result = tokenize("if 3 \n\n//Comment\n#Another\n\twhile //Comment2"); + let result = tokenize("if 3 \n\n//Comment\n#Another\n\twhile //Comment2").unwrap(); use TokenType::*; assert_eq!( @@ -129,7 +147,7 @@ mod tests { #[test] fn test_tokenize_operators_basic() { let loc = CodeLocation::new(usize::MAX, usize::MAX); - let result = tokenize("var = 1 + 2"); + let result = tokenize("var = 1 + 2").unwrap(); use TokenType::*; assert_eq!( @@ -147,7 +165,7 @@ mod tests { #[test] fn test_tokenize_operators_all() { let loc = CodeLocation::new(usize::MAX, usize::MAX); - let result = tokenize("var 1 + - * 1/2 = == != < <= > >= 2 %"); + let result = tokenize("var 1 + - * 1/2 = == != < <= > >= 2 %").unwrap(); use TokenType::*; assert_eq!( @@ -177,7 +195,7 @@ mod tests { #[test] fn test_tokenize_punctuation_basic() { let loc = CodeLocation::new(usize::MAX, usize::MAX); - let result = tokenize("{var = (1 + 2, 3);:}"); + let result = tokenize("{var = (1 + 2, 3);:}").unwrap(); use TokenType::*; assert_eq!( @@ -203,6 +221,6 @@ mod tests { #[test] #[should_panic] fn test_tokenize_wrong_token() { - tokenize("if 3\n while @"); + tokenize("if 3\n while @").unwrap(); } } diff --git a/src/compiler/type_checker.rs b/src/compiler/type_checker.rs index 62c8906..e4c8faa 100644 --- a/src/compiler/type_checker.rs +++ b/src/compiler/type_checker.rs @@ -160,7 +160,10 @@ mod tests { use Type::*; fn get_type(code: &str) -> Type { - type_check(&mut parse(&tokenize(code)), &mut SymTab::new_type_table()) + type_check( + &mut parse(&tokenize(code).unwrap()), + &mut SymTab::new_type_table(), + ) } #[test] @@ -317,14 +320,14 @@ mod tests { #[test] fn test_function() { - let mut tokens = tokenize("foo(1)"); + let mut tokens = tokenize("foo(1)").unwrap(); let mut ast = parse(&tokens); let mut symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); let result = type_check(&mut ast, &mut symtab); assert_eq!(result, Int); - tokens = tokenize("foo(1);"); + tokens = tokenize("foo(1);").unwrap(); ast = parse(&tokens); symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); @@ -335,7 +338,7 @@ mod tests { #[test] #[should_panic] fn test_function_wrong_arg() { - let tokens = tokenize("foo(true)"); + let tokens = tokenize("foo(true)").unwrap(); let mut ast = parse(&tokens); let mut symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); @@ -344,7 +347,7 @@ mod tests { #[test] fn test_node_type() { - let tokens = tokenize("1"); + let tokens = tokenize("1").unwrap(); let mut ast = parse(&tokens); let mut symtab = SymTab::new_type_table(); diff --git a/src/server.rs b/src/server.rs index 4f2b416..fd5187f 100644 --- a/src/server.rs +++ b/src/server.rs @@ -37,7 +37,11 @@ fn handle_connection(mut stream: TcpStream) { let program = json_request["code"].as_str().unwrap(); let output = compiler::compile(program); - let response = format!("{{\"program\": \"{output}\"}}"); + let response = match output { + Ok(output) => format!("{{\"program\": \"{output}\"}}"), + Err(e) => format!("{{\"error\": \"{e}\"}}"), + }; + stream.write_all(response.as_bytes()).unwrap(); } _ => panic!("Unexpected command!"),