1
0
Fork 0

Add error handling for tokenizer errors

This commit is contained in:
Vili Sinervä 2025-02-26 21:50:08 +02:00
parent 340de3c984
commit 9a13d0b9b6
No known key found for this signature in database
GPG key ID: DF8FEAF54EFAC996
5 changed files with 125 additions and 96 deletions

View file

@ -1,4 +1,4 @@
use std::io;
use std::{error::Error, io};
use assembler::assemble;
use assembly_generator::generate_assembly;
@ -23,22 +23,23 @@ mod tokenizer;
mod type_checker;
mod variable;
pub fn compile(code: &str) -> String {
let tokens = tokenize(code);
pub fn compile(code: &str) -> Result<String, Box<dyn Error>> {
let tokens = tokenize(code)?;
let mut ast = parse(&tokens);
type_check(&mut ast, &mut SymTab::new_type_table());
let ir = generate_ir(&ast);
let assembly = generate_assembly(&ir);
general_purpose::STANDARD.encode(&assemble(assembly))
Ok(general_purpose::STANDARD.encode(assemble(assembly)))
}
pub fn start_compiler() {
let lines = io::stdin().lines();
for line in lines.map_while(Result::ok) {
println!();
println!("{:?}", compile(&line));
println!();
match compile(&line) {
Ok(_) => println!("\nCompilation OK :)\n"),
Err(e) => println!("\n{}\n", e),
}
}
}
@ -47,7 +48,7 @@ pub fn start_interpreter() {
#[allow(clippy::manual_flatten)]
for line in lines {
if let Ok(code) = line {
let tokens = tokenize(&code);
let tokens = tokenize(&code).unwrap();
let ast = parse(&tokens);
let val = interpret(&ast, &mut SymTab::new_val_table());

View file

@ -139,39 +139,39 @@ fn test_empty() {
#[test]
#[should_panic]
fn test_invalid_start() {
parse(&tokenize("1 2 + 3"));
parse(&tokenize("1 2 + 3").unwrap());
}
#[test]
#[should_panic]
fn test_invalid_middle() {
parse(&tokenize("1 + 2 2 + 3"));
parse(&tokenize("1 + 2 2 + 3").unwrap());
}
#[test]
#[should_panic]
fn test_invalid_end() {
parse(&tokenize("1 + 2 3"));
parse(&tokenize("1 + 2 3").unwrap());
}
#[test]
fn test_binary_op_basic() {
let result = parse(&tokenize("1 + 23"));
let result = parse(&tokenize("1 + 23").unwrap());
assert_eq!(result, bin_ast!(int_ast_b!(1), "+", int_ast_b!(23)));
let result = parse(&tokenize("4 - 56"));
let result = parse(&tokenize("4 - 56").unwrap());
assert_eq!(result, bin_ast!(int_ast_b!(4), "-", int_ast_b!(56)));
let result = parse(&tokenize("1 * 2"));
let result = parse(&tokenize("1 * 2").unwrap());
assert_eq!(result, bin_ast!(int_ast_b!(1), "*", int_ast_b!(2)));
let result = parse(&tokenize("1 / 2"));
let result = parse(&tokenize("1 / 2").unwrap());
assert_eq!(result, bin_ast!(int_ast_b!(1), "/", int_ast_b!(2)));
}
#[test]
fn test_binary_op_all_levels() {
let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7"));
let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap());
assert_eq!(
result,
bin_ast!(
@ -200,16 +200,16 @@ fn test_binary_op_all_levels() {
#[test]
fn test_binary_op_identifier() {
let result = parse(&tokenize("a + 1"));
let result = parse(&tokenize("a + 1").unwrap());
assert_eq!(result, bin_ast!(id_ast_b!("a"), "+", int_ast_b!(1)));
let result = parse(&tokenize("1 - a"));
let result = parse(&tokenize("1 - a").unwrap());
assert_eq!(result, bin_ast!(int_ast_b!(1), "-", id_ast_b!("a")));
}
#[test]
fn test_binary_op_multiple() {
let result = parse(&tokenize("1 + 2 - 3"));
let result = parse(&tokenize("1 + 2 - 3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -222,7 +222,7 @@ fn test_binary_op_multiple() {
#[test]
fn test_binary_op_precedence() {
let result = parse(&tokenize("1 + 2 * 3"));
let result = parse(&tokenize("1 + 2 * 3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -232,7 +232,7 @@ fn test_binary_op_precedence() {
)
);
let result = parse(&tokenize("1 - 2 / 3"));
let result = parse(&tokenize("1 - 2 / 3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -245,7 +245,7 @@ fn test_binary_op_precedence() {
#[test]
fn test_assignment_basic() {
let result = parse(&tokenize("a = 1 + 2"));
let result = parse(&tokenize("a = 1 + 2").unwrap());
assert_eq!(
result,
bin_ast!(
@ -258,7 +258,7 @@ fn test_assignment_basic() {
#[test]
fn test_assignment_chain() {
let result = parse(&tokenize("a = b = 1 + 2"));
let result = parse(&tokenize("a = b = 1 + 2").unwrap());
assert_eq!(
result,
bin_ast!(
@ -276,21 +276,21 @@ fn test_assignment_chain() {
#[test]
#[should_panic]
fn test_assignment_invalid() {
parse(&tokenize("a ="));
parse(&tokenize("a =").unwrap());
}
#[test]
fn test_unary_basic() {
let result = parse(&tokenize("not x"));
let result = parse(&tokenize("not x").unwrap());
assert_eq!(result, un_ast!("not", id_ast_b!("x")));
let result = parse(&tokenize("-x"));
let result = parse(&tokenize("-x").unwrap());
assert_eq!(result, un_ast!("-", id_ast_b!("x")));
let result = parse(&tokenize("-1"));
let result = parse(&tokenize("-1").unwrap());
assert_eq!(result, un_ast!("-", int_ast_b!(1)));
let result = parse(&tokenize("-1 + 2"));
let result = parse(&tokenize("-1 + 2").unwrap());
assert_eq!(
result,
bin_ast!(un_ast_b!("-", int_ast_b!(1)), "+", int_ast_b!(2))
@ -299,16 +299,16 @@ fn test_unary_basic() {
#[test]
fn test_unary_chain() {
let result = parse(&tokenize("not not x"));
let result = parse(&tokenize("not not x").unwrap());
assert_eq!(result, un_ast!("not", un_ast_b!("not", id_ast_b!("x"))));
let result = parse(&tokenize("--x"));
let result = parse(&tokenize("--x").unwrap());
assert_eq!(result, un_ast!("-", un_ast_b!("-", id_ast_b!("x"))));
let result = parse(&tokenize("--1"));
let result = parse(&tokenize("--1").unwrap());
assert_eq!(result, un_ast!("-", un_ast_b!("-", int_ast_b!(1))));
let result = parse(&tokenize("--1 + 2"));
let result = parse(&tokenize("--1 + 2").unwrap());
assert_eq!(
result,
bin_ast!(
@ -321,7 +321,7 @@ fn test_unary_chain() {
#[test]
fn test_parenthesized() {
let result = parse(&tokenize("(1+2)*3"));
let result = parse(&tokenize("(1+2)*3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -334,7 +334,7 @@ fn test_parenthesized() {
#[test]
fn test_parenthesized_nested() {
let result = parse(&tokenize("((1 - 2))/3"));
let result = parse(&tokenize("((1 - 2))/3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -344,7 +344,7 @@ fn test_parenthesized_nested() {
)
);
let result = parse(&tokenize("((1 + 2)*3) / 4"));
let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap());
assert_eq!(
result,
bin_ast!(
@ -362,12 +362,12 @@ fn test_parenthesized_nested() {
#[test]
#[should_panic]
fn test_parenthesized_mismatched() {
parse(&tokenize("(1+2*3"));
parse(&tokenize("(1+2*3").unwrap());
}
#[test]
fn test_if_then() {
let result = parse(&tokenize("if 1 + 2 then 3"));
let result = parse(&tokenize("if 1 + 2 then 3").unwrap());
assert_eq!(
result,
con_ast!(
@ -380,7 +380,7 @@ fn test_if_then() {
#[test]
fn test_if_then_else() {
let result = parse(&tokenize("if a then b + c else 1 * 2"));
let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap());
assert_eq!(
result,
con_ast!(
@ -393,7 +393,7 @@ fn test_if_then_else() {
#[test]
fn test_if_then_else_embedded() {
let result = parse(&tokenize("1 + if true then 2 else 3"));
let result = parse(&tokenize("1 + if true then 2 else 3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -406,7 +406,7 @@ fn test_if_then_else_embedded() {
#[test]
fn test_if_then_else_nested() {
let result = parse(&tokenize("if true then if false then 1 else 2 else 3"));
let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap());
assert_eq!(
result,
con_ast!(
@ -420,15 +420,15 @@ fn test_if_then_else_nested() {
#[test]
#[should_panic]
fn test_if_no_then() {
parse(&tokenize("if true"));
parse(&tokenize("if true").unwrap());
}
#[test]
fn test_func_basic() {
let result = parse(&tokenize("f(a, b)"));
let result = parse(&tokenize("f(a, b)").unwrap());
assert_eq!(result, fun_ast!("f", vec![id_ast!("a"), id_ast!("b"),]));
let result = parse(&tokenize("f(a, 1 + 2)"));
let result = parse(&tokenize("f(a, 1 + 2)").unwrap());
assert_eq!(
result,
fun_ast!(
@ -437,13 +437,13 @@ fn test_func_basic() {
)
);
let result = parse(&tokenize("f()"));
let result = parse(&tokenize("f()").unwrap());
assert_eq!(result, fun_ast!("f", vec![]));
}
#[test]
fn test_func_embedded() {
let result = parse(&tokenize("1 + f(a)"));
let result = parse(&tokenize("1 + f(a)").unwrap());
assert_eq!(
result,
bin_ast!(int_ast_b!(1), "+", fun_ast_b!("f", vec![id_ast!("a")]))
@ -452,7 +452,7 @@ fn test_func_embedded() {
#[test]
fn test_func_nested() {
let result = parse(&tokenize("f(a, g(b))"));
let result = parse(&tokenize("f(a, g(b))").unwrap());
assert_eq!(
result,
fun_ast!("f", vec![id_ast!("a"), fun_ast!("g", vec![id_ast!("b")]),])
@ -462,18 +462,18 @@ fn test_func_nested() {
#[test]
#[should_panic]
fn test_func_missing_comma() {
parse(&tokenize("f(a b)"));
parse(&tokenize("f(a b)").unwrap());
}
#[test]
#[should_panic]
fn test_func_missing_close() {
parse(&tokenize("f(a"));
parse(&tokenize("f(a").unwrap());
}
#[test]
fn test_block_basic() {
let result = parse(&tokenize("{ a = 1; b; }"));
let result = parse(&tokenize("{ a = 1; b; }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -483,7 +483,7 @@ fn test_block_basic() {
])
);
let result = parse(&tokenize("{ a = 1; b }"));
let result = parse(&tokenize("{ a = 1; b }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -495,7 +495,7 @@ fn test_block_basic() {
#[test]
fn test_block_embedded() {
let result = parse(&tokenize("{ 1 + 2 } * 3"));
let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap());
assert_eq!(
result,
bin_ast!(
@ -508,7 +508,7 @@ fn test_block_embedded() {
#[test]
fn test_block_nested() {
let result = parse(&tokenize("{ a = { 1 + 2}}"));
let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap());
assert_eq!(
result,
block_ast!(vec![bin_ast!(
@ -522,21 +522,21 @@ fn test_block_nested() {
#[test]
#[should_panic]
fn test_block_unmatched() {
parse(&tokenize("{ a = 1 "));
parse(&tokenize("{ a = 1 ").unwrap());
}
#[test]
#[should_panic]
fn test_block_missing_semicolon() {
parse(&tokenize("{ a = 1\nb }"));
parse(&tokenize("{ a = 1\nb }").unwrap());
}
#[test]
fn test_var_basic() {
let result = parse(&tokenize("var x = 1"));
let result = parse(&tokenize("var x = 1").unwrap());
assert_eq!(result, var_ast!("x", int_ast_b!(1), None));
let result = parse(&tokenize("{ var x = 1; x = 2; }"));
let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -549,7 +549,7 @@ fn test_var_basic() {
#[test]
fn test_var_typed() {
let result = parse(&tokenize("var x: Int = 1"));
let result = parse(&tokenize("var x: Int = 1").unwrap());
assert_eq!(
result,
var_ast!(
@ -562,7 +562,7 @@ fn test_var_typed() {
)
);
let result = parse(&tokenize("var x: Bool = true"));
let result = parse(&tokenize("var x: Bool = true").unwrap());
assert_eq!(
result,
var_ast!(
@ -579,18 +579,18 @@ fn test_var_typed() {
#[test]
#[should_panic]
fn test_var_chain() {
parse(&tokenize("var x = var y = 1"));
parse(&tokenize("var x = var y = 1").unwrap());
}
#[test]
#[should_panic]
fn test_var_embedded() {
parse(&tokenize("if true then var x = 3"));
parse(&tokenize("if true then var x = 3").unwrap());
}
#[test]
fn test_omitting_semicolons() {
let result = parse(&tokenize("{ { a } { b } }"));
let result = parse(&tokenize("{ { a } { b } }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -599,7 +599,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a } b }"));
let result = parse(&tokenize("{ if true then { a } b }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -608,7 +608,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a }; b }"));
let result = parse(&tokenize("{ if true then { a }; b }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -617,7 +617,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a } else { b } c }"));
let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap());
assert_eq!(
result,
block_ast!(vec![
@ -630,7 +630,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("x = { { f(a) } { b } }"));
let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap());
assert_eq!(
result,
bin_ast!(
@ -647,12 +647,12 @@ fn test_omitting_semicolons() {
#[test]
#[should_panic]
fn test_omitting_semicolons_invalid() {
parse(&tokenize("{ if true then { a } b c }"));
parse(&tokenize("{ if true then { a } b c }").unwrap());
}
#[test]
fn test_while_do() {
let result = parse(&tokenize("while 1 + 2 do 3"));
let result = parse(&tokenize("while 1 + 2 do 3").unwrap());
assert_eq!(
result,
while_ast!(bin_ast_b!(int_ast_b!(1), "+", int_ast_b!(2)), int_ast_b!(3))
@ -661,7 +661,7 @@ fn test_while_do() {
#[test]
fn test_while_do_embedded() {
let result = parse(&tokenize("1 + while true do 2"));
let result = parse(&tokenize("1 + while true do 2").unwrap());
assert_eq!(
result,
bin_ast!(
@ -674,7 +674,7 @@ fn test_while_do_embedded() {
#[test]
fn test_while_do_nested() {
let result = parse(&tokenize("while true do while false do 1"));
let result = parse(&tokenize("while true do while false do 1").unwrap());
assert_eq!(
result,
while_ast!(
@ -687,18 +687,18 @@ fn test_while_do_nested() {
#[test]
#[should_panic]
fn test_while_no_do() {
parse(&tokenize("while true"));
parse(&tokenize("while true").unwrap());
}
#[test]
fn test_multiple_top_levels() {
let result = parse(&tokenize("a;"));
let result = parse(&tokenize("a;").unwrap());
assert_eq!(result, block_ast!(vec![id_ast!("a"), empty_ast!()]));
let result = parse(&tokenize("a; b"));
let result = parse(&tokenize("a; b").unwrap());
assert_eq!(result, block_ast!(vec![id_ast!("a"), id_ast!("b")]));
let result = parse(&tokenize("{}{}"));
let result = parse(&tokenize("{}{}").unwrap());
assert_eq!(
result,
block_ast!(vec![block_ast!(vec![]), block_ast!(vec![])])
@ -707,8 +707,9 @@ fn test_multiple_top_levels() {
#[test]
fn test_large() {
let result = parse(&tokenize(
"
let result = parse(
&tokenize(
"
{
while f() do {
x = 10;
@ -723,7 +724,9 @@ fn test_large() {
123
}
",
));
)
.unwrap(),
);
assert_eq!(
result,

View file

@ -1,7 +1,22 @@
use std::{error::Error, fmt::Display};
use crate::compiler::token::{CodeLocation, Token, TokenType};
use regex::Regex;
pub fn tokenize(code: &str) -> Vec<Token> {
#[derive(Debug)]
pub struct TokenizeError {
message: String,
}
impl Display for TokenizeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TokenizerError: {}", self.message)
}
}
impl Error for TokenizeError {}
pub fn tokenize(code: &str) -> Result<Vec<Token>, TokenizeError> {
// We only want to compile the regexes once
// The ordering of these is important!
let regexes = vec![
@ -48,16 +63,19 @@ pub fn tokenize(code: &str) -> Vec<Token> {
}
if !valid_token {
panic!(
"Invalid token on line {} in position {}",
line_number + 1,
pos + 1
);
return Err(TokenizeError {
message: format!(
"Invalid token starting with '{}' on line {} in position {}",
&line[pos..pos + 1],
line_number + 1,
pos + 1
),
});
}
}
}
tokens
Ok(tokens)
}
#[cfg(test)]
@ -66,7 +84,7 @@ mod tests {
#[test]
fn test_tokenize_basic() {
let loc = CodeLocation::new(usize::MAX, usize::MAX);
let result = tokenize("if 3 \n\twhile");
let result = tokenize("if 3 \n\twhile").unwrap();
use TokenType::*;
assert_eq!(
@ -81,7 +99,7 @@ mod tests {
#[test]
fn test_tokenize_code_location() {
let result = tokenize("if 3\n while");
let result = tokenize("if 3\n while").unwrap();
use TokenType::*;
assert_eq!(
@ -113,7 +131,7 @@ mod tests {
#[test]
fn test_tokenize_comment() {
let loc = CodeLocation::new(usize::MAX, usize::MAX);
let result = tokenize("if 3 \n\n//Comment\n#Another\n\twhile //Comment2");
let result = tokenize("if 3 \n\n//Comment\n#Another\n\twhile //Comment2").unwrap();
use TokenType::*;
assert_eq!(
@ -129,7 +147,7 @@ mod tests {
#[test]
fn test_tokenize_operators_basic() {
let loc = CodeLocation::new(usize::MAX, usize::MAX);
let result = tokenize("var = 1 + 2");
let result = tokenize("var = 1 + 2").unwrap();
use TokenType::*;
assert_eq!(
@ -147,7 +165,7 @@ mod tests {
#[test]
fn test_tokenize_operators_all() {
let loc = CodeLocation::new(usize::MAX, usize::MAX);
let result = tokenize("var 1 + - * 1/2 = == != < <= > >= 2 %");
let result = tokenize("var 1 + - * 1/2 = == != < <= > >= 2 %").unwrap();
use TokenType::*;
assert_eq!(
@ -177,7 +195,7 @@ mod tests {
#[test]
fn test_tokenize_punctuation_basic() {
let loc = CodeLocation::new(usize::MAX, usize::MAX);
let result = tokenize("{var = (1 + 2, 3);:}");
let result = tokenize("{var = (1 + 2, 3);:}").unwrap();
use TokenType::*;
assert_eq!(
@ -203,6 +221,6 @@ mod tests {
#[test]
#[should_panic]
fn test_tokenize_wrong_token() {
tokenize("if 3\n while @");
tokenize("if 3\n while @").unwrap();
}
}

View file

@ -160,7 +160,10 @@ mod tests {
use Type::*;
fn get_type(code: &str) -> Type {
type_check(&mut parse(&tokenize(code)), &mut SymTab::new_type_table())
type_check(
&mut parse(&tokenize(code).unwrap()),
&mut SymTab::new_type_table(),
)
}
#[test]
@ -317,14 +320,14 @@ mod tests {
#[test]
fn test_function() {
let mut tokens = tokenize("foo(1)");
let mut tokens = tokenize("foo(1)").unwrap();
let mut ast = parse(&tokens);
let mut symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
let result = type_check(&mut ast, &mut symtab);
assert_eq!(result, Int);
tokens = tokenize("foo(1);");
tokens = tokenize("foo(1);").unwrap();
ast = parse(&tokens);
symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
@ -335,7 +338,7 @@ mod tests {
#[test]
#[should_panic]
fn test_function_wrong_arg() {
let tokens = tokenize("foo(true)");
let tokens = tokenize("foo(true)").unwrap();
let mut ast = parse(&tokens);
let mut symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
@ -344,7 +347,7 @@ mod tests {
#[test]
fn test_node_type() {
let tokens = tokenize("1");
let tokens = tokenize("1").unwrap();
let mut ast = parse(&tokens);
let mut symtab = SymTab::new_type_table();

View file

@ -37,7 +37,11 @@ fn handle_connection(mut stream: TcpStream) {
let program = json_request["code"].as_str().unwrap();
let output = compiler::compile(program);
let response = format!("{{\"program\": \"{output}\"}}");
let response = match output {
Ok(output) => format!("{{\"program\": \"{output}\"}}"),
Err(e) => format!("{{\"error\": \"{e}\"}}"),
};
stream.write_all(response.as_bytes()).unwrap();
}
_ => panic!("Unexpected command!"),