From 02026c42e0e816e710fa379a9f9b4c4a4e93dc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vili=20Sinerv=C3=A4?= Date: Wed, 26 Feb 2025 22:31:04 +0200 Subject: [PATCH] Add error handling for parser --- src/compiler.rs | 4 +- src/compiler/parser/mod.rs | 287 +++++++++++++++--------- src/compiler/parser/parser_utilities.rs | 48 ++-- src/compiler/parser/tests.rs | 135 +++++------ src/compiler/tokenizer.rs | 16 +- src/compiler/type_checker.rs | 10 +- 6 files changed, 286 insertions(+), 214 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index 086f558..c1781c0 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -25,7 +25,7 @@ mod variable; pub fn compile(code: &str) -> Result> { let tokens = tokenize(code)?; - let mut ast = parse(&tokens); + let mut ast = parse(&tokens)?; type_check(&mut ast, &mut SymTab::new_type_table()); let ir = generate_ir(&ast); let assembly = generate_assembly(&ir); @@ -49,7 +49,7 @@ pub fn start_interpreter() { for line in lines { if let Ok(code) = line { let tokens = tokenize(&code).unwrap(); - let ast = parse(&tokens); + let ast = parse(&tokens).unwrap(); let val = interpret(&ast, &mut SymTab::new_val_table()); println!("{}", val); diff --git a/src/compiler/parser/mod.rs b/src/compiler/parser/mod.rs index 4077ba1..446b652 100644 --- a/src/compiler/parser/mod.rs +++ b/src/compiler/parser/mod.rs @@ -2,6 +2,8 @@ mod parser_utilities; #[cfg(test)] mod tests; +use std::{error::Error, fmt::Display}; + use crate::compiler::{ ast::{ AstNode, @@ -12,48 +14,61 @@ use crate::compiler::{ token::{Token, TokenType}, }; -pub fn parse<'source>(tokens: &[Token<'source>]) -> AstNode<'source> { +#[derive(Debug)] +pub struct ParserError { + msg: String, +} + +impl Display for ParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ParserError: {}", self.msg) + } +} + +impl Error for ParserError {} + +pub fn parse<'source>(tokens: &[Token<'source>]) -> Result, ParserError> { let mut pos = 0; - let first_expression = parse_block_level_expressions(&mut pos, tokens); + let first_expression = parse_block_level_expressions(&mut pos, tokens)?; if pos != tokens.len() { let mut expressions = vec![first_expression]; // Blocks don't need to be followed by a semicolon, but can be - if peek(&mut (pos - 1), tokens).text == "}" { - if peek(&mut pos, tokens).text == ";" { - consume_string(&mut pos, tokens, ";"); + if peek(&mut (pos - 1), tokens)?.text == "}" { + if peek(&mut pos, tokens)?.text == ";" { + consume_string(&mut pos, tokens, ";")?; } } else { - consume_string(&mut pos, tokens, ";"); + consume_string(&mut pos, tokens, ";")?; } - while peek(&mut pos, tokens).token_type != TokenType::End { - expressions.push(parse_block_level_expressions(&mut pos, tokens)); + while peek(&mut pos, tokens)?.token_type != TokenType::End { + expressions.push(parse_block_level_expressions(&mut pos, tokens)?); - if peek(&mut pos, tokens).token_type == TokenType::End { + if peek(&mut pos, tokens)?.token_type == TokenType::End { break; } // Blocks don't need to be followed by a semicolon, but can be - if peek(&mut (pos - 1), tokens).text == "}" { - if peek(&mut pos, tokens).text == ";" { - consume_string(&mut pos, tokens, ";"); + if peek(&mut (pos - 1), tokens)?.text == "}" { + if peek(&mut pos, tokens)?.text == ";" { + consume_string(&mut pos, tokens, ";")?; } } else { - consume_string(&mut pos, tokens, ";"); + consume_string(&mut pos, tokens, ";")?; } } - let last_token = peek(&mut (pos - 1), tokens); + let last_token = peek(&mut (pos - 1), tokens)?; if last_token.text == ";" { expressions.push(AstNode::new(last_token.loc, EmptyLiteral())); } - AstNode::new(tokens[0].loc, Block(expressions)) + Ok(AstNode::new(tokens[0].loc, Block(expressions))) } else { - first_expression + Ok(first_expression) } } @@ -62,9 +77,9 @@ pub fn parse<'source>(tokens: &[Token<'source>]) -> AstNode<'source> { fn parse_block_level_expressions<'source>( pos: &mut usize, tokens: &[Token<'source>], -) -> AstNode<'source> { +) -> Result, ParserError> { // Special handling for variable declaration, since it is only allowed in very specifc places - if peek(pos, tokens).text == "var" { + if peek(pos, tokens)?.text == "var" { parse_var_declaration(pos, tokens) } else { parse_expression(0, pos, tokens) @@ -75,7 +90,7 @@ fn parse_expression<'source>( level: usize, pos: &mut usize, tokens: &[Token<'source>], -) -> AstNode<'source> { +) -> Result, ParserError> { const OPS: [&[&str]; 8] = [ &["="], // 0 &["or"], // 1 @@ -90,39 +105,39 @@ fn parse_expression<'source>( match level { 0 => { - let left = parse_expression(level + 1, pos, tokens); - if OPS[level].contains(&peek(pos, tokens).text) { - let operator_token = consume_strings(pos, tokens, OPS[level]); - let right = parse_expression(level, pos, tokens); - AstNode::new( + let left = parse_expression(level + 1, pos, tokens)?; + if OPS[level].contains(&peek(pos, tokens)?.text) { + let operator_token = consume_strings(pos, tokens, OPS[level])?; + let right = parse_expression(level, pos, tokens)?; + Ok(AstNode::new( operator_token.loc, BinaryOp(Box::new(left), operator_token.text, Box::new(right)), - ) + )) } else { - left + Ok(left) } } 1..=6 => { - let mut left = parse_expression(level + 1, pos, tokens); - while OPS[level].contains(&peek(pos, tokens).text) { - let operator_token = consume_strings(pos, tokens, OPS[level]); - let right = parse_expression(level + 1, pos, tokens); + let mut left = parse_expression(level + 1, pos, tokens)?; + while OPS[level].contains(&peek(pos, tokens)?.text) { + let operator_token = consume_strings(pos, tokens, OPS[level])?; + let right = parse_expression(level + 1, pos, tokens)?; left = AstNode::new( operator_token.loc, BinaryOp(Box::new(left), operator_token.text, Box::new(right)), ); } - left + Ok(left) } 7 => { - if OPS[level].contains(&peek(pos, tokens).text) { - let operator_token = consume_strings(pos, tokens, OPS[level]); - let right = parse_expression(level, pos, tokens); - AstNode::new( + if OPS[level].contains(&peek(pos, tokens)?.text) { + let operator_token = consume_strings(pos, tokens, OPS[level])?; + let right = parse_expression(level, pos, tokens)?; + Ok(AstNode::new( operator_token.loc, UnaryOp(operator_token.text, Box::new(right)), - ) + )) } else { parse_expression(level + 1, pos, tokens) } @@ -132,18 +147,23 @@ fn parse_expression<'source>( } } -fn parse_term<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let token = peek(pos, tokens); +fn parse_term<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let token = peek(pos, tokens)?; match token.token_type { - TokenType::Integer => parse_int_literal(pos, tokens), + TokenType::Integer => Ok(parse_int_literal(pos, tokens)?), TokenType::Identifier => match token.text { "if" => parse_conditional(pos, tokens), "while" => parse_while_loop(pos, tokens), "true" | "false" => parse_bool_literal(pos, tokens), - "var" => panic!("Invalid variable declaration {}", token), + "var" => Err(ParserError { + msg: format!("Invalid variable declaration {}", token), + }), _ => { - if peek(&mut (*pos + 1), tokens).text == "(" { + if peek(&mut (*pos + 1), tokens)?.text == "(" { parse_function(pos, tokens) } else { parse_identifier(pos, tokens) @@ -155,146 +175,191 @@ fn parse_term<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'s "{" => parse_block(pos, tokens), _ => unreachable!(), }, - _ => panic!("Unexpected {}", token), + _ => Err(ParserError { + msg: format!("Unexpected {}", token), + }), } } -fn parse_var_declaration<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - consume_string(pos, tokens, "var"); - let name_token = consume_type(pos, tokens, TokenType::Identifier); +fn parse_var_declaration<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + consume_string(pos, tokens, "var")?; + let name_token = consume_type(pos, tokens, TokenType::Identifier)?; let mut type_expr = None; - if peek(pos, tokens).text == ":" { - consume_string(pos, tokens, ":"); + if peek(pos, tokens)?.text == ":" { + consume_string(pos, tokens, ":")?; - let type_token = consume_type(pos, tokens, TokenType::Identifier); + let type_token = consume_type(pos, tokens, TokenType::Identifier)?; type_expr = match type_token.text { "Int" => Some(TypeExpression::Int(type_token.loc)), "Bool" => Some(TypeExpression::Bool(type_token.loc)), - _ => panic! {"Unknown type indicator!"}, + _ => { + return Err(ParserError { + msg: format!("Invalid type specifier {}", type_token.text), + }); + } } } - consume_string(pos, tokens, "="); - let value = parse_expression(0, pos, tokens); - AstNode::new( + consume_string(pos, tokens, "=")?; + let value = parse_expression(0, pos, tokens)?; + Ok(AstNode::new( name_token.loc, VarDeclaration(name_token.text, Box::new(value), type_expr), - ) + )) } -fn parse_conditional<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let start = consume_string(pos, tokens, "if"); - let condition = Box::new(parse_expression(0, pos, tokens)); - consume_string(pos, tokens, "then"); - let then_expr = Box::new(parse_expression(0, pos, tokens)); +fn parse_conditional<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let start = consume_string(pos, tokens, "if")?; + let condition = Box::new(parse_expression(0, pos, tokens)?); + consume_string(pos, tokens, "then")?; + let then_expr = Box::new(parse_expression(0, pos, tokens)?); - let else_expr = match peek(pos, tokens).text { + let else_expr = match peek(pos, tokens)?.text { "else" => { - consume_string(pos, tokens, "else"); - Some(Box::new(parse_expression(0, pos, tokens))) + consume_string(pos, tokens, "else")?; + Some(Box::new(parse_expression(0, pos, tokens)?)) } _ => None, }; - AstNode::new(start.loc, Conditional(condition, then_expr, else_expr)) + Ok(AstNode::new( + start.loc, + Conditional(condition, then_expr, else_expr), + )) } -fn parse_while_loop<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let start = consume_string(pos, tokens, "while"); - let condition = Box::new(parse_expression(0, pos, tokens)); - consume_string(pos, tokens, "do"); - let do_expr = Box::new(parse_expression(0, pos, tokens)); +fn parse_while_loop<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let start = consume_string(pos, tokens, "while")?; + let condition = Box::new(parse_expression(0, pos, tokens)?); + consume_string(pos, tokens, "do")?; + let do_expr = Box::new(parse_expression(0, pos, tokens)?); - AstNode::new(start.loc, While(condition, do_expr)) + Ok(AstNode::new(start.loc, While(condition, do_expr))) } -fn parse_parenthesized<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - consume_string(pos, tokens, "("); - let expression = parse_expression(0, pos, tokens); - consume_string(pos, tokens, ")"); - expression +fn parse_parenthesized<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + consume_string(pos, tokens, "(")?; + let expression = parse_expression(0, pos, tokens)?; + consume_string(pos, tokens, ")")?; + Ok(expression) } -fn parse_block<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let start = consume_string(pos, tokens, "{"); +fn parse_block<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let start = consume_string(pos, tokens, "{")?; let mut expressions = Vec::new(); - while peek(pos, tokens).text != "}" { - expressions.push(parse_block_level_expressions(pos, tokens)); + while peek(pos, tokens)?.text != "}" { + expressions.push(parse_block_level_expressions(pos, tokens)?); // Last expression left as return expression, if no semicolon is present - if peek(pos, tokens).text == "}" { + if peek(pos, tokens)?.text == "}" { break; } // Blocks don't need to be followed by a semicolon, but can be - if peek(&mut (*pos - 1), tokens).text == "}" { - if peek(pos, tokens).text == ";" { - consume_string(pos, tokens, ";"); + if peek(&mut (*pos - 1), tokens)?.text == "}" { + if peek(pos, tokens)?.text == ";" { + consume_string(pos, tokens, ";")?; } } else { - consume_string(pos, tokens, ";"); + consume_string(pos, tokens, ";")?; } // If the last expression of the block ended in a semicolon, empty return - let next_token = peek(pos, tokens); + let next_token = peek(pos, tokens)?; if next_token.text == "}" { expressions.push(AstNode::new(next_token.loc, EmptyLiteral())); break; } } - consume_string(pos, tokens, "}"); - AstNode::new(start.loc, Block(expressions)) + consume_string(pos, tokens, "}")?; + Ok(AstNode::new(start.loc, Block(expressions))) } -fn parse_function<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let identifier = consume_type(pos, tokens, TokenType::Identifier); - consume_string(pos, tokens, "("); +fn parse_function<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let identifier = consume_type(pos, tokens, TokenType::Identifier)?; + consume_string(pos, tokens, "(")?; let mut arguments = Vec::new(); // If/loop used instead of while to show that we will always use break to exit the loop - if peek(pos, tokens).text != ")" { + if peek(pos, tokens)?.text != ")" { loop { - arguments.push(parse_expression(0, pos, tokens)); + arguments.push(parse_expression(0, pos, tokens)?); - match peek(pos, tokens).text { - "," => consume_string(pos, tokens, ","), - _ => break, // Break out of the loop. Intentionally causes a panic with a missing comma + match peek(pos, tokens)?.text { + "," => consume_string(pos, tokens, ",")?, + _ => break, // Break out of the loop. Intentionally causes an error with a missing comma }; } } - consume_string(pos, tokens, ")"); - AstNode::new(identifier.loc, FunCall(identifier.text, arguments)) + consume_string(pos, tokens, ")")?; + Ok(AstNode::new( + identifier.loc, + FunCall(identifier.text, arguments), + )) } -fn parse_int_literal<'source>(pos: &mut usize, tokens: &[Token]) -> AstNode<'source> { - let token = consume_type(pos, tokens, TokenType::Integer); +fn parse_int_literal<'source>( + pos: &mut usize, + tokens: &[Token], +) -> Result, ParserError> { + let token = consume_type(pos, tokens, TokenType::Integer)?; - let expr = IntLiteral( - token - .text - .parse::() - .unwrap_or_else(|_| panic!("Fatal parser error! Invalid value in token {token}")), - ); + let expr = match token.text.parse::() { + Ok(val) => IntLiteral(val), + Err(_) => { + return Err(ParserError { + msg: format!("Invalid value in token {token}"), + }); + } + }; - AstNode::new(token.loc, expr) + Ok(AstNode::new(token.loc, expr)) } -fn parse_bool_literal<'source>(pos: &mut usize, tokens: &[Token]) -> AstNode<'source> { - let token = consume_type(pos, tokens, TokenType::Identifier); +fn parse_bool_literal<'source>( + pos: &mut usize, + tokens: &[Token], +) -> Result, ParserError> { + let token = consume_type(pos, tokens, TokenType::Identifier)?; let expr = match token.text { "true" => BoolLiteral(true), "false" => BoolLiteral(false), - _ => panic!("Fatal parser error! Expected bool literal but found {token}"), + _ => { + return Err(ParserError { + msg: format!("Expected bool literal but found {token}"), + }) + } }; - AstNode::new(token.loc, expr) + Ok(AstNode::new(token.loc, expr)) } -fn parse_identifier<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> { - let token = consume_type(pos, tokens, TokenType::Identifier); - AstNode::new(token.loc, Identifier(token.text)) +fn parse_identifier<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { + let token = consume_type(pos, tokens, TokenType::Identifier)?; + Ok(AstNode::new(token.loc, Identifier(token.text))) } diff --git a/src/compiler/parser/parser_utilities.rs b/src/compiler/parser/parser_utilities.rs index 6e6722f..957dcc8 100644 --- a/src/compiler/parser/parser_utilities.rs +++ b/src/compiler/parser/parser_utilities.rs @@ -4,7 +4,7 @@ pub fn consume_string<'source>( pos: &mut usize, tokens: &[Token<'source>], expected_string: &str, -) -> Token<'source> { +) -> Result, ParserError> { consume_strings(pos, tokens, &[expected_string]) } @@ -12,16 +12,15 @@ pub fn consume_strings<'source>( pos: &mut usize, tokens: &[Token<'source>], strings: &[&str], -) -> Token<'source> { - let token = consume(pos, tokens); +) -> Result, ParserError> { + let token = consume(pos, tokens)?; if strings.contains(&token.text) { - token + Ok(token) } else { - panic!( - "Parsing error: expected one of {:?} but found {}", - strings, token - ); + Err(ParserError { + msg: format!("Expected one of {:?} but found {}", strings, token), + }) } } @@ -29,7 +28,7 @@ pub fn consume_type<'source>( pos: &mut usize, tokens: &[Token<'source>], expected_type: TokenType, -) -> Token<'source> { +) -> Result, ParserError> { consume_types(pos, tokens, &[expected_type]) } @@ -37,31 +36,38 @@ pub fn consume_types<'source>( pos: &mut usize, tokens: &[Token<'source>], types: &[TokenType], -) -> Token<'source> { - let token = consume(pos, tokens); +) -> Result, ParserError> { + let token = consume(pos, tokens)?; if types.contains(&token.token_type) { - token + Ok(token) } else { - panic!( - "Parsing error: expected one of {:?} but found {}", - types, token - ); + Err(ParserError { + msg: format!("Expected one of {:?} but found {}", types, token), + }) } } -pub fn consume<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> { +pub fn consume<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { let token = peek(pos, tokens); *pos += 1; token } -pub fn peek<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> { +pub fn peek<'source>( + pos: &mut usize, + tokens: &[Token<'source>], +) -> Result, ParserError> { if let Some(token) = tokens.get(*pos) { - token.clone() + Ok(token.clone()) } else if let Some(last_token) = tokens.get(*pos - 1) { - Token::new("", TokenType::End, last_token.loc) + Ok(Token::new("", TokenType::End, last_token.loc)) } else { - panic!("Input to parser appears to be empty!"); + Err(ParserError { + msg: String::from("Input to parser appears to be empty!"), + }) } } diff --git a/src/compiler/parser/tests.rs b/src/compiler/parser/tests.rs index 1dd7426..474e400 100644 --- a/src/compiler/parser/tests.rs +++ b/src/compiler/parser/tests.rs @@ -133,45 +133,45 @@ macro_rules! while_ast_b { #[test] #[should_panic] fn test_empty() { - parse(&[]); + parse(&[]).unwrap(); } #[test] #[should_panic] fn test_invalid_start() { - parse(&tokenize("1 2 + 3").unwrap()); + parse(&tokenize("1 2 + 3").unwrap()).unwrap(); } #[test] #[should_panic] fn test_invalid_middle() { - parse(&tokenize("1 + 2 2 + 3").unwrap()); + parse(&tokenize("1 + 2 2 + 3").unwrap()).unwrap(); } #[test] #[should_panic] fn test_invalid_end() { - parse(&tokenize("1 + 2 3").unwrap()); + parse(&tokenize("1 + 2 3").unwrap()).unwrap(); } #[test] fn test_binary_op_basic() { - let result = parse(&tokenize("1 + 23").unwrap()); + let result = parse(&tokenize("1 + 23").unwrap()).unwrap(); assert_eq!(result, bin_ast!(int_ast_b!(1), "+", int_ast_b!(23))); - let result = parse(&tokenize("4 - 56").unwrap()); + let result = parse(&tokenize("4 - 56").unwrap()).unwrap(); assert_eq!(result, bin_ast!(int_ast_b!(4), "-", int_ast_b!(56))); - let result = parse(&tokenize("1 * 2").unwrap()); + let result = parse(&tokenize("1 * 2").unwrap()).unwrap(); assert_eq!(result, bin_ast!(int_ast_b!(1), "*", int_ast_b!(2))); - let result = parse(&tokenize("1 / 2").unwrap()); + let result = parse(&tokenize("1 / 2").unwrap()).unwrap(); assert_eq!(result, bin_ast!(int_ast_b!(1), "/", int_ast_b!(2))); } #[test] fn test_binary_op_all_levels() { - let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap()); + let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -200,16 +200,16 @@ fn test_binary_op_all_levels() { #[test] fn test_binary_op_identifier() { - let result = parse(&tokenize("a + 1").unwrap()); + let result = parse(&tokenize("a + 1").unwrap()).unwrap(); assert_eq!(result, bin_ast!(id_ast_b!("a"), "+", int_ast_b!(1))); - let result = parse(&tokenize("1 - a").unwrap()); + let result = parse(&tokenize("1 - a").unwrap()).unwrap(); assert_eq!(result, bin_ast!(int_ast_b!(1), "-", id_ast_b!("a"))); } #[test] fn test_binary_op_multiple() { - let result = parse(&tokenize("1 + 2 - 3").unwrap()); + let result = parse(&tokenize("1 + 2 - 3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -222,7 +222,7 @@ fn test_binary_op_multiple() { #[test] fn test_binary_op_precedence() { - let result = parse(&tokenize("1 + 2 * 3").unwrap()); + let result = parse(&tokenize("1 + 2 * 3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -232,7 +232,7 @@ fn test_binary_op_precedence() { ) ); - let result = parse(&tokenize("1 - 2 / 3").unwrap()); + let result = parse(&tokenize("1 - 2 / 3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -245,7 +245,7 @@ fn test_binary_op_precedence() { #[test] fn test_assignment_basic() { - let result = parse(&tokenize("a = 1 + 2").unwrap()); + let result = parse(&tokenize("a = 1 + 2").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -258,7 +258,7 @@ fn test_assignment_basic() { #[test] fn test_assignment_chain() { - let result = parse(&tokenize("a = b = 1 + 2").unwrap()); + let result = parse(&tokenize("a = b = 1 + 2").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -276,21 +276,21 @@ fn test_assignment_chain() { #[test] #[should_panic] fn test_assignment_invalid() { - parse(&tokenize("a =").unwrap()); + parse(&tokenize("a =").unwrap()).unwrap(); } #[test] fn test_unary_basic() { - let result = parse(&tokenize("not x").unwrap()); + let result = parse(&tokenize("not x").unwrap()).unwrap(); assert_eq!(result, un_ast!("not", id_ast_b!("x"))); - let result = parse(&tokenize("-x").unwrap()); + let result = parse(&tokenize("-x").unwrap()).unwrap(); assert_eq!(result, un_ast!("-", id_ast_b!("x"))); - let result = parse(&tokenize("-1").unwrap()); + let result = parse(&tokenize("-1").unwrap()).unwrap(); assert_eq!(result, un_ast!("-", int_ast_b!(1))); - let result = parse(&tokenize("-1 + 2").unwrap()); + let result = parse(&tokenize("-1 + 2").unwrap()).unwrap(); assert_eq!( result, bin_ast!(un_ast_b!("-", int_ast_b!(1)), "+", int_ast_b!(2)) @@ -299,16 +299,16 @@ fn test_unary_basic() { #[test] fn test_unary_chain() { - let result = parse(&tokenize("not not x").unwrap()); + let result = parse(&tokenize("not not x").unwrap()).unwrap(); assert_eq!(result, un_ast!("not", un_ast_b!("not", id_ast_b!("x")))); - let result = parse(&tokenize("--x").unwrap()); + let result = parse(&tokenize("--x").unwrap()).unwrap(); assert_eq!(result, un_ast!("-", un_ast_b!("-", id_ast_b!("x")))); - let result = parse(&tokenize("--1").unwrap()); + let result = parse(&tokenize("--1").unwrap()).unwrap(); assert_eq!(result, un_ast!("-", un_ast_b!("-", int_ast_b!(1)))); - let result = parse(&tokenize("--1 + 2").unwrap()); + let result = parse(&tokenize("--1 + 2").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -321,7 +321,7 @@ fn test_unary_chain() { #[test] fn test_parenthesized() { - let result = parse(&tokenize("(1+2)*3").unwrap()); + let result = parse(&tokenize("(1+2)*3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -334,7 +334,7 @@ fn test_parenthesized() { #[test] fn test_parenthesized_nested() { - let result = parse(&tokenize("((1 - 2))/3").unwrap()); + let result = parse(&tokenize("((1 - 2))/3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -344,7 +344,7 @@ fn test_parenthesized_nested() { ) ); - let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap()); + let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -362,12 +362,12 @@ fn test_parenthesized_nested() { #[test] #[should_panic] fn test_parenthesized_mismatched() { - parse(&tokenize("(1+2*3").unwrap()); + parse(&tokenize("(1+2*3").unwrap()).unwrap(); } #[test] fn test_if_then() { - let result = parse(&tokenize("if 1 + 2 then 3").unwrap()); + let result = parse(&tokenize("if 1 + 2 then 3").unwrap()).unwrap(); assert_eq!( result, con_ast!( @@ -380,7 +380,7 @@ fn test_if_then() { #[test] fn test_if_then_else() { - let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap()); + let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap()).unwrap(); assert_eq!( result, con_ast!( @@ -393,7 +393,7 @@ fn test_if_then_else() { #[test] fn test_if_then_else_embedded() { - let result = parse(&tokenize("1 + if true then 2 else 3").unwrap()); + let result = parse(&tokenize("1 + if true then 2 else 3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -406,7 +406,7 @@ fn test_if_then_else_embedded() { #[test] fn test_if_then_else_nested() { - let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap()); + let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap()).unwrap(); assert_eq!( result, con_ast!( @@ -420,15 +420,15 @@ fn test_if_then_else_nested() { #[test] #[should_panic] fn test_if_no_then() { - parse(&tokenize("if true").unwrap()); + parse(&tokenize("if true").unwrap()).unwrap(); } #[test] fn test_func_basic() { - let result = parse(&tokenize("f(a, b)").unwrap()); + let result = parse(&tokenize("f(a, b)").unwrap()).unwrap(); assert_eq!(result, fun_ast!("f", vec![id_ast!("a"), id_ast!("b"),])); - let result = parse(&tokenize("f(a, 1 + 2)").unwrap()); + let result = parse(&tokenize("f(a, 1 + 2)").unwrap()).unwrap(); assert_eq!( result, fun_ast!( @@ -437,13 +437,13 @@ fn test_func_basic() { ) ); - let result = parse(&tokenize("f()").unwrap()); + let result = parse(&tokenize("f()").unwrap()).unwrap(); assert_eq!(result, fun_ast!("f", vec![])); } #[test] fn test_func_embedded() { - let result = parse(&tokenize("1 + f(a)").unwrap()); + let result = parse(&tokenize("1 + f(a)").unwrap()).unwrap(); assert_eq!( result, bin_ast!(int_ast_b!(1), "+", fun_ast_b!("f", vec![id_ast!("a")])) @@ -452,7 +452,7 @@ fn test_func_embedded() { #[test] fn test_func_nested() { - let result = parse(&tokenize("f(a, g(b))").unwrap()); + let result = parse(&tokenize("f(a, g(b))").unwrap()).unwrap(); assert_eq!( result, fun_ast!("f", vec![id_ast!("a"), fun_ast!("g", vec![id_ast!("b")]),]) @@ -462,18 +462,18 @@ fn test_func_nested() { #[test] #[should_panic] fn test_func_missing_comma() { - parse(&tokenize("f(a b)").unwrap()); + parse(&tokenize("f(a b)").unwrap()).unwrap(); } #[test] #[should_panic] fn test_func_missing_close() { - parse(&tokenize("f(a").unwrap()); + parse(&tokenize("f(a").unwrap()).unwrap(); } #[test] fn test_block_basic() { - let result = parse(&tokenize("{ a = 1; b; }").unwrap()); + let result = parse(&tokenize("{ a = 1; b; }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -483,7 +483,7 @@ fn test_block_basic() { ]) ); - let result = parse(&tokenize("{ a = 1; b }").unwrap()); + let result = parse(&tokenize("{ a = 1; b }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -495,7 +495,7 @@ fn test_block_basic() { #[test] fn test_block_embedded() { - let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap()); + let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -508,7 +508,7 @@ fn test_block_embedded() { #[test] fn test_block_nested() { - let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap()); + let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![bin_ast!( @@ -522,21 +522,21 @@ fn test_block_nested() { #[test] #[should_panic] fn test_block_unmatched() { - parse(&tokenize("{ a = 1 ").unwrap()); + parse(&tokenize("{ a = 1 ").unwrap()).unwrap(); } #[test] #[should_panic] fn test_block_missing_semicolon() { - parse(&tokenize("{ a = 1\nb }").unwrap()); + parse(&tokenize("{ a = 1\nb }").unwrap()).unwrap(); } #[test] fn test_var_basic() { - let result = parse(&tokenize("var x = 1").unwrap()); + let result = parse(&tokenize("var x = 1").unwrap()).unwrap(); assert_eq!(result, var_ast!("x", int_ast_b!(1), None)); - let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap()); + let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -549,7 +549,7 @@ fn test_var_basic() { #[test] fn test_var_typed() { - let result = parse(&tokenize("var x: Int = 1").unwrap()); + let result = parse(&tokenize("var x: Int = 1").unwrap()).unwrap(); assert_eq!( result, var_ast!( @@ -562,7 +562,7 @@ fn test_var_typed() { ) ); - let result = parse(&tokenize("var x: Bool = true").unwrap()); + let result = parse(&tokenize("var x: Bool = true").unwrap()).unwrap(); assert_eq!( result, var_ast!( @@ -579,18 +579,18 @@ fn test_var_typed() { #[test] #[should_panic] fn test_var_chain() { - parse(&tokenize("var x = var y = 1").unwrap()); + parse(&tokenize("var x = var y = 1").unwrap()).unwrap(); } #[test] #[should_panic] fn test_var_embedded() { - parse(&tokenize("if true then var x = 3").unwrap()); + parse(&tokenize("if true then var x = 3").unwrap()).unwrap(); } #[test] fn test_omitting_semicolons() { - let result = parse(&tokenize("{ { a } { b } }").unwrap()); + let result = parse(&tokenize("{ { a } { b } }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -599,7 +599,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a } b }").unwrap()); + let result = parse(&tokenize("{ if true then { a } b }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -608,7 +608,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a }; b }").unwrap()); + let result = parse(&tokenize("{ if true then { a }; b }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -617,7 +617,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap()); + let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![ @@ -630,7 +630,7 @@ fn test_omitting_semicolons() { ]) ); - let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap()); + let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -647,12 +647,12 @@ fn test_omitting_semicolons() { #[test] #[should_panic] fn test_omitting_semicolons_invalid() { - parse(&tokenize("{ if true then { a } b c }").unwrap()); + parse(&tokenize("{ if true then { a } b c }").unwrap()).unwrap(); } #[test] fn test_while_do() { - let result = parse(&tokenize("while 1 + 2 do 3").unwrap()); + let result = parse(&tokenize("while 1 + 2 do 3").unwrap()).unwrap(); assert_eq!( result, while_ast!(bin_ast_b!(int_ast_b!(1), "+", int_ast_b!(2)), int_ast_b!(3)) @@ -661,7 +661,7 @@ fn test_while_do() { #[test] fn test_while_do_embedded() { - let result = parse(&tokenize("1 + while true do 2").unwrap()); + let result = parse(&tokenize("1 + while true do 2").unwrap()).unwrap(); assert_eq!( result, bin_ast!( @@ -674,7 +674,7 @@ fn test_while_do_embedded() { #[test] fn test_while_do_nested() { - let result = parse(&tokenize("while true do while false do 1").unwrap()); + let result = parse(&tokenize("while true do while false do 1").unwrap()).unwrap(); assert_eq!( result, while_ast!( @@ -687,18 +687,18 @@ fn test_while_do_nested() { #[test] #[should_panic] fn test_while_no_do() { - parse(&tokenize("while true").unwrap()); + parse(&tokenize("while true").unwrap()).unwrap(); } #[test] fn test_multiple_top_levels() { - let result = parse(&tokenize("a;").unwrap()); + let result = parse(&tokenize("a;").unwrap()).unwrap(); assert_eq!(result, block_ast!(vec![id_ast!("a"), empty_ast!()])); - let result = parse(&tokenize("a; b").unwrap()); + let result = parse(&tokenize("a; b").unwrap()).unwrap(); assert_eq!(result, block_ast!(vec![id_ast!("a"), id_ast!("b")])); - let result = parse(&tokenize("{}{}").unwrap()); + let result = parse(&tokenize("{}{}").unwrap()).unwrap(); assert_eq!( result, block_ast!(vec![block_ast!(vec![]), block_ast!(vec![])]) @@ -726,7 +726,8 @@ fn test_large() { ", ) .unwrap(), - ); + ) + .unwrap(); assert_eq!( result, diff --git a/src/compiler/tokenizer.rs b/src/compiler/tokenizer.rs index 12f1bb0..d4fb258 100644 --- a/src/compiler/tokenizer.rs +++ b/src/compiler/tokenizer.rs @@ -4,19 +4,19 @@ use crate::compiler::token::{CodeLocation, Token, TokenType}; use regex::Regex; #[derive(Debug)] -pub struct TokenizeError { - message: String, +pub struct TokenizerError { + msg: String, } -impl Display for TokenizeError { +impl Display for TokenizerError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "TokenizerError: {}", self.message) + write!(f, "TokenizerError: {}", self.msg) } } -impl Error for TokenizeError {} +impl Error for TokenizerError {} -pub fn tokenize(code: &str) -> Result, TokenizeError> { +pub fn tokenize(code: &str) -> Result, TokenizerError> { // We only want to compile the regexes once // The ordering of these is important! let regexes = vec![ @@ -63,8 +63,8 @@ pub fn tokenize(code: &str) -> Result, TokenizeError> { } if !valid_token { - return Err(TokenizeError { - message: format!( + return Err(TokenizerError { + msg: format!( "Invalid token starting with '{}' on line {} in position {}", &line[pos..pos + 1], line_number + 1, diff --git a/src/compiler/type_checker.rs b/src/compiler/type_checker.rs index e4c8faa..8cf9568 100644 --- a/src/compiler/type_checker.rs +++ b/src/compiler/type_checker.rs @@ -161,7 +161,7 @@ mod tests { fn get_type(code: &str) -> Type { type_check( - &mut parse(&tokenize(code).unwrap()), + &mut parse(&tokenize(code).unwrap()).unwrap(), &mut SymTab::new_type_table(), ) } @@ -321,14 +321,14 @@ mod tests { #[test] fn test_function() { let mut tokens = tokenize("foo(1)").unwrap(); - let mut ast = parse(&tokens); + let mut ast = parse(&tokens).unwrap(); let mut symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); let result = type_check(&mut ast, &mut symtab); assert_eq!(result, Int); tokens = tokenize("foo(1);").unwrap(); - ast = parse(&tokens); + ast = parse(&tokens).unwrap(); symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); let result = type_check(&mut ast, &mut symtab); @@ -339,7 +339,7 @@ mod tests { #[should_panic] fn test_function_wrong_arg() { let tokens = tokenize("foo(true)").unwrap(); - let mut ast = parse(&tokens); + let mut ast = parse(&tokens).unwrap(); let mut symtab = SymTab::new_type_table(); symtab.insert("foo", Func(vec![Int], Box::new(Int))); type_check(&mut ast, &mut symtab); @@ -348,7 +348,7 @@ mod tests { #[test] fn test_node_type() { let tokens = tokenize("1").unwrap(); - let mut ast = parse(&tokens); + let mut ast = parse(&tokens).unwrap(); let mut symtab = SymTab::new_type_table(); assert_eq!(ast.node_type, Unit);