1
0
Fork 0

Add error handling for parser

This commit is contained in:
Vili Sinervä 2025-02-26 22:31:04 +02:00
parent 9a13d0b9b6
commit 02026c42e0
No known key found for this signature in database
GPG key ID: DF8FEAF54EFAC996
6 changed files with 286 additions and 214 deletions

View file

@ -25,7 +25,7 @@ mod variable;
pub fn compile(code: &str) -> Result<String, Box<dyn Error>> {
let tokens = tokenize(code)?;
let mut ast = parse(&tokens);
let mut ast = parse(&tokens)?;
type_check(&mut ast, &mut SymTab::new_type_table());
let ir = generate_ir(&ast);
let assembly = generate_assembly(&ir);
@ -49,7 +49,7 @@ pub fn start_interpreter() {
for line in lines {
if let Ok(code) = line {
let tokens = tokenize(&code).unwrap();
let ast = parse(&tokens);
let ast = parse(&tokens).unwrap();
let val = interpret(&ast, &mut SymTab::new_val_table());
println!("{}", val);

View file

@ -2,6 +2,8 @@ mod parser_utilities;
#[cfg(test)]
mod tests;
use std::{error::Error, fmt::Display};
use crate::compiler::{
ast::{
AstNode,
@ -12,48 +14,61 @@ use crate::compiler::{
token::{Token, TokenType},
};
pub fn parse<'source>(tokens: &[Token<'source>]) -> AstNode<'source> {
#[derive(Debug)]
pub struct ParserError {
msg: String,
}
impl Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "ParserError: {}", self.msg)
}
}
impl Error for ParserError {}
pub fn parse<'source>(tokens: &[Token<'source>]) -> Result<AstNode<'source>, ParserError> {
let mut pos = 0;
let first_expression = parse_block_level_expressions(&mut pos, tokens);
let first_expression = parse_block_level_expressions(&mut pos, tokens)?;
if pos != tokens.len() {
let mut expressions = vec![first_expression];
// Blocks don't need to be followed by a semicolon, but can be
if peek(&mut (pos - 1), tokens).text == "}" {
if peek(&mut pos, tokens).text == ";" {
consume_string(&mut pos, tokens, ";");
if peek(&mut (pos - 1), tokens)?.text == "}" {
if peek(&mut pos, tokens)?.text == ";" {
consume_string(&mut pos, tokens, ";")?;
}
} else {
consume_string(&mut pos, tokens, ";");
consume_string(&mut pos, tokens, ";")?;
}
while peek(&mut pos, tokens).token_type != TokenType::End {
expressions.push(parse_block_level_expressions(&mut pos, tokens));
while peek(&mut pos, tokens)?.token_type != TokenType::End {
expressions.push(parse_block_level_expressions(&mut pos, tokens)?);
if peek(&mut pos, tokens).token_type == TokenType::End {
if peek(&mut pos, tokens)?.token_type == TokenType::End {
break;
}
// Blocks don't need to be followed by a semicolon, but can be
if peek(&mut (pos - 1), tokens).text == "}" {
if peek(&mut pos, tokens).text == ";" {
consume_string(&mut pos, tokens, ";");
if peek(&mut (pos - 1), tokens)?.text == "}" {
if peek(&mut pos, tokens)?.text == ";" {
consume_string(&mut pos, tokens, ";")?;
}
} else {
consume_string(&mut pos, tokens, ";");
consume_string(&mut pos, tokens, ";")?;
}
}
let last_token = peek(&mut (pos - 1), tokens);
let last_token = peek(&mut (pos - 1), tokens)?;
if last_token.text == ";" {
expressions.push(AstNode::new(last_token.loc, EmptyLiteral()));
}
AstNode::new(tokens[0].loc, Block(expressions))
Ok(AstNode::new(tokens[0].loc, Block(expressions)))
} else {
first_expression
Ok(first_expression)
}
}
@ -62,9 +77,9 @@ pub fn parse<'source>(tokens: &[Token<'source>]) -> AstNode<'source> {
fn parse_block_level_expressions<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> AstNode<'source> {
) -> Result<AstNode<'source>, ParserError> {
// Special handling for variable declaration, since it is only allowed in very specifc places
if peek(pos, tokens).text == "var" {
if peek(pos, tokens)?.text == "var" {
parse_var_declaration(pos, tokens)
} else {
parse_expression(0, pos, tokens)
@ -75,7 +90,7 @@ fn parse_expression<'source>(
level: usize,
pos: &mut usize,
tokens: &[Token<'source>],
) -> AstNode<'source> {
) -> Result<AstNode<'source>, ParserError> {
const OPS: [&[&str]; 8] = [
&["="], // 0
&["or"], // 1
@ -90,39 +105,39 @@ fn parse_expression<'source>(
match level {
0 => {
let left = parse_expression(level + 1, pos, tokens);
if OPS[level].contains(&peek(pos, tokens).text) {
let operator_token = consume_strings(pos, tokens, OPS[level]);
let right = parse_expression(level, pos, tokens);
AstNode::new(
let left = parse_expression(level + 1, pos, tokens)?;
if OPS[level].contains(&peek(pos, tokens)?.text) {
let operator_token = consume_strings(pos, tokens, OPS[level])?;
let right = parse_expression(level, pos, tokens)?;
Ok(AstNode::new(
operator_token.loc,
BinaryOp(Box::new(left), operator_token.text, Box::new(right)),
)
))
} else {
left
Ok(left)
}
}
1..=6 => {
let mut left = parse_expression(level + 1, pos, tokens);
while OPS[level].contains(&peek(pos, tokens).text) {
let operator_token = consume_strings(pos, tokens, OPS[level]);
let right = parse_expression(level + 1, pos, tokens);
let mut left = parse_expression(level + 1, pos, tokens)?;
while OPS[level].contains(&peek(pos, tokens)?.text) {
let operator_token = consume_strings(pos, tokens, OPS[level])?;
let right = parse_expression(level + 1, pos, tokens)?;
left = AstNode::new(
operator_token.loc,
BinaryOp(Box::new(left), operator_token.text, Box::new(right)),
);
}
left
Ok(left)
}
7 => {
if OPS[level].contains(&peek(pos, tokens).text) {
let operator_token = consume_strings(pos, tokens, OPS[level]);
let right = parse_expression(level, pos, tokens);
AstNode::new(
if OPS[level].contains(&peek(pos, tokens)?.text) {
let operator_token = consume_strings(pos, tokens, OPS[level])?;
let right = parse_expression(level, pos, tokens)?;
Ok(AstNode::new(
operator_token.loc,
UnaryOp(operator_token.text, Box::new(right)),
)
))
} else {
parse_expression(level + 1, pos, tokens)
}
@ -132,18 +147,23 @@ fn parse_expression<'source>(
}
}
fn parse_term<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let token = peek(pos, tokens);
fn parse_term<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let token = peek(pos, tokens)?;
match token.token_type {
TokenType::Integer => parse_int_literal(pos, tokens),
TokenType::Integer => Ok(parse_int_literal(pos, tokens)?),
TokenType::Identifier => match token.text {
"if" => parse_conditional(pos, tokens),
"while" => parse_while_loop(pos, tokens),
"true" | "false" => parse_bool_literal(pos, tokens),
"var" => panic!("Invalid variable declaration {}", token),
"var" => Err(ParserError {
msg: format!("Invalid variable declaration {}", token),
}),
_ => {
if peek(&mut (*pos + 1), tokens).text == "(" {
if peek(&mut (*pos + 1), tokens)?.text == "(" {
parse_function(pos, tokens)
} else {
parse_identifier(pos, tokens)
@ -155,146 +175,191 @@ fn parse_term<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'s
"{" => parse_block(pos, tokens),
_ => unreachable!(),
},
_ => panic!("Unexpected {}", token),
_ => Err(ParserError {
msg: format!("Unexpected {}", token),
}),
}
}
fn parse_var_declaration<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
consume_string(pos, tokens, "var");
let name_token = consume_type(pos, tokens, TokenType::Identifier);
fn parse_var_declaration<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
consume_string(pos, tokens, "var")?;
let name_token = consume_type(pos, tokens, TokenType::Identifier)?;
let mut type_expr = None;
if peek(pos, tokens).text == ":" {
consume_string(pos, tokens, ":");
if peek(pos, tokens)?.text == ":" {
consume_string(pos, tokens, ":")?;
let type_token = consume_type(pos, tokens, TokenType::Identifier);
let type_token = consume_type(pos, tokens, TokenType::Identifier)?;
type_expr = match type_token.text {
"Int" => Some(TypeExpression::Int(type_token.loc)),
"Bool" => Some(TypeExpression::Bool(type_token.loc)),
_ => panic! {"Unknown type indicator!"},
_ => {
return Err(ParserError {
msg: format!("Invalid type specifier {}", type_token.text),
});
}
}
}
consume_string(pos, tokens, "=");
let value = parse_expression(0, pos, tokens);
AstNode::new(
consume_string(pos, tokens, "=")?;
let value = parse_expression(0, pos, tokens)?;
Ok(AstNode::new(
name_token.loc,
VarDeclaration(name_token.text, Box::new(value), type_expr),
)
))
}
fn parse_conditional<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let start = consume_string(pos, tokens, "if");
let condition = Box::new(parse_expression(0, pos, tokens));
consume_string(pos, tokens, "then");
let then_expr = Box::new(parse_expression(0, pos, tokens));
fn parse_conditional<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let start = consume_string(pos, tokens, "if")?;
let condition = Box::new(parse_expression(0, pos, tokens)?);
consume_string(pos, tokens, "then")?;
let then_expr = Box::new(parse_expression(0, pos, tokens)?);
let else_expr = match peek(pos, tokens).text {
let else_expr = match peek(pos, tokens)?.text {
"else" => {
consume_string(pos, tokens, "else");
Some(Box::new(parse_expression(0, pos, tokens)))
consume_string(pos, tokens, "else")?;
Some(Box::new(parse_expression(0, pos, tokens)?))
}
_ => None,
};
AstNode::new(start.loc, Conditional(condition, then_expr, else_expr))
Ok(AstNode::new(
start.loc,
Conditional(condition, then_expr, else_expr),
))
}
fn parse_while_loop<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let start = consume_string(pos, tokens, "while");
let condition = Box::new(parse_expression(0, pos, tokens));
consume_string(pos, tokens, "do");
let do_expr = Box::new(parse_expression(0, pos, tokens));
fn parse_while_loop<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let start = consume_string(pos, tokens, "while")?;
let condition = Box::new(parse_expression(0, pos, tokens)?);
consume_string(pos, tokens, "do")?;
let do_expr = Box::new(parse_expression(0, pos, tokens)?);
AstNode::new(start.loc, While(condition, do_expr))
Ok(AstNode::new(start.loc, While(condition, do_expr)))
}
fn parse_parenthesized<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
consume_string(pos, tokens, "(");
let expression = parse_expression(0, pos, tokens);
consume_string(pos, tokens, ")");
expression
fn parse_parenthesized<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
consume_string(pos, tokens, "(")?;
let expression = parse_expression(0, pos, tokens)?;
consume_string(pos, tokens, ")")?;
Ok(expression)
}
fn parse_block<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let start = consume_string(pos, tokens, "{");
fn parse_block<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let start = consume_string(pos, tokens, "{")?;
let mut expressions = Vec::new();
while peek(pos, tokens).text != "}" {
expressions.push(parse_block_level_expressions(pos, tokens));
while peek(pos, tokens)?.text != "}" {
expressions.push(parse_block_level_expressions(pos, tokens)?);
// Last expression left as return expression, if no semicolon is present
if peek(pos, tokens).text == "}" {
if peek(pos, tokens)?.text == "}" {
break;
}
// Blocks don't need to be followed by a semicolon, but can be
if peek(&mut (*pos - 1), tokens).text == "}" {
if peek(pos, tokens).text == ";" {
consume_string(pos, tokens, ";");
if peek(&mut (*pos - 1), tokens)?.text == "}" {
if peek(pos, tokens)?.text == ";" {
consume_string(pos, tokens, ";")?;
}
} else {
consume_string(pos, tokens, ";");
consume_string(pos, tokens, ";")?;
}
// If the last expression of the block ended in a semicolon, empty return
let next_token = peek(pos, tokens);
let next_token = peek(pos, tokens)?;
if next_token.text == "}" {
expressions.push(AstNode::new(next_token.loc, EmptyLiteral()));
break;
}
}
consume_string(pos, tokens, "}");
AstNode::new(start.loc, Block(expressions))
consume_string(pos, tokens, "}")?;
Ok(AstNode::new(start.loc, Block(expressions)))
}
fn parse_function<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let identifier = consume_type(pos, tokens, TokenType::Identifier);
consume_string(pos, tokens, "(");
fn parse_function<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let identifier = consume_type(pos, tokens, TokenType::Identifier)?;
consume_string(pos, tokens, "(")?;
let mut arguments = Vec::new();
// If/loop used instead of while to show that we will always use break to exit the loop
if peek(pos, tokens).text != ")" {
if peek(pos, tokens)?.text != ")" {
loop {
arguments.push(parse_expression(0, pos, tokens));
arguments.push(parse_expression(0, pos, tokens)?);
match peek(pos, tokens).text {
"," => consume_string(pos, tokens, ","),
_ => break, // Break out of the loop. Intentionally causes a panic with a missing comma
match peek(pos, tokens)?.text {
"," => consume_string(pos, tokens, ",")?,
_ => break, // Break out of the loop. Intentionally causes an error with a missing comma
};
}
}
consume_string(pos, tokens, ")");
AstNode::new(identifier.loc, FunCall(identifier.text, arguments))
consume_string(pos, tokens, ")")?;
Ok(AstNode::new(
identifier.loc,
FunCall(identifier.text, arguments),
))
}
fn parse_int_literal<'source>(pos: &mut usize, tokens: &[Token]) -> AstNode<'source> {
let token = consume_type(pos, tokens, TokenType::Integer);
fn parse_int_literal<'source>(
pos: &mut usize,
tokens: &[Token],
) -> Result<AstNode<'source>, ParserError> {
let token = consume_type(pos, tokens, TokenType::Integer)?;
let expr = IntLiteral(
token
.text
.parse::<i128>()
.unwrap_or_else(|_| panic!("Fatal parser error! Invalid value in token {token}")),
);
let expr = match token.text.parse::<i128>() {
Ok(val) => IntLiteral(val),
Err(_) => {
return Err(ParserError {
msg: format!("Invalid value in token {token}"),
});
}
};
AstNode::new(token.loc, expr)
Ok(AstNode::new(token.loc, expr))
}
fn parse_bool_literal<'source>(pos: &mut usize, tokens: &[Token]) -> AstNode<'source> {
let token = consume_type(pos, tokens, TokenType::Identifier);
fn parse_bool_literal<'source>(
pos: &mut usize,
tokens: &[Token],
) -> Result<AstNode<'source>, ParserError> {
let token = consume_type(pos, tokens, TokenType::Identifier)?;
let expr = match token.text {
"true" => BoolLiteral(true),
"false" => BoolLiteral(false),
_ => panic!("Fatal parser error! Expected bool literal but found {token}"),
_ => {
return Err(ParserError {
msg: format!("Expected bool literal but found {token}"),
})
}
};
AstNode::new(token.loc, expr)
Ok(AstNode::new(token.loc, expr))
}
fn parse_identifier<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> AstNode<'source> {
let token = consume_type(pos, tokens, TokenType::Identifier);
AstNode::new(token.loc, Identifier(token.text))
fn parse_identifier<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<AstNode<'source>, ParserError> {
let token = consume_type(pos, tokens, TokenType::Identifier)?;
Ok(AstNode::new(token.loc, Identifier(token.text)))
}

View file

@ -4,7 +4,7 @@ pub fn consume_string<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
expected_string: &str,
) -> Token<'source> {
) -> Result<Token<'source>, ParserError> {
consume_strings(pos, tokens, &[expected_string])
}
@ -12,16 +12,15 @@ pub fn consume_strings<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
strings: &[&str],
) -> Token<'source> {
let token = consume(pos, tokens);
) -> Result<Token<'source>, ParserError> {
let token = consume(pos, tokens)?;
if strings.contains(&token.text) {
token
Ok(token)
} else {
panic!(
"Parsing error: expected one of {:?} but found {}",
strings, token
);
Err(ParserError {
msg: format!("Expected one of {:?} but found {}", strings, token),
})
}
}
@ -29,7 +28,7 @@ pub fn consume_type<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
expected_type: TokenType,
) -> Token<'source> {
) -> Result<Token<'source>, ParserError> {
consume_types(pos, tokens, &[expected_type])
}
@ -37,31 +36,38 @@ pub fn consume_types<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
types: &[TokenType],
) -> Token<'source> {
let token = consume(pos, tokens);
) -> Result<Token<'source>, ParserError> {
let token = consume(pos, tokens)?;
if types.contains(&token.token_type) {
token
Ok(token)
} else {
panic!(
"Parsing error: expected one of {:?} but found {}",
types, token
);
Err(ParserError {
msg: format!("Expected one of {:?} but found {}", types, token),
})
}
}
pub fn consume<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> {
pub fn consume<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<Token<'source>, ParserError> {
let token = peek(pos, tokens);
*pos += 1;
token
}
pub fn peek<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> {
pub fn peek<'source>(
pos: &mut usize,
tokens: &[Token<'source>],
) -> Result<Token<'source>, ParserError> {
if let Some(token) = tokens.get(*pos) {
token.clone()
Ok(token.clone())
} else if let Some(last_token) = tokens.get(*pos - 1) {
Token::new("", TokenType::End, last_token.loc)
Ok(Token::new("", TokenType::End, last_token.loc))
} else {
panic!("Input to parser appears to be empty!");
Err(ParserError {
msg: String::from("Input to parser appears to be empty!"),
})
}
}

View file

@ -133,45 +133,45 @@ macro_rules! while_ast_b {
#[test]
#[should_panic]
fn test_empty() {
parse(&[]);
parse(&[]).unwrap();
}
#[test]
#[should_panic]
fn test_invalid_start() {
parse(&tokenize("1 2 + 3").unwrap());
parse(&tokenize("1 2 + 3").unwrap()).unwrap();
}
#[test]
#[should_panic]
fn test_invalid_middle() {
parse(&tokenize("1 + 2 2 + 3").unwrap());
parse(&tokenize("1 + 2 2 + 3").unwrap()).unwrap();
}
#[test]
#[should_panic]
fn test_invalid_end() {
parse(&tokenize("1 + 2 3").unwrap());
parse(&tokenize("1 + 2 3").unwrap()).unwrap();
}
#[test]
fn test_binary_op_basic() {
let result = parse(&tokenize("1 + 23").unwrap());
let result = parse(&tokenize("1 + 23").unwrap()).unwrap();
assert_eq!(result, bin_ast!(int_ast_b!(1), "+", int_ast_b!(23)));
let result = parse(&tokenize("4 - 56").unwrap());
let result = parse(&tokenize("4 - 56").unwrap()).unwrap();
assert_eq!(result, bin_ast!(int_ast_b!(4), "-", int_ast_b!(56)));
let result = parse(&tokenize("1 * 2").unwrap());
let result = parse(&tokenize("1 * 2").unwrap()).unwrap();
assert_eq!(result, bin_ast!(int_ast_b!(1), "*", int_ast_b!(2)));
let result = parse(&tokenize("1 / 2").unwrap());
let result = parse(&tokenize("1 / 2").unwrap()).unwrap();
assert_eq!(result, bin_ast!(int_ast_b!(1), "/", int_ast_b!(2)));
}
#[test]
fn test_binary_op_all_levels() {
let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap());
let result = parse(&tokenize("1 * 2 + 3 < 4 == 5 and 6 or 7").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -200,16 +200,16 @@ fn test_binary_op_all_levels() {
#[test]
fn test_binary_op_identifier() {
let result = parse(&tokenize("a + 1").unwrap());
let result = parse(&tokenize("a + 1").unwrap()).unwrap();
assert_eq!(result, bin_ast!(id_ast_b!("a"), "+", int_ast_b!(1)));
let result = parse(&tokenize("1 - a").unwrap());
let result = parse(&tokenize("1 - a").unwrap()).unwrap();
assert_eq!(result, bin_ast!(int_ast_b!(1), "-", id_ast_b!("a")));
}
#[test]
fn test_binary_op_multiple() {
let result = parse(&tokenize("1 + 2 - 3").unwrap());
let result = parse(&tokenize("1 + 2 - 3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -222,7 +222,7 @@ fn test_binary_op_multiple() {
#[test]
fn test_binary_op_precedence() {
let result = parse(&tokenize("1 + 2 * 3").unwrap());
let result = parse(&tokenize("1 + 2 * 3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -232,7 +232,7 @@ fn test_binary_op_precedence() {
)
);
let result = parse(&tokenize("1 - 2 / 3").unwrap());
let result = parse(&tokenize("1 - 2 / 3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -245,7 +245,7 @@ fn test_binary_op_precedence() {
#[test]
fn test_assignment_basic() {
let result = parse(&tokenize("a = 1 + 2").unwrap());
let result = parse(&tokenize("a = 1 + 2").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -258,7 +258,7 @@ fn test_assignment_basic() {
#[test]
fn test_assignment_chain() {
let result = parse(&tokenize("a = b = 1 + 2").unwrap());
let result = parse(&tokenize("a = b = 1 + 2").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -276,21 +276,21 @@ fn test_assignment_chain() {
#[test]
#[should_panic]
fn test_assignment_invalid() {
parse(&tokenize("a =").unwrap());
parse(&tokenize("a =").unwrap()).unwrap();
}
#[test]
fn test_unary_basic() {
let result = parse(&tokenize("not x").unwrap());
let result = parse(&tokenize("not x").unwrap()).unwrap();
assert_eq!(result, un_ast!("not", id_ast_b!("x")));
let result = parse(&tokenize("-x").unwrap());
let result = parse(&tokenize("-x").unwrap()).unwrap();
assert_eq!(result, un_ast!("-", id_ast_b!("x")));
let result = parse(&tokenize("-1").unwrap());
let result = parse(&tokenize("-1").unwrap()).unwrap();
assert_eq!(result, un_ast!("-", int_ast_b!(1)));
let result = parse(&tokenize("-1 + 2").unwrap());
let result = parse(&tokenize("-1 + 2").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(un_ast_b!("-", int_ast_b!(1)), "+", int_ast_b!(2))
@ -299,16 +299,16 @@ fn test_unary_basic() {
#[test]
fn test_unary_chain() {
let result = parse(&tokenize("not not x").unwrap());
let result = parse(&tokenize("not not x").unwrap()).unwrap();
assert_eq!(result, un_ast!("not", un_ast_b!("not", id_ast_b!("x"))));
let result = parse(&tokenize("--x").unwrap());
let result = parse(&tokenize("--x").unwrap()).unwrap();
assert_eq!(result, un_ast!("-", un_ast_b!("-", id_ast_b!("x"))));
let result = parse(&tokenize("--1").unwrap());
let result = parse(&tokenize("--1").unwrap()).unwrap();
assert_eq!(result, un_ast!("-", un_ast_b!("-", int_ast_b!(1))));
let result = parse(&tokenize("--1 + 2").unwrap());
let result = parse(&tokenize("--1 + 2").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -321,7 +321,7 @@ fn test_unary_chain() {
#[test]
fn test_parenthesized() {
let result = parse(&tokenize("(1+2)*3").unwrap());
let result = parse(&tokenize("(1+2)*3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -334,7 +334,7 @@ fn test_parenthesized() {
#[test]
fn test_parenthesized_nested() {
let result = parse(&tokenize("((1 - 2))/3").unwrap());
let result = parse(&tokenize("((1 - 2))/3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -344,7 +344,7 @@ fn test_parenthesized_nested() {
)
);
let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap());
let result = parse(&tokenize("((1 + 2)*3) / 4").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -362,12 +362,12 @@ fn test_parenthesized_nested() {
#[test]
#[should_panic]
fn test_parenthesized_mismatched() {
parse(&tokenize("(1+2*3").unwrap());
parse(&tokenize("(1+2*3").unwrap()).unwrap();
}
#[test]
fn test_if_then() {
let result = parse(&tokenize("if 1 + 2 then 3").unwrap());
let result = parse(&tokenize("if 1 + 2 then 3").unwrap()).unwrap();
assert_eq!(
result,
con_ast!(
@ -380,7 +380,7 @@ fn test_if_then() {
#[test]
fn test_if_then_else() {
let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap());
let result = parse(&tokenize("if a then b + c else 1 * 2").unwrap()).unwrap();
assert_eq!(
result,
con_ast!(
@ -393,7 +393,7 @@ fn test_if_then_else() {
#[test]
fn test_if_then_else_embedded() {
let result = parse(&tokenize("1 + if true then 2 else 3").unwrap());
let result = parse(&tokenize("1 + if true then 2 else 3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -406,7 +406,7 @@ fn test_if_then_else_embedded() {
#[test]
fn test_if_then_else_nested() {
let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap());
let result = parse(&tokenize("if true then if false then 1 else 2 else 3").unwrap()).unwrap();
assert_eq!(
result,
con_ast!(
@ -420,15 +420,15 @@ fn test_if_then_else_nested() {
#[test]
#[should_panic]
fn test_if_no_then() {
parse(&tokenize("if true").unwrap());
parse(&tokenize("if true").unwrap()).unwrap();
}
#[test]
fn test_func_basic() {
let result = parse(&tokenize("f(a, b)").unwrap());
let result = parse(&tokenize("f(a, b)").unwrap()).unwrap();
assert_eq!(result, fun_ast!("f", vec![id_ast!("a"), id_ast!("b"),]));
let result = parse(&tokenize("f(a, 1 + 2)").unwrap());
let result = parse(&tokenize("f(a, 1 + 2)").unwrap()).unwrap();
assert_eq!(
result,
fun_ast!(
@ -437,13 +437,13 @@ fn test_func_basic() {
)
);
let result = parse(&tokenize("f()").unwrap());
let result = parse(&tokenize("f()").unwrap()).unwrap();
assert_eq!(result, fun_ast!("f", vec![]));
}
#[test]
fn test_func_embedded() {
let result = parse(&tokenize("1 + f(a)").unwrap());
let result = parse(&tokenize("1 + f(a)").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(int_ast_b!(1), "+", fun_ast_b!("f", vec![id_ast!("a")]))
@ -452,7 +452,7 @@ fn test_func_embedded() {
#[test]
fn test_func_nested() {
let result = parse(&tokenize("f(a, g(b))").unwrap());
let result = parse(&tokenize("f(a, g(b))").unwrap()).unwrap();
assert_eq!(
result,
fun_ast!("f", vec![id_ast!("a"), fun_ast!("g", vec![id_ast!("b")]),])
@ -462,18 +462,18 @@ fn test_func_nested() {
#[test]
#[should_panic]
fn test_func_missing_comma() {
parse(&tokenize("f(a b)").unwrap());
parse(&tokenize("f(a b)").unwrap()).unwrap();
}
#[test]
#[should_panic]
fn test_func_missing_close() {
parse(&tokenize("f(a").unwrap());
parse(&tokenize("f(a").unwrap()).unwrap();
}
#[test]
fn test_block_basic() {
let result = parse(&tokenize("{ a = 1; b; }").unwrap());
let result = parse(&tokenize("{ a = 1; b; }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -483,7 +483,7 @@ fn test_block_basic() {
])
);
let result = parse(&tokenize("{ a = 1; b }").unwrap());
let result = parse(&tokenize("{ a = 1; b }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -495,7 +495,7 @@ fn test_block_basic() {
#[test]
fn test_block_embedded() {
let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap());
let result = parse(&tokenize("{ 1 + 2 } * 3").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -508,7 +508,7 @@ fn test_block_embedded() {
#[test]
fn test_block_nested() {
let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap());
let result = parse(&tokenize("{ a = { 1 + 2}}").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![bin_ast!(
@ -522,21 +522,21 @@ fn test_block_nested() {
#[test]
#[should_panic]
fn test_block_unmatched() {
parse(&tokenize("{ a = 1 ").unwrap());
parse(&tokenize("{ a = 1 ").unwrap()).unwrap();
}
#[test]
#[should_panic]
fn test_block_missing_semicolon() {
parse(&tokenize("{ a = 1\nb }").unwrap());
parse(&tokenize("{ a = 1\nb }").unwrap()).unwrap();
}
#[test]
fn test_var_basic() {
let result = parse(&tokenize("var x = 1").unwrap());
let result = parse(&tokenize("var x = 1").unwrap()).unwrap();
assert_eq!(result, var_ast!("x", int_ast_b!(1), None));
let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap());
let result = parse(&tokenize("{ var x = 1; x = 2; }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -549,7 +549,7 @@ fn test_var_basic() {
#[test]
fn test_var_typed() {
let result = parse(&tokenize("var x: Int = 1").unwrap());
let result = parse(&tokenize("var x: Int = 1").unwrap()).unwrap();
assert_eq!(
result,
var_ast!(
@ -562,7 +562,7 @@ fn test_var_typed() {
)
);
let result = parse(&tokenize("var x: Bool = true").unwrap());
let result = parse(&tokenize("var x: Bool = true").unwrap()).unwrap();
assert_eq!(
result,
var_ast!(
@ -579,18 +579,18 @@ fn test_var_typed() {
#[test]
#[should_panic]
fn test_var_chain() {
parse(&tokenize("var x = var y = 1").unwrap());
parse(&tokenize("var x = var y = 1").unwrap()).unwrap();
}
#[test]
#[should_panic]
fn test_var_embedded() {
parse(&tokenize("if true then var x = 3").unwrap());
parse(&tokenize("if true then var x = 3").unwrap()).unwrap();
}
#[test]
fn test_omitting_semicolons() {
let result = parse(&tokenize("{ { a } { b } }").unwrap());
let result = parse(&tokenize("{ { a } { b } }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -599,7 +599,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a } b }").unwrap());
let result = parse(&tokenize("{ if true then { a } b }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -608,7 +608,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a }; b }").unwrap());
let result = parse(&tokenize("{ if true then { a }; b }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -617,7 +617,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap());
let result = parse(&tokenize("{ if true then { a } else { b } c }").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![
@ -630,7 +630,7 @@ fn test_omitting_semicolons() {
])
);
let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap());
let result = parse(&tokenize("x = { { f(a) } { b } }").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -647,12 +647,12 @@ fn test_omitting_semicolons() {
#[test]
#[should_panic]
fn test_omitting_semicolons_invalid() {
parse(&tokenize("{ if true then { a } b c }").unwrap());
parse(&tokenize("{ if true then { a } b c }").unwrap()).unwrap();
}
#[test]
fn test_while_do() {
let result = parse(&tokenize("while 1 + 2 do 3").unwrap());
let result = parse(&tokenize("while 1 + 2 do 3").unwrap()).unwrap();
assert_eq!(
result,
while_ast!(bin_ast_b!(int_ast_b!(1), "+", int_ast_b!(2)), int_ast_b!(3))
@ -661,7 +661,7 @@ fn test_while_do() {
#[test]
fn test_while_do_embedded() {
let result = parse(&tokenize("1 + while true do 2").unwrap());
let result = parse(&tokenize("1 + while true do 2").unwrap()).unwrap();
assert_eq!(
result,
bin_ast!(
@ -674,7 +674,7 @@ fn test_while_do_embedded() {
#[test]
fn test_while_do_nested() {
let result = parse(&tokenize("while true do while false do 1").unwrap());
let result = parse(&tokenize("while true do while false do 1").unwrap()).unwrap();
assert_eq!(
result,
while_ast!(
@ -687,18 +687,18 @@ fn test_while_do_nested() {
#[test]
#[should_panic]
fn test_while_no_do() {
parse(&tokenize("while true").unwrap());
parse(&tokenize("while true").unwrap()).unwrap();
}
#[test]
fn test_multiple_top_levels() {
let result = parse(&tokenize("a;").unwrap());
let result = parse(&tokenize("a;").unwrap()).unwrap();
assert_eq!(result, block_ast!(vec![id_ast!("a"), empty_ast!()]));
let result = parse(&tokenize("a; b").unwrap());
let result = parse(&tokenize("a; b").unwrap()).unwrap();
assert_eq!(result, block_ast!(vec![id_ast!("a"), id_ast!("b")]));
let result = parse(&tokenize("{}{}").unwrap());
let result = parse(&tokenize("{}{}").unwrap()).unwrap();
assert_eq!(
result,
block_ast!(vec![block_ast!(vec![]), block_ast!(vec![])])
@ -726,7 +726,8 @@ fn test_large() {
",
)
.unwrap(),
);
)
.unwrap();
assert_eq!(
result,

View file

@ -4,19 +4,19 @@ use crate::compiler::token::{CodeLocation, Token, TokenType};
use regex::Regex;
#[derive(Debug)]
pub struct TokenizeError {
message: String,
pub struct TokenizerError {
msg: String,
}
impl Display for TokenizeError {
impl Display for TokenizerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "TokenizerError: {}", self.message)
write!(f, "TokenizerError: {}", self.msg)
}
}
impl Error for TokenizeError {}
impl Error for TokenizerError {}
pub fn tokenize(code: &str) -> Result<Vec<Token>, TokenizeError> {
pub fn tokenize(code: &str) -> Result<Vec<Token>, TokenizerError> {
// We only want to compile the regexes once
// The ordering of these is important!
let regexes = vec![
@ -63,8 +63,8 @@ pub fn tokenize(code: &str) -> Result<Vec<Token>, TokenizeError> {
}
if !valid_token {
return Err(TokenizeError {
message: format!(
return Err(TokenizerError {
msg: format!(
"Invalid token starting with '{}' on line {} in position {}",
&line[pos..pos + 1],
line_number + 1,

View file

@ -161,7 +161,7 @@ mod tests {
fn get_type(code: &str) -> Type {
type_check(
&mut parse(&tokenize(code).unwrap()),
&mut parse(&tokenize(code).unwrap()).unwrap(),
&mut SymTab::new_type_table(),
)
}
@ -321,14 +321,14 @@ mod tests {
#[test]
fn test_function() {
let mut tokens = tokenize("foo(1)").unwrap();
let mut ast = parse(&tokens);
let mut ast = parse(&tokens).unwrap();
let mut symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
let result = type_check(&mut ast, &mut symtab);
assert_eq!(result, Int);
tokens = tokenize("foo(1);").unwrap();
ast = parse(&tokens);
ast = parse(&tokens).unwrap();
symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
let result = type_check(&mut ast, &mut symtab);
@ -339,7 +339,7 @@ mod tests {
#[should_panic]
fn test_function_wrong_arg() {
let tokens = tokenize("foo(true)").unwrap();
let mut ast = parse(&tokens);
let mut ast = parse(&tokens).unwrap();
let mut symtab = SymTab::new_type_table();
symtab.insert("foo", Func(vec![Int], Box::new(Int)));
type_check(&mut ast, &mut symtab);
@ -348,7 +348,7 @@ mod tests {
#[test]
fn test_node_type() {
let tokens = tokenize("1").unwrap();
let mut ast = parse(&tokens);
let mut ast = parse(&tokens).unwrap();
let mut symtab = SymTab::new_type_table();
assert_eq!(ast.node_type, Unit);