diff --git a/src/compiler.rs b/src/compiler.rs index bc1e972..8be1603 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,6 +1,9 @@ +mod ast; +mod parser; mod token; mod tokenizer; pub fn compile(code: &str) { - tokenizer::tokenize(code); + let tokens = tokenizer::tokenize(code); + parser::parse(&tokens); } diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs new file mode 100644 index 0000000..d2ffdcf --- /dev/null +++ b/src/compiler/ast.rs @@ -0,0 +1,12 @@ +#[expect(dead_code)] +#[derive(Debug, PartialEq)] +pub enum Expression<'source> { + IntLiteral(u32), + BoolLiteral(bool), + Indentifier(String), + BinaryOp( + Box>, + &'source str, + Box>, + ), +} diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs new file mode 100644 index 0000000..2e1785c --- /dev/null +++ b/src/compiler/parser.rs @@ -0,0 +1,123 @@ +use crate::compiler::{ + ast::Expression::{self, *}, + token::{Token, TokenType}, +}; + +pub fn parse<'source>(tokens: &[Token<'source>]) -> Expression<'source> { + let mut pos = 0; + + parse_expression(&mut pos, tokens) +} + +fn peek<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> { + if let Some(token) = tokens.get(*pos) { + token.clone() + } else if let Some(last_token) = tokens.get(*pos - 1) { + Token::new("", TokenType::End, last_token.loc) + } else { + panic!("Input to parser appears to be empty!"); + } +} + +fn next<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Token<'source> { + let token = peek(pos, tokens); + *pos += 1; + token +} + +fn next_expect_types<'source>( + pos: &mut usize, + tokens: &[Token<'source>], + types: &Vec, +) -> Token<'source> { + let token = next(pos, tokens); + + if types.contains(&token.token_type) { + token + } else { + panic!( + "Parsing error: expected one of {:?} but found {}", + types, token + ); + } +} + +fn next_expect_strings<'source>( + pos: &mut usize, + tokens: &[Token<'source>], + strings: &Vec<&str>, +) -> Token<'source> { + let token = next(pos, tokens); + + if strings.contains(&token.text) { + token + } else { + panic!( + "Parsing error: expected one of {:?} but found {}", + strings, token + ); + } +} + +fn next_expect_type<'source>( + pos: &mut usize, + tokens: &[Token<'source>], + expected_type: TokenType, +) -> Token<'source> { + next_expect_types(pos, tokens, &vec![expected_type]) +} + +fn parse_int_literal<'source>(pos: &mut usize, tokens: &[Token]) -> Expression<'source> { + let token = next_expect_type(pos, tokens, TokenType::Integer); + + IntLiteral( + token + .text + .parse::() + .unwrap_or_else(|_| panic!("Fatal parser error! Invalid value in token {token}")), + ) +} + +fn parse_expression<'source>(pos: &mut usize, tokens: &[Token<'source>]) -> Expression<'source> { + let left = parse_int_literal(pos, tokens); + let operator_token = next_expect_strings(pos, tokens, &vec!["+", "-"]); + let right = parse_int_literal(pos, tokens); + Expression::BinaryOp(Box::new(left), operator_token.text, Box::new(right)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::compiler::token::CodeLocation; + + fn new_int(text: &str) -> Token { + Token::new( + text, + TokenType::Integer, + CodeLocation::new(usize::MAX, usize::MAX), + ) + } + + fn new_id(text: &str) -> Token { + Token::new( + text, + TokenType::Identifier, + CodeLocation::new(usize::MAX, usize::MAX), + ) + } + + #[test] + fn test_binary_op_basic() { + let result = parse(&vec![new_int("1"), new_id("+"), new_int("23")]); + assert_eq!( + result, + BinaryOp(Box::new(IntLiteral(1)), "+", Box::new(IntLiteral(23))) + ); + + let result = parse(&vec![new_int("4"), new_id("-"), new_int("56")]); + assert_eq!( + result, + BinaryOp(Box::new(IntLiteral(4)), "-", Box::new(IntLiteral(56))) + ); + } +} diff --git a/src/compiler/token.rs b/src/compiler/token.rs index e5d6fac..fa9f489 100644 --- a/src/compiler/token.rs +++ b/src/compiler/token.rs @@ -1,3 +1,5 @@ +use std::fmt; + #[derive(Debug, Copy, Clone)] pub struct CodeLocation { line: usize, @@ -10,6 +12,12 @@ impl CodeLocation { } } +impl fmt::Display for CodeLocation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.line, self.char) + } +} + impl PartialEq for CodeLocation { fn eq(&self, other: &Self) -> bool { let true_match = self.line == other.line && self.char == other.char; @@ -32,23 +40,21 @@ pub enum TokenType { Operator, Punctuation, Whitespace, + End, } impl TokenType { pub fn ignore(&self) -> bool { use TokenType::*; - match self { - Whitespace | Comment => true, - _ => false, - } + matches!(self, Whitespace | Comment) } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone)] pub struct Token<'source> { - text: &'source str, - token_type: TokenType, - loc: CodeLocation, + pub text: &'source str, + pub token_type: TokenType, + pub loc: CodeLocation, } impl<'source> Token<'source> { @@ -60,3 +66,9 @@ impl<'source> Token<'source> { } } } + +impl<'source> fmt::Display for Token<'source> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?} ``{}`` at {}", self.token_type, self.text, self.loc) + } +} diff --git a/src/server.rs b/src/server.rs index 77decf0..f547183 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,5 +1,4 @@ use crate::compiler; -use json; use std::{ io::prelude::*, net::{IpAddr, TcpListener, TcpStream},