From 28a8ae69be94c34ce0ffe06abdbbb95f73080b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vili=20Sinerv=C3=A4?= Date: Tue, 21 Jan 2025 18:59:20 +0200 Subject: [PATCH] Add proper line/char location for tokens --- src/compiler/token.rs | 18 +++++++-------- src/compiler/tokenizer.rs | 48 ++++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/compiler/token.rs b/src/compiler/token.rs index dae89b9..9d47abe 100644 --- a/src/compiler/token.rs +++ b/src/compiler/token.rs @@ -1,24 +1,24 @@ #[derive(Debug, Copy, Clone)] pub struct CodeLocation { - start: usize, - end: usize, + line: usize, + char: usize, } impl CodeLocation { - pub fn new(start: usize, end: usize) -> Self { - Self { start, end } + pub fn new(line: usize, char: usize) -> Self { + Self { line, char } } } impl PartialEq for CodeLocation { fn eq(&self, other: &Self) -> bool { - let true_match = self.start == other.start && self.end == other.end; + let true_match = self.line == other.line && self.char == other.char; // For testing purposes - let simulated_match = self.start == usize::MAX - || self.end == usize::MAX - || other.start == usize::MAX - || other.end == usize::MAX; + let simulated_match = self.line == usize::MAX + || self.char == usize::MAX + || other.line == usize::MAX + || other.char == usize::MAX; true_match || simulated_match } diff --git a/src/compiler/tokenizer.rs b/src/compiler/tokenizer.rs index 93e2f30..98cdeeb 100644 --- a/src/compiler/tokenizer.rs +++ b/src/compiler/tokenizer.rs @@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec { // We only want to compile the regexes once // The ordering of these is important! let regexes = vec![ - (TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()), + (TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()), (TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()), ( TokenType::Operator, @@ -21,32 +21,38 @@ pub fn tokenize(code: &str) -> Vec { let mut tokens = Vec::new(); - let mut pos = 0; + for (line_number, line) in code.lines().enumerate() { + let mut pos = 0; - while pos < code.len() { - let mut valid_token = false; + while pos < line.len() { + let mut valid_token = false; - for (token_type, regex_matcher) in ®exes { - let found_match = regex_matcher.find(&code[pos..]); + for (token_type, regex_matcher) in ®exes { + let found_match = regex_matcher.find(&line[pos..]); - if let Some(token) = found_match { - if !token_type.ignore() { - let start = pos + token.start(); - let end = pos + token.end(); - tokens.push(Token::new( - &code[start..end], - *token_type, - CodeLocation::new(start, end), - )); + if let Some(token) = found_match { + if !token_type.ignore() { + let start = pos + token.start(); + let end = pos + token.end(); + tokens.push(Token::new( + &line[start..end], + *token_type, + CodeLocation::new(line_number + 1, start + 1), // 1-indexing + )); + } + + valid_token = true; + pos += token.end(); } - - valid_token = true; - pos += token.end(); } - } - if !valid_token { - panic!("Invalid token at {pos}"); + if !valid_token { + panic!( + "Invalid token on line {} in position {}", + line_number + 1, + pos + 1 + ); + } } }