Add proper line/char location for tokens

2025-01-21 18:59:20 +02:00 · 2025-01-21 18:59:20 +02:00 · 28a8ae69be
commit 28a8ae69be
parent c9ef000cd0
2 changed files with 36 additions and 30 deletions
--- a/src/compiler/token.rs
+++ b/src/compiler/token.rs
@ -1,24 +1,24 @@
 #[derive(Debug, Copy, Clone)]
 pub struct CodeLocation {
-    start: usize,
-    end: usize,
+    line: usize,
+    char: usize,
 }

 impl CodeLocation {
-    pub fn new(start: usize, end: usize) -> Self {
-        Self { start, end }
+    pub fn new(line: usize, char: usize) -> Self {
+        Self { line, char }
    }
 }

 impl PartialEq for CodeLocation {
    fn eq(&self, other: &Self) -> bool {
-        let true_match = self.start == other.start && self.end == other.end;
+        let true_match = self.line == other.line && self.char == other.char;

        // For testing purposes
-        let simulated_match = self.start == usize::MAX
-            || self.end == usize::MAX
-            || other.start == usize::MAX
-            || other.end == usize::MAX;
+        let simulated_match = self.line == usize::MAX
+            || self.char == usize::MAX
+            || other.line == usize::MAX
+            || other.char == usize::MAX;

        true_match || simulated_match
    }
--- a/src/compiler/tokenizer.rs
+++ b/src/compiler/tokenizer.rs
@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
    // We only want to compile the regexes once
    // The ordering of these is important!
    let regexes = vec![
-        (TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
+        (TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
        (TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
        (
            TokenType::Operator,
@ -21,22 +21,23 @@ pub fn tokenize(code: &str) -> Vec<Token> {

    let mut tokens = Vec::new();

+    for (line_number, line) in code.lines().enumerate() {
        let mut pos = 0;

-    while pos < code.len() {
+        while pos < line.len() {
            let mut valid_token = false;

            for (token_type, regex_matcher) in &regexes {
-            let found_match = regex_matcher.find(&code[pos..]);
+                let found_match = regex_matcher.find(&line[pos..]);

                if let Some(token) = found_match {
                    if !token_type.ignore() {
                        let start = pos + token.start();
                        let end = pos + token.end();
                        tokens.push(Token::new(
-                        &code[start..end],
+                            &line[start..end],
                            *token_type,
-                        CodeLocation::new(start, end),
+                            CodeLocation::new(line_number + 1, start + 1), // 1-indexing
                        ));
                    }

@ -46,7 +47,12 @@ pub fn tokenize(code: &str) -> Vec<Token> {
            }

            if !valid_token {
-            panic!("Invalid token at {pos}");
+                panic!(
+                    "Invalid token on line {} in position {}",
+                    line_number + 1,
+                    pos + 1
+                );
+            }
        }
    }