Add proper line/char location for tokens

2025-01-21 18:59:20 +02:00 · 2025-01-21 18:59:20 +02:00 · 28a8ae69be
commit 28a8ae69be
parent c9ef000cd0
2 changed files with 36 additions and 30 deletions
--- a/src/compiler/token.rs
+++ b/src/compiler/token.rs
@ -1,24 +1,24 @@
 #[derive(Debug, Copy, Clone)]
 pub struct CodeLocation {
-    start: usize,
+    line: usize,
-    end: usize,
+    char: usize,
 }
 impl CodeLocation {
-    pub fn new(start: usize, end: usize) -> Self {
+    pub fn new(line: usize, char: usize) -> Self {
-        Self { start, end }
+        Self { line, char }
    }
 }
 impl PartialEq for CodeLocation {
    fn eq(&self, other: &Self) -> bool {
-        let true_match = self.start == other.start && self.end == other.end;
+        let true_match = self.line == other.line && self.char == other.char;
        // For testing purposes
-        let simulated_match = self.start == usize::MAX
+        let simulated_match = self.line == usize::MAX
-            || self.end == usize::MAX
+            || self.char == usize::MAX
-            || other.start == usize::MAX
+            || other.line == usize::MAX
-            || other.end == usize::MAX;
+            || other.char == usize::MAX;
        true_match || simulated_match
    }
--- a/src/compiler/tokenizer.rs
+++ b/src/compiler/tokenizer.rs
@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
    // We only want to compile the regexes once
    // The ordering of these is important!
    let regexes = vec![
-        (TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
+        (TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
        (TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
        (
            TokenType::Operator,
@ -21,32 +21,38 @@ pub fn tokenize(code: &str) -> Vec<Token> {
    let mut tokens = Vec::new();
-    let mut pos = 0;
+    for (line_number, line) in code.lines().enumerate() {
        let mut pos = 0;
-    while pos < code.len() {
+        while pos < line.len() {
-        let mut valid_token = false;
+            let mut valid_token = false;
-        for (token_type, regex_matcher) in &regexes {
+            for (token_type, regex_matcher) in &regexes {
-            let found_match = regex_matcher.find(&code[pos..]);
+                let found_match = regex_matcher.find(&line[pos..]);
-            if let Some(token) = found_match {
+                if let Some(token) = found_match {
-                if !token_type.ignore() {
+                    if !token_type.ignore() {
-                    let start = pos + token.start();
+                        let start = pos + token.start();
-                    let end = pos + token.end();
+                        let end = pos + token.end();
-                    tokens.push(Token::new(
+                        tokens.push(Token::new(
-                        &code[start..end],
+                            &line[start..end],
-                        *token_type,
+                            *token_type,
-                        CodeLocation::new(start, end),
+                            CodeLocation::new(line_number + 1, start + 1), // 1-indexing
-                    ));
+                        ));
                    }
                    valid_token = true;
                    pos += token.end();
                }
                valid_token = true;
                pos += token.end();
            }
        }
-        if !valid_token {
+            if !valid_token {
-            panic!("Invalid token at {pos}");
+                panic!(
                    "Invalid token on line {} in position {}",
                    line_number + 1,
                    pos + 1
                );
            }
        }
    }