From 28a8ae69be94c34ce0ffe06abdbbb95f73080b7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vili=20Sinerv=C3=A4?= <vili.m.sinerva@gmail.com>
Date: Tue, 21 Jan 2025 18:59:20 +0200
Subject: [PATCH] Add proper line/char location for tokens

---
 src/compiler/token.rs     | 18 +++++++--------
 src/compiler/tokenizer.rs | 48 ++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 30 deletions(-)
diff --git a/src/compiler/token.rs b/src/compiler/token.rs
index dae89b9..9d47abe 100644
--- a/src/compiler/token.rs
+++ b/src/compiler/token.rs
@@ -1,24 +1,24 @@
 #[derive(Debug, Copy, Clone)]
 pub struct CodeLocation {
-    start: usize,
-    end: usize,
+    line: usize,
+    char: usize,
 }
 
 impl CodeLocation {
-    pub fn new(start: usize, end: usize) -> Self {
-        Self { start, end }
+    pub fn new(line: usize, char: usize) -> Self {
+        Self { line, char }
     }
 }
 
 impl PartialEq for CodeLocation {
     fn eq(&self, other: &Self) -> bool {
-        let true_match = self.start == other.start && self.end == other.end;
+        let true_match = self.line == other.line && self.char == other.char;
 
         // For testing purposes
-        let simulated_match = self.start == usize::MAX
-            || self.end == usize::MAX
-            || other.start == usize::MAX
-            || other.end == usize::MAX;
+        let simulated_match = self.line == usize::MAX
+            || self.char == usize::MAX
+            || other.line == usize::MAX
+            || other.char == usize::MAX;
 
         true_match || simulated_match
     }
diff --git a/src/compiler/tokenizer.rs b/src/compiler/tokenizer.rs
index 93e2f30..98cdeeb 100644
--- a/src/compiler/tokenizer.rs
+++ b/src/compiler/tokenizer.rs
@@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
     // We only want to compile the regexes once
     // The ordering of these is important!
     let regexes = vec![
-        (TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
+        (TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
         (TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
         (
             TokenType::Operator,
@@ -21,32 +21,38 @@ pub fn tokenize(code: &str) -> Vec<Token> {
 
     let mut tokens = Vec::new();
 
-    let mut pos = 0;
+    for (line_number, line) in code.lines().enumerate() {
+        let mut pos = 0;
 
-    while pos < code.len() {
-        let mut valid_token = false;
+        while pos < line.len() {
+            let mut valid_token = false;
 
-        for (token_type, regex_matcher) in &regexes {
-            let found_match = regex_matcher.find(&code[pos..]);
+            for (token_type, regex_matcher) in &regexes {
+                let found_match = regex_matcher.find(&line[pos..]);
 
-            if let Some(token) = found_match {
-                if !token_type.ignore() {
-                    let start = pos + token.start();
-                    let end = pos + token.end();
-                    tokens.push(Token::new(
-                        &code[start..end],
-                        *token_type,
-                        CodeLocation::new(start, end),
-                    ));
+                if let Some(token) = found_match {
+                    if !token_type.ignore() {
+                        let start = pos + token.start();
+                        let end = pos + token.end();
+                        tokens.push(Token::new(
+                            &line[start..end],
+                            *token_type,
+                            CodeLocation::new(line_number + 1, start + 1), // 1-indexing
+                        ));
+                    }
+
+                    valid_token = true;
+                    pos += token.end();
                 }
-
-                valid_token = true;
-                pos += token.end();
             }
-        }
 
-        if !valid_token {
-            panic!("Invalid token at {pos}");
+            if !valid_token {
+                panic!(
+                    "Invalid token on line {} in position {}",
+                    line_number + 1,
+                    pos + 1
+                );
+            }
         }
     }