1
0
Fork 0

Add proper line/char location for tokens

This commit is contained in:
Vili Sinervä 2025-01-21 18:59:20 +02:00
parent c9ef000cd0
commit 28a8ae69be
No known key found for this signature in database
GPG key ID: DF8FEAF54EFAC996
2 changed files with 36 additions and 30 deletions

View file

@ -1,24 +1,24 @@
#[derive(Debug, Copy, Clone)]
pub struct CodeLocation {
start: usize,
end: usize,
line: usize,
char: usize,
}
impl CodeLocation {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
pub fn new(line: usize, char: usize) -> Self {
Self { line, char }
}
}
impl PartialEq for CodeLocation {
fn eq(&self, other: &Self) -> bool {
let true_match = self.start == other.start && self.end == other.end;
let true_match = self.line == other.line && self.char == other.char;
// For testing purposes
let simulated_match = self.start == usize::MAX
|| self.end == usize::MAX
|| other.start == usize::MAX
|| other.end == usize::MAX;
let simulated_match = self.line == usize::MAX
|| self.char == usize::MAX
|| other.line == usize::MAX
|| other.char == usize::MAX;
true_match || simulated_match
}

View file

@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
// We only want to compile the regexes once
// The ordering of these is important!
let regexes = vec![
(TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
(TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
(TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
(
TokenType::Operator,
@ -21,22 +21,23 @@ pub fn tokenize(code: &str) -> Vec<Token> {
let mut tokens = Vec::new();
for (line_number, line) in code.lines().enumerate() {
let mut pos = 0;
while pos < code.len() {
while pos < line.len() {
let mut valid_token = false;
for (token_type, regex_matcher) in &regexes {
let found_match = regex_matcher.find(&code[pos..]);
let found_match = regex_matcher.find(&line[pos..]);
if let Some(token) = found_match {
if !token_type.ignore() {
let start = pos + token.start();
let end = pos + token.end();
tokens.push(Token::new(
&code[start..end],
&line[start..end],
*token_type,
CodeLocation::new(start, end),
CodeLocation::new(line_number + 1, start + 1), // 1-indexing
));
}
@ -46,7 +47,12 @@ pub fn tokenize(code: &str) -> Vec<Token> {
}
if !valid_token {
panic!("Invalid token at {pos}");
panic!(
"Invalid token on line {} in position {}",
line_number + 1,
pos + 1
);
}
}
}