Add proper line/char location for tokens
This commit is contained in:
parent
c9ef000cd0
commit
28a8ae69be
2 changed files with 36 additions and 30 deletions
|
@ -1,24 +1,24 @@
|
|||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct CodeLocation {
|
||||
start: usize,
|
||||
end: usize,
|
||||
line: usize,
|
||||
char: usize,
|
||||
}
|
||||
|
||||
impl CodeLocation {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self { start, end }
|
||||
pub fn new(line: usize, char: usize) -> Self {
|
||||
Self { line, char }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for CodeLocation {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
let true_match = self.start == other.start && self.end == other.end;
|
||||
let true_match = self.line == other.line && self.char == other.char;
|
||||
|
||||
// For testing purposes
|
||||
let simulated_match = self.start == usize::MAX
|
||||
|| self.end == usize::MAX
|
||||
|| other.start == usize::MAX
|
||||
|| other.end == usize::MAX;
|
||||
let simulated_match = self.line == usize::MAX
|
||||
|| self.char == usize::MAX
|
||||
|| other.line == usize::MAX
|
||||
|| other.char == usize::MAX;
|
||||
|
||||
true_match || simulated_match
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
|||
// We only want to compile the regexes once
|
||||
// The ordering of these is important!
|
||||
let regexes = vec![
|
||||
(TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
|
||||
(TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
|
||||
(TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
|
||||
(
|
||||
TokenType::Operator,
|
||||
|
@ -21,22 +21,23 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
|||
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
for (line_number, line) in code.lines().enumerate() {
|
||||
let mut pos = 0;
|
||||
|
||||
while pos < code.len() {
|
||||
while pos < line.len() {
|
||||
let mut valid_token = false;
|
||||
|
||||
for (token_type, regex_matcher) in ®exes {
|
||||
let found_match = regex_matcher.find(&code[pos..]);
|
||||
let found_match = regex_matcher.find(&line[pos..]);
|
||||
|
||||
if let Some(token) = found_match {
|
||||
if !token_type.ignore() {
|
||||
let start = pos + token.start();
|
||||
let end = pos + token.end();
|
||||
tokens.push(Token::new(
|
||||
&code[start..end],
|
||||
&line[start..end],
|
||||
*token_type,
|
||||
CodeLocation::new(start, end),
|
||||
CodeLocation::new(line_number + 1, start + 1), // 1-indexing
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -46,7 +47,12 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
|||
}
|
||||
|
||||
if !valid_token {
|
||||
panic!("Invalid token at {pos}");
|
||||
panic!(
|
||||
"Invalid token on line {} in position {}",
|
||||
line_number + 1,
|
||||
pos + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Reference in a new issue