Add proper line/char location for tokens
This commit is contained in:
parent
c9ef000cd0
commit
28a8ae69be
2 changed files with 36 additions and 30 deletions
|
@ -1,24 +1,24 @@
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
pub struct CodeLocation {
|
pub struct CodeLocation {
|
||||||
start: usize,
|
line: usize,
|
||||||
end: usize,
|
char: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CodeLocation {
|
impl CodeLocation {
|
||||||
pub fn new(start: usize, end: usize) -> Self {
|
pub fn new(line: usize, char: usize) -> Self {
|
||||||
Self { start, end }
|
Self { line, char }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for CodeLocation {
|
impl PartialEq for CodeLocation {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
let true_match = self.start == other.start && self.end == other.end;
|
let true_match = self.line == other.line && self.char == other.char;
|
||||||
|
|
||||||
// For testing purposes
|
// For testing purposes
|
||||||
let simulated_match = self.start == usize::MAX
|
let simulated_match = self.line == usize::MAX
|
||||||
|| self.end == usize::MAX
|
|| self.char == usize::MAX
|
||||||
|| other.start == usize::MAX
|
|| other.line == usize::MAX
|
||||||
|| other.end == usize::MAX;
|
|| other.char == usize::MAX;
|
||||||
|
|
||||||
true_match || simulated_match
|
true_match || simulated_match
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
||||||
// We only want to compile the regexes once
|
// We only want to compile the regexes once
|
||||||
// The ordering of these is important!
|
// The ordering of these is important!
|
||||||
let regexes = vec![
|
let regexes = vec![
|
||||||
(TokenType::Comment, Regex::new(r"^(\\\\|#).*\n").unwrap()),
|
(TokenType::Comment, Regex::new(r"^(//|#).*").unwrap()),
|
||||||
(TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
|
(TokenType::Whitespace, Regex::new(r"^[\s\t\n]+").unwrap()),
|
||||||
(
|
(
|
||||||
TokenType::Operator,
|
TokenType::Operator,
|
||||||
|
@ -21,22 +21,23 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
||||||
|
|
||||||
let mut tokens = Vec::new();
|
let mut tokens = Vec::new();
|
||||||
|
|
||||||
|
for (line_number, line) in code.lines().enumerate() {
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
|
|
||||||
while pos < code.len() {
|
while pos < line.len() {
|
||||||
let mut valid_token = false;
|
let mut valid_token = false;
|
||||||
|
|
||||||
for (token_type, regex_matcher) in ®exes {
|
for (token_type, regex_matcher) in ®exes {
|
||||||
let found_match = regex_matcher.find(&code[pos..]);
|
let found_match = regex_matcher.find(&line[pos..]);
|
||||||
|
|
||||||
if let Some(token) = found_match {
|
if let Some(token) = found_match {
|
||||||
if !token_type.ignore() {
|
if !token_type.ignore() {
|
||||||
let start = pos + token.start();
|
let start = pos + token.start();
|
||||||
let end = pos + token.end();
|
let end = pos + token.end();
|
||||||
tokens.push(Token::new(
|
tokens.push(Token::new(
|
||||||
&code[start..end],
|
&line[start..end],
|
||||||
*token_type,
|
*token_type,
|
||||||
CodeLocation::new(start, end),
|
CodeLocation::new(line_number + 1, start + 1), // 1-indexing
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,7 +47,12 @@ pub fn tokenize(code: &str) -> Vec<Token> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if !valid_token {
|
if !valid_token {
|
||||||
panic!("Invalid token at {pos}");
|
panic!(
|
||||||
|
"Invalid token on line {} in position {}",
|
||||||
|
line_number + 1,
|
||||||
|
pos + 1
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in a new issue