#[cfg(test)]
mod tests;
+pub use crate::cursor::Cursor;
+
use self::LiteralKind::*;
use self::TokenKind::*;
-use crate::cursor::{Cursor, EOF_CHAR};
+use crate::cursor::EOF_CHAR;
use std::convert::TryFrom;
/// Parsed token.
/// Unknown token, not expected by the lexer, e.g. "№"
Unknown,
+
+ /// End of input.
+ Eof,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
None
}
-/// Parses the first token from the provided input string.
-#[inline]
-pub fn first_token(input: &str) -> Token {
- debug_assert!(!input.is_empty());
- Cursor::new(input).advance_token()
-}
-
/// Validates a raw string literal. Used for getting more information about a
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
#[inline]
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
let mut cursor = Cursor::new(input);
std::iter::from_fn(move || {
- if cursor.is_eof() {
- None
- } else {
- cursor.reset_len_consumed();
- Some(cursor.advance_token())
- }
+ let token = cursor.advance_token();
+ if token.kind != TokenKind::Eof { Some(token) } else { None }
})
}
impl Cursor<'_> {
/// Parses a token from the input string.
- fn advance_token(&mut self) -> Token {
- let first_char = self.bump().unwrap();
+ pub fn advance_token(&mut self) -> Token {
+ let first_char = match self.bump() {
+ Some(c) => c,
+ None => return Token::new(TokenKind::Eof, 0),
+ };
let token_kind = match first_char {
// Slash, comment or block comment.
'/' => match self.first() {
('#', c1) if is_id_start(c1) => self.raw_ident(),
('#', _) | ('"', _) => {
let res = self.raw_double_quoted_string(1);
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
('\'', _) => {
self.bump();
let terminated = self.single_quoted_string();
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
('"', _) => {
self.bump();
let terminated = self.double_quoted_string();
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
('r', '"') | ('r', '#') => {
self.bump();
let res = self.raw_double_quoted_string(2);
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
// Numeric literal.
c @ '0'..='9' => {
let literal_kind = self.number(c);
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
self.eat_literal_suffix();
TokenKind::Literal { kind: literal_kind, suffix_start }
}
// String literal.
'"' => {
let terminated = self.double_quoted_string();
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
}
_ => Unknown,
};
- Token::new(token_kind, self.len_consumed())
+ let res = Token::new(token_kind, self.pos_within_token());
+ self.reset_pos_within_token();
+ res
}
fn line_comment(&mut self) -> TokenKind {
if !can_be_a_lifetime {
let terminated = self.single_quoted_string();
- let suffix_start = self.len_consumed();
+ let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
if self.first() == '\'' {
self.bump();
let kind = Char { terminated: true };
- Literal { kind, suffix_start: self.len_consumed() }
+ Literal { kind, suffix_start: self.pos_within_token() }
} else {
Lifetime { starts_with_number }
}
fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
debug_assert!(self.prev() == 'r');
- let start_pos = self.len_consumed();
+ let start_pos = self.pos_within_token();
let mut possible_terminator_offset = None;
let mut max_hashes = 0;
// Keep track of possible terminators to give a hint about
// where there might be a missing terminator
possible_terminator_offset =
- Some(self.len_consumed() - start_pos - n_end_hashes + prefix_len);
+ Some(self.pos_within_token() - start_pos - n_end_hashes + prefix_len);
max_hashes = n_end_hashes;
}
}