1 use {SyntaxKind, Token};
11 use self::numbers::scan_number;
14 use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string,
18 use self::comments::{scan_comment, scan_shebang};
20 pub fn tokenize(text: &str) -> Vec<Token> {
22 let mut acc = Vec::new();
23 while !text.is_empty() {
24 let token = next_token(text);
26 let len: u32 = token.len.into();
27 text = &text[len as usize..];
31 pub fn next_token(text: &str) -> Token {
32 assert!(!text.is_empty());
33 let mut ptr = Ptr::new(text);
34 let c = ptr.bump().unwrap();
35 let kind = next_token_inner(c, &mut ptr);
36 let len = ptr.into_len();
40 fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
42 ptr.bump_while(is_whitespace);
47 '#' => if scan_shebang(ptr) {
50 '/' => if let Some(kind) = scan_comment(ptr) {
56 let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext());
58 return scan_ident(c, ptr);
62 let kind = scan_number(c, ptr);
63 scan_literal_suffix(ptr);
71 '(' => return L_PAREN,
72 ')' => return R_PAREN,
73 '{' => return L_CURLY,
74 '}' => return R_CURLY,
75 '[' => return L_BRACK,
76 ']' => return R_BRACK,
77 '<' => return L_ANGLE,
78 '>' => return R_ANGLE,
82 '?' => return QUESTION,
84 '&' => return AMPERSAND,
90 '%' => return PERCENT,
94 return match (ptr.next(), ptr.nnext()) {
95 (Some('.'), Some('.')) => {
100 (Some('.'), Some('=')) => {
113 return match ptr.next() {
122 return match ptr.next() {
135 return match ptr.next() {
144 return if ptr.next_is('>') {
152 // If the character is an ident start not followed by another single
153 // quote, then this is a lifetime name:
155 return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
157 while ptr.next_is_p(is_ident_continue) {
160 // lifetimes shouldn't end with a single quote
161 // if we find one, then this is an invalid character literal
162 if ptr.next_is('\'') {
164 return CHAR; // TODO: error reporting
169 scan_literal_suffix(ptr);
174 let kind = scan_byte_char_or_string(ptr);
175 scan_literal_suffix(ptr);
180 scan_literal_suffix(ptr);
184 scan_raw_string(ptr);
185 scan_literal_suffix(ptr);
193 fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
194 let is_single_letter = match ptr.next() {
196 Some(c) if !is_ident_continue(c) => true,
199 if is_single_letter {
200 return if c == '_' { UNDERSCORE } else { IDENT };
202 ptr.bump_while(is_ident_continue);
203 if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
209 fn scan_literal_suffix(ptr: &mut Ptr) {
210 if ptr.next_is_p(is_ident_start) {
213 ptr.bump_while(is_ident_continue);