1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::tt_next_token;
17 use parse::token::{str_to_ident};
20 use std::mem::replace;
21 use std::num::from_str_radix;
25 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
30 fn is_eof(&self) -> bool;
31 fn next_token(&mut self) -> TokenAndSpan;
32 /// Report a fatal error with the current span.
33 fn fatal(&self, &str) -> !;
34 /// Report a non-fatal error with the current span.
36 fn peek(&self) -> TokenAndSpan;
39 #[deriving(Clone, PartialEq, Eq, Show)]
40 pub struct TokenAndSpan {
41 pub tok: token::Token,
45 pub struct StringReader<'a> {
46 pub span_diagnostic: &'a SpanHandler,
47 // The absolute offset within the codemap of the next character to read
49 // The absolute offset within the codemap of the last character read(curr)
50 pub last_pos: BytePos,
51 // The column of the next character to read
53 // The last character to be read
54 pub curr: Option<char>,
55 pub filemap: Rc<codemap::FileMap>,
57 pub peek_tok: token::Token,
61 impl<'a> Reader for StringReader<'a> {
62 fn is_eof(&self) -> bool { self.curr.is_none() }
63 // return the next token. EFFECT: advances the string_reader.
64 fn next_token(&mut self) -> TokenAndSpan {
65 let ret_val = TokenAndSpan {
66 tok: replace(&mut self.peek_tok, token::UNDERSCORE),
72 fn fatal(&self, m: &str) -> ! {
73 self.fatal_span(self.peek_span, m)
75 fn err(&self, m: &str) {
76 self.err_span(self.peek_span, m)
78 fn peek(&self) -> TokenAndSpan {
79 // FIXME(pcwalton): Bad copy!
81 tok: self.peek_tok.clone(),
87 impl<'a> Reader for TtReader<'a> {
88 fn is_eof(&self) -> bool {
89 self.cur_tok == token::EOF
91 fn next_token(&mut self) -> TokenAndSpan {
92 let r = tt_next_token(self);
93 debug!("TtReader: r={:?}", r);
96 fn fatal(&self, m: &str) -> ! {
97 self.sp_diag.span_fatal(self.cur_span, m);
99 fn err(&self, m: &str) {
100 self.sp_diag.span_err(self.cur_span, m);
102 fn peek(&self) -> TokenAndSpan {
104 tok: self.cur_tok.clone(),
110 impl<'a> StringReader<'a> {
111 /// For comments.rs, which hackily pokes into pos and curr
112 pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler,
113 filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
114 let mut sr = StringReader {
115 span_diagnostic: span_diagnostic,
116 pos: filemap.start_pos,
117 last_pos: filemap.start_pos,
121 /* dummy values; not read */
122 peek_tok: token::EOF,
123 peek_span: codemap::DUMMY_SP,
129 pub fn new<'b>(span_diagnostic: &'b SpanHandler,
130 filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
131 let mut sr = StringReader::new_raw(span_diagnostic, filemap);
136 pub fn curr_is(&self, c: char) -> bool {
140 /// Report a fatal lexical error with a given span.
141 pub fn fatal_span(&self, sp: Span, m: &str) -> ! {
142 self.span_diagnostic.span_fatal(sp, m)
145 /// Report a lexical error with a given span.
146 pub fn err_span(&self, sp: Span, m: &str) {
147 self.span_diagnostic.span_err(sp, m)
150 /// Report a fatal error spanning [`from_pos`, `to_pos`).
151 fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
152 self.fatal_span(codemap::mk_sp(from_pos, to_pos), m)
155 /// Report a lexical error spanning [`from_pos`, `to_pos`).
156 fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
157 self.err_span(codemap::mk_sp(from_pos, to_pos), m)
160 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
161 /// escaped character to the error message
162 fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
163 let mut m = m.to_string();
165 char::escape_default(c, |c| m.push_char(c));
166 self.fatal_span_(from_pos, to_pos, m.as_slice());
169 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an
170 /// escaped character to the error message
171 fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
172 let mut m = m.to_string();
174 char::escape_default(c, |c| m.push_char(c));
175 self.err_span_(from_pos, to_pos, m.as_slice());
178 /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the
179 /// offending string to the error message
180 fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> ! {
182 let from = self.byte_offset(from_pos).to_uint();
183 let to = self.byte_offset(to_pos).to_uint();
184 m.push_str(self.filemap.src.as_slice().slice(from, to));
185 self.fatal_span_(from_pos, to_pos, m.as_slice());
188 /// Advance peek_tok and peek_span to refer to the next token, and
189 /// possibly update the interner.
190 fn advance_token(&mut self) {
191 match self.consume_whitespace_and_comments() {
193 self.peek_span = comment.sp;
194 self.peek_tok = comment.tok;
198 self.peek_tok = token::EOF;
200 let start_bytepos = self.last_pos;
201 self.peek_tok = self.next_token_inner();
202 self.peek_span = codemap::mk_sp(start_bytepos,
209 fn byte_offset(&self, pos: BytePos) -> BytePos {
210 (pos - self.filemap.start_pos)
213 /// Calls `f` with a string slice of the source text spanning from `start`
214 /// up to but excluding `self.last_pos`, meaning the slice does not include
215 /// the character `self.curr`.
216 pub fn with_str_from<T>(&self, start: BytePos, f: |s: &str| -> T) -> T {
217 self.with_str_from_to(start, self.last_pos, f)
220 /// Calls `f` with a string slice of the source text spanning from `start`
221 /// up to but excluding `end`.
222 fn with_str_from_to<T>(&self, start: BytePos, end: BytePos, f: |s: &str| -> T) -> T {
223 f(self.filemap.src.as_slice().slice(
224 self.byte_offset(start).to_uint(),
225 self.byte_offset(end).to_uint()))
228 /// Converts CRLF to LF in the given string, raising an error on bare CR.
229 fn translate_crlf<'a>(&self, start: BytePos,
230 s: &'a str, errmsg: &'a str) -> str::MaybeOwned<'a> {
233 let str::CharRange { ch, next } = s.char_range_at(i);
235 if next < s.len() && s.char_at(next) == '\n' {
236 return translate_crlf_(self, start, s, errmsg, i).into_maybe_owned();
238 let pos = start + BytePos(i as u32);
239 let end_pos = start + BytePos(next as u32);
240 self.err_span_(pos, end_pos, errmsg);
244 return s.into_maybe_owned();
246 fn translate_crlf_(rdr: &StringReader, start: BytePos,
247 s: &str, errmsg: &str, mut i: uint) -> String {
248 let mut buf = String::with_capacity(s.len());
251 let str::CharRange { ch, next } = s.char_range_at(i);
253 if j < i { buf.push_str(s.slice(j, i)); }
255 if next >= s.len() || s.char_at(next) != '\n' {
256 let pos = start + BytePos(i as u32);
257 let end_pos = start + BytePos(next as u32);
258 rdr.err_span_(pos, end_pos, errmsg);
263 if j < s.len() { buf.push_str(s.slice_from(j)); }
269 /// Advance the StringReader by one character. If a newline is
270 /// discovered, add it to the FileMap's list of line start offsets.
271 pub fn bump(&mut self) {
272 self.last_pos = self.pos;
273 let current_byte_offset = self.byte_offset(self.pos).to_uint();
274 if current_byte_offset < self.filemap.src.len() {
275 assert!(self.curr.is_some());
276 let last_char = self.curr.unwrap();
277 let next = self.filemap
280 .char_range_at(current_byte_offset);
281 let byte_offset_diff = next.next - current_byte_offset;
282 self.pos = self.pos + Pos::from_uint(byte_offset_diff);
283 self.curr = Some(next.ch);
284 self.col = self.col + CharPos(1u);
285 if last_char == '\n' {
286 self.filemap.next_line(self.last_pos);
287 self.col = CharPos(0u);
290 if byte_offset_diff > 1 {
291 self.filemap.record_multibyte_char(self.last_pos, byte_offset_diff);
298 pub fn nextch(&self) -> Option<char> {
299 let offset = self.byte_offset(self.pos).to_uint();
300 if offset < self.filemap.src.len() {
301 Some(self.filemap.src.as_slice().char_at(offset))
307 pub fn nextch_is(&self, c: char) -> bool {
308 self.nextch() == Some(c)
311 pub fn nextnextch(&self) -> Option<char> {
312 let offset = self.byte_offset(self.pos).to_uint();
313 let s = self.filemap.deref().src.as_slice();
314 if offset >= s.len() { return None }
315 let str::CharRange { next, .. } = s.char_range_at(offset);
317 Some(s.char_at(next))
323 pub fn nextnextch_is(&self, c: char) -> bool {
324 self.nextnextch() == Some(c)
327 /// PRECONDITION: self.curr is not whitespace
328 /// Eats any kind of comment.
329 /// Returns a Some(sugared-doc-attr) if one exists, None otherwise
330 fn consume_any_line_comment(&mut self) -> Option<TokenAndSpan> {
333 if c.is_whitespace() {
334 self.span_diagnostic.span_err(codemap::mk_sp(self.last_pos, self.last_pos),
335 "called consume_any_line_comment, but there was whitespace");
341 if self.curr_is('/') {
342 match self.nextch() {
346 // line comments starting with "///" or "//!" are doc-comments
347 if self.curr_is('/') || self.curr_is('!') {
348 let start_bpos = self.pos - BytePos(3);
349 while !self.is_eof() {
350 match self.curr.unwrap() {
353 if self.nextch_is('\n') {
357 self.err_span_(self.last_pos, self.pos,
358 "bare CR not allowed in doc-comment");
365 let ret = self.with_str_from(start_bpos, |string| {
366 // but comments with only more "/"s are not
367 if !is_line_non_doc_comment(string) {
369 tok: token::DOC_COMMENT(str_to_ident(string)),
370 sp: codemap::mk_sp(start_bpos, self.last_pos)
381 while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
383 // Restart whitespace munch.
384 self.consume_whitespace_and_comments()
386 Some('*') => { self.bump(); self.bump(); self.consume_block_comment() }
389 } else if self.curr_is('#') {
390 if self.nextch_is('!') {
392 // Parse an inner attribute.
393 if self.nextnextch_is('[') {
397 // I guess this is the only way to figure out if
398 // we're at the beginning of the file...
399 let cmap = CodeMap::new();
400 cmap.files.borrow_mut().push(self.filemap.clone());
401 let loc = cmap.lookup_char_pos_adj(self.last_pos);
402 if loc.line == 1u && loc.col == CharPos(0u) {
403 while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
404 return self.consume_whitespace_and_comments();
413 /// EFFECT: eats whitespace and comments.
414 /// Returns a Some(sugared-doc-attr) if one exists, None otherwise.
415 fn consume_whitespace_and_comments(&mut self) -> Option<TokenAndSpan> {
416 while is_whitespace(self.curr) { self.bump(); }
417 return self.consume_any_line_comment();
420 // might return a sugared-doc-attr
421 fn consume_block_comment(&mut self) -> Option<TokenAndSpan> {
422 // block comments starting with "/**" or "/*!" are doc-comments
423 let is_doc_comment = self.curr_is('*') || self.curr_is('!');
424 let start_bpos = self.last_pos - BytePos(2);
426 let mut level: int = 1;
427 let mut has_cr = false;
430 let msg = if is_doc_comment {
431 "unterminated block doc-comment"
433 "unterminated block comment"
435 let last_bpos = self.last_pos;
436 self.fatal_span_(start_bpos, last_bpos, msg);
438 let n = self.curr.unwrap();
440 '/' if self.nextch_is('*') => {
444 '*' if self.nextch_is('/') => {
456 let res = if is_doc_comment {
457 self.with_str_from(start_bpos, |string| {
458 // but comments with only "*"s between two "/"s are not
459 if !is_block_non_doc_comment(string) {
460 let string = if has_cr {
461 self.translate_crlf(start_bpos, string,
462 "bare CR not allowed in block doc-comment")
463 } else { string.into_maybe_owned() };
465 tok: token::DOC_COMMENT(str_to_ident(string.as_slice())),
466 sp: codemap::mk_sp(start_bpos, self.last_pos)
476 // restart whitespace munch.
477 if res.is_some() { res } else { self.consume_whitespace_and_comments() }
480 fn scan_exponent(&mut self, start_bpos: BytePos) -> Option<String> {
481 // \x00 hits the `return None` case immediately, so this is fine.
482 let mut c = self.curr.unwrap_or('\x00');
483 let mut rslt = String::new();
484 if c == 'e' || c == 'E' {
487 c = self.curr.unwrap_or('\x00');
488 if c == '-' || c == '+' {
492 let exponent = self.scan_digits(10u);
493 if exponent.len() > 0u {
494 rslt.push_str(exponent.as_slice());
497 let last_bpos = self.last_pos;
498 self.err_span_(start_bpos, last_bpos, "scan_exponent: bad fp literal");
499 rslt.push_str("1"); // arbitrary placeholder exponent
503 return None::<String>;
507 fn scan_digits(&mut self, radix: uint) -> String {
508 let mut rslt = String::new();
511 if c == Some('_') { self.bump(); continue; }
512 match c.and_then(|cc| char::to_digit(cc, radix)) {
514 rslt.push_char(c.unwrap());
522 fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: uint) {
524 16u => self.err_span_(start_bpos, last_bpos,
525 "hexadecimal float literal is not supported"),
526 8u => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"),
527 2u => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"),
532 fn scan_number(&mut self, c: char) -> token::Token {
536 let mut n = self.nextch().unwrap_or('\x00');
537 let start_bpos = self.last_pos;
538 if c == '0' && n == 'x' {
542 } else if c == '0' && n == 'o' {
546 } else if c == '0' && n == 'b' {
551 num_str = self.scan_digits(base);
552 c = self.curr.unwrap_or('\x00');
554 if c == 'u' || c == 'i' {
555 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
556 let signed = c == 'i';
558 if signed { Signed(ast::TyI) }
559 else { Unsigned(ast::TyU) }
562 c = self.curr.unwrap_or('\x00');
565 tp = if signed { Signed(ast::TyI8) }
566 else { Unsigned(ast::TyU8) };
568 n = self.nextch().unwrap_or('\x00');
569 if c == '1' && n == '6' {
572 tp = if signed { Signed(ast::TyI16) }
573 else { Unsigned(ast::TyU16) };
574 } else if c == '3' && n == '2' {
577 tp = if signed { Signed(ast::TyI32) }
578 else { Unsigned(ast::TyU32) };
579 } else if c == '6' && n == '4' {
582 tp = if signed { Signed(ast::TyI64) }
583 else { Unsigned(ast::TyU64) };
585 if num_str.len() == 0u {
586 let last_bpos = self.last_pos;
587 self.err_span_(start_bpos, last_bpos, "no valid digits found for number");
588 num_str = "1".to_string();
590 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
594 let last_bpos = self.last_pos;
595 self.err_span_(start_bpos, last_bpos, "int literal is too large");
601 Signed(t) => return token::LIT_INT(parsed as i64, t),
602 Unsigned(t) => return token::LIT_UINT(parsed, t)
605 let mut is_float = false;
606 if self.curr_is('.') && !(ident_start(self.nextch()) || self.nextch_is('.')) {
609 let dec_part = self.scan_digits(10u);
610 num_str.push_char('.');
611 num_str.push_str(dec_part.as_slice());
613 match self.scan_exponent(start_bpos) {
616 num_str.push_str(s.as_slice());
621 if self.curr_is('f') {
623 c = self.curr.unwrap_or('\x00');
624 n = self.nextch().unwrap_or('\x00');
625 if c == '3' && n == '2' {
628 let last_bpos = self.last_pos;
629 self.check_float_base(start_bpos, last_bpos, base);
630 return token::LIT_FLOAT(str_to_ident(num_str.as_slice()),
632 } else if c == '6' && n == '4' {
635 let last_bpos = self.last_pos;
636 self.check_float_base(start_bpos, last_bpos, base);
637 return token::LIT_FLOAT(str_to_ident(num_str.as_slice()),
639 /* FIXME (#2252): if this is out of range for either a
640 32-bit or 64-bit float, it won't be noticed till the
642 } else if c == '1' && n == '2' && self.nextnextch().unwrap_or('\x00') == '8' {
646 let last_bpos = self.last_pos;
647 self.check_float_base(start_bpos, last_bpos, base);
648 return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), ast::TyF128);
650 let last_bpos = self.last_pos;
651 self.err_span_(start_bpos, last_bpos, "expected `f32`, `f64` or `f128` suffix");
654 let last_bpos = self.last_pos;
655 self.check_float_base(start_bpos, last_bpos, base);
656 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(
657 num_str.as_slice()));
659 if num_str.len() == 0u {
660 let last_bpos = self.last_pos;
661 self.err_span_(start_bpos, last_bpos, "no valid digits found for number");
662 num_str = "1".to_string();
664 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
668 let last_bpos = self.last_pos;
669 self.err_span_(start_bpos, last_bpos, "int literal is too large");
674 debug!("lexing {} as an unsuffixed integer literal",
676 return token::LIT_INT_UNSUFFIXED(parsed as i64);
681 fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
682 let mut accum_int = 0u32;
683 let start_bpos = self.last_pos;
684 for _ in range(0, n_hex_digits) {
686 let last_bpos = self.last_pos;
687 self.fatal_span_(start_bpos, last_bpos, "unterminated numeric character escape");
689 if self.curr_is(delim) {
690 let last_bpos = self.last_pos;
691 self.err_span_(start_bpos, last_bpos, "numeric character escape is too short");
694 let c = self.curr.unwrap_or('\x00');
696 accum_int += c.to_digit(16).unwrap_or_else(|| {
697 self.err_span_char(self.last_pos, self.pos,
698 "illegal character in numeric character escape", c);
704 match char::from_u32(accum_int) {
707 let last_bpos = self.last_pos;
708 self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
714 /// Scan for a single (possibly escaped) byte or char
715 /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
716 /// `start` is the position of `first_source_char`, which is already consumed.
717 fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
718 ascii_only: bool, delim: char) -> Option<char> {
719 match first_source_char {
721 // '\X' for some X must be a character constant:
722 let escaped = self.curr;
723 let escaped_pos = self.last_pos;
726 None => {}, // EOF here is an error that will be checked later.
728 return Some(match e {
736 'x' => self.scan_numeric_escape(2u, delim),
737 'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
738 'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
739 '\n' if delim == '"' => {
740 self.consume_whitespace();
743 '\r' if delim == '"' && self.curr_is('\n') => {
744 self.consume_whitespace();
748 let last_pos = self.last_pos;
750 escaped_pos, last_pos,
751 if ascii_only { "unknown byte escape" }
752 else { "unknown character escape" },
760 '\t' | '\n' | '\r' | '\'' if delim == '\'' => {
761 let last_pos = self.last_pos;
764 if ascii_only { "byte constant must be escaped" }
765 else { "character constant must be escaped" },
769 if self.curr_is('\n') {
773 self.err_span_(start, self.last_pos,
774 "bare CR not allowed in string, use \\r instead");
777 _ => if ascii_only && first_source_char > '\x7F' {
778 let last_pos = self.last_pos;
781 "byte constant must be ASCII. \
782 Use a \\xHH escape for a non-ASCII byte", first_source_char);
785 Some(first_source_char)
788 fn binop(&mut self, op: token::BinOp) -> token::Token {
790 if self.curr_is('=') {
792 return token::BINOPEQ(op);
794 return token::BINOP(op);
798 /// Return the next token from the string, advances the input past that
799 /// token, and updates the interner
800 fn next_token_inner(&mut self) -> token::Token {
802 if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
803 // Note: r as in r" or r#" is part of a raw string literal,
804 // b as in b' is part of a byte literal.
805 // They are not identifiers, and are handled further down.
806 ('r', Some('"'), _) | ('r', Some('#'), _) |
807 ('b', Some('"'), _) | ('b', Some('\''), _) |
808 ('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
811 let start = self.last_pos;
812 while ident_continue(self.curr) {
816 return self.with_str_from(start, |string| {
820 let is_mod_name = self.curr_is(':') && self.nextch_is(':');
822 // FIXME: perform NFKC normalization here. (Issue #2253)
823 token::IDENT(str_to_ident(string), is_mod_name)
829 return self.scan_number(c.unwrap());
832 match c.expect("next_token_inner called at EOF") {
834 ';' => { self.bump(); return token::SEMI; }
835 ',' => { self.bump(); return token::COMMA; }
838 return if self.curr_is('.') {
840 if self.curr_is('.') {
850 '(' => { self.bump(); return token::LPAREN; }
851 ')' => { self.bump(); return token::RPAREN; }
852 '{' => { self.bump(); return token::LBRACE; }
853 '}' => { self.bump(); return token::RBRACE; }
854 '[' => { self.bump(); return token::LBRACKET; }
855 ']' => { self.bump(); return token::RBRACKET; }
856 '@' => { self.bump(); return token::AT; }
857 '#' => { self.bump(); return token::POUND; }
858 '~' => { self.bump(); return token::TILDE; }
861 if self.curr_is(':') {
863 return token::MOD_SEP;
869 '$' => { self.bump(); return token::DOLLAR; }
871 // Multi-byte tokens.
874 if self.curr_is('=') {
877 } else if self.curr_is('>') {
879 return token::FAT_ARROW;
886 if self.curr_is('=') {
889 } else { return token::NOT; }
893 match self.curr.unwrap_or('\x00') {
894 '=' => { self.bump(); return token::LE; }
895 '<' => { return self.binop(token::SHL); }
898 match self.curr.unwrap_or('\x00') {
899 _ => { return token::LARROW; }
902 _ => { return token::LT; }
907 match self.curr.unwrap_or('\x00') {
908 '=' => { self.bump(); return token::GE; }
909 '>' => { return self.binop(token::SHR); }
910 _ => { return token::GT; }
914 // Either a character constant 'a' OR a lifetime name 'abc
916 let start = self.last_pos;
918 // the eof will be picked up by the final `'` check below
919 let mut c2 = self.curr.unwrap_or('\x00');
922 // If the character is an ident start not followed by another single
923 // quote, then this is a lifetime name:
924 if ident_start(Some(c2)) && !self.curr_is('\'') {
925 while ident_continue(self.curr) {
929 // Include the leading `'` in the real identifier, for macro
930 // expansion purposes. See #12512 for the gory details of why
931 // this is necessary.
932 let ident = self.with_str_from(start, |lifetime_name| {
933 str_to_ident(format!("'{}", lifetime_name).as_slice())
936 // Conjure up a "keyword checking ident" to make sure that
937 // the lifetime name is not a keyword.
938 let keyword_checking_ident =
939 self.with_str_from(start, |lifetime_name| {
940 str_to_ident(lifetime_name)
942 let keyword_checking_token =
943 &token::IDENT(keyword_checking_ident, false);
944 let last_bpos = self.last_pos;
945 if token::is_keyword(token::keywords::Self,
946 keyword_checking_token) {
947 self.err_span_(start,
949 "invalid lifetime name: 'self \
950 is no longer a special lifetime");
951 } else if token::is_any_keyword(keyword_checking_token) &&
952 !token::is_keyword(token::keywords::Static,
953 keyword_checking_token) {
954 self.err_span_(start,
956 "invalid lifetime name");
958 return token::LIFETIME(ident);
961 // Otherwise it is a character constant:
962 c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
963 if !self.curr_is('\'') {
964 let last_bpos = self.last_pos;
965 self.fatal_span_verbose(
966 // Byte offsetting here is okay because the
967 // character before position `start` is an
968 // ascii single quote.
969 start - BytePos(1), last_bpos,
970 "unterminated character constant".to_string());
972 self.bump(); // advance curr past token
973 return token::LIT_CHAR(c2);
977 return match self.curr {
978 Some('\'') => parse_byte(self),
979 Some('"') => parse_byte_string(self),
980 Some('r') => parse_raw_byte_string(self),
981 _ => unreachable!() // Should have been a token::IDENT above.
984 fn parse_byte(self_: &mut StringReader) -> token::Token {
986 let start = self_.last_pos;
988 // the eof will be picked up by the final `'` check below
989 let mut c2 = self_.curr.unwrap_or('\x00');
992 c2 = self_.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
993 if !self_.curr_is('\'') {
994 // Byte offsetting here is okay because the
995 // character before position `start` are an
996 // ascii single quote and ascii 'b'.
997 let last_pos = self_.last_pos;
998 self_.fatal_span_verbose(
999 start - BytePos(2), last_pos,
1000 "unterminated byte constant".to_string());
1002 self_.bump(); // advance curr past token
1003 return token::LIT_BYTE(c2 as u8);
1006 fn parse_byte_string(self_: &mut StringReader) -> token::Token {
1008 let start = self_.last_pos;
1009 let mut value = Vec::new();
1010 while !self_.curr_is('"') {
1012 let last_pos = self_.last_pos;
1013 self_.fatal_span_(start, last_pos,
1014 "unterminated double quote byte string");
1017 let ch_start = self_.last_pos;
1018 let ch = self_.curr.unwrap();
1020 self_.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
1021 .map(|ch| value.push(ch as u8));
1024 return token::LIT_BINARY(Rc::new(value));
1027 fn parse_raw_byte_string(self_: &mut StringReader) -> token::Token {
1028 let start_bpos = self_.last_pos;
1030 let mut hash_count = 0u;
1031 while self_.curr_is('#') {
1037 let last_pos = self_.last_pos;
1038 self_.fatal_span_(start_bpos, last_pos, "unterminated raw string");
1039 } else if !self_.curr_is('"') {
1040 let last_pos = self_.last_pos;
1041 let ch = self_.curr.unwrap();
1042 self_.fatal_span_char(start_bpos, last_pos,
1043 "only `#` is allowed in raw string delimitation; \
1044 found illegal character",
1048 let content_start_bpos = self_.last_pos;
1049 let mut content_end_bpos;
1053 let last_pos = self_.last_pos;
1054 self_.fatal_span_(start_bpos, last_pos, "unterminated raw string")
1057 content_end_bpos = self_.last_pos;
1058 for _ in range(0, hash_count) {
1060 if !self_.curr_is('#') {
1066 Some(c) => if c > '\x7F' {
1067 let last_pos = self_.last_pos;
1068 self_.err_span_char(
1069 last_pos, last_pos, "raw byte string must be ASCII", c);
1075 let bytes = self_.with_str_from_to(content_start_bpos,
1077 |s| s.as_bytes().to_owned());
1078 return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
1082 let mut accum_str = String::new();
1083 let start_bpos = self.last_pos;
1085 while !self.curr_is('"') {
1087 let last_bpos = self.last_pos;
1088 self.fatal_span_(start_bpos, last_bpos, "unterminated double quote string");
1091 let ch_start = self.last_pos;
1092 let ch = self.curr.unwrap();
1094 self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
1095 .map(|ch| accum_str.push_char(ch));
1098 return token::LIT_STR(str_to_ident(accum_str.as_slice()));
1101 let start_bpos = self.last_pos;
1103 let mut hash_count = 0u;
1104 while self.curr_is('#') {
1110 let last_bpos = self.last_pos;
1111 self.fatal_span_(start_bpos, last_bpos, "unterminated raw string");
1112 } else if !self.curr_is('"') {
1113 let last_bpos = self.last_pos;
1114 let curr_char = self.curr.unwrap();
1115 self.fatal_span_char(start_bpos, last_bpos,
1116 "only `#` is allowed in raw string delimitation; \
1117 found illegal character",
1121 let content_start_bpos = self.last_pos;
1122 let mut content_end_bpos;
1123 let mut has_cr = false;
1126 let last_bpos = self.last_pos;
1127 self.fatal_span_(start_bpos, last_bpos, "unterminated raw string");
1129 //if self.curr_is('"') {
1130 //content_end_bpos = self.last_pos;
1131 //for _ in range(0, hash_count) {
1133 //if !self.curr_is('#') {
1135 let c = self.curr.unwrap();
1138 content_end_bpos = self.last_pos;
1139 for _ in range(0, hash_count) {
1141 if !self.curr_is('#') {
1155 let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| {
1156 let string = if has_cr {
1157 self.translate_crlf(content_start_bpos, string,
1158 "bare CR not allowed in raw string")
1159 } else { string.into_maybe_owned() };
1160 str_to_ident(string.as_slice())
1162 return token::LIT_STR_RAW(str_content, hash_count);
1165 if self.nextch_is('>') {
1168 return token::RARROW;
1169 } else { return self.binop(token::MINUS); }
1172 if self.nextch_is('&') {
1175 return token::ANDAND;
1176 } else { return self.binop(token::AND); }
1179 match self.nextch() {
1180 Some('|') => { self.bump(); self.bump(); return token::OROR; }
1181 _ => { return self.binop(token::OR); }
1184 '+' => { return self.binop(token::PLUS); }
1185 '*' => { return self.binop(token::STAR); }
1186 '/' => { return self.binop(token::SLASH); }
1187 '^' => { return self.binop(token::CARET); }
1188 '%' => { return self.binop(token::PERCENT); }
1190 let last_bpos = self.last_pos;
1191 let bpos = self.pos;
1192 self.fatal_span_char(last_bpos, bpos, "unknown start of token", c);
1197 fn consume_whitespace(&mut self) {
1198 while is_whitespace(self.curr) && !self.is_eof() { self.bump(); }
1201 fn read_to_eol(&mut self) -> String {
1202 let mut val = String::new();
1203 while !self.curr_is('\n') && !self.is_eof() {
1204 val.push_char(self.curr.unwrap());
1207 if self.curr_is('\n') { self.bump(); }
1211 fn read_one_line_comment(&mut self) -> String {
1212 let val = self.read_to_eol();
1213 assert!((val.as_slice()[0] == '/' as u8 && val.as_slice()[1] == '/' as u8)
1214 || (val.as_slice()[0] == '#' as u8 && val.as_slice()[1] == '!' as u8));
1218 fn consume_non_eol_whitespace(&mut self) {
1219 while is_whitespace(self.curr) && !self.curr_is('\n') && !self.is_eof() {
1224 fn peeking_at_comment(&self) -> bool {
1225 (self.curr_is('/') && self.nextch_is('/'))
1226 || (self.curr_is('/') && self.nextch_is('*'))
1227 // consider shebangs comments, but not inner attributes
1228 || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
1232 pub fn is_whitespace(c: Option<char>) -> bool {
1233 match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
1234 ' ' | '\n' | '\t' | '\r' => true,
1239 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
1241 Some(c) => lo <= c && c <= hi,
1246 fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
1248 pub fn is_line_non_doc_comment(s: &str) -> bool {
1249 s.starts_with("////")
1252 pub fn is_block_non_doc_comment(s: &str) -> bool {
1253 s.starts_with("/***")
1256 fn ident_start(c: Option<char>) -> bool {
1257 let c = match c { Some(c) => c, None => return false };
1259 (c >= 'a' && c <= 'z')
1260 || (c >= 'A' && c <= 'Z')
1262 || (c > '\x7f' && char::is_XID_start(c))
1265 fn ident_continue(c: Option<char>) -> bool {
1266 let c = match c { Some(c) => c, None => return false };
1268 (c >= 'a' && c <= 'z')
1269 || (c >= 'A' && c <= 'Z')
1270 || (c >= '0' && c <= '9')
1272 || (c > '\x7f' && char::is_XID_continue(c))
1279 use codemap::{BytePos, CodeMap, Span};
1282 use parse::token::{str_to_ident};
1285 fn mk_sh() -> diagnostic::SpanHandler {
1286 let emitter = diagnostic::EmitterWriter::new(box util::NullWriter);
1287 let handler = diagnostic::mk_handler(box emitter);
1288 diagnostic::mk_span_handler(handler, CodeMap::new())
1291 // open a string reader for the given string
1292 fn setup<'a>(span_handler: &'a diagnostic::SpanHandler,
1293 teststr: String) -> StringReader<'a> {
1294 let fm = span_handler.cm.new_filemap("zebra.rs".to_string(), teststr);
1295 StringReader::new(span_handler, fm)
1299 let span_handler = mk_sh();
1300 let mut string_reader = setup(&span_handler,
1301 "/* my source file */ \
1302 fn main() { println!(\"zebra\"); }\n".to_string());
1303 let id = str_to_ident("fn");
1304 let tok1 = string_reader.next_token();
1305 let tok2 = TokenAndSpan{
1306 tok:token::IDENT(id, false),
1307 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
1308 assert_eq!(tok1,tok2);
1309 // the 'main' id is already read:
1310 assert_eq!(string_reader.last_pos.clone(), BytePos(28));
1311 // read another token:
1312 let tok3 = string_reader.next_token();
1313 let tok4 = TokenAndSpan{
1314 tok:token::IDENT(str_to_ident("main"), false),
1315 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1316 assert_eq!(tok3,tok4);
1317 // the lparen is already read:
1318 assert_eq!(string_reader.last_pos.clone(), BytePos(29))
1321 // check that the given reader produces the desired stream
1322 // of tokens (stop checking after exhausting the expected vec)
1323 fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
1324 for expected_tok in expected.iter() {
1325 assert_eq!(&string_reader.next_token().tok, expected_tok);
1329 // make the identifier by looking up the string in the interner
1330 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1331 token::IDENT (str_to_ident(id),is_mod_name)
1334 #[test] fn doublecolonparsing () {
1335 check_tokenization(setup(&mk_sh(), "a b".to_string()),
1336 vec!(mk_ident("a",false),
1337 mk_ident("b",false)));
1340 #[test] fn dcparsing_2 () {
1341 check_tokenization(setup(&mk_sh(), "a::b".to_string()),
1342 vec!(mk_ident("a",true),
1344 mk_ident("b",false)));
1347 #[test] fn dcparsing_3 () {
1348 check_tokenization(setup(&mk_sh(), "a ::b".to_string()),
1349 vec!(mk_ident("a",false),
1351 mk_ident("b",false)));
1354 #[test] fn dcparsing_4 () {
1355 check_tokenization(setup(&mk_sh(), "a:: b".to_string()),
1356 vec!(mk_ident("a",true),
1358 mk_ident("b",false)));
1361 #[test] fn character_a() {
1362 assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok,
1363 token::LIT_CHAR('a'));
1366 #[test] fn character_space() {
1367 assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok,
1368 token::LIT_CHAR(' '));
1371 #[test] fn character_escaped() {
1372 assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok,
1373 token::LIT_CHAR('\n'));
1376 #[test] fn lifetime_name() {
1377 assert_eq!(setup(&mk_sh(), "'abc".to_string()).next_token().tok,
1378 token::LIFETIME(token::str_to_ident("'abc")));
1381 #[test] fn raw_string() {
1382 assert_eq!(setup(&mk_sh(),
1383 "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token()
1385 token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3));
1388 #[test] fn line_doc_comments() {
1389 assert!(!is_line_non_doc_comment("///"));
1390 assert!(!is_line_non_doc_comment("/// blah"));
1391 assert!(is_line_non_doc_comment("////"));
1394 #[test] fn nested_block_comments() {
1395 assert_eq!(setup(&mk_sh(),
1396 "/* /* */ */'a'".to_string()).next_token().tok,
1397 token::LIT_CHAR('a'));