1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::tt_next_token;
17 use parse::token::{str_to_ident};
20 use std::mem::replace;
21 use std::num::from_str_radix;
24 use std::strbuf::StrBuf;
26 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
29 fn is_eof(&self) -> bool;
30 fn next_token(&mut self) -> TokenAndSpan;
31 fn fatal(&self, StrBuf) -> !;
32 fn span_diag<'a>(&'a self) -> &'a SpanHandler;
33 fn peek(&self) -> TokenAndSpan;
36 #[deriving(Clone, Eq, Show)]
37 pub struct TokenAndSpan {
38 pub tok: token::Token,
42 pub struct StringReader<'a> {
43 pub span_diagnostic: &'a SpanHandler,
44 // The absolute offset within the codemap of the next character to read
46 // The absolute offset within the codemap of the last character read(curr)
47 pub last_pos: BytePos,
48 // The column of the next character to read
50 // The last character to be read
51 pub curr: Option<char>,
52 pub filemap: Rc<codemap::FileMap>,
54 pub peek_tok: token::Token,
58 impl<'a> StringReader<'a> {
59 pub fn curr_is(&self, c: char) -> bool {
64 pub fn new_string_reader<'a>(span_diagnostic: &'a SpanHandler,
65 filemap: Rc<codemap::FileMap>)
67 let mut r = new_low_level_string_reader(span_diagnostic, filemap);
68 string_advance_token(&mut r); /* fill in peek_* */
72 /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
73 pub fn new_low_level_string_reader<'a>(span_diagnostic: &'a SpanHandler,
74 filemap: Rc<codemap::FileMap>)
76 // Force the initial reader bump to start on a fresh line
77 let initial_char = '\n';
78 let mut r = StringReader {
79 span_diagnostic: span_diagnostic,
80 pos: filemap.start_pos,
81 last_pos: filemap.start_pos,
83 curr: Some(initial_char),
85 /* dummy values; not read */
87 peek_span: codemap::DUMMY_SP,
93 impl<'a> Reader for StringReader<'a> {
94 fn is_eof(&self) -> bool { is_eof(self) }
95 // return the next token. EFFECT: advances the string_reader.
96 fn next_token(&mut self) -> TokenAndSpan {
97 let ret_val = TokenAndSpan {
98 tok: replace(&mut self.peek_tok, token::UNDERSCORE),
101 string_advance_token(self);
104 fn fatal(&self, m: StrBuf) -> ! {
105 self.span_diagnostic.span_fatal(self.peek_span, m.as_slice())
107 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.span_diagnostic }
108 fn peek(&self) -> TokenAndSpan {
109 // FIXME(pcwalton): Bad copy!
111 tok: self.peek_tok.clone(),
112 sp: self.peek_span.clone(),
117 impl<'a> Reader for TtReader<'a> {
118 fn is_eof(&self) -> bool {
119 self.cur_tok == token::EOF
121 fn next_token(&mut self) -> TokenAndSpan {
122 let r = tt_next_token(self);
123 debug!("TtReader: r={:?}", r);
126 fn fatal(&self, m: StrBuf) -> ! {
127 self.sp_diag.span_fatal(self.cur_span, m.as_slice());
129 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.sp_diag }
130 fn peek(&self) -> TokenAndSpan {
132 tok: self.cur_tok.clone(),
133 sp: self.cur_span.clone(),
138 // report a lexical error spanning [`from_pos`, `to_pos`)
139 fn fatal_span(rdr: &mut StringReader,
144 rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
148 // report a lexical error spanning [`from_pos`, `to_pos`), appending an
149 // escaped character to the error message
150 fn fatal_span_char(rdr: &mut StringReader,
158 char::escape_default(c, |c| m.push_char(c));
159 fatal_span(rdr, from_pos, to_pos, m.into_strbuf());
162 // report a lexical error spanning [`from_pos`, `to_pos`), appending the
163 // offending string to the error message
164 fn fatal_span_verbose(rdr: &mut StringReader,
171 let from = byte_offset(rdr, from_pos).to_uint();
172 let to = byte_offset(rdr, to_pos).to_uint();
173 m.push_str(rdr.filemap.src.as_slice().slice(from, to));
174 fatal_span(rdr, from_pos, to_pos, m);
177 // EFFECT: advance peek_tok and peek_span to refer to the next token.
178 // EFFECT: update the interner, maybe.
179 fn string_advance_token(r: &mut StringReader) {
180 match consume_whitespace_and_comments(r) {
182 r.peek_span = comment.sp;
183 r.peek_tok = comment.tok;
187 r.peek_tok = token::EOF;
189 let start_bytepos = r.last_pos;
190 r.peek_tok = next_token_inner(r);
191 r.peek_span = codemap::mk_sp(start_bytepos,
198 fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
199 (pos - rdr.filemap.start_pos)
202 /// Calls `f` with a string slice of the source text spanning from `start`
203 /// up to but excluding `rdr.last_pos`, meaning the slice does not include
204 /// the character `rdr.curr`.
205 pub fn with_str_from<T>(
210 with_str_from_to(rdr, start, rdr.last_pos, f)
213 /// Calls `f` with astring slice of the source text spanning from `start`
214 /// up to but excluding `end`.
215 fn with_str_from_to<T>(
221 f(rdr.filemap.src.as_slice().slice(
222 byte_offset(rdr, start).to_uint(),
223 byte_offset(rdr, end).to_uint()))
226 // EFFECT: advance the StringReader by one character. If a newline is
227 // discovered, add it to the FileMap's list of line start offsets.
228 pub fn bump(rdr: &mut StringReader) {
229 rdr.last_pos = rdr.pos;
230 let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
231 if current_byte_offset < rdr.filemap.src.len() {
232 assert!(rdr.curr.is_some());
233 let last_char = rdr.curr.unwrap();
234 let next = rdr.filemap
237 .char_range_at(current_byte_offset);
238 let byte_offset_diff = next.next - current_byte_offset;
239 rdr.pos = rdr.pos + Pos::from_uint(byte_offset_diff);
240 rdr.curr = Some(next.ch);
241 rdr.col = rdr.col + CharPos(1u);
242 if last_char == '\n' {
243 rdr.filemap.next_line(rdr.last_pos);
244 rdr.col = CharPos(0u);
247 if byte_offset_diff > 1 {
248 rdr.filemap.record_multibyte_char(rdr.last_pos, byte_offset_diff);
255 pub fn is_eof(rdr: &StringReader) -> bool {
259 pub fn nextch(rdr: &StringReader) -> Option<char> {
260 let offset = byte_offset(rdr, rdr.pos).to_uint();
261 if offset < rdr.filemap.src.len() {
262 Some(rdr.filemap.src.as_slice().char_at(offset))
267 pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
268 nextch(rdr) == Some(c)
271 pub fn nextnextch(rdr: &StringReader) -> Option<char> {
272 let offset = byte_offset(rdr, rdr.pos).to_uint();
273 let s = rdr.filemap.deref().src.as_slice();
274 if offset >= s.len() { return None }
275 let str::CharRange { next, .. } = s.char_range_at(offset);
277 Some(s.char_at(next))
282 pub fn nextnextch_is(rdr: &StringReader, c: char) -> bool {
283 nextnextch(rdr) == Some(c)
286 fn hex_digit_val(c: Option<char>) -> int {
287 let d = c.unwrap_or('\x00');
289 if in_range(c, '0', '9') { return (d as int) - ('0' as int); }
290 if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
291 if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
295 pub fn is_whitespace(c: Option<char>) -> bool {
296 match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
297 ' ' | '\n' | '\t' | '\r' => true,
302 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
304 Some(c) => lo <= c && c <= hi,
309 fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
311 fn is_hex_digit(c: Option<char>) -> bool {
312 return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
313 in_range(c, 'A', 'F');
316 // EFFECT: eats whitespace and comments.
317 // returns a Some(sugared-doc-attr) if one exists, None otherwise.
318 fn consume_whitespace_and_comments(rdr: &mut StringReader)
319 -> Option<TokenAndSpan> {
320 while is_whitespace(rdr.curr) { bump(rdr); }
321 return consume_any_line_comment(rdr);
324 pub fn is_line_non_doc_comment(s: &str) -> bool {
325 s.starts_with("////")
328 // PRECONDITION: rdr.curr is not whitespace
329 // EFFECT: eats any kind of comment.
330 // returns a Some(sugared-doc-attr) if one exists, None otherwise
331 fn consume_any_line_comment(rdr: &mut StringReader)
332 -> Option<TokenAndSpan> {
333 if rdr.curr_is('/') {
338 // line comments starting with "///" or "//!" are doc-comments
339 if rdr.curr_is('/') || rdr.curr_is('!') {
340 let start_bpos = rdr.pos - BytePos(3);
341 while !rdr.curr_is('\n') && !is_eof(rdr) {
344 let ret = with_str_from(rdr, start_bpos, |string| {
345 // but comments with only more "/"s are not
346 if !is_line_non_doc_comment(string) {
348 tok: token::DOC_COMMENT(str_to_ident(string)),
349 sp: codemap::mk_sp(start_bpos, rdr.pos)
360 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
362 // Restart whitespace munch.
363 consume_whitespace_and_comments(rdr)
365 Some('*') => { bump(rdr); bump(rdr); consume_block_comment(rdr) }
368 } else if rdr.curr_is('#') {
369 if nextch_is(rdr, '!') {
371 // Parse an inner attribute.
372 if nextnextch_is(rdr, '[') {
376 // I guess this is the only way to figure out if
377 // we're at the beginning of the file...
378 let cmap = CodeMap::new();
379 cmap.files.borrow_mut().push(rdr.filemap.clone());
380 let loc = cmap.lookup_char_pos_adj(rdr.last_pos);
381 if loc.line == 1u && loc.col == CharPos(0u) {
382 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
383 return consume_whitespace_and_comments(rdr);
392 pub fn is_block_non_doc_comment(s: &str) -> bool {
393 s.starts_with("/***")
396 // might return a sugared-doc-attr
397 fn consume_block_comment(rdr: &mut StringReader) -> Option<TokenAndSpan> {
398 // block comments starting with "/**" or "/*!" are doc-comments
399 let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!');
400 let start_bpos = rdr.pos - BytePos(if is_doc_comment {3} else {2});
402 let mut level: int = 1;
405 let msg = if is_doc_comment {
406 "unterminated block doc-comment".to_strbuf()
408 "unterminated block comment".to_strbuf()
410 fatal_span(rdr, start_bpos, rdr.last_pos, msg);
411 } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
415 } else if rdr.curr_is('*') && nextch_is(rdr, '/') {
424 let res = if is_doc_comment {
425 with_str_from(rdr, start_bpos, |string| {
426 // but comments with only "*"s between two "/"s are not
427 if !is_block_non_doc_comment(string) {
429 tok: token::DOC_COMMENT(str_to_ident(string)),
430 sp: codemap::mk_sp(start_bpos, rdr.pos)
440 // restart whitespace munch.
441 if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
444 fn scan_exponent(rdr: &mut StringReader, start_bpos: BytePos) -> Option<StrBuf> {
445 // \x00 hits the `return None` case immediately, so this is fine.
446 let mut c = rdr.curr.unwrap_or('\x00');
447 let mut rslt = StrBuf::new();
448 if c == 'e' || c == 'E' {
451 c = rdr.curr.unwrap_or('\x00');
452 if c == '-' || c == '+' {
456 let exponent = scan_digits(rdr, 10u);
457 if exponent.len() > 0u {
458 rslt.push_str(exponent.as_slice());
461 fatal_span(rdr, start_bpos, rdr.last_pos,
462 "scan_exponent: bad fp literal".to_strbuf());
465 return None::<StrBuf>;
469 fn scan_digits(rdr: &mut StringReader, radix: uint) -> StrBuf {
470 let mut rslt = StrBuf::new();
473 if c == Some('_') { bump(rdr); continue; }
474 match c.and_then(|cc| char::to_digit(cc, radix)) {
476 rslt.push_char(c.unwrap());
484 fn check_float_base(rdr: &mut StringReader, start_bpos: BytePos, last_bpos: BytePos,
488 fatal_span(rdr, start_bpos, last_bpos,
489 "hexadecimal float literal is not supported".to_strbuf())
491 8u => fatal_span(rdr, start_bpos, last_bpos,
492 "octal float literal is not supported".to_strbuf()),
493 2u => fatal_span(rdr, start_bpos, last_bpos,
494 "binary float literal is not supported".to_strbuf()),
499 fn scan_number(c: char, rdr: &mut StringReader) -> token::Token {
503 let mut n = nextch(rdr).unwrap_or('\x00');
504 let start_bpos = rdr.last_pos;
505 if c == '0' && n == 'x' {
509 } else if c == '0' && n == 'o' {
513 } else if c == '0' && n == 'b' {
518 num_str = scan_digits(rdr, base);
519 c = rdr.curr.unwrap_or('\x00');
521 if c == 'u' || c == 'i' {
522 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
523 let signed = c == 'i';
525 if signed { Signed(ast::TyI) }
526 else { Unsigned(ast::TyU) }
529 c = rdr.curr.unwrap_or('\x00');
532 tp = if signed { Signed(ast::TyI8) }
533 else { Unsigned(ast::TyU8) };
535 n = nextch(rdr).unwrap_or('\x00');
536 if c == '1' && n == '6' {
539 tp = if signed { Signed(ast::TyI16) }
540 else { Unsigned(ast::TyU16) };
541 } else if c == '3' && n == '2' {
544 tp = if signed { Signed(ast::TyI32) }
545 else { Unsigned(ast::TyU32) };
546 } else if c == '6' && n == '4' {
549 tp = if signed { Signed(ast::TyI64) }
550 else { Unsigned(ast::TyU64) };
552 if num_str.len() == 0u {
553 fatal_span(rdr, start_bpos, rdr.last_pos,
554 "no valid digits found for number".to_strbuf());
556 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
559 None => fatal_span(rdr, start_bpos, rdr.last_pos,
560 "int literal is too large".to_strbuf())
564 Signed(t) => return token::LIT_INT(parsed as i64, t),
565 Unsigned(t) => return token::LIT_UINT(parsed, t)
568 let mut is_float = false;
569 if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
572 let dec_part = scan_digits(rdr, 10u);
573 num_str.push_char('.');
574 num_str.push_str(dec_part.as_slice());
576 match scan_exponent(rdr, start_bpos) {
579 num_str.push_str(s.as_slice());
584 if rdr.curr_is('f') {
586 c = rdr.curr.unwrap_or('\x00');
587 n = nextch(rdr).unwrap_or('\x00');
588 if c == '3' && n == '2' {
591 check_float_base(rdr, start_bpos, rdr.last_pos, base);
592 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
594 } else if c == '6' && n == '4' {
597 check_float_base(rdr, start_bpos, rdr.last_pos, base);
598 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
600 /* FIXME (#2252): if this is out of range for either a
601 32-bit or 64-bit float, it won't be noticed till the
603 } else if c == '1' && n == '2' && nextnextch(rdr).unwrap_or('\x00') == '8' {
607 check_float_base(rdr, start_bpos, rdr.last_pos, base);
608 return token::LIT_FLOAT(str_to_ident(num_str.as_slice()), ast::TyF128);
610 fatal_span(rdr, start_bpos, rdr.last_pos,
611 "expected `f32`, `f64` or `f128` suffix".to_strbuf());
614 check_float_base(rdr, start_bpos, rdr.last_pos, base);
615 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(
616 num_str.into_owned()));
618 if num_str.len() == 0u {
619 fatal_span(rdr, start_bpos, rdr.last_pos,
620 "no valid digits found for number".to_strbuf());
622 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
625 None => fatal_span(rdr, start_bpos, rdr.last_pos,
626 "int literal is too large".to_strbuf())
629 debug!("lexing {} as an unsuffixed integer literal",
631 return token::LIT_INT_UNSUFFIXED(parsed as i64);
635 fn scan_numeric_escape(rdr: &mut StringReader, n_hex_digits: uint) -> char {
636 let mut accum_int = 0;
637 let mut i = n_hex_digits;
638 let start_bpos = rdr.last_pos;
639 while i != 0u && !is_eof(rdr) {
641 if !is_hex_digit(n) {
646 "illegal character in numeric character escape".to_strbuf(),
651 accum_int += hex_digit_val(n);
654 if i != 0 && is_eof(rdr) {
655 fatal_span(rdr, start_bpos, rdr.last_pos,
656 "unterminated numeric character escape".to_strbuf());
659 match char::from_u32(accum_int as u32) {
661 None => fatal_span(rdr, start_bpos, rdr.last_pos,
662 "illegal numeric character escape".to_strbuf())
666 fn ident_start(c: Option<char>) -> bool {
667 let c = match c { Some(c) => c, None => return false };
669 (c >= 'a' && c <= 'z')
670 || (c >= 'A' && c <= 'Z')
672 || (c > '\x7f' && char::is_XID_start(c))
675 fn ident_continue(c: Option<char>) -> bool {
676 let c = match c { Some(c) => c, None => return false };
678 (c >= 'a' && c <= 'z')
679 || (c >= 'A' && c <= 'Z')
680 || (c >= '0' && c <= '9')
682 || (c > '\x7f' && char::is_XID_continue(c))
685 // return the next token from the string
686 // EFFECT: advances the input past that token
687 // EFFECT: updates the interner
688 fn next_token_inner(rdr: &mut StringReader) -> token::Token {
690 if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') {
691 // Note: r as in r" or r#" is part of a raw string literal,
692 // not an identifier, and is handled further down.
694 let start = rdr.last_pos;
695 while ident_continue(rdr.curr) {
699 return with_str_from(rdr, start, |string| {
703 let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
705 // FIXME: perform NFKC normalization here. (Issue #2253)
706 token::IDENT(str_to_ident(string), is_mod_name)
711 return scan_number(c.unwrap(), rdr);
713 fn binop(rdr: &mut StringReader, op: token::BinOp) -> token::Token {
715 if rdr.curr_is('=') {
717 return token::BINOPEQ(op);
718 } else { return token::BINOP(op); }
720 match c.expect("next_token_inner called at EOF") {
727 ';' => { bump(rdr); return token::SEMI; }
728 ',' => { bump(rdr); return token::COMMA; }
731 return if rdr.curr_is('.') {
733 if rdr.curr_is('.') {
743 '(' => { bump(rdr); return token::LPAREN; }
744 ')' => { bump(rdr); return token::RPAREN; }
745 '{' => { bump(rdr); return token::LBRACE; }
746 '}' => { bump(rdr); return token::RBRACE; }
747 '[' => { bump(rdr); return token::LBRACKET; }
748 ']' => { bump(rdr); return token::RBRACKET; }
749 '@' => { bump(rdr); return token::AT; }
750 '#' => { bump(rdr); return token::POUND; }
751 '~' => { bump(rdr); return token::TILDE; }
754 if rdr.curr_is(':') {
756 return token::MOD_SEP;
757 } else { return token::COLON; }
760 '$' => { bump(rdr); return token::DOLLAR; }
766 // Multi-byte tokens.
769 if rdr.curr_is('=') {
772 } else if rdr.curr_is('>') {
774 return token::FAT_ARROW;
781 if rdr.curr_is('=') {
784 } else { return token::NOT; }
788 match rdr.curr.unwrap_or('\x00') {
789 '=' => { bump(rdr); return token::LE; }
790 '<' => { return binop(rdr, token::SHL); }
793 match rdr.curr.unwrap_or('\x00') {
794 '>' => { bump(rdr); return token::DARROW; }
795 _ => { return token::LARROW; }
798 _ => { return token::LT; }
803 match rdr.curr.unwrap_or('\x00') {
804 '=' => { bump(rdr); return token::GE; }
805 '>' => { return binop(rdr, token::SHR); }
806 _ => { return token::GT; }
810 // Either a character constant 'a' OR a lifetime name 'abc
812 let start = rdr.last_pos;
814 // the eof will be picked up by the final `'` check below
815 let mut c2 = rdr.curr.unwrap_or('\x00');
818 // If the character is an ident start not followed by another single
819 // quote, then this is a lifetime name:
820 if ident_start(Some(c2)) && !rdr.curr_is('\'') {
821 while ident_continue(rdr.curr) {
824 let ident = with_str_from(rdr, start, |lifetime_name| {
825 str_to_ident(lifetime_name)
827 let tok = &token::IDENT(ident, false);
829 if token::is_keyword(token::keywords::Self, tok) {
830 fatal_span(rdr, start, rdr.last_pos,
831 "invalid lifetime name: 'self \
832 is no longer a special lifetime".to_strbuf());
833 } else if token::is_any_keyword(tok) &&
834 !token::is_keyword(token::keywords::Static, tok) {
835 fatal_span(rdr, start, rdr.last_pos,
836 "invalid lifetime name".to_strbuf());
838 return token::LIFETIME(ident);
842 // Otherwise it is a character constant:
845 // '\X' for some X must be a character constant:
846 let escaped = rdr.curr;
847 let escaped_pos = rdr.last_pos;
860 'x' => scan_numeric_escape(rdr, 2u),
861 'u' => scan_numeric_escape(rdr, 4u),
862 'U' => scan_numeric_escape(rdr, 8u),
875 '\t' | '\n' | '\r' | '\'' => {
880 "character constant must be escaped".to_strbuf(),
885 if !rdr.curr_is('\'') {
886 fatal_span_verbose(rdr,
887 // Byte offsetting here is okay because the
888 // character before position `start` is an
889 // ascii single quote.
892 "unterminated character constant".to_strbuf());
894 bump(rdr); // advance curr past token
895 return token::LIT_CHAR(c2);
898 let mut accum_str = StrBuf::new();
899 let start_bpos = rdr.last_pos;
901 while !rdr.curr_is('"') {
903 fatal_span(rdr, start_bpos, rdr.last_pos,
904 "unterminated double quote string".to_strbuf());
907 let ch = rdr.curr.unwrap();
912 fatal_span(rdr, start_bpos, rdr.last_pos,
913 "unterminated double quote string".to_strbuf());
916 let escaped = rdr.curr.unwrap();
917 let escaped_pos = rdr.last_pos;
920 'n' => accum_str.push_char('\n'),
921 'r' => accum_str.push_char('\r'),
922 't' => accum_str.push_char('\t'),
923 '\\' => accum_str.push_char('\\'),
924 '\'' => accum_str.push_char('\''),
925 '"' => accum_str.push_char('"'),
926 '\n' => consume_whitespace(rdr),
927 '0' => accum_str.push_char('\x00'),
929 accum_str.push_char(scan_numeric_escape(rdr, 2u));
932 accum_str.push_char(scan_numeric_escape(rdr, 4u));
935 accum_str.push_char(scan_numeric_escape(rdr, 8u));
938 fatal_span_char(rdr, escaped_pos, rdr.last_pos,
939 "unknown string escape".to_strbuf(), c2);
943 _ => accum_str.push_char(ch)
947 return token::LIT_STR(str_to_ident(accum_str.as_slice()));
950 let start_bpos = rdr.last_pos;
952 let mut hash_count = 0u;
953 while rdr.curr_is('#') {
959 fatal_span(rdr, start_bpos, rdr.last_pos,
960 "unterminated raw string".to_strbuf());
961 } else if !rdr.curr_is('"') {
962 fatal_span_char(rdr, start_bpos, rdr.last_pos,
963 "only `#` is allowed in raw string delimitation; \
964 found illegal character".to_strbuf(),
968 let content_start_bpos = rdr.last_pos;
969 let mut content_end_bpos;
972 fatal_span(rdr, start_bpos, rdr.last_pos,
973 "unterminated raw string".to_strbuf());
975 if rdr.curr_is('"') {
976 content_end_bpos = rdr.last_pos;
977 for _ in range(0, hash_count) {
979 if !rdr.curr_is('#') {
988 let str_content = with_str_from_to(rdr,
992 return token::LIT_STR_RAW(str_content, hash_count);
995 if nextch_is(rdr, '>') {
998 return token::RARROW;
999 } else { return binop(rdr, token::MINUS); }
1002 if nextch_is(rdr, '&') {
1005 return token::ANDAND;
1006 } else { return binop(rdr, token::AND); }
1010 Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
1011 _ => { return binop(rdr, token::OR); }
1014 '+' => { return binop(rdr, token::PLUS); }
1015 '*' => { return binop(rdr, token::STAR); }
1016 '/' => { return binop(rdr, token::SLASH); }
1017 '^' => { return binop(rdr, token::CARET); }
1018 '%' => { return binop(rdr, token::PERCENT); }
1020 fatal_span_char(rdr, rdr.last_pos, rdr.pos,
1021 "unknown start of token".to_strbuf(), c);
1026 fn consume_whitespace(rdr: &mut StringReader) {
1027 while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
1034 use codemap::{BytePos, CodeMap, Span};
1037 use parse::token::{str_to_ident};
1040 fn mk_sh() -> diagnostic::SpanHandler {
1041 let emitter = diagnostic::EmitterWriter::new(box util::NullWriter);
1042 let handler = diagnostic::mk_handler(box emitter);
1043 diagnostic::mk_span_handler(handler, CodeMap::new())
1046 // open a string reader for the given string
1047 fn setup<'a>(span_handler: &'a diagnostic::SpanHandler,
1048 teststr: StrBuf) -> StringReader<'a> {
1049 let fm = span_handler.cm.new_filemap("zebra.rs".to_strbuf(), teststr);
1050 new_string_reader(span_handler, fm)
1054 let span_handler = mk_sh();
1055 let mut string_reader = setup(&span_handler,
1056 "/* my source file */ \
1057 fn main() { println!(\"zebra\"); }\n".to_strbuf());
1058 let id = str_to_ident("fn");
1059 let tok1 = string_reader.next_token();
1060 let tok2 = TokenAndSpan{
1061 tok:token::IDENT(id, false),
1062 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
1063 assert_eq!(tok1,tok2);
1064 // the 'main' id is already read:
1065 assert_eq!(string_reader.last_pos.clone(), BytePos(28));
1066 // read another token:
1067 let tok3 = string_reader.next_token();
1068 let tok4 = TokenAndSpan{
1069 tok:token::IDENT(str_to_ident("main"), false),
1070 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1071 assert_eq!(tok3,tok4);
1072 // the lparen is already read:
1073 assert_eq!(string_reader.last_pos.clone(), BytePos(29))
1076 // check that the given reader produces the desired stream
1077 // of tokens (stop checking after exhausting the expected vec)
1078 fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
1079 for expected_tok in expected.iter() {
1080 assert_eq!(&string_reader.next_token().tok, expected_tok);
1084 // make the identifier by looking up the string in the interner
1085 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1086 token::IDENT (str_to_ident(id),is_mod_name)
1089 #[test] fn doublecolonparsing () {
1090 check_tokenization(setup(&mk_sh(), "a b".to_strbuf()),
1091 vec!(mk_ident("a",false),
1092 mk_ident("b",false)));
1095 #[test] fn dcparsing_2 () {
1096 check_tokenization(setup(&mk_sh(), "a::b".to_strbuf()),
1097 vec!(mk_ident("a",true),
1099 mk_ident("b",false)));
1102 #[test] fn dcparsing_3 () {
1103 check_tokenization(setup(&mk_sh(), "a ::b".to_strbuf()),
1104 vec!(mk_ident("a",false),
1106 mk_ident("b",false)));
1109 #[test] fn dcparsing_4 () {
1110 check_tokenization(setup(&mk_sh(), "a:: b".to_strbuf()),
1111 vec!(mk_ident("a",true),
1113 mk_ident("b",false)));
1116 #[test] fn character_a() {
1117 assert_eq!(setup(&mk_sh(), "'a'".to_strbuf()).next_token().tok,
1118 token::LIT_CHAR('a'));
1121 #[test] fn character_space() {
1122 assert_eq!(setup(&mk_sh(), "' '".to_strbuf()).next_token().tok,
1123 token::LIT_CHAR(' '));
1126 #[test] fn character_escaped() {
1127 assert_eq!(setup(&mk_sh(), "'\\n'".to_strbuf()).next_token().tok,
1128 token::LIT_CHAR('\n'));
1131 #[test] fn lifetime_name() {
1132 assert_eq!(setup(&mk_sh(), "'abc".to_strbuf()).next_token().tok,
1133 token::LIFETIME(token::str_to_ident("abc")));
1136 #[test] fn raw_string() {
1137 assert_eq!(setup(&mk_sh(),
1138 "r###\"\"#a\\b\x00c\"\"###".to_strbuf()).next_token()
1140 token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3));
1143 #[test] fn line_doc_comments() {
1144 assert!(!is_line_non_doc_comment("///"));
1145 assert!(!is_line_non_doc_comment("/// blah"));
1146 assert!(is_line_non_doc_comment("////"));
1149 #[test] fn nested_block_comments() {
1150 assert_eq!(setup(&mk_sh(),
1151 "/* /* */ */'a'".to_strbuf()).next_token().tok,
1152 token::LIT_CHAR('a'));