1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::{dup_tt_reader, tt_next_token};
17 use parse::token::{str_to_ident};
19 use std::cell::{Cell, RefCell};
21 use std::mem::replace;
22 use std::num::from_str_radix;
26 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
29 fn is_eof(&self) -> bool;
30 fn next_token(&self) -> TokenAndSpan;
31 fn fatal(&self, ~str) -> !;
32 fn span_diag<'a>(&'a self) -> &'a SpanHandler;
33 fn peek(&self) -> TokenAndSpan;
34 fn dup(&self) -> ~Reader:;
37 #[deriving(Clone, Eq, Show)]
38 pub struct TokenAndSpan {
43 pub struct StringReader<'a> {
44 span_diagnostic: &'a SpanHandler,
45 // The absolute offset within the codemap of the next character to read
47 // The absolute offset within the codemap of the last character read(curr)
48 last_pos: Cell<BytePos>,
49 // The column of the next character to read
51 // The last character to be read
52 curr: Cell<Option<char>>,
53 filemap: Rc<codemap::FileMap>,
55 peek_tok: RefCell<token::Token>,
56 peek_span: RefCell<Span>,
59 impl<'a> StringReader<'a> {
60 pub fn curr_is(&self, c: char) -> bool {
61 self.curr.get() == Some(c)
65 pub fn new_string_reader<'a>(span_diagnostic: &'a SpanHandler,
66 filemap: Rc<codemap::FileMap>)
68 let r = new_low_level_string_reader(span_diagnostic, filemap);
69 string_advance_token(&r); /* fill in peek_* */
73 /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
74 pub fn new_low_level_string_reader<'a>(span_diagnostic: &'a SpanHandler,
75 filemap: Rc<codemap::FileMap>)
77 // Force the initial reader bump to start on a fresh line
78 let initial_char = '\n';
79 let r = StringReader {
80 span_diagnostic: span_diagnostic,
81 pos: Cell::new(filemap.start_pos),
82 last_pos: Cell::new(filemap.start_pos),
83 col: Cell::new(CharPos(0)),
84 curr: Cell::new(Some(initial_char)),
86 /* dummy values; not read */
87 peek_tok: RefCell::new(token::EOF),
88 peek_span: RefCell::new(codemap::DUMMY_SP),
94 // duplicating the string reader is probably a bad idea, in
95 // that using them will cause interleaved pushes of line
96 // offsets to the underlying filemap...
97 fn dup_string_reader<'a>(r: &StringReader<'a>) -> StringReader<'a> {
99 span_diagnostic: r.span_diagnostic,
100 pos: Cell::new(r.pos.get()),
101 last_pos: Cell::new(r.last_pos.get()),
102 col: Cell::new(r.col.get()),
103 curr: Cell::new(r.curr.get()),
104 filemap: r.filemap.clone(),
105 peek_tok: r.peek_tok.clone(),
106 peek_span: r.peek_span.clone(),
110 impl<'a> Reader for StringReader<'a> {
111 fn is_eof(&self) -> bool { is_eof(self) }
112 // return the next token. EFFECT: advances the string_reader.
113 fn next_token(&self) -> TokenAndSpan {
114 let ret_val = TokenAndSpan {
115 tok: replace(&mut *self.peek_tok.borrow_mut(), token::UNDERSCORE),
116 sp: self.peek_span.get(),
118 string_advance_token(self);
121 fn fatal(&self, m: ~str) -> ! {
122 self.span_diagnostic.span_fatal(self.peek_span.get(), m)
124 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.span_diagnostic }
125 fn peek(&self) -> TokenAndSpan {
126 // FIXME(pcwalton): Bad copy!
128 tok: self.peek_tok.get(),
129 sp: self.peek_span.get(),
132 fn dup(&self) -> ~Reader: { ~dup_string_reader(self) as ~Reader: }
135 impl<'a> Reader for TtReader<'a> {
136 fn is_eof(&self) -> bool {
137 *self.cur_tok.borrow() == token::EOF
139 fn next_token(&self) -> TokenAndSpan {
140 let r = tt_next_token(self);
141 debug!("TtReader: r={:?}", r);
144 fn fatal(&self, m: ~str) -> ! {
145 self.sp_diag.span_fatal(self.cur_span.get(), m);
147 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.sp_diag }
148 fn peek(&self) -> TokenAndSpan {
150 tok: self.cur_tok.get(),
151 sp: self.cur_span.get(),
154 fn dup(&self) -> ~Reader: { ~dup_tt_reader(self) as ~Reader: }
157 // report a lexical error spanning [`from_pos`, `to_pos`)
158 fn fatal_span(rdr: &StringReader,
163 rdr.peek_span.set(codemap::mk_sp(from_pos, to_pos));
167 // report a lexical error spanning [`from_pos`, `to_pos`), appending an
168 // escaped character to the error message
169 fn fatal_span_char(rdr: &StringReader,
177 char::escape_default(c, |c| m.push_char(c));
178 fatal_span(rdr, from_pos, to_pos, m);
181 // report a lexical error spanning [`from_pos`, `to_pos`), appending the
182 // offending string to the error message
183 fn fatal_span_verbose(rdr: &StringReader,
190 let s = rdr.filemap.src.slice(
191 byte_offset(rdr, from_pos).to_uint(),
192 byte_offset(rdr, to_pos).to_uint());
194 fatal_span(rdr, from_pos, to_pos, m);
197 // EFFECT: advance peek_tok and peek_span to refer to the next token.
198 // EFFECT: update the interner, maybe.
199 fn string_advance_token(r: &StringReader) {
200 match consume_whitespace_and_comments(r) {
202 r.peek_span.set(comment.sp);
203 r.peek_tok.set(comment.tok);
207 r.peek_tok.set(token::EOF);
209 let start_bytepos = r.last_pos.get();
210 r.peek_tok.set(next_token_inner(r));
211 r.peek_span.set(codemap::mk_sp(start_bytepos,
218 fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
219 (pos - rdr.filemap.start_pos)
222 /// Calls `f` with a string slice of the source text spanning from `start`
223 /// up to but excluding `rdr.last_pos`, meaning the slice does not include
224 /// the character `rdr.curr`.
225 pub fn with_str_from<T>(
230 with_str_from_to(rdr, start, rdr.last_pos.get(), f)
233 /// Calls `f` with astring slice of the source text spanning from `start`
234 /// up to but excluding `end`.
235 fn with_str_from_to<T>(
241 f(rdr.filemap.src.slice(
242 byte_offset(rdr, start).to_uint(),
243 byte_offset(rdr, end).to_uint()))
246 // EFFECT: advance the StringReader by one character. If a newline is
247 // discovered, add it to the FileMap's list of line start offsets.
248 pub fn bump(rdr: &StringReader) {
249 rdr.last_pos.set(rdr.pos.get());
250 let current_byte_offset = byte_offset(rdr, rdr.pos.get()).to_uint();
251 if current_byte_offset < rdr.filemap.src.len() {
252 assert!(rdr.curr.get().is_some());
253 let last_char = rdr.curr.get().unwrap();
254 let next = rdr.filemap.src.char_range_at(current_byte_offset);
255 let byte_offset_diff = next.next - current_byte_offset;
256 rdr.pos.set(rdr.pos.get() + Pos::from_uint(byte_offset_diff));
257 rdr.curr.set(Some(next.ch));
258 rdr.col.set(rdr.col.get() + CharPos(1u));
259 if last_char == '\n' {
260 rdr.filemap.next_line(rdr.last_pos.get());
261 rdr.col.set(CharPos(0u));
264 if byte_offset_diff > 1 {
265 rdr.filemap.record_multibyte_char(rdr.last_pos.get(), byte_offset_diff);
272 pub fn is_eof(rdr: &StringReader) -> bool {
273 rdr.curr.get().is_none()
276 pub fn nextch(rdr: &StringReader) -> Option<char> {
277 let offset = byte_offset(rdr, rdr.pos.get()).to_uint();
278 if offset < rdr.filemap.src.len() {
279 Some(rdr.filemap.src.char_at(offset))
284 pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
285 nextch(rdr) == Some(c)
288 pub fn nextnextch(rdr: &StringReader) -> Option<char> {
289 let offset = byte_offset(rdr, rdr.pos.get()).to_uint();
290 let s = rdr.filemap.deref().src.as_slice();
291 if offset >= s.len() { return None }
292 let str::CharRange { next, .. } = s.char_range_at(offset);
294 Some(s.char_at(next))
299 pub fn nextnextch_is(rdr: &StringReader, c: char) -> bool {
300 nextnextch(rdr) == Some(c)
303 fn hex_digit_val(c: Option<char>) -> int {
304 let d = c.unwrap_or('\x00');
306 if in_range(c, '0', '9') { return (d as int) - ('0' as int); }
307 if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
308 if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
312 pub fn is_whitespace(c: Option<char>) -> bool {
313 match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
314 ' ' | '\n' | '\t' | '\r' => true,
319 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
321 Some(c) => lo <= c && c <= hi,
326 fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
328 fn is_hex_digit(c: Option<char>) -> bool {
329 return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
330 in_range(c, 'A', 'F');
333 // EFFECT: eats whitespace and comments.
334 // returns a Some(sugared-doc-attr) if one exists, None otherwise.
335 fn consume_whitespace_and_comments(rdr: &StringReader)
336 -> Option<TokenAndSpan> {
337 while is_whitespace(rdr.curr.get()) { bump(rdr); }
338 return consume_any_line_comment(rdr);
341 pub fn is_line_non_doc_comment(s: &str) -> bool {
342 s.starts_with("////")
345 // PRECONDITION: rdr.curr is not whitespace
346 // EFFECT: eats any kind of comment.
347 // returns a Some(sugared-doc-attr) if one exists, None otherwise
348 fn consume_any_line_comment(rdr: &StringReader)
349 -> Option<TokenAndSpan> {
350 if rdr.curr_is('/') {
355 // line comments starting with "///" or "//!" are doc-comments
356 if rdr.curr_is('/') || rdr.curr_is('!') {
357 let start_bpos = rdr.pos.get() - BytePos(3);
358 while !rdr.curr_is('\n') && !is_eof(rdr) {
361 let ret = with_str_from(rdr, start_bpos, |string| {
362 // but comments with only more "/"s are not
363 if !is_line_non_doc_comment(string) {
365 tok: token::DOC_COMMENT(str_to_ident(string)),
366 sp: codemap::mk_sp(start_bpos, rdr.pos.get())
377 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
379 // Restart whitespace munch.
380 consume_whitespace_and_comments(rdr)
382 Some('*') => { bump(rdr); bump(rdr); consume_block_comment(rdr) }
385 } else if rdr.curr_is('#') {
386 if nextch_is(rdr, '!') {
388 // Parse an inner attribute.
389 if nextnextch_is(rdr, '[') {
393 // I guess this is the only way to figure out if
394 // we're at the beginning of the file...
395 let cmap = CodeMap::new();
396 cmap.files.borrow_mut().push(rdr.filemap.clone());
397 let loc = cmap.lookup_char_pos_adj(rdr.last_pos.get());
398 if loc.line == 1u && loc.col == CharPos(0u) {
399 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
400 return consume_whitespace_and_comments(rdr);
409 pub fn is_block_non_doc_comment(s: &str) -> bool {
410 s.starts_with("/***")
413 // might return a sugared-doc-attr
414 fn consume_block_comment(rdr: &StringReader) -> Option<TokenAndSpan> {
415 // block comments starting with "/**" or "/*!" are doc-comments
416 let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!');
417 let start_bpos = rdr.pos.get() - BytePos(if is_doc_comment {3} else {2});
419 let mut level: int = 1;
422 let msg = if is_doc_comment {
423 ~"unterminated block doc-comment"
425 ~"unterminated block comment"
427 fatal_span(rdr, start_bpos, rdr.last_pos.get(), msg);
428 } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
432 } else if rdr.curr_is('*') && nextch_is(rdr, '/') {
441 let res = if is_doc_comment {
442 with_str_from(rdr, start_bpos, |string| {
443 // but comments with only "*"s between two "/"s are not
444 if !is_block_non_doc_comment(string) {
446 tok: token::DOC_COMMENT(str_to_ident(string)),
447 sp: codemap::mk_sp(start_bpos, rdr.pos.get())
457 // restart whitespace munch.
458 if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
461 fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> {
462 // \x00 hits the `return None` case immediately, so this is fine.
463 let mut c = rdr.curr.get().unwrap_or('\x00');
465 if c == 'e' || c == 'E' {
468 c = rdr.curr.get().unwrap_or('\x00');
469 if c == '-' || c == '+' {
473 let exponent = scan_digits(rdr, 10u);
474 if exponent.len() > 0u {
475 return Some(rslt + exponent);
477 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
478 ~"scan_exponent: bad fp literal");
480 } else { return None::<~str>; }
483 fn scan_digits(rdr: &StringReader, radix: uint) -> ~str {
486 let c = rdr.curr.get();
487 if c == Some('_') { bump(rdr); continue; }
488 match c.and_then(|cc| char::to_digit(cc, radix)) {
490 rslt.push_char(c.unwrap());
498 fn check_float_base(rdr: &StringReader, start_bpos: BytePos, last_bpos: BytePos,
501 16u => fatal_span(rdr, start_bpos, last_bpos,
502 ~"hexadecimal float literal is not supported"),
503 8u => fatal_span(rdr, start_bpos, last_bpos,
504 ~"octal float literal is not supported"),
505 2u => fatal_span(rdr, start_bpos, last_bpos,
506 ~"binary float literal is not supported"),
511 fn scan_number(c: char, rdr: &StringReader) -> token::Token {
515 let mut n = nextch(rdr).unwrap_or('\x00');
516 let start_bpos = rdr.last_pos.get();
517 if c == '0' && n == 'x' {
521 } else if c == '0' && n == 'o' {
525 } else if c == '0' && n == 'b' {
530 num_str = scan_digits(rdr, base);
531 c = rdr.curr.get().unwrap_or('\x00');
533 if c == 'u' || c == 'i' {
534 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
535 let signed = c == 'i';
537 if signed { Signed(ast::TyI) }
538 else { Unsigned(ast::TyU) }
541 c = rdr.curr.get().unwrap_or('\x00');
544 tp = if signed { Signed(ast::TyI8) }
545 else { Unsigned(ast::TyU8) };
547 n = nextch(rdr).unwrap_or('\x00');
548 if c == '1' && n == '6' {
551 tp = if signed { Signed(ast::TyI16) }
552 else { Unsigned(ast::TyU16) };
553 } else if c == '3' && n == '2' {
556 tp = if signed { Signed(ast::TyI32) }
557 else { Unsigned(ast::TyU32) };
558 } else if c == '6' && n == '4' {
561 tp = if signed { Signed(ast::TyI64) }
562 else { Unsigned(ast::TyU64) };
564 if num_str.len() == 0u {
565 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
566 ~"no valid digits found for number");
568 let parsed = match from_str_radix::<u64>(num_str, base as uint) {
570 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
571 ~"int literal is too large")
575 Signed(t) => return token::LIT_INT(parsed as i64, t),
576 Unsigned(t) => return token::LIT_UINT(parsed, t)
579 let mut is_float = false;
580 if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
583 let dec_part = scan_digits(rdr, 10u);
584 num_str.push_char('.');
585 num_str.push_str(dec_part);
587 match scan_exponent(rdr, start_bpos) {
590 num_str.push_str(*s);
595 if rdr.curr_is('f') {
597 c = rdr.curr.get().unwrap_or('\x00');
598 n = nextch(rdr).unwrap_or('\x00');
599 if c == '3' && n == '2' {
602 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
603 return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF32);
604 } else if c == '6' && n == '4' {
607 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
608 return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF64);
609 /* FIXME (#2252): if this is out of range for either a
610 32-bit or 64-bit float, it won't be noticed till the
613 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
614 ~"expected `f32` or `f64` suffix");
618 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
619 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
621 if num_str.len() == 0u {
622 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
623 ~"no valid digits found for number");
625 let parsed = match from_str_radix::<u64>(num_str, base as uint) {
627 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
628 ~"int literal is too large")
631 debug!("lexing {} as an unsuffixed integer literal", num_str);
632 return token::LIT_INT_UNSUFFIXED(parsed as i64);
636 fn scan_numeric_escape(rdr: &StringReader, n_hex_digits: uint) -> char {
637 let mut accum_int = 0;
638 let mut i = n_hex_digits;
639 let start_bpos = rdr.last_pos.get();
640 while i != 0u && !is_eof(rdr) {
641 let n = rdr.curr.get();
642 if !is_hex_digit(n) {
643 fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(),
644 ~"illegal character in numeric character escape",
649 accum_int += hex_digit_val(n);
652 if i != 0 && is_eof(rdr) {
653 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
654 ~"unterminated numeric character escape");
657 match char::from_u32(accum_int as u32) {
659 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
660 ~"illegal numeric character escape")
664 fn ident_start(c: Option<char>) -> bool {
665 let c = match c { Some(c) => c, None => return false };
667 (c >= 'a' && c <= 'z')
668 || (c >= 'A' && c <= 'Z')
670 || (c > '\x7f' && char::is_XID_start(c))
673 fn ident_continue(c: Option<char>) -> bool {
674 let c = match c { Some(c) => c, None => return false };
676 (c >= 'a' && c <= 'z')
677 || (c >= 'A' && c <= 'Z')
678 || (c >= '0' && c <= '9')
680 || (c > '\x7f' && char::is_XID_continue(c))
683 // return the next token from the string
684 // EFFECT: advances the input past that token
685 // EFFECT: updates the interner
686 fn next_token_inner(rdr: &StringReader) -> token::Token {
687 let c = rdr.curr.get();
688 if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') {
689 // Note: r as in r" or r#" is part of a raw string literal,
690 // not an identifier, and is handled further down.
692 let start = rdr.last_pos.get();
693 while ident_continue(rdr.curr.get()) {
697 return with_str_from(rdr, start, |string| {
701 let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
703 // FIXME: perform NFKC normalization here. (Issue #2253)
704 token::IDENT(str_to_ident(string), is_mod_name)
709 return scan_number(c.unwrap(), rdr);
711 fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token {
713 if rdr.curr_is('=') {
715 return token::BINOPEQ(op);
716 } else { return token::BINOP(op); }
718 match c.expect("next_token_inner called at EOF") {
725 ';' => { bump(rdr); return token::SEMI; }
726 ',' => { bump(rdr); return token::COMMA; }
729 return if rdr.curr_is('.') {
731 if rdr.curr_is('.') {
741 '(' => { bump(rdr); return token::LPAREN; }
742 ')' => { bump(rdr); return token::RPAREN; }
743 '{' => { bump(rdr); return token::LBRACE; }
744 '}' => { bump(rdr); return token::RBRACE; }
745 '[' => { bump(rdr); return token::LBRACKET; }
746 ']' => { bump(rdr); return token::RBRACKET; }
747 '@' => { bump(rdr); return token::AT; }
748 '#' => { bump(rdr); return token::POUND; }
749 '~' => { bump(rdr); return token::TILDE; }
752 if rdr.curr_is(':') {
754 return token::MOD_SEP;
755 } else { return token::COLON; }
758 '$' => { bump(rdr); return token::DOLLAR; }
764 // Multi-byte tokens.
767 if rdr.curr_is('=') {
770 } else if rdr.curr_is('>') {
772 return token::FAT_ARROW;
779 if rdr.curr_is('=') {
782 } else { return token::NOT; }
786 match rdr.curr.get().unwrap_or('\x00') {
787 '=' => { bump(rdr); return token::LE; }
788 '<' => { return binop(rdr, token::SHL); }
791 match rdr.curr.get().unwrap_or('\x00') {
792 '>' => { bump(rdr); return token::DARROW; }
793 _ => { return token::LARROW; }
796 _ => { return token::LT; }
801 match rdr.curr.get().unwrap_or('\x00') {
802 '=' => { bump(rdr); return token::GE; }
803 '>' => { return binop(rdr, token::SHR); }
804 _ => { return token::GT; }
808 // Either a character constant 'a' OR a lifetime name 'abc
810 let start = rdr.last_pos.get();
812 // the eof will be picked up by the final `'` check below
813 let mut c2 = rdr.curr.get().unwrap_or('\x00');
816 // If the character is an ident start not followed by another single
817 // quote, then this is a lifetime name:
818 if ident_start(Some(c2)) && !rdr.curr_is('\'') {
819 while ident_continue(rdr.curr.get()) {
822 return with_str_from(rdr, start, |lifetime_name| {
823 let ident = str_to_ident(lifetime_name);
824 let tok = &token::IDENT(ident, false);
826 if token::is_keyword(token::keywords::Self, tok) {
827 fatal_span(rdr, start, rdr.last_pos.get(),
828 ~"invalid lifetime name: 'self is no longer a special lifetime");
829 } else if token::is_any_keyword(tok) &&
830 !token::is_keyword(token::keywords::Static, tok) {
831 fatal_span(rdr, start, rdr.last_pos.get(),
832 ~"invalid lifetime name");
834 token::LIFETIME(ident)
839 // Otherwise it is a character constant:
842 // '\X' for some X must be a character constant:
843 let escaped = rdr.curr.get();
844 let escaped_pos = rdr.last_pos.get();
857 'x' => scan_numeric_escape(rdr, 2u),
858 'u' => scan_numeric_escape(rdr, 4u),
859 'U' => scan_numeric_escape(rdr, 8u),
861 fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
862 ~"unknown character escape", c2)
868 '\t' | '\n' | '\r' | '\'' => {
869 fatal_span_char(rdr, start, rdr.last_pos.get(),
870 ~"character constant must be escaped", c2);
874 if !rdr.curr_is('\'') {
875 fatal_span_verbose(rdr,
876 // Byte offsetting here is okay because the
877 // character before position `start` is an
878 // ascii single quote.
881 ~"unterminated character constant");
883 bump(rdr); // advance curr past token
884 return token::LIT_CHAR(c2 as u32);
887 let mut accum_str = ~"";
888 let start_bpos = rdr.last_pos.get();
890 while !rdr.curr_is('"') {
892 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
893 ~"unterminated double quote string");
896 let ch = rdr.curr.get().unwrap();
901 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
902 ~"unterminated double quote string");
905 let escaped = rdr.curr.get().unwrap();
906 let escaped_pos = rdr.last_pos.get();
909 'n' => accum_str.push_char('\n'),
910 'r' => accum_str.push_char('\r'),
911 't' => accum_str.push_char('\t'),
912 '\\' => accum_str.push_char('\\'),
913 '\'' => accum_str.push_char('\''),
914 '"' => accum_str.push_char('"'),
915 '\n' => consume_whitespace(rdr),
916 '0' => accum_str.push_char('\x00'),
918 accum_str.push_char(scan_numeric_escape(rdr, 2u));
921 accum_str.push_char(scan_numeric_escape(rdr, 4u));
924 accum_str.push_char(scan_numeric_escape(rdr, 8u));
927 fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
928 ~"unknown string escape", c2);
932 _ => accum_str.push_char(ch)
936 return token::LIT_STR(str_to_ident(accum_str));
939 let start_bpos = rdr.last_pos.get();
941 let mut hash_count = 0u;
942 while rdr.curr_is('#') {
948 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
949 ~"unterminated raw string");
950 } else if !rdr.curr_is('"') {
951 fatal_span_char(rdr, start_bpos, rdr.last_pos.get(),
952 ~"only `#` is allowed in raw string delimitation; \
953 found illegal character",
954 rdr.curr.get().unwrap());
957 let content_start_bpos = rdr.last_pos.get();
958 let mut content_end_bpos;
961 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
962 ~"unterminated raw string");
964 if rdr.curr_is('"') {
965 content_end_bpos = rdr.last_pos.get();
966 for _ in range(0, hash_count) {
968 if !rdr.curr_is('#') {
977 let str_content = with_str_from_to(rdr,
981 return token::LIT_STR_RAW(str_content, hash_count);
984 if nextch_is(rdr, '>') {
987 return token::RARROW;
988 } else { return binop(rdr, token::MINUS); }
991 if nextch_is(rdr, '&') {
994 return token::ANDAND;
995 } else { return binop(rdr, token::AND); }
999 Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
1000 _ => { return binop(rdr, token::OR); }
1003 '+' => { return binop(rdr, token::PLUS); }
1004 '*' => { return binop(rdr, token::STAR); }
1005 '/' => { return binop(rdr, token::SLASH); }
1006 '^' => { return binop(rdr, token::CARET); }
1007 '%' => { return binop(rdr, token::PERCENT); }
1009 fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(),
1010 ~"unknown start of token", c);
1015 fn consume_whitespace(rdr: &StringReader) {
1016 while is_whitespace(rdr.curr.get()) && !is_eof(rdr) { bump(rdr); }
1023 use codemap::{BytePos, CodeMap, Span};
1026 use parse::token::{str_to_ident};
1029 fn mk_sh() -> diagnostic::SpanHandler {
1030 let emitter = diagnostic::EmitterWriter::new(~util::NullWriter);
1031 let handler = diagnostic::mk_handler(~emitter);
1032 diagnostic::mk_span_handler(handler, CodeMap::new())
1035 // open a string reader for the given string
1036 fn setup<'a>(span_handler: &'a diagnostic::SpanHandler,
1037 teststr: ~str) -> StringReader<'a> {
1038 let fm = span_handler.cm.new_filemap(~"zebra.rs", teststr);
1039 new_string_reader(span_handler, fm)
1043 let span_handler = mk_sh();
1044 let string_reader = setup(&span_handler,
1045 ~"/* my source file */ \
1046 fn main() { println!(\"zebra\"); }\n");
1047 let id = str_to_ident("fn");
1048 let tok1 = string_reader.next_token();
1049 let tok2 = TokenAndSpan{
1050 tok:token::IDENT(id, false),
1051 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
1052 assert_eq!(tok1,tok2);
1053 // the 'main' id is already read:
1054 assert_eq!(string_reader.last_pos.get().clone(), BytePos(28));
1055 // read another token:
1056 let tok3 = string_reader.next_token();
1057 let tok4 = TokenAndSpan{
1058 tok:token::IDENT(str_to_ident("main"), false),
1059 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1060 assert_eq!(tok3,tok4);
1061 // the lparen is already read:
1062 assert_eq!(string_reader.last_pos.get().clone(), BytePos(29))
1065 // check that the given reader produces the desired stream
1066 // of tokens (stop checking after exhausting the expected vec)
1067 fn check_tokenization (string_reader: StringReader, expected: Vec<token::Token> ) {
1068 for expected_tok in expected.iter() {
1069 assert_eq!(&string_reader.next_token().tok, expected_tok);
1073 // make the identifier by looking up the string in the interner
1074 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1075 token::IDENT (str_to_ident(id),is_mod_name)
1078 #[test] fn doublecolonparsing () {
1079 check_tokenization(setup(&mk_sh(), ~"a b"),
1080 vec!(mk_ident("a",false),
1081 mk_ident("b",false)));
1084 #[test] fn dcparsing_2 () {
1085 check_tokenization(setup(&mk_sh(), ~"a::b"),
1086 vec!(mk_ident("a",true),
1088 mk_ident("b",false)));
1091 #[test] fn dcparsing_3 () {
1092 check_tokenization(setup(&mk_sh(), ~"a ::b"),
1093 vec!(mk_ident("a",false),
1095 mk_ident("b",false)));
1098 #[test] fn dcparsing_4 () {
1099 check_tokenization(setup(&mk_sh(), ~"a:: b"),
1100 vec!(mk_ident("a",true),
1102 mk_ident("b",false)));
1105 #[test] fn character_a() {
1106 assert_eq!(setup(&mk_sh(), ~"'a'").next_token().tok,
1107 token::LIT_CHAR('a' as u32));
1110 #[test] fn character_space() {
1111 assert_eq!(setup(&mk_sh(), ~"' '").next_token().tok,
1112 token::LIT_CHAR(' ' as u32));
1115 #[test] fn character_escaped() {
1116 assert_eq!(setup(&mk_sh(), ~"'\\n'").next_token().tok,
1117 token::LIT_CHAR('\n' as u32));
1120 #[test] fn lifetime_name() {
1121 assert_eq!(setup(&mk_sh(), ~"'abc").next_token().tok,
1122 token::LIFETIME(token::str_to_ident("abc")));
1125 #[test] fn raw_string() {
1126 assert_eq!(setup(&mk_sh(), ~"r###\"\"#a\\b\x00c\"\"###").next_token().tok,
1127 token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3));
1130 #[test] fn line_doc_comments() {
1131 assert!(!is_line_non_doc_comment("///"));
1132 assert!(!is_line_non_doc_comment("/// blah"));
1133 assert!(is_line_non_doc_comment("////"));
1136 #[test] fn nested_block_comments() {
1137 assert_eq!(setup(&mk_sh(), ~"/* /* */ */'a'").next_token().tok,
1138 token::LIT_CHAR('a' as u32));