1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::{tt_next_token};
16 use ext::tt::transcribe::{dup_tt_reader};
18 use parse::token::{str_to_ident};
20 use std::cast::transmute;
21 use std::cell::{Cell, RefCell};
23 use std::num::from_str_radix;
26 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
29 fn is_eof(@self) -> bool;
30 fn next_token(@self) -> TokenAndSpan;
31 fn fatal(@self, ~str) -> !;
32 fn span_diag(@self) -> @SpanHandler;
33 fn peek(@self) -> TokenAndSpan;
34 fn dup(@self) -> @Reader;
37 #[deriving(Clone, Eq)]
38 pub struct TokenAndSpan {
43 pub struct StringReader {
44 span_diagnostic: @SpanHandler,
46 // The absolute offset within the codemap of the next character to read
48 // The absolute offset within the codemap of the last character read(curr)
49 last_pos: Cell<BytePos>,
50 // The column of the next character to read
52 // The last character to be read
54 filemap: @codemap::FileMap,
56 peek_tok: RefCell<token::Token>,
57 peek_span: RefCell<Span>,
60 pub fn new_string_reader(span_diagnostic: @SpanHandler,
61 filemap: @codemap::FileMap)
63 let r = new_low_level_string_reader(span_diagnostic, filemap);
64 string_advance_token(r); /* fill in peek_* */
68 /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
69 pub fn new_low_level_string_reader(span_diagnostic: @SpanHandler,
70 filemap: @codemap::FileMap)
72 // Force the initial reader bump to start on a fresh line
73 let initial_char = '\n';
74 let r = @StringReader {
75 span_diagnostic: span_diagnostic,
77 pos: Cell::new(filemap.start_pos),
78 last_pos: Cell::new(filemap.start_pos),
79 col: Cell::new(CharPos(0)),
80 curr: Cell::new(initial_char),
82 /* dummy values; not read */
83 peek_tok: RefCell::new(token::EOF),
84 peek_span: RefCell::new(codemap::DUMMY_SP),
90 // duplicating the string reader is probably a bad idea, in
91 // that using them will cause interleaved pushes of line
92 // offsets to the underlying filemap...
93 fn dup_string_reader(r: @StringReader) -> @StringReader {
95 span_diagnostic: r.span_diagnostic,
97 pos: Cell::new(r.pos.get()),
98 last_pos: Cell::new(r.last_pos.get()),
99 col: Cell::new(r.col.get()),
100 curr: Cell::new(r.curr.get()),
102 peek_tok: r.peek_tok.clone(),
103 peek_span: r.peek_span.clone(),
107 impl Reader for StringReader {
108 fn is_eof(@self) -> bool { is_eof(self) }
109 // return the next token. EFFECT: advances the string_reader.
110 fn next_token(@self) -> TokenAndSpan {
112 let mut peek_tok = self.peek_tok.borrow_mut();
114 tok: util::replace(peek_tok.get(), token::UNDERSCORE),
115 sp: self.peek_span.get(),
118 string_advance_token(self);
121 fn fatal(@self, m: ~str) -> ! {
122 self.span_diagnostic.span_fatal(self.peek_span.get(), m)
124 fn span_diag(@self) -> @SpanHandler { self.span_diagnostic }
125 fn peek(@self) -> TokenAndSpan {
126 // FIXME(pcwalton): Bad copy!
128 tok: self.peek_tok.get(),
129 sp: self.peek_span.get(),
132 fn dup(@self) -> @Reader { dup_string_reader(self) as @Reader }
135 impl Reader for TtReader {
136 fn is_eof(@self) -> bool {
137 let cur_tok = self.cur_tok.borrow();
138 *cur_tok.get() == token::EOF
140 fn next_token(@self) -> TokenAndSpan {
141 let r = tt_next_token(self);
142 debug!("TtReader: r={:?}", r);
145 fn fatal(@self, m: ~str) -> ! {
146 self.sp_diag.span_fatal(self.cur_span.get(), m);
148 fn span_diag(@self) -> @SpanHandler { self.sp_diag }
149 fn peek(@self) -> TokenAndSpan {
151 tok: self.cur_tok.get(),
152 sp: self.cur_span.get(),
155 fn dup(@self) -> @Reader { dup_tt_reader(self) as @Reader }
158 // report a lexical error spanning [`from_pos`, `to_pos`)
159 fn fatal_span(rdr: @StringReader,
164 rdr.peek_span.set(codemap::mk_sp(from_pos, to_pos));
168 // report a lexical error spanning [`from_pos`, `to_pos`), appending an
169 // escaped character to the error message
170 fn fatal_span_char(rdr: @StringReader,
178 char::escape_default(c, |c| m.push_char(c));
179 fatal_span(rdr, from_pos, to_pos, m);
182 // report a lexical error spanning [`from_pos`, `to_pos`), appending the
183 // offending string to the error message
184 fn fatal_span_verbose(rdr: @StringReader,
191 let s = rdr.src.slice(
192 byte_offset(rdr, from_pos).to_uint(),
193 byte_offset(rdr, to_pos).to_uint());
195 fatal_span(rdr, from_pos, to_pos, m);
198 // EFFECT: advance peek_tok and peek_span to refer to the next token.
199 // EFFECT: update the interner, maybe.
200 fn string_advance_token(r: @StringReader) {
201 match consume_whitespace_and_comments(r) {
203 r.peek_span.set(comment.sp);
204 r.peek_tok.set(comment.tok);
208 r.peek_tok.set(token::EOF);
210 let start_bytepos = r.last_pos.get();
211 r.peek_tok.set(next_token_inner(r));
212 r.peek_span.set(codemap::mk_sp(start_bytepos,
219 fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
220 (pos - rdr.filemap.start_pos)
223 /// Calls `f` with a string slice of the source text spanning from `start`
224 /// up to but excluding `rdr.last_pos`, meaning the slice does not include
225 /// the character `rdr.curr`.
226 pub fn with_str_from<T>(
231 with_str_from_to(rdr, start, rdr.last_pos.get(), f)
234 /// Calls `f` with astring slice of the source text spanning from `start`
235 /// up to but excluding `end`.
236 fn with_str_from_to<T>(
243 byte_offset(rdr, start).to_uint(),
244 byte_offset(rdr, end).to_uint()))
247 // EFFECT: advance the StringReader by one character. If a newline is
248 // discovered, add it to the FileMap's list of line start offsets.
249 pub fn bump(rdr: &StringReader) {
250 rdr.last_pos.set(rdr.pos.get());
251 let current_byte_offset = byte_offset(rdr, rdr.pos.get()).to_uint();
252 if current_byte_offset < (rdr.src).len() {
253 assert!(rdr.curr.get() != unsafe {
255 }); // FIXME: #8971: unsound
256 let last_char = rdr.curr.get();
257 let next = rdr.src.char_range_at(current_byte_offset);
258 let byte_offset_diff = next.next - current_byte_offset;
259 rdr.pos.set(rdr.pos.get() + Pos::from_uint(byte_offset_diff));
260 rdr.curr.set(next.ch);
261 rdr.col.set(rdr.col.get() + CharPos(1u));
262 if last_char == '\n' {
263 rdr.filemap.next_line(rdr.last_pos.get());
264 rdr.col.set(CharPos(0u));
267 if byte_offset_diff > 1 {
268 rdr.filemap.record_multibyte_char(
269 Pos::from_uint(current_byte_offset), byte_offset_diff);
272 rdr.curr.set(unsafe { transmute(-1u32) }); // FIXME: #8971: unsound
275 pub fn is_eof(rdr: @StringReader) -> bool {
276 rdr.curr.get() == unsafe { transmute(-1u32) } // FIXME: #8971: unsound
278 pub fn nextch(rdr: @StringReader) -> char {
279 let offset = byte_offset(rdr, rdr.pos.get()).to_uint();
280 if offset < (rdr.src).len() {
281 return rdr.src.char_at(offset);
282 } else { return unsafe { transmute(-1u32) }; } // FIXME: #8971: unsound
285 fn hex_digit_val(c: char) -> int {
286 if in_range(c, '0', '9') { return (c as int) - ('0' as int); }
287 if in_range(c, 'a', 'f') { return (c as int) - ('a' as int) + 10; }
288 if in_range(c, 'A', 'F') { return (c as int) - ('A' as int) + 10; }
292 pub fn is_whitespace(c: char) -> bool {
293 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
296 fn in_range(c: char, lo: char, hi: char) -> bool {
297 return lo <= c && c <= hi
300 fn is_dec_digit(c: char) -> bool { return in_range(c, '0', '9'); }
302 fn is_hex_digit(c: char) -> bool {
303 return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
304 in_range(c, 'A', 'F');
307 // EFFECT: eats whitespace and comments.
308 // returns a Some(sugared-doc-attr) if one exists, None otherwise.
309 fn consume_whitespace_and_comments(rdr: @StringReader)
310 -> Option<TokenAndSpan> {
311 while is_whitespace(rdr.curr.get()) { bump(rdr); }
312 return consume_any_line_comment(rdr);
315 pub fn is_line_non_doc_comment(s: &str) -> bool {
316 s.starts_with("////")
319 // PRECONDITION: rdr.curr is not whitespace
320 // EFFECT: eats any kind of comment.
321 // returns a Some(sugared-doc-attr) if one exists, None otherwise
322 fn consume_any_line_comment(rdr: @StringReader)
323 -> Option<TokenAndSpan> {
324 if rdr.curr.get() == '/' {
329 // line comments starting with "///" or "//!" are doc-comments
330 if rdr.curr.get() == '/' || rdr.curr.get() == '!' {
331 let start_bpos = rdr.pos.get() - BytePos(3);
332 while rdr.curr.get() != '\n' && !is_eof(rdr) {
335 let ret = with_str_from(rdr, start_bpos, |string| {
336 // but comments with only more "/"s are not
337 if !is_line_non_doc_comment(string) {
339 tok: token::DOC_COMMENT(str_to_ident(string)),
340 sp: codemap::mk_sp(start_bpos, rdr.pos.get())
351 while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); }
353 // Restart whitespace munch.
354 return consume_whitespace_and_comments(rdr);
356 '*' => { bump(rdr); bump(rdr); return consume_block_comment(rdr); }
359 } else if rdr.curr.get() == '#' {
360 if nextch(rdr) == '!' {
361 // I guess this is the only way to figure out if
362 // we're at the beginning of the file...
363 let cmap = @CodeMap::new();
365 let mut files = cmap.files.borrow_mut();
366 files.get().push(rdr.filemap);
368 let loc = cmap.lookup_char_pos_adj(rdr.last_pos.get());
369 if loc.line == 1u && loc.col == CharPos(0u) {
370 while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); }
371 return consume_whitespace_and_comments(rdr);
378 pub fn is_block_non_doc_comment(s: &str) -> bool {
379 s.starts_with("/***")
382 // might return a sugared-doc-attr
383 fn consume_block_comment(rdr: @StringReader) -> Option<TokenAndSpan> {
384 // block comments starting with "/**" or "/*!" are doc-comments
385 let is_doc_comment = rdr.curr.get() == '*' || rdr.curr.get() == '!';
386 let start_bpos = rdr.pos.get() - BytePos(if is_doc_comment {3} else {2});
388 let mut level: int = 1;
391 let msg = if is_doc_comment {
392 ~"unterminated block doc-comment"
394 ~"unterminated block comment"
396 fatal_span(rdr, start_bpos, rdr.last_pos.get(), msg);
397 } else if rdr.curr.get() == '/' && nextch(rdr) == '*' {
401 } else if rdr.curr.get() == '*' && nextch(rdr) == '/' {
410 let res = if is_doc_comment {
411 with_str_from(rdr, start_bpos, |string| {
412 // but comments with only "*"s between two "/"s are not
413 if !is_block_non_doc_comment(string) {
415 tok: token::DOC_COMMENT(str_to_ident(string)),
416 sp: codemap::mk_sp(start_bpos, rdr.pos.get())
426 // restart whitespace munch.
427 if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
430 fn scan_exponent(rdr: @StringReader, start_bpos: BytePos) -> Option<~str> {
431 let mut c = rdr.curr.get();
433 if c == 'e' || c == 'E' {
437 if c == '-' || c == '+' {
441 let exponent = scan_digits(rdr, 10u);
442 if exponent.len() > 0u {
443 return Some(rslt + exponent);
445 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
446 ~"scan_exponent: bad fp literal");
448 } else { return None::<~str>; }
451 fn scan_digits(rdr: @StringReader, radix: uint) -> ~str {
454 let c = rdr.curr.get();
455 if c == '_' { bump(rdr); continue; }
456 match char::to_digit(c, radix) {
466 fn check_float_base(rdr: @StringReader, start_bpos: BytePos, last_bpos: BytePos,
469 16u => fatal_span(rdr, start_bpos, last_bpos,
470 ~"hexadecimal float literal is not supported"),
471 8u => fatal_span(rdr, start_bpos, last_bpos,
472 ~"octal float literal is not supported"),
473 2u => fatal_span(rdr, start_bpos, last_bpos,
474 ~"binary float literal is not supported"),
479 fn scan_number(c: char, rdr: @StringReader) -> token::Token {
483 let mut n = nextch(rdr);
484 let start_bpos = rdr.last_pos.get();
485 if c == '0' && n == 'x' {
489 } else if c == '0' && n == 'o' {
493 } else if c == '0' && n == 'b' {
498 num_str = scan_digits(rdr, base);
501 if c == 'u' || c == 'i' {
502 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
503 let signed = c == 'i';
505 if signed { Signed(ast::TyI) }
506 else { Unsigned(ast::TyU) }
512 tp = if signed { Signed(ast::TyI8) }
513 else { Unsigned(ast::TyU8) };
516 if c == '1' && n == '6' {
519 tp = if signed { Signed(ast::TyI16) }
520 else { Unsigned(ast::TyU16) };
521 } else if c == '3' && n == '2' {
524 tp = if signed { Signed(ast::TyI32) }
525 else { Unsigned(ast::TyU32) };
526 } else if c == '6' && n == '4' {
529 tp = if signed { Signed(ast::TyI64) }
530 else { Unsigned(ast::TyU64) };
532 if num_str.len() == 0u {
533 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
534 ~"no valid digits found for number");
536 let parsed = match from_str_radix::<u64>(num_str, base as uint) {
538 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
539 ~"int literal is too large")
543 Signed(t) => return token::LIT_INT(parsed as i64, t),
544 Unsigned(t) => return token::LIT_UINT(parsed, t)
547 let mut is_float = false;
548 if rdr.curr.get() == '.' && !(ident_start(nextch(rdr)) || nextch(rdr) ==
552 let dec_part = scan_digits(rdr, 10u);
553 num_str.push_char('.');
554 num_str.push_str(dec_part);
556 match scan_exponent(rdr, start_bpos) {
559 num_str.push_str(*s);
564 if rdr.curr.get() == 'f' {
568 if c == '3' && n == '2' {
571 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
572 return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF32);
573 } else if c == '6' && n == '4' {
576 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
577 return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF64);
578 /* FIXME (#2252): if this is out of range for either a
579 32-bit or 64-bit float, it won't be noticed till the
582 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
583 ~"expected `f32` or `f64` suffix");
587 check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
588 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
590 if num_str.len() == 0u {
591 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
592 ~"no valid digits found for number");
594 let parsed = match from_str_radix::<u64>(num_str, base as uint) {
596 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
597 ~"int literal is too large")
600 debug!("lexing {} as an unsuffixed integer literal", num_str);
601 return token::LIT_INT_UNSUFFIXED(parsed as i64);
605 fn scan_numeric_escape(rdr: @StringReader, n_hex_digits: uint) -> char {
606 let mut accum_int = 0;
607 let mut i = n_hex_digits;
608 let start_bpos = rdr.last_pos.get();
610 let n = rdr.curr.get();
611 if !is_hex_digit(n) {
612 fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(),
613 ~"illegal character in numeric character escape",
618 accum_int += hex_digit_val(n);
621 match char::from_u32(accum_int as u32) {
623 None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
624 ~"illegal numeric character escape")
628 fn ident_start(c: char) -> bool {
629 (c >= 'a' && c <= 'z')
630 || (c >= 'A' && c <= 'Z')
632 || (c > '\x7f' && char::is_XID_start(c))
635 fn ident_continue(c: char) -> bool {
636 (c >= 'a' && c <= 'z')
637 || (c >= 'A' && c <= 'Z')
638 || (c >= '0' && c <= '9')
640 || (c > '\x7f' && char::is_XID_continue(c))
643 // return the next token from the string
644 // EFFECT: advances the input past that token
645 // EFFECT: updates the interner
646 fn next_token_inner(rdr: @StringReader) -> token::Token {
647 let c = rdr.curr.get();
648 if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' {
649 // Note: r as in r" or r#" is part of a raw string literal,
650 // not an identifier, and is handled further down.
652 let start = rdr.last_pos.get();
653 while ident_continue(rdr.curr.get()) {
657 return with_str_from(rdr, start, |string| {
661 let is_mod_name = rdr.curr.get() == ':' && nextch(rdr) == ':';
663 // FIXME: perform NFKC normalization here. (Issue #2253)
664 token::IDENT(str_to_ident(string), is_mod_name)
669 return scan_number(c, rdr);
671 fn binop(rdr: @StringReader, op: token::BinOp) -> token::Token {
673 if rdr.curr.get() == '=' {
675 return token::BINOPEQ(op);
676 } else { return token::BINOP(op); }
685 ';' => { bump(rdr); return token::SEMI; }
686 ',' => { bump(rdr); return token::COMMA; }
689 return if rdr.curr.get() == '.' {
691 if rdr.curr.get() == '.' {
701 '(' => { bump(rdr); return token::LPAREN; }
702 ')' => { bump(rdr); return token::RPAREN; }
703 '{' => { bump(rdr); return token::LBRACE; }
704 '}' => { bump(rdr); return token::RBRACE; }
705 '[' => { bump(rdr); return token::LBRACKET; }
706 ']' => { bump(rdr); return token::RBRACKET; }
707 '@' => { bump(rdr); return token::AT; }
708 '#' => { bump(rdr); return token::POUND; }
709 '~' => { bump(rdr); return token::TILDE; }
712 if rdr.curr.get() == ':' {
714 return token::MOD_SEP;
715 } else { return token::COLON; }
718 '$' => { bump(rdr); return token::DOLLAR; }
724 // Multi-byte tokens.
727 if rdr.curr.get() == '=' {
730 } else if rdr.curr.get() == '>' {
732 return token::FAT_ARROW;
739 if rdr.curr.get() == '=' {
742 } else { return token::NOT; }
746 match rdr.curr.get() {
747 '=' => { bump(rdr); return token::LE; }
748 '<' => { return binop(rdr, token::SHL); }
751 match rdr.curr.get() {
752 '>' => { bump(rdr); return token::DARROW; }
753 _ => { return token::LARROW; }
756 _ => { return token::LT; }
761 match rdr.curr.get() {
762 '=' => { bump(rdr); return token::GE; }
763 '>' => { return binop(rdr, token::SHR); }
764 _ => { return token::GT; }
768 // Either a character constant 'a' OR a lifetime name 'abc
770 let start = rdr.last_pos.get();
771 let mut c2 = rdr.curr.get();
774 // If the character is an ident start not followed by another single
775 // quote, then this is a lifetime name:
776 if ident_start(c2) && rdr.curr.get() != '\'' {
777 while ident_continue(rdr.curr.get()) {
780 return with_str_from(rdr, start, |lifetime_name| {
781 let ident = str_to_ident(lifetime_name);
782 let tok = &token::IDENT(ident, false);
784 if token::is_keyword(token::keywords::Self, tok) {
785 fatal_span(rdr, start, rdr.last_pos.get(),
786 ~"invalid lifetime name: 'self is no longer a special lifetime");
787 } else if token::is_any_keyword(tok) &&
788 !token::is_keyword(token::keywords::Static, tok) {
789 fatal_span(rdr, start, rdr.last_pos.get(),
790 ~"invalid lifetime name");
792 token::LIFETIME(ident)
797 // Otherwise it is a character constant:
800 // '\X' for some X must be a character constant:
801 let escaped = rdr.curr.get();
802 let escaped_pos = rdr.last_pos.get();
805 'n' => { c2 = '\n'; }
806 'r' => { c2 = '\r'; }
807 't' => { c2 = '\t'; }
808 '\\' => { c2 = '\\'; }
809 '\'' => { c2 = '\''; }
811 '0' => { c2 = '\x00'; }
812 'x' => { c2 = scan_numeric_escape(rdr, 2u); }
813 'u' => { c2 = scan_numeric_escape(rdr, 4u); }
814 'U' => { c2 = scan_numeric_escape(rdr, 8u); }
816 fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
817 ~"unknown character escape", c2);
821 '\t' | '\n' | '\r' | '\'' => {
822 fatal_span_char(rdr, start, rdr.last_pos.get(),
823 ~"character constant must be escaped", c2);
827 if rdr.curr.get() != '\'' {
828 fatal_span_verbose(rdr,
829 // Byte offsetting here is okay because the
830 // character before position `start` is an
831 // ascii single quote.
834 ~"unterminated character constant");
836 bump(rdr); // advance curr past token
837 return token::LIT_CHAR(c2 as u32);
840 let mut accum_str = ~"";
841 let start_bpos = rdr.last_pos.get();
843 while rdr.curr.get() != '"' {
845 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
846 ~"unterminated double quote string");
849 let ch = rdr.curr.get();
853 let escaped = rdr.curr.get();
854 let escaped_pos = rdr.last_pos.get();
857 'n' => accum_str.push_char('\n'),
858 'r' => accum_str.push_char('\r'),
859 't' => accum_str.push_char('\t'),
860 '\\' => accum_str.push_char('\\'),
861 '\'' => accum_str.push_char('\''),
862 '"' => accum_str.push_char('"'),
863 '\n' => consume_whitespace(rdr),
864 '0' => accum_str.push_char('\x00'),
866 accum_str.push_char(scan_numeric_escape(rdr, 2u));
869 accum_str.push_char(scan_numeric_escape(rdr, 4u));
872 accum_str.push_char(scan_numeric_escape(rdr, 8u));
875 fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
876 ~"unknown string escape", c2);
880 _ => accum_str.push_char(ch)
884 return token::LIT_STR(str_to_ident(accum_str));
887 let start_bpos = rdr.last_pos.get();
889 let mut hash_count = 0u;
890 while rdr.curr.get() == '#' {
894 if rdr.curr.get() != '"' {
895 fatal_span_char(rdr, start_bpos, rdr.last_pos.get(),
896 ~"only `#` is allowed in raw string delimitation; \
897 found illegal character",
901 let content_start_bpos = rdr.last_pos.get();
902 let mut content_end_bpos;
905 fatal_span(rdr, start_bpos, rdr.last_pos.get(),
906 ~"unterminated raw string");
908 if rdr.curr.get() == '"' {
909 content_end_bpos = rdr.last_pos.get();
910 for _ in range(0, hash_count) {
912 if rdr.curr.get() != '#' {
921 let str_content = with_str_from_to(rdr,
925 return token::LIT_STR_RAW(str_content, hash_count);
928 if nextch(rdr) == '>' {
931 return token::RARROW;
932 } else { return binop(rdr, token::MINUS); }
935 if nextch(rdr) == '&' {
938 return token::ANDAND;
939 } else { return binop(rdr, token::AND); }
943 '|' => { bump(rdr); bump(rdr); return token::OROR; }
944 _ => { return binop(rdr, token::OR); }
947 '+' => { return binop(rdr, token::PLUS); }
948 '*' => { return binop(rdr, token::STAR); }
949 '/' => { return binop(rdr, token::SLASH); }
950 '^' => { return binop(rdr, token::CARET); }
951 '%' => { return binop(rdr, token::PERCENT); }
953 fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(),
954 ~"unknown start of token", c);
959 fn consume_whitespace(rdr: @StringReader) {
960 while is_whitespace(rdr.curr.get()) && !is_eof(rdr) { bump(rdr); }
967 use codemap::{BytePos, CodeMap, Span};
970 use parse::token::{str_to_ident};
972 // represents a testing reader (incl. both reader and interner)
974 string_reader: @StringReader
977 // open a string reader for the given string
978 fn setup(teststr: @str) -> Env {
979 let cm = CodeMap::new();
980 let fm = cm.new_filemap(@"zebra.rs", teststr);
982 diagnostic::mk_span_handler(diagnostic::mk_handler(None),@cm);
984 string_reader: new_string_reader(span_handler,fm)
989 let Env {string_reader} =
990 setup(@"/* my source file */ \
991 fn main() { println!(\"zebra\"); }\n");
992 let id = str_to_ident("fn");
993 let tok1 = string_reader.next_token();
994 let tok2 = TokenAndSpan{
995 tok:token::IDENT(id, false),
996 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
997 assert_eq!(tok1,tok2);
998 // the 'main' id is already read:
999 assert_eq!(string_reader.last_pos.get().clone(), BytePos(28));
1000 // read another token:
1001 let tok3 = string_reader.next_token();
1002 let tok4 = TokenAndSpan{
1003 tok:token::IDENT(str_to_ident("main"), false),
1004 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1005 assert_eq!(tok3,tok4);
1006 // the lparen is already read:
1007 assert_eq!(string_reader.last_pos.get().clone(), BytePos(29))
1010 // check that the given reader produces the desired stream
1011 // of tokens (stop checking after exhausting the expected vec)
1012 fn check_tokenization (env: Env, expected: ~[token::Token]) {
1013 for expected_tok in expected.iter() {
1014 let TokenAndSpan {tok:actual_tok, sp: _} =
1015 env.string_reader.next_token();
1016 assert_eq!(&actual_tok,expected_tok);
1020 // make the identifier by looking up the string in the interner
1021 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1022 token::IDENT (str_to_ident(id),is_mod_name)
1025 #[test] fn doublecolonparsing () {
1026 let env = setup (@"a b");
1027 check_tokenization (env,
1028 ~[mk_ident("a",false),
1029 mk_ident("b",false)]);
1032 #[test] fn dcparsing_2 () {
1033 let env = setup (@"a::b");
1034 check_tokenization (env,
1035 ~[mk_ident("a",true),
1037 mk_ident("b",false)]);
1040 #[test] fn dcparsing_3 () {
1041 let env = setup (@"a ::b");
1042 check_tokenization (env,
1043 ~[mk_ident("a",false),
1045 mk_ident("b",false)]);
1048 #[test] fn dcparsing_4 () {
1049 let env = setup (@"a:: b");
1050 check_tokenization (env,
1051 ~[mk_ident("a",true),
1053 mk_ident("b",false)]);
1056 #[test] fn character_a() {
1057 let env = setup(@"'a'");
1058 let TokenAndSpan {tok, sp: _} =
1059 env.string_reader.next_token();
1060 assert_eq!(tok,token::LIT_CHAR('a' as u32));
1063 #[test] fn character_space() {
1064 let env = setup(@"' '");
1065 let TokenAndSpan {tok, sp: _} =
1066 env.string_reader.next_token();
1067 assert_eq!(tok, token::LIT_CHAR(' ' as u32));
1070 #[test] fn character_escaped() {
1071 let env = setup(@"'\\n'");
1072 let TokenAndSpan {tok, sp: _} =
1073 env.string_reader.next_token();
1074 assert_eq!(tok, token::LIT_CHAR('\n' as u32));
1077 #[test] fn lifetime_name() {
1078 let env = setup(@"'abc");
1079 let TokenAndSpan {tok, sp: _} =
1080 env.string_reader.next_token();
1081 let id = token::str_to_ident("abc");
1082 assert_eq!(tok, token::LIFETIME(id));
1085 #[test] fn raw_string() {
1086 let env = setup(@"r###\"\"#a\\b\x00c\"\"###");
1087 let TokenAndSpan {tok, sp: _} =
1088 env.string_reader.next_token();
1089 let id = token::str_to_ident("\"#a\\b\x00c\"");
1090 assert_eq!(tok, token::LIT_STR_RAW(id, 3));
1093 #[test] fn line_doc_comments() {
1094 assert!(!is_line_non_doc_comment("///"));
1095 assert!(!is_line_non_doc_comment("/// blah"));
1096 assert!(is_line_non_doc_comment("////"));
1099 #[test] fn nested_block_comments() {
1100 let env = setup(@"/* /* */ */'a'");
1101 let TokenAndSpan {tok, sp: _} =
1102 env.string_reader.next_token();
1103 assert_eq!(tok,token::LIT_CHAR('a' as u32));