1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use codemap::{BytePos, CharPos, CodeMap, Pos, Span};
14 use diagnostic::SpanHandler;
15 use ext::tt::transcribe::tt_next_token;
17 use parse::token::{str_to_ident};
20 use std::mem::replace;
21 use std::num::from_str_radix;
24 use std::strbuf::StrBuf;
26 pub use ext::tt::transcribe::{TtReader, new_tt_reader};
29 fn is_eof(&self) -> bool;
30 fn next_token(&mut self) -> TokenAndSpan;
31 fn fatal(&self, ~str) -> !;
32 fn span_diag<'a>(&'a self) -> &'a SpanHandler;
33 fn peek(&self) -> TokenAndSpan;
36 #[deriving(Clone, Eq, Show)]
37 pub struct TokenAndSpan {
38 pub tok: token::Token,
42 pub struct StringReader<'a> {
43 pub span_diagnostic: &'a SpanHandler,
44 // The absolute offset within the codemap of the next character to read
46 // The absolute offset within the codemap of the last character read(curr)
47 pub last_pos: BytePos,
48 // The column of the next character to read
50 // The last character to be read
51 pub curr: Option<char>,
52 pub filemap: Rc<codemap::FileMap>,
54 pub peek_tok: token::Token,
58 impl<'a> StringReader<'a> {
59 pub fn curr_is(&self, c: char) -> bool {
64 pub fn new_string_reader<'a>(span_diagnostic: &'a SpanHandler,
65 filemap: Rc<codemap::FileMap>)
67 let mut r = new_low_level_string_reader(span_diagnostic, filemap);
68 string_advance_token(&mut r); /* fill in peek_* */
72 /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
73 pub fn new_low_level_string_reader<'a>(span_diagnostic: &'a SpanHandler,
74 filemap: Rc<codemap::FileMap>)
76 // Force the initial reader bump to start on a fresh line
77 let initial_char = '\n';
78 let mut r = StringReader {
79 span_diagnostic: span_diagnostic,
80 pos: filemap.start_pos,
81 last_pos: filemap.start_pos,
83 curr: Some(initial_char),
85 /* dummy values; not read */
87 peek_span: codemap::DUMMY_SP,
93 impl<'a> Reader for StringReader<'a> {
94 fn is_eof(&self) -> bool { is_eof(self) }
95 // return the next token. EFFECT: advances the string_reader.
96 fn next_token(&mut self) -> TokenAndSpan {
97 let ret_val = TokenAndSpan {
98 tok: replace(&mut self.peek_tok, token::UNDERSCORE),
101 string_advance_token(self);
104 fn fatal(&self, m: ~str) -> ! {
105 self.span_diagnostic.span_fatal(self.peek_span, m)
107 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.span_diagnostic }
108 fn peek(&self) -> TokenAndSpan {
109 // FIXME(pcwalton): Bad copy!
111 tok: self.peek_tok.clone(),
112 sp: self.peek_span.clone(),
117 impl<'a> Reader for TtReader<'a> {
118 fn is_eof(&self) -> bool {
119 self.cur_tok == token::EOF
121 fn next_token(&mut self) -> TokenAndSpan {
122 let r = tt_next_token(self);
123 debug!("TtReader: r={:?}", r);
126 fn fatal(&self, m: ~str) -> ! {
127 self.sp_diag.span_fatal(self.cur_span, m);
129 fn span_diag<'a>(&'a self) -> &'a SpanHandler { self.sp_diag }
130 fn peek(&self) -> TokenAndSpan {
132 tok: self.cur_tok.clone(),
133 sp: self.cur_span.clone(),
138 // report a lexical error spanning [`from_pos`, `to_pos`)
139 fn fatal_span(rdr: &mut StringReader,
144 rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
148 // report a lexical error spanning [`from_pos`, `to_pos`), appending an
149 // escaped character to the error message
150 fn fatal_span_char(rdr: &mut StringReader,
156 let mut m = StrBuf::from_owned_str(m);
158 char::escape_default(c, |c| m.push_char(c));
159 fatal_span(rdr, from_pos, to_pos, m.into_owned());
162 // report a lexical error spanning [`from_pos`, `to_pos`), appending the
163 // offending string to the error message
164 fn fatal_span_verbose(rdr: &mut StringReader,
169 let mut m = StrBuf::from_owned_str(m);
171 let from = byte_offset(rdr, from_pos).to_uint();
172 let to = byte_offset(rdr, to_pos).to_uint();
173 m.push_str(rdr.filemap.src.slice(from, to));
174 fatal_span(rdr, from_pos, to_pos, m.into_owned());
177 // EFFECT: advance peek_tok and peek_span to refer to the next token.
178 // EFFECT: update the interner, maybe.
179 fn string_advance_token(r: &mut StringReader) {
180 match consume_whitespace_and_comments(r) {
182 r.peek_span = comment.sp;
183 r.peek_tok = comment.tok;
187 r.peek_tok = token::EOF;
189 let start_bytepos = r.last_pos;
190 r.peek_tok = next_token_inner(r);
191 r.peek_span = codemap::mk_sp(start_bytepos,
198 fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
199 (pos - rdr.filemap.start_pos)
202 /// Calls `f` with a string slice of the source text spanning from `start`
203 /// up to but excluding `rdr.last_pos`, meaning the slice does not include
204 /// the character `rdr.curr`.
205 pub fn with_str_from<T>(
210 with_str_from_to(rdr, start, rdr.last_pos, f)
213 /// Calls `f` with astring slice of the source text spanning from `start`
214 /// up to but excluding `end`.
215 fn with_str_from_to<T>(
221 f(rdr.filemap.src.slice(
222 byte_offset(rdr, start).to_uint(),
223 byte_offset(rdr, end).to_uint()))
226 // EFFECT: advance the StringReader by one character. If a newline is
227 // discovered, add it to the FileMap's list of line start offsets.
228 pub fn bump(rdr: &mut StringReader) {
229 rdr.last_pos = rdr.pos;
230 let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
231 if current_byte_offset < rdr.filemap.src.len() {
232 assert!(rdr.curr.is_some());
233 let last_char = rdr.curr.unwrap();
234 let next = rdr.filemap.src.char_range_at(current_byte_offset);
235 let byte_offset_diff = next.next - current_byte_offset;
236 rdr.pos = rdr.pos + Pos::from_uint(byte_offset_diff);
237 rdr.curr = Some(next.ch);
238 rdr.col = rdr.col + CharPos(1u);
239 if last_char == '\n' {
240 rdr.filemap.next_line(rdr.last_pos);
241 rdr.col = CharPos(0u);
244 if byte_offset_diff > 1 {
245 rdr.filemap.record_multibyte_char(rdr.last_pos, byte_offset_diff);
252 pub fn is_eof(rdr: &StringReader) -> bool {
256 pub fn nextch(rdr: &StringReader) -> Option<char> {
257 let offset = byte_offset(rdr, rdr.pos).to_uint();
258 if offset < rdr.filemap.src.len() {
259 Some(rdr.filemap.src.char_at(offset))
264 pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
265 nextch(rdr) == Some(c)
268 pub fn nextnextch(rdr: &StringReader) -> Option<char> {
269 let offset = byte_offset(rdr, rdr.pos).to_uint();
270 let s = rdr.filemap.deref().src.as_slice();
271 if offset >= s.len() { return None }
272 let str::CharRange { next, .. } = s.char_range_at(offset);
274 Some(s.char_at(next))
279 pub fn nextnextch_is(rdr: &StringReader, c: char) -> bool {
280 nextnextch(rdr) == Some(c)
283 fn hex_digit_val(c: Option<char>) -> int {
284 let d = c.unwrap_or('\x00');
286 if in_range(c, '0', '9') { return (d as int) - ('0' as int); }
287 if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
288 if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
292 pub fn is_whitespace(c: Option<char>) -> bool {
293 match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
294 ' ' | '\n' | '\t' | '\r' => true,
299 fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
301 Some(c) => lo <= c && c <= hi,
306 fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
308 fn is_hex_digit(c: Option<char>) -> bool {
309 return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
310 in_range(c, 'A', 'F');
313 // EFFECT: eats whitespace and comments.
314 // returns a Some(sugared-doc-attr) if one exists, None otherwise.
315 fn consume_whitespace_and_comments(rdr: &mut StringReader)
316 -> Option<TokenAndSpan> {
317 while is_whitespace(rdr.curr) { bump(rdr); }
318 return consume_any_line_comment(rdr);
321 pub fn is_line_non_doc_comment(s: &str) -> bool {
322 s.starts_with("////")
325 // PRECONDITION: rdr.curr is not whitespace
326 // EFFECT: eats any kind of comment.
327 // returns a Some(sugared-doc-attr) if one exists, None otherwise
328 fn consume_any_line_comment(rdr: &mut StringReader)
329 -> Option<TokenAndSpan> {
330 if rdr.curr_is('/') {
335 // line comments starting with "///" or "//!" are doc-comments
336 if rdr.curr_is('/') || rdr.curr_is('!') {
337 let start_bpos = rdr.pos - BytePos(3);
338 while !rdr.curr_is('\n') && !is_eof(rdr) {
341 let ret = with_str_from(rdr, start_bpos, |string| {
342 // but comments with only more "/"s are not
343 if !is_line_non_doc_comment(string) {
345 tok: token::DOC_COMMENT(str_to_ident(string)),
346 sp: codemap::mk_sp(start_bpos, rdr.pos)
357 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
359 // Restart whitespace munch.
360 consume_whitespace_and_comments(rdr)
362 Some('*') => { bump(rdr); bump(rdr); consume_block_comment(rdr) }
365 } else if rdr.curr_is('#') {
366 if nextch_is(rdr, '!') {
368 // Parse an inner attribute.
369 if nextnextch_is(rdr, '[') {
373 // I guess this is the only way to figure out if
374 // we're at the beginning of the file...
375 let cmap = CodeMap::new();
376 cmap.files.borrow_mut().push(rdr.filemap.clone());
377 let loc = cmap.lookup_char_pos_adj(rdr.last_pos);
378 if loc.line == 1u && loc.col == CharPos(0u) {
379 while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
380 return consume_whitespace_and_comments(rdr);
389 pub fn is_block_non_doc_comment(s: &str) -> bool {
390 s.starts_with("/***")
393 // might return a sugared-doc-attr
394 fn consume_block_comment(rdr: &mut StringReader) -> Option<TokenAndSpan> {
395 // block comments starting with "/**" or "/*!" are doc-comments
396 let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!');
397 let start_bpos = rdr.pos - BytePos(if is_doc_comment {3} else {2});
399 let mut level: int = 1;
402 let msg = if is_doc_comment {
403 ~"unterminated block doc-comment"
405 ~"unterminated block comment"
407 fatal_span(rdr, start_bpos, rdr.last_pos, msg);
408 } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
412 } else if rdr.curr_is('*') && nextch_is(rdr, '/') {
421 let res = if is_doc_comment {
422 with_str_from(rdr, start_bpos, |string| {
423 // but comments with only "*"s between two "/"s are not
424 if !is_block_non_doc_comment(string) {
426 tok: token::DOC_COMMENT(str_to_ident(string)),
427 sp: codemap::mk_sp(start_bpos, rdr.pos)
437 // restart whitespace munch.
438 if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
441 fn scan_exponent(rdr: &mut StringReader, start_bpos: BytePos) -> Option<~str> {
442 // \x00 hits the `return None` case immediately, so this is fine.
443 let mut c = rdr.curr.unwrap_or('\x00');
444 let mut rslt = StrBuf::new();
445 if c == 'e' || c == 'E' {
448 c = rdr.curr.unwrap_or('\x00');
449 if c == '-' || c == '+' {
453 let exponent = scan_digits(rdr, 10u);
454 if exponent.len() > 0u {
455 rslt.push_str(exponent);
456 return Some(rslt.into_owned());
458 fatal_span(rdr, start_bpos, rdr.last_pos,
459 ~"scan_exponent: bad fp literal");
461 } else { return None::<~str>; }
464 fn scan_digits(rdr: &mut StringReader, radix: uint) -> ~str {
465 let mut rslt = StrBuf::new();
468 if c == Some('_') { bump(rdr); continue; }
469 match c.and_then(|cc| char::to_digit(cc, radix)) {
471 rslt.push_char(c.unwrap());
474 _ => return rslt.into_owned()
479 fn check_float_base(rdr: &mut StringReader, start_bpos: BytePos, last_bpos: BytePos,
482 16u => fatal_span(rdr, start_bpos, last_bpos,
483 ~"hexadecimal float literal is not supported"),
484 8u => fatal_span(rdr, start_bpos, last_bpos,
485 ~"octal float literal is not supported"),
486 2u => fatal_span(rdr, start_bpos, last_bpos,
487 ~"binary float literal is not supported"),
492 fn scan_number(c: char, rdr: &mut StringReader) -> token::Token {
496 let mut n = nextch(rdr).unwrap_or('\x00');
497 let start_bpos = rdr.last_pos;
498 if c == '0' && n == 'x' {
502 } else if c == '0' && n == 'o' {
506 } else if c == '0' && n == 'b' {
511 num_str = StrBuf::from_owned_str(scan_digits(rdr, base));
512 c = rdr.curr.unwrap_or('\x00');
514 if c == 'u' || c == 'i' {
515 enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
516 let signed = c == 'i';
518 if signed { Signed(ast::TyI) }
519 else { Unsigned(ast::TyU) }
522 c = rdr.curr.unwrap_or('\x00');
525 tp = if signed { Signed(ast::TyI8) }
526 else { Unsigned(ast::TyU8) };
528 n = nextch(rdr).unwrap_or('\x00');
529 if c == '1' && n == '6' {
532 tp = if signed { Signed(ast::TyI16) }
533 else { Unsigned(ast::TyU16) };
534 } else if c == '3' && n == '2' {
537 tp = if signed { Signed(ast::TyI32) }
538 else { Unsigned(ast::TyU32) };
539 } else if c == '6' && n == '4' {
542 tp = if signed { Signed(ast::TyI64) }
543 else { Unsigned(ast::TyU64) };
545 if num_str.len() == 0u {
546 fatal_span(rdr, start_bpos, rdr.last_pos,
547 ~"no valid digits found for number");
549 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
552 None => fatal_span(rdr, start_bpos, rdr.last_pos,
553 ~"int literal is too large")
557 Signed(t) => return token::LIT_INT(parsed as i64, t),
558 Unsigned(t) => return token::LIT_UINT(parsed, t)
561 let mut is_float = false;
562 if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
565 let dec_part = scan_digits(rdr, 10u);
566 num_str.push_char('.');
567 num_str.push_str(dec_part);
569 match scan_exponent(rdr, start_bpos) {
572 num_str.push_str(*s);
577 if rdr.curr_is('f') {
579 c = rdr.curr.unwrap_or('\x00');
580 n = nextch(rdr).unwrap_or('\x00');
581 if c == '3' && n == '2' {
584 check_float_base(rdr, start_bpos, rdr.last_pos, base);
585 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
587 } else if c == '6' && n == '4' {
590 check_float_base(rdr, start_bpos, rdr.last_pos, base);
591 return token::LIT_FLOAT(str_to_ident(num_str.into_owned()),
593 /* FIXME (#2252): if this is out of range for either a
594 32-bit or 64-bit float, it won't be noticed till the
597 fatal_span(rdr, start_bpos, rdr.last_pos,
598 ~"expected `f32` or `f64` suffix");
602 check_float_base(rdr, start_bpos, rdr.last_pos, base);
603 return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(
604 num_str.into_owned()));
606 if num_str.len() == 0u {
607 fatal_span(rdr, start_bpos, rdr.last_pos,
608 ~"no valid digits found for number");
610 let parsed = match from_str_radix::<u64>(num_str.as_slice(),
613 None => fatal_span(rdr, start_bpos, rdr.last_pos,
614 ~"int literal is too large")
617 debug!("lexing {} as an unsuffixed integer literal",
619 return token::LIT_INT_UNSUFFIXED(parsed as i64);
623 fn scan_numeric_escape(rdr: &mut StringReader, n_hex_digits: uint) -> char {
624 let mut accum_int = 0;
625 let mut i = n_hex_digits;
626 let start_bpos = rdr.last_pos;
627 while i != 0u && !is_eof(rdr) {
629 if !is_hex_digit(n) {
630 fatal_span_char(rdr, rdr.last_pos, rdr.pos,
631 ~"illegal character in numeric character escape",
636 accum_int += hex_digit_val(n);
639 if i != 0 && is_eof(rdr) {
640 fatal_span(rdr, start_bpos, rdr.last_pos,
641 ~"unterminated numeric character escape");
644 match char::from_u32(accum_int as u32) {
646 None => fatal_span(rdr, start_bpos, rdr.last_pos,
647 ~"illegal numeric character escape")
651 fn ident_start(c: Option<char>) -> bool {
652 let c = match c { Some(c) => c, None => return false };
654 (c >= 'a' && c <= 'z')
655 || (c >= 'A' && c <= 'Z')
657 || (c > '\x7f' && char::is_XID_start(c))
660 fn ident_continue(c: Option<char>) -> bool {
661 let c = match c { Some(c) => c, None => return false };
663 (c >= 'a' && c <= 'z')
664 || (c >= 'A' && c <= 'Z')
665 || (c >= '0' && c <= '9')
667 || (c > '\x7f' && char::is_XID_continue(c))
670 // return the next token from the string
671 // EFFECT: advances the input past that token
672 // EFFECT: updates the interner
673 fn next_token_inner(rdr: &mut StringReader) -> token::Token {
675 if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') {
676 // Note: r as in r" or r#" is part of a raw string literal,
677 // not an identifier, and is handled further down.
679 let start = rdr.last_pos;
680 while ident_continue(rdr.curr) {
684 return with_str_from(rdr, start, |string| {
688 let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
690 // FIXME: perform NFKC normalization here. (Issue #2253)
691 token::IDENT(str_to_ident(string), is_mod_name)
696 return scan_number(c.unwrap(), rdr);
698 fn binop(rdr: &mut StringReader, op: token::BinOp) -> token::Token {
700 if rdr.curr_is('=') {
702 return token::BINOPEQ(op);
703 } else { return token::BINOP(op); }
705 match c.expect("next_token_inner called at EOF") {
712 ';' => { bump(rdr); return token::SEMI; }
713 ',' => { bump(rdr); return token::COMMA; }
716 return if rdr.curr_is('.') {
718 if rdr.curr_is('.') {
728 '(' => { bump(rdr); return token::LPAREN; }
729 ')' => { bump(rdr); return token::RPAREN; }
730 '{' => { bump(rdr); return token::LBRACE; }
731 '}' => { bump(rdr); return token::RBRACE; }
732 '[' => { bump(rdr); return token::LBRACKET; }
733 ']' => { bump(rdr); return token::RBRACKET; }
734 '@' => { bump(rdr); return token::AT; }
735 '#' => { bump(rdr); return token::POUND; }
736 '~' => { bump(rdr); return token::TILDE; }
739 if rdr.curr_is(':') {
741 return token::MOD_SEP;
742 } else { return token::COLON; }
745 '$' => { bump(rdr); return token::DOLLAR; }
751 // Multi-byte tokens.
754 if rdr.curr_is('=') {
757 } else if rdr.curr_is('>') {
759 return token::FAT_ARROW;
766 if rdr.curr_is('=') {
769 } else { return token::NOT; }
773 match rdr.curr.unwrap_or('\x00') {
774 '=' => { bump(rdr); return token::LE; }
775 '<' => { return binop(rdr, token::SHL); }
778 match rdr.curr.unwrap_or('\x00') {
779 '>' => { bump(rdr); return token::DARROW; }
780 _ => { return token::LARROW; }
783 _ => { return token::LT; }
788 match rdr.curr.unwrap_or('\x00') {
789 '=' => { bump(rdr); return token::GE; }
790 '>' => { return binop(rdr, token::SHR); }
791 _ => { return token::GT; }
795 // Either a character constant 'a' OR a lifetime name 'abc
797 let start = rdr.last_pos;
799 // the eof will be picked up by the final `'` check below
800 let mut c2 = rdr.curr.unwrap_or('\x00');
803 // If the character is an ident start not followed by another single
804 // quote, then this is a lifetime name:
805 if ident_start(Some(c2)) && !rdr.curr_is('\'') {
806 while ident_continue(rdr.curr) {
809 let ident = with_str_from(rdr, start, |lifetime_name| {
810 str_to_ident(lifetime_name)
812 let tok = &token::IDENT(ident, false);
814 if token::is_keyword(token::keywords::Self, tok) {
815 fatal_span(rdr, start, rdr.last_pos,
816 ~"invalid lifetime name: 'self is no longer a special lifetime");
817 } else if token::is_any_keyword(tok) &&
818 !token::is_keyword(token::keywords::Static, tok) {
819 fatal_span(rdr, start, rdr.last_pos,
820 ~"invalid lifetime name");
822 return token::LIFETIME(ident);
826 // Otherwise it is a character constant:
829 // '\X' for some X must be a character constant:
830 let escaped = rdr.curr;
831 let escaped_pos = rdr.last_pos;
844 'x' => scan_numeric_escape(rdr, 2u),
845 'u' => scan_numeric_escape(rdr, 4u),
846 'U' => scan_numeric_escape(rdr, 8u),
848 fatal_span_char(rdr, escaped_pos, rdr.last_pos,
849 ~"unknown character escape", c2)
855 '\t' | '\n' | '\r' | '\'' => {
856 fatal_span_char(rdr, start, rdr.last_pos,
857 ~"character constant must be escaped", c2);
861 if !rdr.curr_is('\'') {
862 fatal_span_verbose(rdr,
863 // Byte offsetting here is okay because the
864 // character before position `start` is an
865 // ascii single quote.
868 ~"unterminated character constant");
870 bump(rdr); // advance curr past token
871 return token::LIT_CHAR(c2 as u32);
874 let mut accum_str = StrBuf::new();
875 let start_bpos = rdr.last_pos;
877 while !rdr.curr_is('"') {
879 fatal_span(rdr, start_bpos, rdr.last_pos,
880 ~"unterminated double quote string");
883 let ch = rdr.curr.unwrap();
888 fatal_span(rdr, start_bpos, rdr.last_pos,
889 ~"unterminated double quote string");
892 let escaped = rdr.curr.unwrap();
893 let escaped_pos = rdr.last_pos;
896 'n' => accum_str.push_char('\n'),
897 'r' => accum_str.push_char('\r'),
898 't' => accum_str.push_char('\t'),
899 '\\' => accum_str.push_char('\\'),
900 '\'' => accum_str.push_char('\''),
901 '"' => accum_str.push_char('"'),
902 '\n' => consume_whitespace(rdr),
903 '0' => accum_str.push_char('\x00'),
905 accum_str.push_char(scan_numeric_escape(rdr, 2u));
908 accum_str.push_char(scan_numeric_escape(rdr, 4u));
911 accum_str.push_char(scan_numeric_escape(rdr, 8u));
914 fatal_span_char(rdr, escaped_pos, rdr.last_pos,
915 ~"unknown string escape", c2);
919 _ => accum_str.push_char(ch)
923 return token::LIT_STR(str_to_ident(accum_str.as_slice()));
926 let start_bpos = rdr.last_pos;
928 let mut hash_count = 0u;
929 while rdr.curr_is('#') {
935 fatal_span(rdr, start_bpos, rdr.last_pos,
936 ~"unterminated raw string");
937 } else if !rdr.curr_is('"') {
938 fatal_span_char(rdr, start_bpos, rdr.last_pos,
939 ~"only `#` is allowed in raw string delimitation; \
940 found illegal character",
944 let content_start_bpos = rdr.last_pos;
945 let mut content_end_bpos;
948 fatal_span(rdr, start_bpos, rdr.last_pos,
949 ~"unterminated raw string");
951 if rdr.curr_is('"') {
952 content_end_bpos = rdr.last_pos;
953 for _ in range(0, hash_count) {
955 if !rdr.curr_is('#') {
964 let str_content = with_str_from_to(rdr,
968 return token::LIT_STR_RAW(str_content, hash_count);
971 if nextch_is(rdr, '>') {
974 return token::RARROW;
975 } else { return binop(rdr, token::MINUS); }
978 if nextch_is(rdr, '&') {
981 return token::ANDAND;
982 } else { return binop(rdr, token::AND); }
986 Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
987 _ => { return binop(rdr, token::OR); }
990 '+' => { return binop(rdr, token::PLUS); }
991 '*' => { return binop(rdr, token::STAR); }
992 '/' => { return binop(rdr, token::SLASH); }
993 '^' => { return binop(rdr, token::CARET); }
994 '%' => { return binop(rdr, token::PERCENT); }
996 fatal_span_char(rdr, rdr.last_pos, rdr.pos,
997 ~"unknown start of token", c);
1002 fn consume_whitespace(rdr: &mut StringReader) {
1003 while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
1010 use codemap::{BytePos, CodeMap, Span};
1013 use parse::token::{str_to_ident};
1016 fn mk_sh() -> diagnostic::SpanHandler {
1017 let emitter = diagnostic::EmitterWriter::new(~util::NullWriter);
1018 let handler = diagnostic::mk_handler(~emitter);
1019 diagnostic::mk_span_handler(handler, CodeMap::new())
1022 // open a string reader for the given string
1023 fn setup<'a>(span_handler: &'a diagnostic::SpanHandler,
1024 teststr: ~str) -> StringReader<'a> {
1025 let fm = span_handler.cm.new_filemap(~"zebra.rs", teststr);
1026 new_string_reader(span_handler, fm)
1030 let span_handler = mk_sh();
1031 let mut string_reader = setup(&span_handler,
1032 ~"/* my source file */ \
1033 fn main() { println!(\"zebra\"); }\n");
1034 let id = str_to_ident("fn");
1035 let tok1 = string_reader.next_token();
1036 let tok2 = TokenAndSpan{
1037 tok:token::IDENT(id, false),
1038 sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}};
1039 assert_eq!(tok1,tok2);
1040 // the 'main' id is already read:
1041 assert_eq!(string_reader.last_pos.clone(), BytePos(28));
1042 // read another token:
1043 let tok3 = string_reader.next_token();
1044 let tok4 = TokenAndSpan{
1045 tok:token::IDENT(str_to_ident("main"), false),
1046 sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}};
1047 assert_eq!(tok3,tok4);
1048 // the lparen is already read:
1049 assert_eq!(string_reader.last_pos.clone(), BytePos(29))
1052 // check that the given reader produces the desired stream
1053 // of tokens (stop checking after exhausting the expected vec)
1054 fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
1055 for expected_tok in expected.iter() {
1056 assert_eq!(&string_reader.next_token().tok, expected_tok);
1060 // make the identifier by looking up the string in the interner
1061 fn mk_ident (id: &str, is_mod_name: bool) -> token::Token {
1062 token::IDENT (str_to_ident(id),is_mod_name)
1065 #[test] fn doublecolonparsing () {
1066 check_tokenization(setup(&mk_sh(), ~"a b"),
1067 vec!(mk_ident("a",false),
1068 mk_ident("b",false)));
1071 #[test] fn dcparsing_2 () {
1072 check_tokenization(setup(&mk_sh(), ~"a::b"),
1073 vec!(mk_ident("a",true),
1075 mk_ident("b",false)));
1078 #[test] fn dcparsing_3 () {
1079 check_tokenization(setup(&mk_sh(), ~"a ::b"),
1080 vec!(mk_ident("a",false),
1082 mk_ident("b",false)));
1085 #[test] fn dcparsing_4 () {
1086 check_tokenization(setup(&mk_sh(), ~"a:: b"),
1087 vec!(mk_ident("a",true),
1089 mk_ident("b",false)));
1092 #[test] fn character_a() {
1093 assert_eq!(setup(&mk_sh(), ~"'a'").next_token().tok,
1094 token::LIT_CHAR('a' as u32));
1097 #[test] fn character_space() {
1098 assert_eq!(setup(&mk_sh(), ~"' '").next_token().tok,
1099 token::LIT_CHAR(' ' as u32));
1102 #[test] fn character_escaped() {
1103 assert_eq!(setup(&mk_sh(), ~"'\\n'").next_token().tok,
1104 token::LIT_CHAR('\n' as u32));
1107 #[test] fn lifetime_name() {
1108 assert_eq!(setup(&mk_sh(), ~"'abc").next_token().tok,
1109 token::LIFETIME(token::str_to_ident("abc")));
1112 #[test] fn raw_string() {
1113 assert_eq!(setup(&mk_sh(), ~"r###\"\"#a\\b\x00c\"\"###").next_token().tok,
1114 token::LIT_STR_RAW(token::str_to_ident("\"#a\\b\x00c\""), 3));
1117 #[test] fn line_doc_comments() {
1118 assert!(!is_line_non_doc_comment("///"));
1119 assert!(!is_line_non_doc_comment("/// blah"));
1120 assert!(is_line_non_doc_comment("////"));
1123 #[test] fn nested_block_comments() {
1124 assert_eq!(setup(&mk_sh(), ~"/* /* */ */'a'").next_token().tok,
1125 token::LIT_CHAR('a' as u32));