syntax: don't process string/char/byte/binary lits

author Corey Richardson <corey@octayn.net>

Thu, 3 Jul 2014 07:47:30 +0000 (00:47 -0700)

committer Corey Richardson <corey@octayn.net>

Wed, 9 Jul 2014 07:06:29 +0000 (00:06 -0700)
author Corey Richardson <corey@octayn.net>
Thu, 3 Jul 2014 07:47:30 +0000 (00:47 -0700)
committer Corey Richardson <corey@octayn.net>
Wed, 9 Jul 2014 07:06:29 +0000 (00:06 -0700)
diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs

index bbf38fd7a9d05c8dd2d86da31fdab81e89e53218..1dbbe3b973c83f7e0b49ce78ced52b2cd828cb84 100644 (file)
--- a/src/libsyntax/ext/base.rs
+++ b/src/libsyntax/ext/base.rs
@@ -579,9 +579,9 @@ pub fn get_single_str_from_tts(cx: &ExtCtxt,
          cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice());
      } else {
          match tts[0] {
-            ast::TTTok(_, token::LIT_STR(ident))
-            | ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
-                return Some(token::get_ident(ident).get().to_string())
+            ast::TTTok(_, token::LIT_STR(ident)) => return Some(parse::str_lit(ident.as_str())),
+            ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
+                return Some(parse::raw_str_lit(ident.as_str()))
              }
              _ => {
                  cx.span_err(sp,
diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs

index a3c901904a9484d940b856e4a086b2b59141b3f9..f950a0d3340ff6b62034dda27d9f256cb1f6721b 100644 (file)
--- a/src/libsyntax/ext/quote.rs
+++ b/src/libsyntax/ext/quote.rs
@@ -401,13 +401,13 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc<ast::Expr> {
          }
  
          LIT_BYTE(i) => {
-            let e_byte = cx.expr_lit(sp, ast::LitByte(i));
+            let e_byte = mk_ident(cx, sp, i);
  
              return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
          }
  
          LIT_CHAR(i) => {
-            let e_char = cx.expr_lit(sp, ast::LitChar(i));
+            let e_char = mk_ident(cx, sp, i);
  
              return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char));
          }
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs

index 7a9051c16aed63f5e5a8f096f52e169cef5fe8c9..e1317e0ed35ca3c3d8a4ce6bbae89f3434a6e79f 100644 (file)
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -685,7 +685,7 @@ enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
      }
  
  
-    fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
+    fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> bool {
          let mut accum_int = 0u32;
          let start_bpos = self.last_pos;
          for _ in range(0, n_hex_digits) {
@@ -709,11 +709,11 @@ fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
          }
  
          match char::from_u32(accum_int) {
-            Some(x) => x,
+            Some(_) => true,
              None => {
                  let last_bpos = self.last_pos;
                  self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
-                '?'
+                false
              }
          }
      }
@@ -721,8 +721,10 @@ fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
      /// Scan for a single (possibly escaped) byte or char
      /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
      /// `start` is the position of `first_source_char`, which is already consumed.
+    ///
+    /// Returns true if there was a valid char/byte, false otherwise.
      fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
-                         ascii_only: bool, delim: char) -> Option<char> {
+                         ascii_only: bool, delim: char) -> bool {
          match first_source_char {
              '\\' => {
                  // '\X' for some X must be a character constant:
@@ -732,24 +734,18 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
                  match escaped {
                      None => {},  // EOF here is an error that will be checked later.
                      Some(e) => {
-                        return Some(match e {
-                            'n' => '\n',
-                            'r' => '\r',
-                            't' => '\t',
-                            '\\' => '\\',
-                            '\'' => '\'',
-                            '"' => '"',
-                            '0' => '\x00',
+                        return match e {
+                            'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
                              'x' => self.scan_numeric_escape(2u, delim),
                              'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
                              'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
                              '\n' if delim == '"' => {
                                  self.consume_whitespace();
-                                return None
+                                true
                              },
                              '\r' if delim == '"' && self.curr_is('\n') => {
                                  self.consume_whitespace();
-                                return None
+                                true
                              }
                              c => {
                                  let last_pos = self.last_pos;
@@ -758,9 +754,9 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
                                      if ascii_only { "unknown byte escape" }
                                      else { "unknown character escape" },
                                      c);
-                                c
+                                false
                              }
-                        })
+                        }
                      }
                  }
              }
@@ -771,14 +767,16 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
                      if ascii_only { "byte constant must be escaped" }
                      else { "character constant must be escaped" },
                      first_source_char);
+                return false;
              }
              '\r' => {
                  if self.curr_is('\n') {
                      self.bump();
-                    return Some('\n');
+                    return true;
                  } else {
                      self.err_span_(start, self.last_pos,
                                     "bare CR not allowed in string, use \\r instead");
+                    return false;
                  }
              }
              _ => if ascii_only && first_source_char > '\x7F' {
@@ -787,9 +785,10 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
                      start, last_pos,
                      "byte constant must be ASCII. \
                       Use a \\xHH escape for a non-ASCII byte", first_source_char);
+                return false;
              }
          }
-        Some(first_source_char)
+        true
      }
  
      fn binop(&mut self, op: token::BinOp) -> token::Token {
@@ -924,7 +923,7 @@ fn next_token_inner(&mut self) -> token::Token {
              let start = self.last_pos;
  
              // the eof will be picked up by the final `'` check below
-            let mut c2 = self.curr.unwrap_or('\x00');
+            let c2 = self.curr.unwrap_or('\x00');
              self.bump();
  
              // If the character is an ident start not followed by another single
@@ -967,7 +966,7 @@ fn next_token_inner(&mut self) -> token::Token {
              }
  
              // Otherwise it is a character constant:
-            c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
+            let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'');
              if !self.curr_is('\'') {
                  let last_bpos = self.last_pos;
                  self.fatal_span_verbose(
@@ -977,8 +976,9 @@ fn next_token_inner(&mut self) -> token::Token {
                                     start - BytePos(1), last_bpos,
                                     "unterminated character constant".to_string());
              }
+            let id = if valid { self.ident_from(start) } else { str_to_ident("0") };
              self.bump(); // advance curr past token
-            return token::LIT_CHAR(c2);
+            return token::LIT_CHAR(id);
            }
            'b' => {
              self.bump();
@@ -991,8 +991,8 @@ fn next_token_inner(&mut self) -> token::Token {
  
            }
            '"' => {
-            let mut accum_str = String::new();
              let start_bpos = self.last_pos;
+            let mut valid = true;
              self.bump();
              while !self.curr_is('"') {
                  if self.is_eof() {
@@ -1003,11 +1003,13 @@ fn next_token_inner(&mut self) -> token::Token {
                  let ch_start = self.last_pos;
                  let ch = self.curr.unwrap();
                  self.bump();
-                self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
-                    .map(|ch| accum_str.push_char(ch));
+                valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"');
              }
+            // adjust for the ACSII " at the start of the literal
+            let id = if valid { self.ident_from(start_bpos + BytePos(1)) }
+                     else { str_to_ident("??") };
              self.bump();
-            return token::LIT_STR(str_to_ident(accum_str.as_slice()));
+            return token::LIT_STR(id);
            }
            'r' => {
              let start_bpos = self.last_pos;
@@ -1032,7 +1034,7 @@ fn next_token_inner(&mut self) -> token::Token {
              self.bump();
              let content_start_bpos = self.last_pos;
              let mut content_end_bpos;
-            let mut has_cr = false;
+            let mut valid = true;
              'outer: loop {
                  if self.is_eof() {
                      let last_bpos = self.last_pos;
@@ -1055,23 +1057,26 @@ fn next_token_inner(&mut self) -> token::Token {
                              }
                          }
                          break;
-                    }
+                    },
                      '\r' => {
-                        has_cr = true;
+                        if !self.nextch_is('\n') {
+                            let last_bpos = self.last_pos;
+                            self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \
+                                           string, use \\r instead");
+                            valid = false;
+                        }
                      }
                      _ => ()
                  }
                  self.bump();
              }
              self.bump();
-            let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| {
-                let string = if has_cr {
-                    self.translate_crlf(content_start_bpos, string,
-                                        "bare CR not allowed in raw string")
-                } else { string.into_maybe_owned() };
-                str_to_ident(string.as_slice())
-            });
-            return token::LIT_STR_RAW(str_content, hash_count);
+            let id = if valid {
+                self.ident_from_to(content_start_bpos, content_end_bpos)
+            } else {
+                str_to_ident("??")
+            };
+            return token::LIT_STR_RAW(id, hash_count);
            }
            '-' => {
              if self.nextch_is('>') {
@@ -1145,10 +1150,10 @@ fn scan_byte(&mut self) -> token::Token {
          let start = self.last_pos;
  
          // the eof will be picked up by the final `'` check below
-        let mut c2 = self.curr.unwrap_or('\x00');
+        let c2 = self.curr.unwrap_or('\x00');
          self.bump();
  
-        c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
+        let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'');
          if !self.curr_is('\'') {
              // Byte offsetting here is okay because the
              // character before position `start` are an
@@ -1158,14 +1163,17 @@ fn scan_byte(&mut self) -> token::Token {
                  start - BytePos(2), last_pos,
                  "unterminated byte constant".to_string());
          }
+
+        let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
          self.bump(); // advance curr past token
-        return token::LIT_BYTE(c2 as u8);
+        return token::LIT_BYTE(id);
      }
  
      fn scan_byte_string(&mut self) -> token::Token {
          self.bump();
          let start = self.last_pos;
-        let mut value = Vec::new();
+        let mut valid = true;
+
          while !self.curr_is('"') {
              if self.is_eof() {
                  let last_pos = self.last_pos;
@@ -1176,11 +1184,11 @@ fn scan_byte_string(&mut self) -> token::Token {
              let ch_start = self.last_pos;
              let ch = self.curr.unwrap();
              self.bump();
-            self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
-                .map(|ch| value.push(ch as u8));
+            valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"');
          }
+        let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
          self.bump();
-        return token::LIT_BINARY(Rc::new(value));
+        return token::LIT_BINARY(id);
      }
  
      fn scan_raw_byte_string(&mut self) -> token::Token {
@@ -1231,10 +1239,8 @@ fn scan_raw_byte_string(&mut self) -> token::Token {
              self.bump();
          }
          self.bump();
-        let bytes = self.with_str_from_to(content_start_bpos,
-                                           content_end_bpos,
-                                           |s| s.as_bytes().to_owned());
-        return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
+        return token::LIT_BINARY_RAW(self.ident_from_to(content_start_bpos, content_end_bpos),
+                                     hash_count);
      }
  }
  
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs

index bea8b6a94d43db7a5b38a75a39c3ac40c5f48943..62750e60bf8ef8dff23baea5e8aefc18c84dc2a1 100644 (file)
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -272,7 +272,239 @@ pub fn maybe_aborted<T>(result: T, mut p: Parser) -> T {
      result
  }
  
+/// Parse a string representing a character literal into its final form.
+/// Rather than just accepting/rejecting a given literal, unescapes it as
+/// well. Can take any slice prefixed by a character escape. Returns the
+/// character and the number of characters consumed.
+pub fn char_lit(lit: &str) -> (char, int) {
+    use std::{num, char};
+
+    let mut chars = lit.chars();
+    let c = match (chars.next(), chars.next()) {
+        (Some(c), None) if c != '\\' => return (c, 1),
+        (Some('\\'), Some(c)) => match c {
+            '"' => Some('"'),
+            'n' => Some('\n'),
+            'r' => Some('\r'),
+            't' => Some('\t'),
+            '\\' => Some('\\'),
+            '\'' => Some('\''),
+            '0' => Some('\0'),
+            _ => { None }
+        },
+        _ => fail!("lexer accepted invalid char escape `{}`", lit)
+    };
+
+    match c {
+        Some(x) => return (x, 2),
+        None => { }
+    }
+
+    let msg = format!("lexer should have rejected a bad character escape {}", lit);
+    let msg2 = msg.as_slice();
+
+    let esc: |uint| -> Option<(char, int)> = |len|
+        num::from_str_radix(lit.slice(2, len), 16)
+        .and_then(char::from_u32)
+        .map(|x| (x, len as int));
+
+    // Unicode escapes
+    return match lit.as_bytes()[1] as char {
+        'x' | 'X' => esc(4),
+        'u' => esc(6),
+        'U' => esc(10),
+        _ => None,
+    }.expect(msg2);
+}
+
+/// Parse a string representing a string literal into its final form. Does
+/// unescaping.
+pub fn str_lit(lit: &str) -> String {
+    debug!("parse_str_lit: given {}", lit.escape_default());
+    let mut res = String::with_capacity(lit.len());
+
+    // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+    let error = |i| format!("lexer should have rejected {} at {}", lit, i);
+
+    /// Eat everything up to a non-whitespace
+    fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
+        loop {
+            match it.peek().map(|x| x.val1()) {
+                Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
+                    it.next();
+                },
+                _ => { break; }
+            }
+        }
+    }
+
+    let mut chars = lit.char_indices().peekable();
+    loop {
+        match chars.next() {
+            Some((i, c)) => {
+                let em = error(i);
+                match c {
+                    '\\' => {
+                        if chars.peek().expect(em.as_slice()).val1() == '\n' {
+                            eat(&mut chars);
+                        } else if chars.peek().expect(em.as_slice()).val1() == '\r' {
+                            chars.next();
+                            if chars.peek().expect(em.as_slice()).val1() != '\n' {
+                                fail!("lexer accepted bare CR");
+                            }
+                            eat(&mut chars);
+                        } else {
+                            // otherwise, a normal escape
+                            let (c, n) = char_lit(lit.slice_from(i));
+                            for _ in range(0, n - 1) { // we don't need to move past the first \
+                                chars.next();
+                            }
+                            res.push_char(c);
+                        }
+                    },
+                    '\r' => {
+                        if chars.peek().expect(em.as_slice()).val1() != '\n' {
+                            fail!("lexer accepted bare CR");
+                        }
+                        chars.next();
+                        res.push_char('\n');
+                    }
+                    c => res.push_char(c),
+                }
+            },
+            None => break
+        }
+    }
+
+    res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
+    debug!("parse_str_lit: returning {}", res);
+    res
+}
+
+/// Parse a string representing a raw string literal into its final form. The
+/// only operation this does is convert embedded CRLF into a single LF.
+pub fn raw_str_lit(lit: &str) -> String {
+    debug!("raw_str_lit: given {}", lit.escape_default());
+    let mut res = String::with_capacity(lit.len());
+
+    // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+    let mut chars = lit.chars().peekable();
+    loop {
+        match chars.next() {
+            Some(c) => {
+                if c == '\r' {
+                    if *chars.peek().unwrap() != '\n' {
+                        fail!("lexer accepted bare CR");
+                    }
+                    chars.next();
+                    res.push_char('\n');
+                } else {
+                    res.push_char(c);
+                }
+            },
+            None => break
+        }
+    }
  
+    res.shrink_to_fit();
+    res
+}
+
+pub fn float_lit(s: &str) -> ast::Lit_ {
+    debug!("float_lit: {}", s);
+    // FIXME #2252: bounds checking float literals is defered until trans
+    let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
+    let s = s2.as_slice();
+
+    let mut ty = None;
+
+    if s.ends_with("f32") {
+        ty = Some(ast::TyF32);
+    } else if s.ends_with("f64") {
+        ty = Some(ast::TyF64);
+    }
+
+
+    match ty {
+        Some(t) => {
+            ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
+        },
+        None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
+    }
+}
+
+/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
+pub fn byte_lit(lit: &str) -> (u8, uint) {
+    let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
+
+    if lit.len() == 1 {
+        (lit.as_bytes()[0], 1)
+    } else {
+        assert!(lit.as_bytes()[0] == b'\\', err(0i));
+        let b = match lit.as_bytes()[1] {
+            b'"' => b'"',
+            b'n' => b'\n',
+            b'r' => b'\r',
+            b't' => b'\t',
+            b'\\' => b'\\',
+            b'\'' => b'\'',
+            b'0' => b'\0',
+            _ => {
+                match ::std::num::from_str_radix::<u64>(lit.slice(2, 4), 16) {
+                    Some(c) =>
+                        if c > 0xFF {
+                            fail!(err(2))
+                        } else {
+                            return (c as u8, 4)
+                        },
+                    None => fail!(err(3))
+                }
+            }
+        };
+        return (b, 2);
+    }
+}
+
+pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
+    let mut res = Vec::with_capacity(lit.len());
+
+    // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+    let error = |i| format!("lexer should have rejected {} at {}", lit, i);
+
+    // binary literals *must* be ASCII, but the escapes don't have to be
+    let mut chars = lit.as_bytes().iter().enumerate().peekable();
+    loop {
+        match chars.next() {
+            Some((i, &c)) => {
+                if c == b'\\' {
+                    if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
+                        loop {
+                            // eat everything up to a non-whitespace
+                            match chars.peek().map(|x| *x.val1()) {
+                                Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
+                                    chars.next();
+                                },
+                                _ => { break; }
+                            }
+                        }
+                    } else {
+                        // otherwise, a normal escape
+                        let (c, n) = byte_lit(lit.slice_from(i));
+                        for _ in range(0, n - 1) { // we don't need to move past the first \
+                            chars.next();
+                        }
+                        res.push(c);
+                    }
+                } else {
+                    res.push(c);
+                }
+            },
+            None => { break; }
+        }
+    }
+
+    Rc::new(res)
+}
  
  #[cfg(test)]
  mod test {
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs

index 3bf88424891bb2b4e8ab3d8cd536eeff13187e96..553e685bddecef13c24389dd6085881293a03969 100644 (file)
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -61,6 +61,7 @@
  use ast_util;
  use codemap::{Span, BytePos, Spanned, spanned, mk_sp};
  use codemap;
+use parse;
  use parse::attr::ParserAttr;
  use parse::classify;
  use parse::common::{SeqSep, seq_sep_none};
@@ -1543,8 +1544,8 @@ pub fn maybe_parse_fixed_vstore(&mut self) -> Option<Gc<ast::Expr>> {
      /// Matches token_lit = LIT_INT | ...
      pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
          match *tok {
-            token::LIT_BYTE(i) => LitByte(i),
-            token::LIT_CHAR(i) => LitChar(i),
+            token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()),
+            token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()),
              token::LIT_INT(i, it) => LitInt(i, it),
              token::LIT_UINT(u, ut) => LitUint(u, ut),
              token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i),
@@ -1555,13 +1556,17 @@ pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
                  LitFloatUnsuffixed(self.id_to_interned_str(s))
              }
              token::LIT_STR(s) => {
-                LitStr(self.id_to_interned_str(s), ast::CookedStr)
+                LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
+                       ast::CookedStr)
              }
              token::LIT_STR_RAW(s, n) => {
-                LitStr(self.id_to_interned_str(s), ast::RawStr(n))
+                LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()),
+                       ast::RawStr(n))
              }
-            token::LIT_BINARY_RAW(ref v, _) |
-            token::LIT_BINARY(ref v) => LitBinary(v.clone()),
+            token::LIT_BINARY(i) =>
+                LitBinary(parse::binary_lit(self.id_to_interned_str(i).get())),
+            token::LIT_BINARY_RAW(i, _) =>
+                LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())),
              token::LPAREN => { self.expect(&token::RPAREN); LitNil },
              _ => { self.unexpected_last(tok); }
          }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs

index c7aeae04ba2fa0c5ee336bbf9f798fd069f8db37..bb6183b7e9ea37374fb872a8fa168c206da9de2e 100644 (file)
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -79,8 +79,8 @@ pub enum Token {
      QUESTION,
  
      /* Literals */
-    LIT_BYTE(u8),
-    LIT_CHAR(char),
+    LIT_BYTE(Ident),
+    LIT_CHAR(Ident),
      LIT_INT(i64, ast::IntTy),
      LIT_UINT(u64, ast::UintTy),
      LIT_INT_UNSUFFIXED(i64),
@@ -88,8 +88,8 @@ pub enum Token {
      LIT_FLOAT_UNSUFFIXED(Ident),
      LIT_STR(Ident),
      LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */
-    LIT_BINARY(Rc<Vec<u8>>),
-    LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
+    LIT_BINARY(Ident),
+    LIT_BINARY_RAW(Ident, uint), /* raw binary str delimited by n hash symbols */
  
      /* Name components */
      /// An identifier contains an "is_mod_name" boolean,
@@ -201,20 +201,10 @@ pub fn to_string(t: &Token) -> String {
  
        /* Literals */
        LIT_BYTE(b) => {
-          let mut res = String::from_str("b'");
-          (b as char).escape_default(|c| {
-              res.push_char(c);
-          });
-          res.push_char('\'');
-          res
+          format!("b'{}'", get_ident(b).get())
        }
        LIT_CHAR(c) => {
-          let mut res = String::from_str("'");
-          c.escape_default(|c| {
-              res.push_char(c);
-          });
-          res.push_char('\'');
-          res
+          format!("'{}'", get_ident(c).get())
        }
        LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)),
        LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)),
@@ -235,20 +225,18 @@ pub fn to_string(t: &Token) -> String {
          body
        }
        LIT_STR(s) => {
-          format!("\"{}\"", get_ident(s).get().escape_default())
+          format!("\"{}\"", get_ident(s).get())
        }
        LIT_STR_RAW(s, n) => {
          format!("r{delim}\"{string}\"{delim}",
                   delim="#".repeat(n), string=get_ident(s))
        }
-      LIT_BINARY(ref v) => {
-          format!(
-            "b\"{}\"",
-            v.iter().map(|&b| b as char).collect::<String>().escape_default())
+      LIT_BINARY(v) => {
+          format!("b\"{}\"", get_ident(v).get())
        }
-      LIT_BINARY_RAW(ref s, n) => {
+      LIT_BINARY_RAW(s, n) => {
          format!("br{delim}\"{string}\"{delim}",
-                 delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
+                 delim="#".repeat(n), string=get_ident(s).get())
        }
  
        /* Name components */
diff --git a/src/test/run-pass/string-escapes.rs b/src/test/run-pass/string-escapes.rs

new file mode 100644 (file)

index 0000000..7abe827
--- /dev/null
+++ b/src/test/run-pass/string-escapes.rs
@@ -0,0 +1,15 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    let x = "\\\\\
+    ";
+    assert!(x == r"\\"); // extraneous whitespace stripped
+}
author	Corey Richardson <corey@octayn.net>
	Thu, 3 Jul 2014 07:47:30 +0000 (00:47 -0700)
committer	Corey Richardson <corey@octayn.net>
	Wed, 9 Jul 2014 07:06:29 +0000 (00:06 -0700)
src/libsyntax/ext/base.rs		patch \| blob \| history
src/libsyntax/ext/quote.rs		patch \| blob \| history
src/libsyntax/parse/lexer/mod.rs		patch \| blob \| history
src/libsyntax/parse/mod.rs		patch \| blob \| history
src/libsyntax/parse/parser.rs		patch \| blob \| history
src/libsyntax/parse/token.rs		patch \| blob \| history
src/test/run-pass/string-escapes.rs	[new file with mode: 0644]	patch \| blob