cx.span_err(sp, format!("{} takes 1 argument.", name).as_slice());
} else {
match tts[0] {
- ast::TTTok(_, token::LIT_STR(ident))
- | ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
- return Some(token::get_ident(ident).get().to_string())
+ ast::TTTok(_, token::LIT_STR(ident)) => return Some(parse::str_lit(ident.as_str())),
+ ast::TTTok(_, token::LIT_STR_RAW(ident, _)) => {
+ return Some(parse::raw_str_lit(ident.as_str()))
}
_ => {
cx.span_err(sp,
}
LIT_BYTE(i) => {
- let e_byte = cx.expr_lit(sp, ast::LitByte(i));
+ let e_byte = mk_ident(cx, sp, i);
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
}
LIT_CHAR(i) => {
- let e_char = cx.expr_lit(sp, ast::LitChar(i));
+ let e_char = mk_ident(cx, sp, i);
return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_CHAR"), vec!(e_char));
}
}
- fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> char {
+ fn scan_numeric_escape(&mut self, n_hex_digits: uint, delim: char) -> bool {
let mut accum_int = 0u32;
let start_bpos = self.last_pos;
for _ in range(0, n_hex_digits) {
}
match char::from_u32(accum_int) {
- Some(x) => x,
+ Some(_) => true,
None => {
let last_bpos = self.last_pos;
self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
- '?'
+ false
}
}
}
/// Scan for a single (possibly escaped) byte or char
/// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
/// `start` is the position of `first_source_char`, which is already consumed.
+ ///
+ /// Returns true if there was a valid char/byte, false otherwise.
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
- ascii_only: bool, delim: char) -> Option<char> {
+ ascii_only: bool, delim: char) -> bool {
match first_source_char {
'\\' => {
// '\X' for some X must be a character constant:
match escaped {
None => {}, // EOF here is an error that will be checked later.
Some(e) => {
- return Some(match e {
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- '\\' => '\\',
- '\'' => '\'',
- '"' => '"',
- '0' => '\x00',
+ return match e {
+ 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
'x' => self.scan_numeric_escape(2u, delim),
'u' if !ascii_only => self.scan_numeric_escape(4u, delim),
'U' if !ascii_only => self.scan_numeric_escape(8u, delim),
'\n' if delim == '"' => {
self.consume_whitespace();
- return None
+ true
},
'\r' if delim == '"' && self.curr_is('\n') => {
self.consume_whitespace();
- return None
+ true
}
c => {
let last_pos = self.last_pos;
if ascii_only { "unknown byte escape" }
else { "unknown character escape" },
c);
- c
+ false
}
- })
+ }
}
}
}
if ascii_only { "byte constant must be escaped" }
else { "character constant must be escaped" },
first_source_char);
+ return false;
}
'\r' => {
if self.curr_is('\n') {
self.bump();
- return Some('\n');
+ return true;
} else {
self.err_span_(start, self.last_pos,
"bare CR not allowed in string, use \\r instead");
+ return false;
}
}
_ => if ascii_only && first_source_char > '\x7F' {
start, last_pos,
"byte constant must be ASCII. \
Use a \\xHH escape for a non-ASCII byte", first_source_char);
+ return false;
}
}
- Some(first_source_char)
+ true
}
fn binop(&mut self, op: token::BinOp) -> token::Token {
let start = self.last_pos;
// the eof will be picked up by the final `'` check below
- let mut c2 = self.curr.unwrap_or('\x00');
+ let c2 = self.curr.unwrap_or('\x00');
self.bump();
// If the character is an ident start not followed by another single
}
// Otherwise it is a character constant:
- c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'').unwrap();
+ let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'');
if !self.curr_is('\'') {
let last_bpos = self.last_pos;
self.fatal_span_verbose(
start - BytePos(1), last_bpos,
"unterminated character constant".to_string());
}
+ let id = if valid { self.ident_from(start) } else { str_to_ident("0") };
self.bump(); // advance curr past token
- return token::LIT_CHAR(c2);
+ return token::LIT_CHAR(id);
}
'b' => {
self.bump();
}
'"' => {
- let mut accum_str = String::new();
let start_bpos = self.last_pos;
+ let mut valid = true;
self.bump();
while !self.curr_is('"') {
if self.is_eof() {
let ch_start = self.last_pos;
let ch = self.curr.unwrap();
self.bump();
- self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"')
- .map(|ch| accum_str.push_char(ch));
+ valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"');
}
+ // adjust for the ACSII " at the start of the literal
+ let id = if valid { self.ident_from(start_bpos + BytePos(1)) }
+ else { str_to_ident("??") };
self.bump();
- return token::LIT_STR(str_to_ident(accum_str.as_slice()));
+ return token::LIT_STR(id);
}
'r' => {
let start_bpos = self.last_pos;
self.bump();
let content_start_bpos = self.last_pos;
let mut content_end_bpos;
- let mut has_cr = false;
+ let mut valid = true;
'outer: loop {
if self.is_eof() {
let last_bpos = self.last_pos;
}
}
break;
- }
+ },
'\r' => {
- has_cr = true;
+ if !self.nextch_is('\n') {
+ let last_bpos = self.last_pos;
+ self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \
+ string, use \\r instead");
+ valid = false;
+ }
}
_ => ()
}
self.bump();
}
self.bump();
- let str_content = self.with_str_from_to(content_start_bpos, content_end_bpos, |string| {
- let string = if has_cr {
- self.translate_crlf(content_start_bpos, string,
- "bare CR not allowed in raw string")
- } else { string.into_maybe_owned() };
- str_to_ident(string.as_slice())
- });
- return token::LIT_STR_RAW(str_content, hash_count);
+ let id = if valid {
+ self.ident_from_to(content_start_bpos, content_end_bpos)
+ } else {
+ str_to_ident("??")
+ };
+ return token::LIT_STR_RAW(id, hash_count);
}
'-' => {
if self.nextch_is('>') {
let start = self.last_pos;
// the eof will be picked up by the final `'` check below
- let mut c2 = self.curr.unwrap_or('\x00');
+ let c2 = self.curr.unwrap_or('\x00');
self.bump();
- c2 = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'').unwrap();
+ let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'');
if !self.curr_is('\'') {
// Byte offsetting here is okay because the
// character before position `start` are an
start - BytePos(2), last_pos,
"unterminated byte constant".to_string());
}
+
+ let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
self.bump(); // advance curr past token
- return token::LIT_BYTE(c2 as u8);
+ return token::LIT_BYTE(id);
}
fn scan_byte_string(&mut self) -> token::Token {
self.bump();
let start = self.last_pos;
- let mut value = Vec::new();
+ let mut valid = true;
+
while !self.curr_is('"') {
if self.is_eof() {
let last_pos = self.last_pos;
let ch_start = self.last_pos;
let ch = self.curr.unwrap();
self.bump();
- self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"')
- .map(|ch| value.push(ch as u8));
+ valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"');
}
+ let id = if valid { self.ident_from(start) } else { str_to_ident("??") };
self.bump();
- return token::LIT_BINARY(Rc::new(value));
+ return token::LIT_BINARY(id);
}
fn scan_raw_byte_string(&mut self) -> token::Token {
self.bump();
}
self.bump();
- let bytes = self.with_str_from_to(content_start_bpos,
- content_end_bpos,
- |s| s.as_bytes().to_owned());
- return token::LIT_BINARY_RAW(Rc::new(bytes), hash_count);
+ return token::LIT_BINARY_RAW(self.ident_from_to(content_start_bpos, content_end_bpos),
+ hash_count);
}
}
result
}
+/// Parse a string representing a character literal into its final form.
+/// Rather than just accepting/rejecting a given literal, unescapes it as
+/// well. Can take any slice prefixed by a character escape. Returns the
+/// character and the number of characters consumed.
+pub fn char_lit(lit: &str) -> (char, int) {
+ use std::{num, char};
+
+ let mut chars = lit.chars();
+ let c = match (chars.next(), chars.next()) {
+ (Some(c), None) if c != '\\' => return (c, 1),
+ (Some('\\'), Some(c)) => match c {
+ '"' => Some('"'),
+ 'n' => Some('\n'),
+ 'r' => Some('\r'),
+ 't' => Some('\t'),
+ '\\' => Some('\\'),
+ '\'' => Some('\''),
+ '0' => Some('\0'),
+ _ => { None }
+ },
+ _ => fail!("lexer accepted invalid char escape `{}`", lit)
+ };
+
+ match c {
+ Some(x) => return (x, 2),
+ None => { }
+ }
+
+ let msg = format!("lexer should have rejected a bad character escape {}", lit);
+ let msg2 = msg.as_slice();
+
+ let esc: |uint| -> Option<(char, int)> = |len|
+ num::from_str_radix(lit.slice(2, len), 16)
+ .and_then(char::from_u32)
+ .map(|x| (x, len as int));
+
+ // Unicode escapes
+ return match lit.as_bytes()[1] as char {
+ 'x' | 'X' => esc(4),
+ 'u' => esc(6),
+ 'U' => esc(10),
+ _ => None,
+ }.expect(msg2);
+}
+
+/// Parse a string representing a string literal into its final form. Does
+/// unescaping.
+pub fn str_lit(lit: &str) -> String {
+ debug!("parse_str_lit: given {}", lit.escape_default());
+ let mut res = String::with_capacity(lit.len());
+
+ // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+ let error = |i| format!("lexer should have rejected {} at {}", lit, i);
+
+ /// Eat everything up to a non-whitespace
+ fn eat<'a>(it: &mut ::std::iter::Peekable<(uint, char), ::std::str::CharOffsets<'a>>) {
+ loop {
+ match it.peek().map(|x| x.val1()) {
+ Some(' ') | Some('\n') | Some('\r') | Some('\t') => {
+ it.next();
+ },
+ _ => { break; }
+ }
+ }
+ }
+
+ let mut chars = lit.char_indices().peekable();
+ loop {
+ match chars.next() {
+ Some((i, c)) => {
+ let em = error(i);
+ match c {
+ '\\' => {
+ if chars.peek().expect(em.as_slice()).val1() == '\n' {
+ eat(&mut chars);
+ } else if chars.peek().expect(em.as_slice()).val1() == '\r' {
+ chars.next();
+ if chars.peek().expect(em.as_slice()).val1() != '\n' {
+ fail!("lexer accepted bare CR");
+ }
+ eat(&mut chars);
+ } else {
+ // otherwise, a normal escape
+ let (c, n) = char_lit(lit.slice_from(i));
+ for _ in range(0, n - 1) { // we don't need to move past the first \
+ chars.next();
+ }
+ res.push_char(c);
+ }
+ },
+ '\r' => {
+ if chars.peek().expect(em.as_slice()).val1() != '\n' {
+ fail!("lexer accepted bare CR");
+ }
+ chars.next();
+ res.push_char('\n');
+ }
+ c => res.push_char(c),
+ }
+ },
+ None => break
+ }
+ }
+
+ res.shrink_to_fit(); // probably not going to do anything, unless there was an escape.
+ debug!("parse_str_lit: returning {}", res);
+ res
+}
+
+/// Parse a string representing a raw string literal into its final form. The
+/// only operation this does is convert embedded CRLF into a single LF.
+pub fn raw_str_lit(lit: &str) -> String {
+ debug!("raw_str_lit: given {}", lit.escape_default());
+ let mut res = String::with_capacity(lit.len());
+
+ // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+ let mut chars = lit.chars().peekable();
+ loop {
+ match chars.next() {
+ Some(c) => {
+ if c == '\r' {
+ if *chars.peek().unwrap() != '\n' {
+ fail!("lexer accepted bare CR");
+ }
+ chars.next();
+ res.push_char('\n');
+ } else {
+ res.push_char(c);
+ }
+ },
+ None => break
+ }
+ }
+ res.shrink_to_fit();
+ res
+}
+
+pub fn float_lit(s: &str) -> ast::Lit_ {
+ debug!("float_lit: {}", s);
+ // FIXME #2252: bounds checking float literals is defered until trans
+ let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
+ let s = s2.as_slice();
+
+ let mut ty = None;
+
+ if s.ends_with("f32") {
+ ty = Some(ast::TyF32);
+ } else if s.ends_with("f64") {
+ ty = Some(ast::TyF64);
+ }
+
+
+ match ty {
+ Some(t) => {
+ ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
+ },
+ None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
+ }
+}
+
+/// Parse a string representing a byte literal into its final form. Similar to `char_lit`
+pub fn byte_lit(lit: &str) -> (u8, uint) {
+ let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i);
+
+ if lit.len() == 1 {
+ (lit.as_bytes()[0], 1)
+ } else {
+ assert!(lit.as_bytes()[0] == b'\\', err(0i));
+ let b = match lit.as_bytes()[1] {
+ b'"' => b'"',
+ b'n' => b'\n',
+ b'r' => b'\r',
+ b't' => b'\t',
+ b'\\' => b'\\',
+ b'\'' => b'\'',
+ b'0' => b'\0',
+ _ => {
+ match ::std::num::from_str_radix::<u64>(lit.slice(2, 4), 16) {
+ Some(c) =>
+ if c > 0xFF {
+ fail!(err(2))
+ } else {
+ return (c as u8, 4)
+ },
+ None => fail!(err(3))
+ }
+ }
+ };
+ return (b, 2);
+ }
+}
+
+pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
+ let mut res = Vec::with_capacity(lit.len());
+
+ // FIXME #8372: This could be a for-loop if it didn't borrow the iterator
+ let error = |i| format!("lexer should have rejected {} at {}", lit, i);
+
+ // binary literals *must* be ASCII, but the escapes don't have to be
+ let mut chars = lit.as_bytes().iter().enumerate().peekable();
+ loop {
+ match chars.next() {
+ Some((i, &c)) => {
+ if c == b'\\' {
+ if *chars.peek().expect(error(i).as_slice()).val1() == b'\n' {
+ loop {
+ // eat everything up to a non-whitespace
+ match chars.peek().map(|x| *x.val1()) {
+ Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => {
+ chars.next();
+ },
+ _ => { break; }
+ }
+ }
+ } else {
+ // otherwise, a normal escape
+ let (c, n) = byte_lit(lit.slice_from(i));
+ for _ in range(0, n - 1) { // we don't need to move past the first \
+ chars.next();
+ }
+ res.push(c);
+ }
+ } else {
+ res.push(c);
+ }
+ },
+ None => { break; }
+ }
+ }
+
+ Rc::new(res)
+}
#[cfg(test)]
mod test {
use ast_util;
use codemap::{Span, BytePos, Spanned, spanned, mk_sp};
use codemap;
+use parse;
use parse::attr::ParserAttr;
use parse::classify;
use parse::common::{SeqSep, seq_sep_none};
/// Matches token_lit = LIT_INT | ...
pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
match *tok {
- token::LIT_BYTE(i) => LitByte(i),
- token::LIT_CHAR(i) => LitChar(i),
+ token::LIT_BYTE(i) => LitByte(parse::byte_lit(i.as_str()).val0()),
+ token::LIT_CHAR(i) => LitChar(parse::char_lit(i.as_str()).val0()),
token::LIT_INT(i, it) => LitInt(i, it),
token::LIT_UINT(u, ut) => LitUint(u, ut),
token::LIT_INT_UNSUFFIXED(i) => LitIntUnsuffixed(i),
LitFloatUnsuffixed(self.id_to_interned_str(s))
}
token::LIT_STR(s) => {
- LitStr(self.id_to_interned_str(s), ast::CookedStr)
+ LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
+ ast::CookedStr)
}
token::LIT_STR_RAW(s, n) => {
- LitStr(self.id_to_interned_str(s), ast::RawStr(n))
+ LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()),
+ ast::RawStr(n))
}
- token::LIT_BINARY_RAW(ref v, _) |
- token::LIT_BINARY(ref v) => LitBinary(v.clone()),
+ token::LIT_BINARY(i) =>
+ LitBinary(parse::binary_lit(self.id_to_interned_str(i).get())),
+ token::LIT_BINARY_RAW(i, _) =>
+ LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())),
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
_ => { self.unexpected_last(tok); }
}
QUESTION,
/* Literals */
- LIT_BYTE(u8),
- LIT_CHAR(char),
+ LIT_BYTE(Ident),
+ LIT_CHAR(Ident),
LIT_INT(i64, ast::IntTy),
LIT_UINT(u64, ast::UintTy),
LIT_INT_UNSUFFIXED(i64),
LIT_FLOAT_UNSUFFIXED(Ident),
LIT_STR(Ident),
LIT_STR_RAW(Ident, uint), /* raw str delimited by n hash symbols */
- LIT_BINARY(Rc<Vec<u8>>),
- LIT_BINARY_RAW(Rc<Vec<u8>>, uint), /* raw binary str delimited by n hash symbols */
+ LIT_BINARY(Ident),
+ LIT_BINARY_RAW(Ident, uint), /* raw binary str delimited by n hash symbols */
/* Name components */
/// An identifier contains an "is_mod_name" boolean,
/* Literals */
LIT_BYTE(b) => {
- let mut res = String::from_str("b'");
- (b as char).escape_default(|c| {
- res.push_char(c);
- });
- res.push_char('\'');
- res
+ format!("b'{}'", get_ident(b).get())
}
LIT_CHAR(c) => {
- let mut res = String::from_str("'");
- c.escape_default(|c| {
- res.push_char(c);
- });
- res.push_char('\'');
- res
+ format!("'{}'", get_ident(c).get())
}
LIT_INT(i, t) => ast_util::int_ty_to_string(t, Some(i)),
LIT_UINT(u, t) => ast_util::uint_ty_to_string(t, Some(u)),
body
}
LIT_STR(s) => {
- format!("\"{}\"", get_ident(s).get().escape_default())
+ format!("\"{}\"", get_ident(s).get())
}
LIT_STR_RAW(s, n) => {
format!("r{delim}\"{string}\"{delim}",
delim="#".repeat(n), string=get_ident(s))
}
- LIT_BINARY(ref v) => {
- format!(
- "b\"{}\"",
- v.iter().map(|&b| b as char).collect::<String>().escape_default())
+ LIT_BINARY(v) => {
+ format!("b\"{}\"", get_ident(v).get())
}
- LIT_BINARY_RAW(ref s, n) => {
+ LIT_BINARY_RAW(s, n) => {
format!("br{delim}\"{string}\"{delim}",
- delim="#".repeat(n), string=s.as_slice().to_ascii().as_str_ascii())
+ delim="#".repeat(n), string=get_ident(s).get())
}
/* Name components */
--- /dev/null
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+ let x = "\\\\\
+ ";
+ assert!(x == r"\\"); // extraneous whitespace stripped
+}