// share the implementation of the lang-item vs. non-lang-item
// eq_slice.
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
#[inline]
fn eq_slice_(a: &str, b: &str) -> bool {
#[allow(ctypes)]
}
/// Bytewise slice equality
+/// NOTE: This function is (ab)used in rustc::middle::trans::_match
+/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
#[cfg(not(test))]
#[lang="str_eq"]
#[inline]
#[allow(unused_variable)]
fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
let mut matched = false;
- let prefix_bytes: &[u8] = &$prefix_bytes;
+ let prefix_bytes: &[u8] = $prefix_bytes;
let mut clist = &mut Threads::new(self.which);
let mut nlist = &mut Threads::new(self.which);
(&const_float(a), &const_float(b)) => compare_vals(a, b),
(&const_str(ref a), &const_str(ref b)) => compare_vals(a, b),
(&const_bool(a), &const_bool(b)) => compare_vals(a, b),
+ (&const_binary(ref a), &const_binary(ref b)) => compare_vals(a, b),
_ => None
}
}
val: bool_to_i1(result.bcx, result.val)
}
}
- _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+ _ => cx.sess().bug("only strings supported in compare_values"),
},
ty::ty_rptr(_, mt) => match ty::get(mt.ty).sty {
ty::ty_str => compare_str(cx, lhs, rhs, rhs_t),
- _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+ ty::ty_vec(mt, _) => match ty::get(mt.ty).sty {
+ ty::ty_uint(ast::TyU8) => {
+ // NOTE: cast &[u8] to &str and abuse the str_eq lang item,
+ // which calls memcmp().
+ let t = ty::mk_str_slice(cx.tcx(), ty::ReStatic, ast::MutImmutable);
+ let lhs = BitCast(cx, lhs, type_of::type_of(cx.ccx(), t).ptr_to());
+ let rhs = BitCast(cx, rhs, type_of::type_of(cx.ccx(), t).ptr_to());
+ compare_str(cx, lhs, rhs, rhs_t)
+ },
+ _ => cx.sess().bug("only byte strings supported in compare_values"),
+ },
+ _ => cx.sess().bug("on string and byte strings supported in compare_values"),
},
- _ => cx.sess().bug("only scalars and strings supported in compare_values"),
+ _ => cx.sess().bug("only scalars, byte strings, and strings supported in compare_values"),
}
}
}
// text literals
- t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
+ t::LIT_BYTE(..) | t::LIT_BINARY(..) |
+ t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
// number literals
t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
// Note: r as in r" or r#" is part of a raw string literal,
// b as in b' is part of a byte literal.
// They are not identifiers, and are handled further down.
- ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+ ('r', Some('"')) | ('r', Some('#')) |
+ ('b', Some('"')) | ('b', Some('\'')) => false,
_ => true
} {
let start = self.last_pos;
}
'b' => {
self.bump();
- assert!(self.curr_is('\''), "Should have been a token::IDENT");
- self.bump();
- let start = self.last_pos;
-
- // the eof will be picked up by the final `'` check below
- let mut c2 = self.curr.unwrap_or('\x00');
- self.bump();
+ return match self.curr {
+ Some('\'') => parse_byte(self),
+ Some('"') => parse_byte_string(self),
+ _ => unreachable!() // Should have been a token::IDENT above.
+ };
- match c2 {
- '\\' => {
- // '\X' for some X must be a character constant:
- let escaped = self.curr;
- let escaped_pos = self.last_pos;
- self.bump();
- match escaped {
- None => {}
- Some(e) => {
- c2 = match e {
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- '\\' => '\\',
- '\'' => '\'',
- '"' => '"',
- '0' => '\x00',
- 'x' => self.scan_numeric_escape(2u, '\''),
- c2 => {
- self.err_span_char(escaped_pos, self.last_pos,
- "unknown byte escape", c2);
- c2
+ fn parse_byte(self_: &mut StringReader) -> token::Token {
+ self_.bump();
+ let start = self_.last_pos;
+
+ // the eof will be picked up by the final `'` check below
+ let mut c2 = self_.curr.unwrap_or('\x00');
+ self_.bump();
+
+ match c2 {
+ '\\' => {
+ // '\X' for some X must be a character constant:
+ let escaped = self_.curr;
+ let escaped_pos = self_.last_pos;
+ self_.bump();
+ match escaped {
+ None => {}
+ Some(e) => {
+ c2 = match e {
+ 'n' => '\n',
+ 'r' => '\r',
+ 't' => '\t',
+ '\\' => '\\',
+ '\'' => '\'',
+ '"' => '"',
+ '0' => '\x00',
+ 'x' => self_.scan_numeric_escape(2u, '\''),
+ c2 => {
+ self_.err_span_char(
+ escaped_pos, self_.last_pos,
+ "unknown byte escape", c2);
+ c2
+ }
}
}
}
}
+ '\t' | '\n' | '\r' | '\'' => {
+ self_.err_span_char( start, self_.last_pos,
+ "byte constant must be escaped", c2);
+ }
+ _ => if c2 > '\x7F' {
+ self_.err_span_char( start, self_.last_pos,
+ "byte constant must be ASCII. \
+ Use a \\xHH escape for a non-ASCII byte", c2);
+ }
}
- '\t' | '\n' | '\r' | '\'' => {
- self.err_span_char( start, self.last_pos,
- "byte constant must be escaped", c2);
- }
- _ if c2 > '\x7F' => {
- self.err_span_char( start, self.last_pos,
- "byte constant must be ASCII. \
- Use a \\xHH escape for a non-ASCII byte", c2);
+ if !self_.curr_is('\'') {
+ // Byte offsetting here is okay because the
+ // character before position `start` are an
+ // ascii single quote and ascii 'b'.
+ self_.fatal_span_verbose(
+ start - BytePos(2), self_.last_pos,
+ "unterminated byte constant".to_string());
}
- _ => {}
+ self_.bump(); // advance curr past token
+ return token::LIT_BYTE(c2 as u8);
}
- if !self.curr_is('\'') {
- self.fatal_span_verbose(
- // Byte offsetting here is okay because the
- // character before position `start` are an
- // ascii single quote and ascii 'b'.
- start - BytePos(2), self.last_pos,
- "unterminated byte constant".to_string());
+
+ fn parse_byte_string(self_: &mut StringReader) -> token::Token {
+ self_.bump();
+ let start = self_.last_pos;
+ let mut value = Vec::new();
+ while !self_.curr_is('"') {
+ if self_.is_eof() {
+ self_.fatal_span(start, self_.last_pos,
+ "unterminated double quote byte string");
+ }
+
+ let ch = self_.curr.unwrap();
+ self_.bump();
+ match ch {
+ '\\' => {
+ if self_.is_eof() {
+ self_.fatal_span(start, self_.last_pos,
+ "unterminated double quote byte string");
+ }
+
+ let escaped = self_.curr.unwrap();
+ let escaped_pos = self_.last_pos;
+ self_.bump();
+ match escaped {
+ 'n' => value.push('\n' as u8),
+ 'r' => value.push('\r' as u8),
+ 't' => value.push('\t' as u8),
+ '\\' => value.push('\\' as u8),
+ '\'' => value.push('\'' as u8),
+ '"' => value.push('"' as u8),
+ '\n' => self_.consume_whitespace(),
+ '0' => value.push(0),
+ 'x' => {
+ value.push(self_.scan_numeric_escape(2u, '"') as u8);
+ }
+ c2 => {
+ self_.err_span_char(escaped_pos, self_.last_pos,
+ "unknown byte string escape", c2);
+ }
+ }
+ }
+ _ => {
+ if ch <= '\x7F' {
+ value.push(ch as u8)
+ } else {
+ self_.err_span_char(self_.last_pos, self_.last_pos,
+ "byte string must be ASCII. \
+ Use a \\xHH escape for a non-ASCII byte", ch);
+ }
+ }
+ }
+ }
+ self_.bump();
+ return token::LIT_BINARY(Rc::new(value));
}
- self.bump(); // advance curr past token
- return token::LIT_BYTE(c2 as u8);
}
'"' => {
let mut accum_str = String::new();
use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
-use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
+use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte, LitBinary};
use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
token::LIT_STR_RAW(s, n) => {
LitStr(self.id_to_interned_str(s), ast::RawStr(n))
}
+ token::LIT_BINARY(ref v) => LitBinary(v.clone()),
token::LPAREN => { self.expect(&token::RPAREN); LitNil },
_ => { self.unexpected_last(tok); }
}
LIT_FLOAT_UNSUFFIXED(ast::Ident),
LIT_STR(ast::Ident),
LIT_STR_RAW(ast::Ident, uint), /* raw str delimited by n hash symbols */
+ LIT_BINARY(Rc<Vec<u8>>),
/* Name components */
// an identifier contains an "is_mod_name" boolean,
body
}
LIT_STR(s) => {
- (format!("\"{}\"", get_ident(s).get().escape_default())).to_string()
+ format!("\"{}\"", get_ident(s).get().escape_default())
}
LIT_STR_RAW(s, n) => {
- (format!("r{delim}\"{string}\"{delim}",
- delim="#".repeat(n), string=get_ident(s))).to_string()
+ format!("r{delim}\"{string}\"{delim}",
+ delim="#".repeat(n), string=get_ident(s))
+ }
+ LIT_BINARY(ref v) => {
+ format!(
+ "b\"{}\"",
+ v.iter().map(|&b| b as char).collect::<String>().escape_default())
}
/* Name components */
IDENT(s, _) => get_ident(s).get().to_string(),
LIFETIME(s) => {
- (format!("{}", get_ident(s))).to_string()
+ format!("{}", get_ident(s))
}
UNDERSCORE => "_".to_string(),
LIT_FLOAT_UNSUFFIXED(_) => true,
LIT_STR(_) => true,
LIT_STR_RAW(_, _) => true,
+ LIT_BINARY(_) => true,
POUND => true,
AT => true,
NOT => true,
LIT_FLOAT_UNSUFFIXED(_) => true,
LIT_STR(_) => true,
LIT_STR_RAW(_, _) => true,
+ LIT_BINARY(_) => true,
_ => false
}
}
ast::LitBool(val) => {
if val { word(&mut self.s, "true") } else { word(&mut self.s, "false") }
}
- ast::LitBinary(ref arr) => {
- try!(self.ibox(indent_unit));
- try!(word(&mut self.s, "["));
- try!(self.commasep_cmnt(Inconsistent,
- arr.as_slice(),
- |s, u| {
- word(&mut s.s,
- format!("{}",
- *u).as_slice())
- },
- |_| lit.span));
- try!(word(&mut self.s, "]"));
- self.end()
+ ast::LitBinary(ref v) => {
+ let escaped: String = v.iter().map(|&b| b as char).collect();
+ word(&mut self.s, format!("b\"{}\"", escaped.escape_default()).as_slice())
}
}
}
--- /dev/null
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// ignore-tidy-tab
+
+static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
+
+pub fn main() {
+ b"\f"; //~ ERROR unknown byte escape
+ b"\x0Z"; //~ ERROR illegal character in numeric character escape: Z
+ b"é"; //~ ERROR byte constant must be ASCII
+ b"a //~ ERROR unterminated double quote byte string
+}
+
+
fn main() {
concat!(b'f'); //~ ERROR: cannot concatenate a binary literal
+ concat!(b"foo"); //~ ERROR: cannot concatenate a binary literal
concat!(foo); //~ ERROR: expected a literal
concat!(foo()); //~ ERROR: expected a literal
}
static FOO: u8 = b'\xF0';
+static BAR: &'static [u8] = b"a\xF0\t";
pub fn main() {
assert_eq!(b'a', 97u8);
b'a' .. b'z' => {},
_ => fail!()
}
+
+ assert_eq!(b"a\n\r\t\\\'\"\0\xF0",
+ &[97u8, 10u8, 13u8, 9u8, 92u8, 39u8, 34u8, 0u8, 240u8]);
+ assert_eq!(b"a\
+ b", &[97u8, 98u8]);
+ assert_eq!(BAR, &[97u8, 240u8, 9u8]);
+
+ match &[97u8, 10u8] {
+ b"a\n" => {},
+ _ => fail!(),
+ }
}