1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Operations on ASCII strings and characters
15 #![stable(feature = "rust1", since = "1.0.0")]
22 /// Extension methods for ASCII-subset only operations on owned strings
23 #[unstable(feature = "std_misc",
24 reason = "would prefer to do this in a more general way")]
25 pub trait OwnedAsciiExt {
26 /// Convert the string to ASCII upper case:
27 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
28 /// but non-ASCII letters are unchanged.
29 fn into_ascii_uppercase(self) -> Self;
31 /// Convert the string to ASCII lower case:
32 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
33 /// but non-ASCII letters are unchanged.
34 fn into_ascii_lowercase(self) -> Self;
37 /// Extension methods for ASCII-subset only operations on string slices
38 #[stable(feature = "rust1", since = "1.0.0")]
40 /// Container type for copied ASCII characters.
41 #[stable(feature = "rust1", since = "1.0.0")]
44 /// Check if within the ASCII range.
45 #[stable(feature = "rust1", since = "1.0.0")]
46 fn is_ascii(&self) -> bool;
48 /// Makes a copy of the string in ASCII upper case:
49 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
50 /// but non-ASCII letters are unchanged.
51 #[stable(feature = "rust1", since = "1.0.0")]
52 fn to_ascii_uppercase(&self) -> Self::Owned;
54 /// Makes a copy of the string in ASCII lower case:
55 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
56 /// but non-ASCII letters are unchanged.
57 #[stable(feature = "rust1", since = "1.0.0")]
58 fn to_ascii_lowercase(&self) -> Self::Owned;
60 /// Check that two strings are an ASCII case-insensitive match.
61 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
62 /// but without allocating and copying temporary strings.
63 #[stable(feature = "rust1", since = "1.0.0")]
64 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
66 /// Convert this type to its ASCII upper case equivalent in-place.
68 /// See `to_ascii_uppercase` for more information.
69 #[unstable(feature = "ascii")]
70 fn make_ascii_uppercase(&mut self);
72 /// Convert this type to its ASCII lower case equivalent in-place.
74 /// See `to_ascii_lowercase` for more information.
75 #[unstable(feature = "ascii")]
76 fn make_ascii_lowercase(&mut self);
79 #[stable(feature = "rust1", since = "1.0.0")]
80 impl AsciiExt for str {
84 fn is_ascii(&self) -> bool {
85 self.bytes().all(|b| b.is_ascii())
89 fn to_ascii_uppercase(&self) -> String {
90 self.to_string().into_ascii_uppercase()
94 fn to_ascii_lowercase(&self) -> String {
95 self.to_string().into_ascii_lowercase()
99 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
100 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
103 fn make_ascii_uppercase(&mut self) {
104 let me: &mut [u8] = unsafe { mem::transmute(self) };
105 me.make_ascii_uppercase()
108 fn make_ascii_lowercase(&mut self) {
109 let me: &mut [u8] = unsafe { mem::transmute(self) };
110 me.make_ascii_lowercase()
114 #[unstable(feature = "std_misc",
115 reason = "would prefer to do this in a more general way")]
116 impl OwnedAsciiExt for String {
118 fn into_ascii_uppercase(self) -> String {
119 // Vec<u8>::into_ascii_uppercase() preserves the UTF-8 invariant.
120 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_uppercase()) }
124 fn into_ascii_lowercase(self) -> String {
125 // Vec<u8>::into_ascii_lowercase() preserves the UTF-8 invariant.
126 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_lowercase()) }
130 #[stable(feature = "rust1", since = "1.0.0")]
131 impl AsciiExt for [u8] {
132 type Owned = Vec<u8>;
134 fn is_ascii(&self) -> bool {
135 self.iter().all(|b| b.is_ascii())
139 fn to_ascii_uppercase(&self) -> Vec<u8> {
140 self.to_vec().into_ascii_uppercase()
144 fn to_ascii_lowercase(&self) -> Vec<u8> {
145 self.to_vec().into_ascii_lowercase()
149 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
150 self.len() == other.len() &&
151 self.iter().zip(other.iter()).all(|(a, b)| {
152 a.eq_ignore_ascii_case(b)
156 fn make_ascii_uppercase(&mut self) {
158 byte.make_ascii_uppercase();
162 fn make_ascii_lowercase(&mut self) {
164 byte.make_ascii_lowercase();
169 #[unstable(feature = "std_misc",
170 reason = "would prefer to do this in a more general way")]
171 impl OwnedAsciiExt for Vec<u8> {
173 fn into_ascii_uppercase(mut self) -> Vec<u8> {
174 self.make_ascii_uppercase();
179 fn into_ascii_lowercase(mut self) -> Vec<u8> {
180 self.make_ascii_lowercase();
185 #[stable(feature = "rust1", since = "1.0.0")]
186 impl AsciiExt for u8 {
189 fn is_ascii(&self) -> bool { *self & 128 == 0 }
191 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
193 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
195 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
196 self.to_ascii_lowercase() == other.to_ascii_lowercase()
199 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
201 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
204 #[stable(feature = "rust1", since = "1.0.0")]
205 impl AsciiExt for char {
208 fn is_ascii(&self) -> bool {
213 fn to_ascii_uppercase(&self) -> char {
215 (*self as u8).to_ascii_uppercase() as char
222 fn to_ascii_lowercase(&self) -> char {
224 (*self as u8).to_ascii_lowercase() as char
231 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
232 self.to_ascii_lowercase() == other.to_ascii_lowercase()
236 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
238 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
241 /// An iterator over the escaped version of a byte, constructed via
242 /// `std::ascii::escape_default`.
243 #[stable(feature = "rust1", since = "1.0.0")]
244 pub struct EscapeDefault {
249 /// Returns a 'default' ASCII and C++11-like literal escape of a `u8`
251 /// The default is chosen with a bias toward producing literals that are
252 /// legal in a variety of languages, including C++11 and similar C-family
253 /// languages. The exact rules are:
255 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
256 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
257 /// - Any other chars in the range [0x20,0x7e] are not escaped.
258 /// - Any other chars are given hex escapes of the form '\xNN'.
259 /// - Unicode escapes are never generated by this function.
260 #[stable(feature = "rust1", since = "1.0.0")]
261 pub fn escape_default(c: u8) -> EscapeDefault {
262 let (data, len) = match c {
263 b'\t' => ([b'\\', b't', 0, 0], 2),
264 b'\r' => ([b'\\', b'r', 0, 0], 2),
265 b'\n' => ([b'\\', b'n', 0, 0], 2),
266 b'\\' => ([b'\\', b'\\', 0, 0], 2),
267 b'\'' => ([b'\\', b'\'', 0, 0], 2),
268 b'"' => ([b'\\', b'"', 0, 0], 2),
269 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
270 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
273 return EscapeDefault { range: (0.. len), data: data };
275 fn hexify(b: u8) -> u8 {
283 #[stable(feature = "rust1", since = "1.0.0")]
284 impl Iterator for EscapeDefault {
286 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
287 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
289 #[stable(feature = "rust1", since = "1.0.0")]
290 impl DoubleEndedIterator for EscapeDefault {
291 fn next_back(&mut self) -> Option<u8> {
292 self.range.next_back().map(|i| self.data[i])
295 #[stable(feature = "rust1", since = "1.0.0")]
296 impl ExactSizeIterator for EscapeDefault {}
298 static ASCII_LOWERCASE_MAP: [u8; 256] = [
299 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
300 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
301 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
302 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
303 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
304 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
305 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
306 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
309 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
310 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
311 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
314 b'[', b'\\', b']', b'^', b'_',
315 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
316 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
317 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
318 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
319 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
320 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
321 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
322 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
323 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
324 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
325 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
326 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
327 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
328 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
329 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
330 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
331 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
332 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
333 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
334 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
337 static ASCII_UPPERCASE_MAP: [u8; 256] = [
338 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
339 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
340 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
341 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
342 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
343 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
344 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
345 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
346 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
347 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
348 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
349 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
352 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
353 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
354 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
357 b'{', b'|', b'}', b'~', 0x7f,
358 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
359 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
360 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
361 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
362 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
363 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
364 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
365 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
366 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
367 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
368 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
369 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
370 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
371 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
372 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
373 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
385 assert!("banana".chars().all(|c| c.is_ascii()));
386 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
390 fn test_ascii_vec() {
391 assert!("".is_ascii());
392 assert!("a".is_ascii());
393 assert!(!"\u{2009}".is_ascii());
397 fn test_to_ascii_uppercase() {
398 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
399 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
402 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
404 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
405 (from_u32(upper).unwrap()).to_string());
410 fn test_to_ascii_lowercase() {
411 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
412 // Dotted capital I, Kelvin sign, Sharp S.
413 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
416 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
418 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
419 (from_u32(lower).unwrap()).to_string());
424 fn test_into_ascii_uppercase() {
425 assert_eq!(("url()URL()uRl()ürl".to_string()).into_ascii_uppercase(),
426 "URL()URL()URL()üRL".to_string());
427 assert_eq!(("hıKß".to_string()).into_ascii_uppercase(), "HıKß");
430 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
432 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_uppercase(),
433 (from_u32(upper).unwrap()).to_string());
438 fn test_into_ascii_lowercase() {
439 assert_eq!(("url()URL()uRl()Ürl".to_string()).into_ascii_lowercase(),
440 "url()url()url()Ürl");
441 // Dotted capital I, Kelvin sign, Sharp S.
442 assert_eq!(("HİKß".to_string()).into_ascii_lowercase(), "hİKß");
445 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
447 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_lowercase(),
448 (from_u32(lower).unwrap()).to_string());
453 fn test_eq_ignore_ascii_case() {
454 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
455 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
456 // Dotted capital I, Kelvin sign, Sharp S.
457 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
458 assert!(!"İ".eq_ignore_ascii_case("i"));
459 assert!(!"K".eq_ignore_ascii_case("k"));
460 assert!(!"ß".eq_ignore_ascii_case("s"));
463 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
465 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
466 &from_u32(lower).unwrap().to_string()));