1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on owned strings
21 #[unstable(feature = "owned_ascii_ext",
22 reason = "would prefer to do this in a more general way")]
23 #[deprecated(since = "1.3.0",
24 reason = "hasn't yet proved essential to be in the standard library")]
26 pub trait OwnedAsciiExt {
27 /// Converts the string to ASCII upper case:
28 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
29 /// but non-ASCII letters are unchanged.
30 fn into_ascii_uppercase(self) -> Self;
32 /// Converts the string to ASCII lower case:
33 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
34 /// but non-ASCII letters are unchanged.
35 fn into_ascii_lowercase(self) -> Self;
38 /// Extension methods for ASCII-subset only operations on string slices.
39 #[stable(feature = "rust1", since = "1.0.0")]
41 /// Container type for copied ASCII characters.
42 #[stable(feature = "rust1", since = "1.0.0")]
45 /// Checks if within the ASCII range.
50 /// use std::ascii::AsciiExt;
55 /// assert_eq!(true, ascii.is_ascii());
56 /// assert_eq!(false, utf8.is_ascii())
58 #[stable(feature = "rust1", since = "1.0.0")]
59 fn is_ascii(&self) -> bool;
61 /// Makes a copy of the string in ASCII upper case.
63 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
64 /// but non-ASCII letters are unchanged.
69 /// use std::ascii::AsciiExt;
74 /// assert_eq!('A', ascii.to_ascii_uppercase());
75 /// assert_eq!('❤', utf8.to_ascii_uppercase());
77 #[stable(feature = "rust1", since = "1.0.0")]
78 fn to_ascii_uppercase(&self) -> Self::Owned;
80 /// Makes a copy of the string in ASCII lower case.
82 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
83 /// but non-ASCII letters are unchanged.
88 /// use std::ascii::AsciiExt;
93 /// assert_eq!('a', ascii.to_ascii_lowercase());
94 /// assert_eq!('❤', utf8.to_ascii_lowercase());
96 #[stable(feature = "rust1", since = "1.0.0")]
97 fn to_ascii_lowercase(&self) -> Self::Owned;
99 /// Checks that two strings are an ASCII case-insensitive match.
101 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
102 /// but without allocating and copying temporary strings.
107 /// use std::ascii::AsciiExt;
109 /// let ascii1 = 'A';
110 /// let ascii2 = 'a';
111 /// let ascii3 = 'A';
112 /// let ascii4 = 'z';
114 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii2));
115 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii3));
116 /// assert_eq!(false, ascii1.eq_ignore_ascii_case(&ascii4));
118 #[stable(feature = "rust1", since = "1.0.0")]
119 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
121 /// Converts this type to its ASCII upper case equivalent in-place.
123 /// See `to_ascii_uppercase` for more information.
128 /// #![feature(ascii)]
130 /// use std::ascii::AsciiExt;
132 /// let mut ascii = 'a';
134 /// ascii.make_ascii_uppercase();
136 /// assert_eq!('A', ascii);
138 #[unstable(feature = "ascii")]
139 fn make_ascii_uppercase(&mut self);
141 /// Converts this type to its ASCII lower case equivalent in-place.
143 /// See `to_ascii_lowercase` for more information.
148 /// #![feature(ascii)]
150 /// use std::ascii::AsciiExt;
152 /// let mut ascii = 'A';
154 /// ascii.make_ascii_lowercase();
156 /// assert_eq!('a', ascii);
158 #[unstable(feature = "ascii")]
159 fn make_ascii_lowercase(&mut self);
162 #[stable(feature = "rust1", since = "1.0.0")]
163 impl AsciiExt for str {
167 fn is_ascii(&self) -> bool {
168 self.bytes().all(|b| b.is_ascii())
173 fn to_ascii_uppercase(&self) -> String {
174 self.to_string().into_ascii_uppercase()
179 fn to_ascii_lowercase(&self) -> String {
180 self.to_string().into_ascii_lowercase()
184 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
185 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
188 fn make_ascii_uppercase(&mut self) {
189 let me: &mut [u8] = unsafe { mem::transmute(self) };
190 me.make_ascii_uppercase()
193 fn make_ascii_lowercase(&mut self) {
194 let me: &mut [u8] = unsafe { mem::transmute(self) };
195 me.make_ascii_lowercase()
200 impl OwnedAsciiExt for String {
202 fn into_ascii_uppercase(self) -> String {
203 // Vec<u8>::into_ascii_uppercase() preserves the UTF-8 invariant.
204 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_uppercase()) }
208 fn into_ascii_lowercase(self) -> String {
209 // Vec<u8>::into_ascii_lowercase() preserves the UTF-8 invariant.
210 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_lowercase()) }
214 #[stable(feature = "rust1", since = "1.0.0")]
215 impl AsciiExt for [u8] {
216 type Owned = Vec<u8>;
218 fn is_ascii(&self) -> bool {
219 self.iter().all(|b| b.is_ascii())
224 fn to_ascii_uppercase(&self) -> Vec<u8> {
225 self.to_vec().into_ascii_uppercase()
230 fn to_ascii_lowercase(&self) -> Vec<u8> {
231 self.to_vec().into_ascii_lowercase()
235 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
236 self.len() == other.len() &&
237 self.iter().zip(other).all(|(a, b)| {
238 a.eq_ignore_ascii_case(b)
242 fn make_ascii_uppercase(&mut self) {
244 byte.make_ascii_uppercase();
248 fn make_ascii_lowercase(&mut self) {
250 byte.make_ascii_lowercase();
256 impl OwnedAsciiExt for Vec<u8> {
258 fn into_ascii_uppercase(mut self) -> Vec<u8> {
259 self.make_ascii_uppercase();
264 fn into_ascii_lowercase(mut self) -> Vec<u8> {
265 self.make_ascii_lowercase();
270 #[stable(feature = "rust1", since = "1.0.0")]
271 impl AsciiExt for u8 {
274 fn is_ascii(&self) -> bool { *self & 128 == 0 }
276 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
278 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
280 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
281 self.to_ascii_lowercase() == other.to_ascii_lowercase()
284 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
286 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
289 #[stable(feature = "rust1", since = "1.0.0")]
290 impl AsciiExt for char {
293 fn is_ascii(&self) -> bool {
298 fn to_ascii_uppercase(&self) -> char {
300 (*self as u8).to_ascii_uppercase() as char
307 fn to_ascii_lowercase(&self) -> char {
309 (*self as u8).to_ascii_lowercase() as char
316 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
317 self.to_ascii_lowercase() == other.to_ascii_lowercase()
321 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
323 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
326 /// An iterator over the escaped version of a byte, constructed via
327 /// `std::ascii::escape_default`.
328 #[stable(feature = "rust1", since = "1.0.0")]
329 pub struct EscapeDefault {
334 /// Returns an iterator that produces an escaped version of a `u8`.
336 /// The default is chosen with a bias toward producing literals that are
337 /// legal in a variety of languages, including C++11 and similar C-family
338 /// languages. The exact rules are:
340 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
341 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
342 /// - Any other chars in the range [0x20,0x7e] are not escaped.
343 /// - Any other chars are given hex escapes of the form '\xNN'.
344 /// - Unicode escapes are never generated by this function.
351 /// let escaped = ascii::escape_default(b'0').next().unwrap();
352 /// assert_eq!(b'0', escaped);
354 /// let mut escaped = ascii::escape_default(b'\t');
356 /// assert_eq!(b'\\', escaped.next().unwrap());
357 /// assert_eq!(b't', escaped.next().unwrap());
359 #[stable(feature = "rust1", since = "1.0.0")]
360 pub fn escape_default(c: u8) -> EscapeDefault {
361 let (data, len) = match c {
362 b'\t' => ([b'\\', b't', 0, 0], 2),
363 b'\r' => ([b'\\', b'r', 0, 0], 2),
364 b'\n' => ([b'\\', b'n', 0, 0], 2),
365 b'\\' => ([b'\\', b'\\', 0, 0], 2),
366 b'\'' => ([b'\\', b'\'', 0, 0], 2),
367 b'"' => ([b'\\', b'"', 0, 0], 2),
368 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
369 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
372 return EscapeDefault { range: (0.. len), data: data };
374 fn hexify(b: u8) -> u8 {
382 #[stable(feature = "rust1", since = "1.0.0")]
383 impl Iterator for EscapeDefault {
385 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
386 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
388 #[stable(feature = "rust1", since = "1.0.0")]
389 impl DoubleEndedIterator for EscapeDefault {
390 fn next_back(&mut self) -> Option<u8> {
391 self.range.next_back().map(|i| self.data[i])
394 #[stable(feature = "rust1", since = "1.0.0")]
395 impl ExactSizeIterator for EscapeDefault {}
397 static ASCII_LOWERCASE_MAP: [u8; 256] = [
398 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
399 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
400 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
401 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
402 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
403 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
404 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
405 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
408 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
409 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
410 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
413 b'[', b'\\', b']', b'^', b'_',
414 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
415 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
416 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
417 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
418 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
419 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
420 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
421 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
422 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
423 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
424 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
425 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
426 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
427 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
428 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
429 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
430 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
431 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
432 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
433 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
436 static ASCII_UPPERCASE_MAP: [u8; 256] = [
437 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
438 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
439 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
440 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
441 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
442 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
443 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
444 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
445 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
446 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
447 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
448 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
451 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
452 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
453 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
456 b'{', b'|', b'}', b'~', 0x7f,
457 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
458 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
459 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
460 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
461 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
462 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
463 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
464 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
465 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
466 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
467 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
468 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
469 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
470 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
471 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
472 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
484 assert!(b"".is_ascii());
485 assert!(b"banana\0\x7F".is_ascii());
486 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
487 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
488 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
489 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
491 assert!("".is_ascii());
492 assert!("banana\0\u{7F}".is_ascii());
493 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
494 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
495 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
499 fn test_to_ascii_uppercase() {
500 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
501 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
504 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
506 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
507 (from_u32(upper).unwrap()).to_string());
512 fn test_to_ascii_lowercase() {
513 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
514 // Dotted capital I, Kelvin sign, Sharp S.
515 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
518 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
520 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
521 (from_u32(lower).unwrap()).to_string());
526 fn test_into_ascii_uppercase() {
527 assert_eq!(("url()URL()uRl()ürl".to_string()).into_ascii_uppercase(),
528 "URL()URL()URL()üRL".to_string());
529 assert_eq!(("hıKß".to_string()).into_ascii_uppercase(), "HıKß");
532 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
534 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_uppercase(),
535 (from_u32(upper).unwrap()).to_string());
540 fn test_into_ascii_lowercase() {
541 assert_eq!(("url()URL()uRl()Ürl".to_string()).into_ascii_lowercase(),
542 "url()url()url()Ürl");
543 // Dotted capital I, Kelvin sign, Sharp S.
544 assert_eq!(("HİKß".to_string()).into_ascii_lowercase(), "hİKß");
547 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
549 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_lowercase(),
550 (from_u32(lower).unwrap()).to_string());
555 fn test_make_ascii_lower_case() {
557 ($from: expr, $to: expr) => {
560 x.make_ascii_lowercase();
572 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
573 test!("HİKß".to_string(), "hİKß");
578 fn test_make_ascii_upper_case() {
580 ($from: expr, $to: expr) => {
583 x.make_ascii_uppercase();
595 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
596 test!("hıKß".to_string(), "HıKß");
598 let mut x = "Hello".to_string();
599 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
600 assert_eq!(x, "HELlo")
604 fn test_eq_ignore_ascii_case() {
605 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
606 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
607 // Dotted capital I, Kelvin sign, Sharp S.
608 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
609 assert!(!"İ".eq_ignore_ascii_case("i"));
610 assert!(!"K".eq_ignore_ascii_case("k"));
611 assert!(!"ß".eq_ignore_ascii_case("s"));
614 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
616 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
617 &from_u32(lower).unwrap().to_string()));