1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters.
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on string slices.
22 /// Be aware that operations on seemingly non-ASCII characters can sometimes
23 /// have unexpected results. Consider this example:
26 /// use std::ascii::AsciiExt;
28 /// assert_eq!("café".to_ascii_uppercase(), "CAFÉ");
29 /// assert_eq!("café".to_ascii_uppercase(), "CAFé");
32 /// In the first example, the lowercased string is represented `"cafe\u{301}"`
33 /// (the last character is an acute accent [combining character]). Unlike the
34 /// other characters in the string, the combining character will not get mapped
35 /// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second
36 /// example, the lowercased string is represented `"caf\u{e9}"` (the last
37 /// character is a single Unicode character representing an 'e' with an acute
38 /// accent). Since the last character is defined outside the scope of ASCII,
39 /// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`.
41 /// [combining character]: https://en.wikipedia.org/wiki/Combining_character
42 #[stable(feature = "rust1", since = "1.0.0")]
44 /// Container type for copied ASCII characters.
45 #[stable(feature = "rust1", since = "1.0.0")]
48 /// Checks if the value is within the ASCII range.
53 /// use std::ascii::AsciiExt;
58 /// assert!(ascii.is_ascii());
59 /// assert!(!utf8.is_ascii());
61 #[stable(feature = "rust1", since = "1.0.0")]
62 fn is_ascii(&self) -> bool;
64 /// Makes a copy of the string in ASCII upper case.
66 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
67 /// but non-ASCII letters are unchanged.
72 /// use std::ascii::AsciiExt;
77 /// assert_eq!('A', ascii.to_ascii_uppercase());
78 /// assert_eq!('❤', utf8.to_ascii_uppercase());
80 #[stable(feature = "rust1", since = "1.0.0")]
81 fn to_ascii_uppercase(&self) -> Self::Owned;
83 /// Makes a copy of the string in ASCII lower case.
85 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
86 /// but non-ASCII letters are unchanged.
91 /// use std::ascii::AsciiExt;
96 /// assert_eq!('a', ascii.to_ascii_lowercase());
97 /// assert_eq!('❤', utf8.to_ascii_lowercase());
99 #[stable(feature = "rust1", since = "1.0.0")]
100 fn to_ascii_lowercase(&self) -> Self::Owned;
102 /// Checks that two strings are an ASCII case-insensitive match.
104 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
105 /// but without allocating and copying temporary strings.
110 /// use std::ascii::AsciiExt;
112 /// let ascii1 = 'A';
113 /// let ascii2 = 'a';
114 /// let ascii3 = 'A';
115 /// let ascii4 = 'z';
117 /// assert!(ascii1.eq_ignore_ascii_case(&ascii2));
118 /// assert!(ascii1.eq_ignore_ascii_case(&ascii3));
119 /// assert!(!ascii1.eq_ignore_ascii_case(&ascii4));
121 #[stable(feature = "rust1", since = "1.0.0")]
122 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
124 /// Converts this type to its ASCII upper case equivalent in-place.
126 /// See `to_ascii_uppercase` for more information.
131 /// #![feature(ascii)]
133 /// use std::ascii::AsciiExt;
135 /// let mut ascii = 'a';
137 /// ascii.make_ascii_uppercase();
139 /// assert_eq!('A', ascii);
141 #[unstable(feature = "ascii", issue = "27809")]
142 fn make_ascii_uppercase(&mut self);
144 /// Converts this type to its ASCII lower case equivalent in-place.
146 /// See `to_ascii_lowercase` for more information.
151 /// #![feature(ascii)]
153 /// use std::ascii::AsciiExt;
155 /// let mut ascii = 'A';
157 /// ascii.make_ascii_lowercase();
159 /// assert_eq!('a', ascii);
161 #[unstable(feature = "ascii", issue = "27809")]
162 fn make_ascii_lowercase(&mut self);
164 /// Converts this type to its ASCII upper case,
165 /// consuming the value to avoid allocating memory where `to_ascii_uppercase` would.
167 /// See `to_ascii_uppercase` for more information.
172 /// use std::ascii::AsciiExt;
174 /// let ascii: String = "a".to_owned();
176 /// let upper = ascii.into_ascii_uppercase();
178 /// assert_eq!(upper, "A");
180 #[stable(feature = "into_ascii", since = "1.8.0")]
181 fn into_ascii_uppercase(self) -> Self::Owned where Self: Sized {
182 self.to_ascii_uppercase()
185 /// Converts this type to its ASCII lower case,
186 /// consuming the value to avoid allocating memory where `to_ascii_lowercase` would.
188 /// See `to_ascii_lowercase` for more information.
193 /// use std::ascii::AsciiExt;
195 /// let ascii: String = "A".to_owned();
197 /// let lower = ascii.into_ascii_lowercase();
199 /// assert_eq!(lower, "a");
201 #[stable(feature = "into_ascii", since = "1.8.0")]
202 fn into_ascii_lowercase(self) -> Self::Owned where Self: Sized {
203 self.to_ascii_lowercase()
207 /// Implement `into_ascii_lowercase` and `into_ascii_uppercase` without memory allocation,
208 /// defer other methods to `str`.
209 #[stable(feature = "into_ascii", since = "1.8.0")]
210 impl AsciiExt for String {
213 #[inline] fn is_ascii(&self) -> bool { (**self).is_ascii() }
214 #[inline] fn to_ascii_uppercase(&self) -> Self { (**self).to_ascii_uppercase() }
215 #[inline] fn to_ascii_lowercase(&self) -> Self { (**self).to_ascii_lowercase() }
216 #[inline] fn eq_ignore_ascii_case(&self, o: &Self) -> bool { (**self).eq_ignore_ascii_case(o) }
217 #[inline] fn make_ascii_uppercase(&mut self) { (**self).make_ascii_uppercase() }
218 #[inline] fn make_ascii_lowercase(&mut self) { (**self).make_ascii_lowercase() }
220 fn into_ascii_lowercase(mut self) -> Self {
222 for byte in self.as_mut_vec() {
223 *byte = byte.to_ascii_lowercase()
229 fn into_ascii_uppercase(mut self) -> Self {
231 for byte in self.as_mut_vec() {
232 *byte = byte.to_ascii_uppercase()
239 /// Implement `into_ascii_lowercase` and `into_ascii_uppercase` without memory allocation,
240 /// defer other methods to `[u8]`.
241 #[stable(feature = "into_ascii", since = "1.8.0")]
242 impl AsciiExt for Vec<u8> {
245 #[inline] fn is_ascii(&self) -> bool { (**self).is_ascii() }
246 #[inline] fn to_ascii_uppercase(&self) -> Self { (**self).to_ascii_uppercase() }
247 #[inline] fn to_ascii_lowercase(&self) -> Self { (**self).to_ascii_lowercase() }
248 #[inline] fn eq_ignore_ascii_case(&self, o: &Self) -> bool { (**self).eq_ignore_ascii_case(o) }
249 #[inline] fn make_ascii_uppercase(&mut self) { (**self).make_ascii_uppercase() }
250 #[inline] fn make_ascii_lowercase(&mut self) { (**self).make_ascii_lowercase() }
252 fn into_ascii_lowercase(mut self) -> Self {
253 for byte in &mut self {
254 *byte = byte.to_ascii_lowercase()
259 fn into_ascii_uppercase(mut self) -> Self {
260 for byte in &mut self {
261 *byte = byte.to_ascii_uppercase()
267 #[stable(feature = "rust1", since = "1.0.0")]
268 impl AsciiExt for str {
272 fn is_ascii(&self) -> bool {
273 self.bytes().all(|b| b.is_ascii())
277 fn to_ascii_uppercase(&self) -> String {
278 let mut bytes = self.as_bytes().to_vec();
279 bytes.make_ascii_uppercase();
280 // make_ascii_uppercase() preserves the UTF-8 invariant.
281 unsafe { String::from_utf8_unchecked(bytes) }
285 fn to_ascii_lowercase(&self) -> String {
286 let mut bytes = self.as_bytes().to_vec();
287 bytes.make_ascii_lowercase();
288 // make_ascii_uppercase() preserves the UTF-8 invariant.
289 unsafe { String::from_utf8_unchecked(bytes) }
293 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
294 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
297 fn make_ascii_uppercase(&mut self) {
298 let me: &mut [u8] = unsafe { mem::transmute(self) };
299 me.make_ascii_uppercase()
302 fn make_ascii_lowercase(&mut self) {
303 let me: &mut [u8] = unsafe { mem::transmute(self) };
304 me.make_ascii_lowercase()
308 #[stable(feature = "rust1", since = "1.0.0")]
309 impl AsciiExt for [u8] {
310 type Owned = Vec<u8>;
312 fn is_ascii(&self) -> bool {
313 self.iter().all(|b| b.is_ascii())
317 fn to_ascii_uppercase(&self) -> Vec<u8> {
318 let mut me = self.to_vec();
319 me.make_ascii_uppercase();
324 fn to_ascii_lowercase(&self) -> Vec<u8> {
325 let mut me = self.to_vec();
326 me.make_ascii_lowercase();
331 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
332 self.len() == other.len() &&
333 self.iter().zip(other).all(|(a, b)| {
334 a.eq_ignore_ascii_case(b)
338 fn make_ascii_uppercase(&mut self) {
340 byte.make_ascii_uppercase();
344 fn make_ascii_lowercase(&mut self) {
346 byte.make_ascii_lowercase();
351 #[stable(feature = "rust1", since = "1.0.0")]
352 impl AsciiExt for u8 {
355 fn is_ascii(&self) -> bool { *self & 128 == 0 }
357 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
359 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
361 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
362 self.to_ascii_lowercase() == other.to_ascii_lowercase()
365 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
367 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
370 #[stable(feature = "rust1", since = "1.0.0")]
371 impl AsciiExt for char {
374 fn is_ascii(&self) -> bool {
379 fn to_ascii_uppercase(&self) -> char {
381 (*self as u8).to_ascii_uppercase() as char
388 fn to_ascii_lowercase(&self) -> char {
390 (*self as u8).to_ascii_lowercase() as char
397 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
398 self.to_ascii_lowercase() == other.to_ascii_lowercase()
402 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
404 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
407 /// An iterator over the escaped version of a byte, constructed via
408 /// `std::ascii::escape_default`.
409 #[stable(feature = "rust1", since = "1.0.0")]
410 pub struct EscapeDefault {
415 /// Returns an iterator that produces an escaped version of a `u8`.
417 /// The default is chosen with a bias toward producing literals that are
418 /// legal in a variety of languages, including C++11 and similar C-family
419 /// languages. The exact rules are:
421 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
422 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
423 /// - Any other chars in the range [0x20,0x7e] are not escaped.
424 /// - Any other chars are given hex escapes of the form '\xNN'.
425 /// - Unicode escapes are never generated by this function.
432 /// let escaped = ascii::escape_default(b'0').next().unwrap();
433 /// assert_eq!(b'0', escaped);
435 /// let mut escaped = ascii::escape_default(b'\t');
437 /// assert_eq!(b'\\', escaped.next().unwrap());
438 /// assert_eq!(b't', escaped.next().unwrap());
440 #[stable(feature = "rust1", since = "1.0.0")]
441 pub fn escape_default(c: u8) -> EscapeDefault {
442 let (data, len) = match c {
443 b'\t' => ([b'\\', b't', 0, 0], 2),
444 b'\r' => ([b'\\', b'r', 0, 0], 2),
445 b'\n' => ([b'\\', b'n', 0, 0], 2),
446 b'\\' => ([b'\\', b'\\', 0, 0], 2),
447 b'\'' => ([b'\\', b'\'', 0, 0], 2),
448 b'"' => ([b'\\', b'"', 0, 0], 2),
449 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
450 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
453 return EscapeDefault { range: (0.. len), data: data };
455 fn hexify(b: u8) -> u8 {
463 #[stable(feature = "rust1", since = "1.0.0")]
464 impl Iterator for EscapeDefault {
466 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
467 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
469 #[stable(feature = "rust1", since = "1.0.0")]
470 impl DoubleEndedIterator for EscapeDefault {
471 fn next_back(&mut self) -> Option<u8> {
472 self.range.next_back().map(|i| self.data[i])
475 #[stable(feature = "rust1", since = "1.0.0")]
476 impl ExactSizeIterator for EscapeDefault {}
478 static ASCII_LOWERCASE_MAP: [u8; 256] = [
479 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
480 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
481 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
482 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
483 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
484 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
485 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
486 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
489 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
490 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
491 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
494 b'[', b'\\', b']', b'^', b'_',
495 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
496 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
497 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
498 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
499 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
500 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
501 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
502 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
503 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
504 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
505 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
506 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
507 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
508 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
509 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
510 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
511 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
512 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
513 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
514 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
517 static ASCII_UPPERCASE_MAP: [u8; 256] = [
518 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
519 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
520 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
521 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
522 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
523 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
524 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
525 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
526 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
527 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
528 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
529 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
532 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
533 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
534 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
537 b'{', b'|', b'}', b'~', 0x7f,
538 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
539 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
540 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
541 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
542 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
543 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
544 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
545 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
546 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
547 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
548 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
549 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
550 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
551 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
552 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
553 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
565 assert!(b"".is_ascii());
566 assert!(b"banana\0\x7F".is_ascii());
567 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
568 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
569 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
570 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
572 assert!("".is_ascii());
573 assert!("banana\0\u{7F}".is_ascii());
574 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
575 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
576 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
580 fn test_to_ascii_uppercase() {
581 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
582 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
585 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
587 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
588 (from_u32(upper).unwrap()).to_string());
593 fn test_to_ascii_lowercase() {
594 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
595 // Dotted capital I, Kelvin sign, Sharp S.
596 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
599 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
601 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
602 (from_u32(lower).unwrap()).to_string());
607 fn test_make_ascii_lower_case() {
609 ($from: expr, $to: expr) => {
612 x.make_ascii_lowercase();
624 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
625 test!("HİKß".to_string(), "hİKß");
630 fn test_make_ascii_upper_case() {
632 ($from: expr, $to: expr) => {
635 x.make_ascii_uppercase();
647 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
648 test!("hıKß".to_string(), "HıKß");
650 let mut x = "Hello".to_string();
651 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
652 assert_eq!(x, "HELlo")
656 fn test_eq_ignore_ascii_case() {
657 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
658 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
659 // Dotted capital I, Kelvin sign, Sharp S.
660 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
661 assert!(!"İ".eq_ignore_ascii_case("i"));
662 assert!(!"K".eq_ignore_ascii_case("k"));
663 assert!(!"ß".eq_ignore_ascii_case("s"));
666 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
668 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
669 &from_u32(lower).unwrap().to_string()));