1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on string slices.
22 /// Be aware that operations on seemingly non-ASCII characters can sometimes
23 /// have unexpected results. Consider this example:
26 /// use std::ascii::AsciiExt;
28 /// assert_eq!("café".to_ascii_uppercase(), "CAFÉ");
29 /// assert_eq!("café".to_ascii_uppercase(), "CAFé");
32 /// In the first example, the lowercased string is represented `"cafe\u{301}"`
33 /// (the last character is an acute accent [combining character]). Unlike the
34 /// other characters in the string, the combining character will not get mapped
35 /// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second
36 /// example, the lowercased string is represented `"caf\u{e9}"` (the last
37 /// character is a single Unicode character representing an 'e' with an acute
38 /// accent). Since the last character is defined outside the scope of ASCII,
39 /// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`.
41 /// [combining character]: https://en.wikipedia.org/wiki/Combining_character
42 #[stable(feature = "rust1", since = "1.0.0")]
44 /// Container type for copied ASCII characters.
45 #[stable(feature = "rust1", since = "1.0.0")]
48 /// Checks if within the ASCII range.
53 /// use std::ascii::AsciiExt;
58 /// assert_eq!(true, ascii.is_ascii());
59 /// assert_eq!(false, utf8.is_ascii())
61 #[stable(feature = "rust1", since = "1.0.0")]
62 fn is_ascii(&self) -> bool;
64 /// Makes a copy of the string in ASCII upper case.
66 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
67 /// but non-ASCII letters are unchanged.
72 /// use std::ascii::AsciiExt;
77 /// assert_eq!('A', ascii.to_ascii_uppercase());
78 /// assert_eq!('❤', utf8.to_ascii_uppercase());
80 #[stable(feature = "rust1", since = "1.0.0")]
81 fn to_ascii_uppercase(&self) -> Self::Owned;
83 /// Makes a copy of the string in ASCII lower case.
85 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
86 /// but non-ASCII letters are unchanged.
91 /// use std::ascii::AsciiExt;
96 /// assert_eq!('a', ascii.to_ascii_lowercase());
97 /// assert_eq!('❤', utf8.to_ascii_lowercase());
99 #[stable(feature = "rust1", since = "1.0.0")]
100 fn to_ascii_lowercase(&self) -> Self::Owned;
102 /// Checks that two strings are an ASCII case-insensitive match.
104 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
105 /// but without allocating and copying temporary strings.
110 /// use std::ascii::AsciiExt;
112 /// let ascii1 = 'A';
113 /// let ascii2 = 'a';
114 /// let ascii3 = 'A';
115 /// let ascii4 = 'z';
117 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii2));
118 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii3));
119 /// assert_eq!(false, ascii1.eq_ignore_ascii_case(&ascii4));
121 #[stable(feature = "rust1", since = "1.0.0")]
122 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
124 /// Converts this type to its ASCII upper case equivalent in-place.
126 /// See `to_ascii_uppercase` for more information.
131 /// #![feature(ascii)]
133 /// use std::ascii::AsciiExt;
135 /// let mut ascii = 'a';
137 /// ascii.make_ascii_uppercase();
139 /// assert_eq!('A', ascii);
141 #[unstable(feature = "ascii", issue = "27809")]
142 fn make_ascii_uppercase(&mut self);
144 /// Converts this type to its ASCII lower case equivalent in-place.
146 /// See `to_ascii_lowercase` for more information.
151 /// #![feature(ascii)]
153 /// use std::ascii::AsciiExt;
155 /// let mut ascii = 'A';
157 /// ascii.make_ascii_lowercase();
159 /// assert_eq!('a', ascii);
161 #[unstable(feature = "ascii", issue = "27809")]
162 fn make_ascii_lowercase(&mut self);
165 #[stable(feature = "rust1", since = "1.0.0")]
166 impl AsciiExt for str {
170 fn is_ascii(&self) -> bool {
171 self.bytes().all(|b| b.is_ascii())
175 fn to_ascii_uppercase(&self) -> String {
176 let mut bytes = self.as_bytes().to_vec();
177 bytes.make_ascii_uppercase();
178 // make_ascii_uppercase() preserves the UTF-8 invariant.
179 unsafe { String::from_utf8_unchecked(bytes) }
183 fn to_ascii_lowercase(&self) -> String {
184 let mut bytes = self.as_bytes().to_vec();
185 bytes.make_ascii_lowercase();
186 // make_ascii_uppercase() preserves the UTF-8 invariant.
187 unsafe { String::from_utf8_unchecked(bytes) }
191 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
192 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
195 fn make_ascii_uppercase(&mut self) {
196 let me: &mut [u8] = unsafe { mem::transmute(self) };
197 me.make_ascii_uppercase()
200 fn make_ascii_lowercase(&mut self) {
201 let me: &mut [u8] = unsafe { mem::transmute(self) };
202 me.make_ascii_lowercase()
206 #[stable(feature = "rust1", since = "1.0.0")]
207 impl AsciiExt for [u8] {
208 type Owned = Vec<u8>;
210 fn is_ascii(&self) -> bool {
211 self.iter().all(|b| b.is_ascii())
215 fn to_ascii_uppercase(&self) -> Vec<u8> {
216 let mut me = self.to_vec();
217 me.make_ascii_uppercase();
222 fn to_ascii_lowercase(&self) -> Vec<u8> {
223 let mut me = self.to_vec();
224 me.make_ascii_lowercase();
229 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
230 self.len() == other.len() &&
231 self.iter().zip(other).all(|(a, b)| {
232 a.eq_ignore_ascii_case(b)
236 fn make_ascii_uppercase(&mut self) {
238 byte.make_ascii_uppercase();
242 fn make_ascii_lowercase(&mut self) {
244 byte.make_ascii_lowercase();
249 #[stable(feature = "rust1", since = "1.0.0")]
250 impl AsciiExt for u8 {
253 fn is_ascii(&self) -> bool { *self & 128 == 0 }
255 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
257 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
259 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
260 self.to_ascii_lowercase() == other.to_ascii_lowercase()
263 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
265 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
268 #[stable(feature = "rust1", since = "1.0.0")]
269 impl AsciiExt for char {
272 fn is_ascii(&self) -> bool {
277 fn to_ascii_uppercase(&self) -> char {
279 (*self as u8).to_ascii_uppercase() as char
286 fn to_ascii_lowercase(&self) -> char {
288 (*self as u8).to_ascii_lowercase() as char
295 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
296 self.to_ascii_lowercase() == other.to_ascii_lowercase()
300 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
302 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
305 /// An iterator over the escaped version of a byte, constructed via
306 /// `std::ascii::escape_default`.
307 #[stable(feature = "rust1", since = "1.0.0")]
308 pub struct EscapeDefault {
313 /// Returns an iterator that produces an escaped version of a `u8`.
315 /// The default is chosen with a bias toward producing literals that are
316 /// legal in a variety of languages, including C++11 and similar C-family
317 /// languages. The exact rules are:
319 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
320 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
321 /// - Any other chars in the range [0x20,0x7e] are not escaped.
322 /// - Any other chars are given hex escapes of the form '\xNN'.
323 /// - Unicode escapes are never generated by this function.
330 /// let escaped = ascii::escape_default(b'0').next().unwrap();
331 /// assert_eq!(b'0', escaped);
333 /// let mut escaped = ascii::escape_default(b'\t');
335 /// assert_eq!(b'\\', escaped.next().unwrap());
336 /// assert_eq!(b't', escaped.next().unwrap());
338 #[stable(feature = "rust1", since = "1.0.0")]
339 pub fn escape_default(c: u8) -> EscapeDefault {
340 let (data, len) = match c {
341 b'\t' => ([b'\\', b't', 0, 0], 2),
342 b'\r' => ([b'\\', b'r', 0, 0], 2),
343 b'\n' => ([b'\\', b'n', 0, 0], 2),
344 b'\\' => ([b'\\', b'\\', 0, 0], 2),
345 b'\'' => ([b'\\', b'\'', 0, 0], 2),
346 b'"' => ([b'\\', b'"', 0, 0], 2),
347 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
348 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
351 return EscapeDefault { range: (0.. len), data: data };
353 fn hexify(b: u8) -> u8 {
361 #[stable(feature = "rust1", since = "1.0.0")]
362 impl Iterator for EscapeDefault {
364 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
365 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
367 #[stable(feature = "rust1", since = "1.0.0")]
368 impl DoubleEndedIterator for EscapeDefault {
369 fn next_back(&mut self) -> Option<u8> {
370 self.range.next_back().map(|i| self.data[i])
373 #[stable(feature = "rust1", since = "1.0.0")]
374 impl ExactSizeIterator for EscapeDefault {}
376 static ASCII_LOWERCASE_MAP: [u8; 256] = [
377 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
378 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
379 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
380 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
381 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
382 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
383 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
384 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
387 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
388 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
389 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
392 b'[', b'\\', b']', b'^', b'_',
393 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
394 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
395 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
396 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
397 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
398 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
399 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
400 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
401 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
402 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
403 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
404 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
405 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
406 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
407 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
408 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
409 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
410 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
411 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
412 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
415 static ASCII_UPPERCASE_MAP: [u8; 256] = [
416 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
417 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
418 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
419 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
420 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
421 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
422 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
423 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
424 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
425 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
426 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
427 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
430 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
431 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
432 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
435 b'{', b'|', b'}', b'~', 0x7f,
436 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
437 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
438 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
439 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
440 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
441 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
442 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
443 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
444 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
445 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
446 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
447 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
448 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
449 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
450 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
451 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
463 assert!(b"".is_ascii());
464 assert!(b"banana\0\x7F".is_ascii());
465 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
466 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
467 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
468 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
470 assert!("".is_ascii());
471 assert!("banana\0\u{7F}".is_ascii());
472 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
473 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
474 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
478 fn test_to_ascii_uppercase() {
479 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
480 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
483 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
485 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
486 (from_u32(upper).unwrap()).to_string());
491 fn test_to_ascii_lowercase() {
492 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
493 // Dotted capital I, Kelvin sign, Sharp S.
494 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
497 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
499 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
500 (from_u32(lower).unwrap()).to_string());
505 fn test_make_ascii_lower_case() {
507 ($from: expr, $to: expr) => {
510 x.make_ascii_lowercase();
522 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
523 test!("HİKß".to_string(), "hİKß");
528 fn test_make_ascii_upper_case() {
530 ($from: expr, $to: expr) => {
533 x.make_ascii_uppercase();
545 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
546 test!("hıKß".to_string(), "HıKß");
548 let mut x = "Hello".to_string();
549 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
550 assert_eq!(x, "HELlo")
554 fn test_eq_ignore_ascii_case() {
555 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
556 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
557 // Dotted capital I, Kelvin sign, Sharp S.
558 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
559 assert!(!"İ".eq_ignore_ascii_case("i"));
560 assert!(!"K".eq_ignore_ascii_case("k"));
561 assert!(!"ß".eq_ignore_ascii_case("s"));
564 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
566 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
567 &from_u32(lower).unwrap().to_string()));