1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on string slices.
21 #[stable(feature = "rust1", since = "1.0.0")]
23 /// Container type for copied ASCII characters.
24 #[stable(feature = "rust1", since = "1.0.0")]
27 /// Checks if within the ASCII range.
32 /// use std::ascii::AsciiExt;
37 /// assert_eq!(true, ascii.is_ascii());
38 /// assert_eq!(false, utf8.is_ascii())
40 #[stable(feature = "rust1", since = "1.0.0")]
41 fn is_ascii(&self) -> bool;
43 /// Makes a copy of the string in ASCII upper case.
45 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
46 /// but non-ASCII letters are unchanged.
51 /// use std::ascii::AsciiExt;
56 /// assert_eq!('A', ascii.to_ascii_uppercase());
57 /// assert_eq!('❤', utf8.to_ascii_uppercase());
59 #[stable(feature = "rust1", since = "1.0.0")]
60 fn to_ascii_uppercase(&self) -> Self::Owned;
62 /// Makes a copy of the string in ASCII lower case.
64 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
65 /// but non-ASCII letters are unchanged.
70 /// use std::ascii::AsciiExt;
75 /// assert_eq!('a', ascii.to_ascii_lowercase());
76 /// assert_eq!('❤', utf8.to_ascii_lowercase());
78 #[stable(feature = "rust1", since = "1.0.0")]
79 fn to_ascii_lowercase(&self) -> Self::Owned;
81 /// Checks that two strings are an ASCII case-insensitive match.
83 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
84 /// but without allocating and copying temporary strings.
89 /// use std::ascii::AsciiExt;
96 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii2));
97 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii3));
98 /// assert_eq!(false, ascii1.eq_ignore_ascii_case(&ascii4));
100 #[stable(feature = "rust1", since = "1.0.0")]
101 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
103 /// Converts this type to its ASCII upper case equivalent in-place.
105 /// See `to_ascii_uppercase` for more information.
110 /// #![feature(ascii)]
112 /// use std::ascii::AsciiExt;
114 /// let mut ascii = 'a';
116 /// ascii.make_ascii_uppercase();
118 /// assert_eq!('A', ascii);
120 #[unstable(feature = "ascii", issue = "27809")]
121 fn make_ascii_uppercase(&mut self);
123 /// Converts this type to its ASCII lower case equivalent in-place.
125 /// See `to_ascii_lowercase` for more information.
130 /// #![feature(ascii)]
132 /// use std::ascii::AsciiExt;
134 /// let mut ascii = 'A';
136 /// ascii.make_ascii_lowercase();
138 /// assert_eq!('a', ascii);
140 #[unstable(feature = "ascii", issue = "27809")]
141 fn make_ascii_lowercase(&mut self);
144 #[stable(feature = "rust1", since = "1.0.0")]
145 impl AsciiExt for str {
149 fn is_ascii(&self) -> bool {
150 self.bytes().all(|b| b.is_ascii())
154 fn to_ascii_uppercase(&self) -> String {
155 let mut bytes = self.as_bytes().to_vec();
156 bytes.make_ascii_uppercase();
157 // make_ascii_uppercase() preserves the UTF-8 invariant.
158 unsafe { String::from_utf8_unchecked(bytes) }
162 fn to_ascii_lowercase(&self) -> String {
163 let mut bytes = self.as_bytes().to_vec();
164 bytes.make_ascii_lowercase();
165 // make_ascii_uppercase() preserves the UTF-8 invariant.
166 unsafe { String::from_utf8_unchecked(bytes) }
170 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
171 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
174 fn make_ascii_uppercase(&mut self) {
175 let me: &mut [u8] = unsafe { mem::transmute(self) };
176 me.make_ascii_uppercase()
179 fn make_ascii_lowercase(&mut self) {
180 let me: &mut [u8] = unsafe { mem::transmute(self) };
181 me.make_ascii_lowercase()
185 #[stable(feature = "rust1", since = "1.0.0")]
186 impl AsciiExt for [u8] {
187 type Owned = Vec<u8>;
189 fn is_ascii(&self) -> bool {
190 self.iter().all(|b| b.is_ascii())
194 fn to_ascii_uppercase(&self) -> Vec<u8> {
195 let mut me = self.to_vec();
196 me.make_ascii_uppercase();
201 fn to_ascii_lowercase(&self) -> Vec<u8> {
202 let mut me = self.to_vec();
203 me.make_ascii_lowercase();
208 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
209 self.len() == other.len() &&
210 self.iter().zip(other).all(|(a, b)| {
211 a.eq_ignore_ascii_case(b)
215 fn make_ascii_uppercase(&mut self) {
217 byte.make_ascii_uppercase();
221 fn make_ascii_lowercase(&mut self) {
223 byte.make_ascii_lowercase();
228 #[stable(feature = "rust1", since = "1.0.0")]
229 impl AsciiExt for u8 {
232 fn is_ascii(&self) -> bool { *self & 128 == 0 }
234 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
236 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
238 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
239 self.to_ascii_lowercase() == other.to_ascii_lowercase()
242 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
244 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
247 #[stable(feature = "rust1", since = "1.0.0")]
248 impl AsciiExt for char {
251 fn is_ascii(&self) -> bool {
256 fn to_ascii_uppercase(&self) -> char {
258 (*self as u8).to_ascii_uppercase() as char
265 fn to_ascii_lowercase(&self) -> char {
267 (*self as u8).to_ascii_lowercase() as char
274 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
275 self.to_ascii_lowercase() == other.to_ascii_lowercase()
279 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
281 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
284 /// An iterator over the escaped version of a byte, constructed via
285 /// `std::ascii::escape_default`.
286 #[stable(feature = "rust1", since = "1.0.0")]
287 pub struct EscapeDefault {
292 /// Returns an iterator that produces an escaped version of a `u8`.
294 /// The default is chosen with a bias toward producing literals that are
295 /// legal in a variety of languages, including C++11 and similar C-family
296 /// languages. The exact rules are:
298 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
299 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
300 /// - Any other chars in the range [0x20,0x7e] are not escaped.
301 /// - Any other chars are given hex escapes of the form '\xNN'.
302 /// - Unicode escapes are never generated by this function.
309 /// let escaped = ascii::escape_default(b'0').next().unwrap();
310 /// assert_eq!(b'0', escaped);
312 /// let mut escaped = ascii::escape_default(b'\t');
314 /// assert_eq!(b'\\', escaped.next().unwrap());
315 /// assert_eq!(b't', escaped.next().unwrap());
317 #[stable(feature = "rust1", since = "1.0.0")]
318 pub fn escape_default(c: u8) -> EscapeDefault {
319 let (data, len) = match c {
320 b'\t' => ([b'\\', b't', 0, 0], 2),
321 b'\r' => ([b'\\', b'r', 0, 0], 2),
322 b'\n' => ([b'\\', b'n', 0, 0], 2),
323 b'\\' => ([b'\\', b'\\', 0, 0], 2),
324 b'\'' => ([b'\\', b'\'', 0, 0], 2),
325 b'"' => ([b'\\', b'"', 0, 0], 2),
326 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
327 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
330 return EscapeDefault { range: (0.. len), data: data };
332 fn hexify(b: u8) -> u8 {
340 #[stable(feature = "rust1", since = "1.0.0")]
341 impl Iterator for EscapeDefault {
343 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
344 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
346 #[stable(feature = "rust1", since = "1.0.0")]
347 impl DoubleEndedIterator for EscapeDefault {
348 fn next_back(&mut self) -> Option<u8> {
349 self.range.next_back().map(|i| self.data[i])
352 #[stable(feature = "rust1", since = "1.0.0")]
353 impl ExactSizeIterator for EscapeDefault {}
355 static ASCII_LOWERCASE_MAP: [u8; 256] = [
356 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
357 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
358 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
359 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
360 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
361 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
362 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
363 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
366 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
367 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
368 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
371 b'[', b'\\', b']', b'^', b'_',
372 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
373 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
374 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
375 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
376 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
377 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
378 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
379 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
380 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
381 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
382 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
383 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
384 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
385 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
386 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
387 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
388 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
389 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
390 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
391 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
394 static ASCII_UPPERCASE_MAP: [u8; 256] = [
395 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
400 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
401 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
402 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
403 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
404 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
405 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
406 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
409 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
410 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
411 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
414 b'{', b'|', b'}', b'~', 0x7f,
415 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
416 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
417 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
418 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
419 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
420 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
421 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
422 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
423 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
424 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
425 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
426 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
427 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
428 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
429 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
430 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
442 assert!(b"".is_ascii());
443 assert!(b"banana\0\x7F".is_ascii());
444 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
445 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
446 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
447 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
449 assert!("".is_ascii());
450 assert!("banana\0\u{7F}".is_ascii());
451 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
452 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
453 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
457 fn test_to_ascii_uppercase() {
458 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
459 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
462 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
464 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
465 (from_u32(upper).unwrap()).to_string());
470 fn test_to_ascii_lowercase() {
471 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
472 // Dotted capital I, Kelvin sign, Sharp S.
473 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
476 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
478 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
479 (from_u32(lower).unwrap()).to_string());
484 fn test_make_ascii_lower_case() {
486 ($from: expr, $to: expr) => {
489 x.make_ascii_lowercase();
501 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
502 test!("HİKß".to_string(), "hİKß");
507 fn test_make_ascii_upper_case() {
509 ($from: expr, $to: expr) => {
512 x.make_ascii_uppercase();
524 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
525 test!("hıKß".to_string(), "HıKß");
527 let mut x = "Hello".to_string();
528 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
529 assert_eq!(x, "HELlo")
533 fn test_eq_ignore_ascii_case() {
534 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
535 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
536 // Dotted capital I, Kelvin sign, Sharp S.
537 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
538 assert!(!"İ".eq_ignore_ascii_case("i"));
539 assert!(!"K".eq_ignore_ascii_case("k"));
540 assert!(!"ß".eq_ignore_ascii_case("s"));
543 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
545 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
546 &from_u32(lower).unwrap().to_string()));