1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on owned strings
21 #[unstable(feature = "owned_ascii_ext",
22 reason = "would prefer to do this in a more general way")]
23 pub trait OwnedAsciiExt {
24 /// Converts the string to ASCII upper case:
25 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
26 /// but non-ASCII letters are unchanged.
27 fn into_ascii_uppercase(self) -> Self;
29 /// Converts the string to ASCII lower case:
30 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
31 /// but non-ASCII letters are unchanged.
32 fn into_ascii_lowercase(self) -> Self;
35 /// Extension methods for ASCII-subset only operations on string slices.
36 #[stable(feature = "rust1", since = "1.0.0")]
38 /// Container type for copied ASCII characters.
39 #[stable(feature = "rust1", since = "1.0.0")]
42 /// Checks if within the ASCII range.
47 /// use std::ascii::AsciiExt;
52 /// assert_eq!(true, ascii.is_ascii());
53 /// assert_eq!(false, utf8.is_ascii())
55 #[stable(feature = "rust1", since = "1.0.0")]
56 fn is_ascii(&self) -> bool;
58 /// Makes a copy of the string in ASCII upper case.
60 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
61 /// but non-ASCII letters are unchanged.
66 /// use std::ascii::AsciiExt;
71 /// assert_eq!('A', ascii.to_ascii_uppercase());
72 /// assert_eq!('❤', utf8.to_ascii_uppercase());
74 #[stable(feature = "rust1", since = "1.0.0")]
75 fn to_ascii_uppercase(&self) -> Self::Owned;
77 /// Makes a copy of the string in ASCII lower case.
79 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
80 /// but non-ASCII letters are unchanged.
85 /// use std::ascii::AsciiExt;
90 /// assert_eq!('a', ascii.to_ascii_lowercase());
91 /// assert_eq!('❤', utf8.to_ascii_lowercase());
93 #[stable(feature = "rust1", since = "1.0.0")]
94 fn to_ascii_lowercase(&self) -> Self::Owned;
96 /// Checks that two strings are an ASCII case-insensitive match.
98 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
99 /// but without allocating and copying temporary strings.
104 /// use std::ascii::AsciiExt;
106 /// let ascii1 = 'A';
107 /// let ascii2 = 'a';
108 /// let ascii3 = 'A';
109 /// let ascii4 = 'z';
111 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii2));
112 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii3));
113 /// assert_eq!(false, ascii1.eq_ignore_ascii_case(&ascii4));
115 #[stable(feature = "rust1", since = "1.0.0")]
116 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
118 /// Converts this type to its ASCII upper case equivalent in-place.
120 /// See `to_ascii_uppercase` for more information.
125 /// # #![feature(ascii)]
126 /// use std::ascii::AsciiExt;
128 /// let mut ascii = 'a';
130 /// ascii.make_ascii_uppercase();
132 /// assert_eq!('A', ascii);
134 #[unstable(feature = "ascii")]
135 fn make_ascii_uppercase(&mut self);
137 /// Converts this type to its ASCII lower case equivalent in-place.
139 /// See `to_ascii_lowercase` for more information.
144 /// # #![feature(ascii)]
145 /// use std::ascii::AsciiExt;
147 /// let mut ascii = 'A';
149 /// ascii.make_ascii_lowercase();
151 /// assert_eq!('a', ascii);
153 #[unstable(feature = "ascii")]
154 fn make_ascii_lowercase(&mut self);
157 #[stable(feature = "rust1", since = "1.0.0")]
158 impl AsciiExt for str {
162 fn is_ascii(&self) -> bool {
163 self.bytes().all(|b| b.is_ascii())
167 fn to_ascii_uppercase(&self) -> String {
168 self.to_string().into_ascii_uppercase()
172 fn to_ascii_lowercase(&self) -> String {
173 self.to_string().into_ascii_lowercase()
177 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
178 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
181 fn make_ascii_uppercase(&mut self) {
182 let me: &mut [u8] = unsafe { mem::transmute(self) };
183 me.make_ascii_uppercase()
186 fn make_ascii_lowercase(&mut self) {
187 let me: &mut [u8] = unsafe { mem::transmute(self) };
188 me.make_ascii_lowercase()
192 impl OwnedAsciiExt for String {
194 fn into_ascii_uppercase(self) -> String {
195 // Vec<u8>::into_ascii_uppercase() preserves the UTF-8 invariant.
196 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_uppercase()) }
200 fn into_ascii_lowercase(self) -> String {
201 // Vec<u8>::into_ascii_lowercase() preserves the UTF-8 invariant.
202 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_lowercase()) }
206 #[stable(feature = "rust1", since = "1.0.0")]
207 impl AsciiExt for [u8] {
208 type Owned = Vec<u8>;
210 fn is_ascii(&self) -> bool {
211 self.iter().all(|b| b.is_ascii())
215 fn to_ascii_uppercase(&self) -> Vec<u8> {
216 self.to_vec().into_ascii_uppercase()
220 fn to_ascii_lowercase(&self) -> Vec<u8> {
221 self.to_vec().into_ascii_lowercase()
225 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
226 self.len() == other.len() &&
227 self.iter().zip(other).all(|(a, b)| {
228 a.eq_ignore_ascii_case(b)
232 fn make_ascii_uppercase(&mut self) {
234 byte.make_ascii_uppercase();
238 fn make_ascii_lowercase(&mut self) {
240 byte.make_ascii_lowercase();
245 impl OwnedAsciiExt for Vec<u8> {
247 fn into_ascii_uppercase(mut self) -> Vec<u8> {
248 self.make_ascii_uppercase();
253 fn into_ascii_lowercase(mut self) -> Vec<u8> {
254 self.make_ascii_lowercase();
259 #[stable(feature = "rust1", since = "1.0.0")]
260 impl AsciiExt for u8 {
263 fn is_ascii(&self) -> bool { *self & 128 == 0 }
265 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
267 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
269 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
270 self.to_ascii_lowercase() == other.to_ascii_lowercase()
273 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
275 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
278 #[stable(feature = "rust1", since = "1.0.0")]
279 impl AsciiExt for char {
282 fn is_ascii(&self) -> bool {
287 fn to_ascii_uppercase(&self) -> char {
289 (*self as u8).to_ascii_uppercase() as char
296 fn to_ascii_lowercase(&self) -> char {
298 (*self as u8).to_ascii_lowercase() as char
305 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
306 self.to_ascii_lowercase() == other.to_ascii_lowercase()
310 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
312 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
315 /// An iterator over the escaped version of a byte, constructed via
316 /// `std::ascii::escape_default`.
317 #[stable(feature = "rust1", since = "1.0.0")]
318 pub struct EscapeDefault {
323 /// Returns an iterator that produces an escaped version of a `u8`.
325 /// The default is chosen with a bias toward producing literals that are
326 /// legal in a variety of languages, including C++11 and similar C-family
327 /// languages. The exact rules are:
329 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
330 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
331 /// - Any other chars in the range [0x20,0x7e] are not escaped.
332 /// - Any other chars are given hex escapes of the form '\xNN'.
333 /// - Unicode escapes are never generated by this function.
340 /// let escaped = ascii::escape_default(b'0').next().unwrap();
341 /// assert_eq!(b'0', escaped);
343 /// let mut escaped = ascii::escape_default(b'\t');
345 /// assert_eq!(b'\\', escaped.next().unwrap());
346 /// assert_eq!(b't', escaped.next().unwrap());
348 #[stable(feature = "rust1", since = "1.0.0")]
349 pub fn escape_default(c: u8) -> EscapeDefault {
350 let (data, len) = match c {
351 b'\t' => ([b'\\', b't', 0, 0], 2),
352 b'\r' => ([b'\\', b'r', 0, 0], 2),
353 b'\n' => ([b'\\', b'n', 0, 0], 2),
354 b'\\' => ([b'\\', b'\\', 0, 0], 2),
355 b'\'' => ([b'\\', b'\'', 0, 0], 2),
356 b'"' => ([b'\\', b'"', 0, 0], 2),
357 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
358 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
361 return EscapeDefault { range: (0.. len), data: data };
363 fn hexify(b: u8) -> u8 {
371 #[stable(feature = "rust1", since = "1.0.0")]
372 impl Iterator for EscapeDefault {
374 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
375 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
377 #[stable(feature = "rust1", since = "1.0.0")]
378 impl DoubleEndedIterator for EscapeDefault {
379 fn next_back(&mut self) -> Option<u8> {
380 self.range.next_back().map(|i| self.data[i])
383 #[stable(feature = "rust1", since = "1.0.0")]
384 impl ExactSizeIterator for EscapeDefault {}
386 static ASCII_LOWERCASE_MAP: [u8; 256] = [
387 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
388 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
389 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
390 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
391 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
392 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
393 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
394 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
397 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
398 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
399 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
402 b'[', b'\\', b']', b'^', b'_',
403 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
404 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
405 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
406 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
407 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
408 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
409 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
410 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
411 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
412 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
413 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
414 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
415 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
416 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
417 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
418 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
419 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
420 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
421 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
422 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
425 static ASCII_UPPERCASE_MAP: [u8; 256] = [
426 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
427 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
428 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
429 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
430 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
431 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
432 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
433 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
434 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
435 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
436 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
437 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
440 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
441 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
442 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
445 b'{', b'|', b'}', b'~', 0x7f,
446 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
447 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
448 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
449 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
450 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
451 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
452 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
453 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
454 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
455 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
456 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
457 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
458 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
459 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
460 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
461 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
473 assert!("banana".chars().all(|c| c.is_ascii()));
474 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
478 fn test_ascii_vec() {
479 assert!("".is_ascii());
480 assert!("a".is_ascii());
481 assert!(!"\u{2009}".is_ascii());
485 fn test_to_ascii_uppercase() {
486 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
487 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
490 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
492 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
493 (from_u32(upper).unwrap()).to_string());
498 fn test_to_ascii_lowercase() {
499 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
500 // Dotted capital I, Kelvin sign, Sharp S.
501 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
504 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
506 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
507 (from_u32(lower).unwrap()).to_string());
512 fn test_into_ascii_uppercase() {
513 assert_eq!(("url()URL()uRl()ürl".to_string()).into_ascii_uppercase(),
514 "URL()URL()URL()üRL".to_string());
515 assert_eq!(("hıKß".to_string()).into_ascii_uppercase(), "HıKß");
518 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
520 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_uppercase(),
521 (from_u32(upper).unwrap()).to_string());
526 fn test_into_ascii_lowercase() {
527 assert_eq!(("url()URL()uRl()Ürl".to_string()).into_ascii_lowercase(),
528 "url()url()url()Ürl");
529 // Dotted capital I, Kelvin sign, Sharp S.
530 assert_eq!(("HİKß".to_string()).into_ascii_lowercase(), "hİKß");
533 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
535 assert_eq!((from_u32(i).unwrap()).to_string().into_ascii_lowercase(),
536 (from_u32(lower).unwrap()).to_string());
541 fn test_eq_ignore_ascii_case() {
542 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
543 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
544 // Dotted capital I, Kelvin sign, Sharp S.
545 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
546 assert!(!"İ".eq_ignore_ascii_case("i"));
547 assert!(!"K".eq_ignore_ascii_case("k"));
548 assert!(!"ß".eq_ignore_ascii_case("s"));
551 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
553 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
554 &from_u32(lower).unwrap().to_string()));