1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters.
13 use to_str::{ToStr,ToStrConsume};
17 use container::Container;
20 use iterator::{Iterator, IteratorUtil};
21 use vec::{CopyableVector, ImmutableVector, OwnedVector};
22 use to_bytes::IterBytes;
23 use option::{Some, None};
25 /// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
26 #[deriving(Clone, Eq)]
27 pub struct Ascii { priv chr: u8 }
30 /// Converts a ascii character into a `u8`.
32 pub fn to_byte(self) -> u8 {
36 /// Converts a ascii character into a `char`.
38 pub fn to_char(self) -> char {
42 /// Convert to lowercase.
44 pub fn to_lower(self) -> Ascii {
45 Ascii{chr: ASCII_LOWER_MAP[self.chr]}
48 /// Convert to uppercase.
50 pub fn to_upper(self) -> Ascii {
51 Ascii{chr: ASCII_UPPER_MAP[self.chr]}
54 /// Compares two ascii characters of equality, ignoring case.
56 pub fn eq_ignore_case(self, other: Ascii) -> bool {
57 ASCII_LOWER_MAP[self.chr] == ASCII_LOWER_MAP[other.chr]
61 impl ToStr for Ascii {
63 fn to_str(&self) -> ~str { str::from_bytes(['\'' as u8, self.chr, '\'' as u8]) }
66 /// Trait for converting into an ascii type.
67 pub trait AsciiCast<T> {
68 /// Convert to an ascii type
69 fn to_ascii(&self) -> T;
71 /// Convert to an ascii type, not doing any range asserts
72 unsafe fn to_ascii_nocheck(&self) -> T;
74 /// Check if convertible to ascii
75 fn is_ascii(&self) -> bool;
78 impl<'self> AsciiCast<&'self[Ascii]> for &'self [u8] {
80 fn to_ascii(&self) -> &'self[Ascii] {
81 assert!(self.is_ascii());
82 unsafe {self.to_ascii_nocheck()}
86 unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
87 cast::transmute(*self)
91 fn is_ascii(&self) -> bool {
92 for b in self.iter() {
93 if !b.is_ascii() { return false; }
99 impl<'self> AsciiCast<&'self[Ascii]> for &'self str {
101 fn to_ascii(&self) -> &'self[Ascii] {
102 assert!(self.is_ascii());
103 unsafe {self.to_ascii_nocheck()}
107 unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
108 let (p,len): (*u8, uint) = cast::transmute(*self);
109 cast::transmute((p, len - 1))
113 fn is_ascii(&self) -> bool {
114 self.byte_iter().all(|b| b.is_ascii())
118 impl AsciiCast<Ascii> for u8 {
120 fn to_ascii(&self) -> Ascii {
121 assert!(self.is_ascii());
122 unsafe {self.to_ascii_nocheck()}
126 unsafe fn to_ascii_nocheck(&self) -> Ascii {
131 fn is_ascii(&self) -> bool {
136 impl AsciiCast<Ascii> for char {
138 fn to_ascii(&self) -> Ascii {
139 assert!(self.is_ascii());
140 unsafe {self.to_ascii_nocheck()}
144 unsafe fn to_ascii_nocheck(&self) -> Ascii {
145 Ascii{ chr: *self as u8 }
149 fn is_ascii(&self) -> bool {
150 *self - ('\x7F' & *self) == '\x00'
154 /// Trait for copyless casting to an ascii vector.
155 pub trait OwnedAsciiCast {
156 /// Take ownership and cast to an ascii vector without trailing zero element.
157 fn into_ascii(self) -> ~[Ascii];
159 /// Take ownership and cast to an ascii vector without trailing zero element.
160 /// Does not perform validation checks.
161 unsafe fn into_ascii_nocheck(self) -> ~[Ascii];
164 impl OwnedAsciiCast for ~[u8] {
166 fn into_ascii(self) -> ~[Ascii] {
167 assert!(self.is_ascii());
168 unsafe {self.into_ascii_nocheck()}
172 unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
173 cast::transmute(self)
177 impl OwnedAsciiCast for ~str {
179 fn into_ascii(self) -> ~[Ascii] {
180 assert!(self.is_ascii());
181 unsafe {self.into_ascii_nocheck()}
185 unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
186 let mut r: ~[Ascii] = cast::transmute(self);
192 /// Trait for converting an ascii type to a string. Needed to convert `&[Ascii]` to `~str`
194 /// Convert to a string.
195 fn to_str_ascii(&self) -> ~str;
197 /// Convert to vector representing a lower cased ascii string.
198 fn to_lower(&self) -> ~[Ascii];
200 /// Convert to vector representing a upper cased ascii string.
201 fn to_upper(&self) -> ~[Ascii];
203 /// Compares two Ascii strings ignoring case
204 fn eq_ignore_case(self, other: &[Ascii]) -> bool;
207 impl<'self> AsciiStr for &'self [Ascii] {
209 fn to_str_ascii(&self) -> ~str {
210 let mut cpy = self.to_owned();
211 cpy.push(0u8.to_ascii());
212 unsafe {cast::transmute(cpy)}
216 fn to_lower(&self) -> ~[Ascii] {
217 self.map(|a| a.to_lower())
221 fn to_upper(&self) -> ~[Ascii] {
222 self.map(|a| a.to_upper())
226 fn eq_ignore_case(self, other: &[Ascii]) -> bool {
227 do self.iter().zip(other.iter()).all |(&a, &b)| { a.eq_ignore_case(b) }
231 impl ToStrConsume for ~[Ascii] {
233 fn into_str(self) -> ~str {
235 cpy.push(0u8.to_ascii());
236 unsafe {cast::transmute(cpy)}
240 impl IterBytes for Ascii {
242 fn iter_bytes(&self, _lsb0: bool, f: &fn(buf: &[u8]) -> bool) -> bool {
247 /// Trait to convert to a owned byte array by consuming self
248 pub trait ToBytesConsume {
249 /// Converts to a owned byte array by consuming self
250 fn into_bytes(self) -> ~[u8];
253 impl ToBytesConsume for ~[Ascii] {
254 fn into_bytes(self) -> ~[u8] {
255 unsafe {cast::transmute(self)}
260 /// Convert the string to ASCII upper case:
261 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
262 /// but non-ASCII letters are unchanged.
264 pub fn to_ascii_upper(string: &str) -> ~str {
265 map_bytes(string, ASCII_UPPER_MAP)
268 /// Convert the string to ASCII lower case:
269 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
270 /// but non-ASCII letters are unchanged.
272 pub fn to_ascii_lower(string: &str) -> ~str {
273 map_bytes(string, ASCII_LOWER_MAP)
277 priv fn map_bytes(string: &str, map: &'static [u8]) -> ~str {
278 let len = string.len();
279 let mut result = str::with_capacity(len);
281 do result.as_mut_buf |mut buf, _| {
282 for c in string.as_bytes().iter() {
284 buf = ptr::mut_offset(buf, 1)
287 str::raw::set_len(&mut result, len);
292 /// Check that two strings are an ASCII case-insensitive match.
293 /// Same as `to_ascii_lower(a) == to_ascii_lower(b)`,
294 /// but without allocating and copying temporary strings.
296 pub fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
297 a.len() == b.len() && a.as_bytes().iter().zip(b.as_bytes().iter()).all(
298 |(byte_a, byte_b)| ASCII_LOWER_MAP[*byte_a] == ASCII_LOWER_MAP[*byte_b])
301 priv static ASCII_LOWER_MAP: &'static [u8] = &[
302 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
303 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
304 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
305 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
306 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
307 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
308 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
309 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
310 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
311 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
312 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
313 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
314 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
315 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
316 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
317 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
318 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
319 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
320 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
321 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
322 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
323 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
324 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
325 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
326 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
327 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
328 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
329 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
330 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
331 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
332 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
333 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
336 priv static ASCII_UPPER_MAP: &'static [u8] = &[
337 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
338 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
339 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
340 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
341 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
342 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
343 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
344 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
345 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
346 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
347 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
348 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
349 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
350 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
351 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
352 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
353 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
354 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
355 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
356 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
357 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
358 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
359 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
360 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
361 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
362 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
363 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
364 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
365 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
366 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
367 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
368 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
375 use to_bytes::ToBytes;
378 macro_rules! v2ascii (
379 ( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
380 (~[$($e:expr),*]) => (~[$(Ascii{chr:$e}),*]);
385 assert_eq!(65u8.to_ascii().to_byte(), 65u8);
386 assert_eq!(65u8.to_ascii().to_char(), 'A');
387 assert_eq!('A'.to_ascii().to_char(), 'A');
388 assert_eq!('A'.to_ascii().to_byte(), 65u8);
390 assert_eq!('A'.to_ascii().to_lower().to_char(), 'a');
391 assert_eq!('Z'.to_ascii().to_lower().to_char(), 'z');
392 assert_eq!('a'.to_ascii().to_upper().to_char(), 'A');
393 assert_eq!('z'.to_ascii().to_upper().to_char(), 'Z');
395 assert_eq!('@'.to_ascii().to_lower().to_char(), '@');
396 assert_eq!('['.to_ascii().to_lower().to_char(), '[');
397 assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
398 assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
400 assert!("banana".iter().all(|c| c.is_ascii()));
401 assert!(!"ประเทศไทย中华Việt Nam".iter().all(|c| c.is_ascii()));
405 fn test_ascii_vec() {
406 assert_eq!((&[40u8, 32u8, 59u8]).to_ascii(), v2ascii!([40, 32, 59]));
407 assert_eq!("( ;".to_ascii(), v2ascii!([40, 32, 59]));
408 // FIXME: #5475 borrowchk error, owned vectors do not live long enough
409 // if chained-from directly
410 let v = ~[40u8, 32u8, 59u8]; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
411 let v = ~"( ;"; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
413 assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
414 assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
416 assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
417 assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
418 assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
420 assert!("aBcDeF&?#".to_ascii().eq_ignore_case("AbCdEf&?#".to_ascii()));
422 assert!("".is_ascii());
423 assert!("a".is_ascii());
424 assert!(!"\u2009".is_ascii());
429 fn test_owned_ascii_vec() {
430 assert_eq!((~"( ;").into_ascii(), v2ascii!(~[40, 32, 59]));
431 assert_eq!((~[40u8, 32u8, 59u8]).into_ascii(), v2ascii!(~[40, 32, 59]));
435 fn test_ascii_to_str() { assert_eq!(v2ascii!([40, 32, 59]).to_str_ascii(), ~"( ;"); }
438 fn test_ascii_into_str() {
439 assert_eq!(v2ascii!(~[40, 32, 59]).into_str(), ~"( ;");
443 fn test_ascii_to_bytes() {
444 assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
445 assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
448 #[test] #[should_fail]
449 fn test_ascii_vec_fail_u8_slice() { (&[127u8, 128u8, 255u8]).to_ascii(); }
451 #[test] #[should_fail]
452 fn test_ascii_vec_fail_str_slice() { "zoä华".to_ascii(); }
454 #[test] #[should_fail]
455 fn test_ascii_fail_u8_slice() { 255u8.to_ascii(); }
457 #[test] #[should_fail]
458 fn test_ascii_fail_char_slice() { 'λ'.to_ascii(); }
461 fn test_to_ascii_upper() {
462 assert_eq!(to_ascii_upper("url()URL()uRl()ürl"), ~"URL()URL()URL()üRL");
463 assert_eq!(to_ascii_upper("hıKß"), ~"HıKß");
468 let upper = if 'a' <= c && c <= 'z' { c + 'A' - 'a' } else { c };
469 assert_eq!(to_ascii_upper(from_char(i as char)), from_char(upper))
475 fn test_to_ascii_lower() {
476 assert_eq!(to_ascii_lower("url()URL()uRl()Ürl"), ~"url()url()url()Ürl");
477 // Dotted capital I, Kelvin sign, Sharp S.
478 assert_eq!(to_ascii_lower("HİKß"), ~"hİKß");
483 let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c };
484 assert_eq!(to_ascii_lower(from_char(i as char)), from_char(lower))
491 fn test_eq_ignore_ascii_case() {
492 assert!(eq_ignore_ascii_case("url()URL()uRl()Ürl", "url()url()url()Ürl"));
493 assert!(!eq_ignore_ascii_case("Ürl", "ürl"));
494 // Dotted capital I, Kelvin sign, Sharp S.
495 assert!(eq_ignore_ascii_case("HİKß", "hİKß"));
496 assert!(!eq_ignore_ascii_case("İ", "i"));
497 assert!(!eq_ignore_ascii_case("K", "k"));
498 assert!(!eq_ignore_ascii_case("ß", "s"));
503 let lower = if 'A' <= c && c <= 'Z' { c + 'a' - 'A' } else { c };
504 assert!(eq_ignore_ascii_case(from_char(i as char), from_char(lower)));