1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters.
13 use to_str::{ToStr,ToStrConsume};
17 use iterator::{Iterator, IteratorUtil};
18 use vec::{CopyableVector, ImmutableVector, OwnedVector};
19 use to_bytes::IterBytes;
20 use option::{Some, None};
22 /// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
23 #[deriving(Clone, Eq)]
24 pub struct Ascii { priv chr: u8 }
27 /// Converts a ascii character into a `u8`.
29 pub fn to_byte(self) -> u8 {
33 /// Converts a ascii character into a `char`.
35 pub fn to_char(self) -> char {
39 /// Convert to lowercase.
41 pub fn to_lower(self) -> Ascii {
42 if self.chr >= 65 && self.chr <= 90 {
43 Ascii{chr: self.chr | 0x20 }
49 /// Convert to uppercase.
51 pub fn to_upper(self) -> Ascii {
52 if self.chr >= 97 && self.chr <= 122 {
53 Ascii{chr: self.chr & !0x20 }
59 /// Compares two ascii characters of equality, ignoring case.
61 pub fn eq_ignore_case(self, other: Ascii) -> bool {
62 self.to_lower().chr == other.to_lower().chr
66 impl ToStr for Ascii {
68 fn to_str(&self) -> ~str { str::from_bytes(['\'' as u8, self.chr, '\'' as u8]) }
71 /// Trait for converting into an ascii type.
72 pub trait AsciiCast<T> {
73 /// Convert to an ascii type
74 fn to_ascii(&self) -> T;
76 /// Convert to an ascii type, not doing any range asserts
77 unsafe fn to_ascii_nocheck(&self) -> T;
79 /// Check if convertible to ascii
80 fn is_ascii(&self) -> bool;
83 impl<'self> AsciiCast<&'self[Ascii]> for &'self [u8] {
85 fn to_ascii(&self) -> &'self[Ascii] {
86 assert!(self.is_ascii());
87 unsafe {self.to_ascii_nocheck()}
91 unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
92 cast::transmute(*self)
96 fn is_ascii(&self) -> bool {
97 for b in self.iter() {
98 if !b.is_ascii() { return false; }
104 impl<'self> AsciiCast<&'self[Ascii]> for &'self str {
106 fn to_ascii(&self) -> &'self[Ascii] {
107 assert!(self.is_ascii());
108 unsafe {self.to_ascii_nocheck()}
112 unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
113 let (p,len): (*u8, uint) = cast::transmute(*self);
114 cast::transmute((p, len - 1))
118 fn is_ascii(&self) -> bool {
119 self.byte_iter().all(|b| b.is_ascii())
123 impl AsciiCast<Ascii> for u8 {
125 fn to_ascii(&self) -> Ascii {
126 assert!(self.is_ascii());
127 unsafe {self.to_ascii_nocheck()}
131 unsafe fn to_ascii_nocheck(&self) -> Ascii {
136 fn is_ascii(&self) -> bool {
141 impl AsciiCast<Ascii> for char {
143 fn to_ascii(&self) -> Ascii {
144 assert!(self.is_ascii());
145 unsafe {self.to_ascii_nocheck()}
149 unsafe fn to_ascii_nocheck(&self) -> Ascii {
150 Ascii{ chr: *self as u8 }
154 fn is_ascii(&self) -> bool {
155 *self - ('\x7F' & *self) == '\x00'
159 /// Trait for copyless casting to an ascii vector.
160 pub trait OwnedAsciiCast {
161 /// Take ownership and cast to an ascii vector without trailing zero element.
162 fn into_ascii(self) -> ~[Ascii];
164 /// Take ownership and cast to an ascii vector without trailing zero element.
165 /// Does not perform validation checks.
166 unsafe fn into_ascii_nocheck(self) -> ~[Ascii];
169 impl OwnedAsciiCast for ~[u8] {
171 fn into_ascii(self) -> ~[Ascii] {
172 assert!(self.is_ascii());
173 unsafe {self.into_ascii_nocheck()}
177 unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
178 cast::transmute(self)
182 impl OwnedAsciiCast for ~str {
184 fn into_ascii(self) -> ~[Ascii] {
185 assert!(self.is_ascii());
186 unsafe {self.into_ascii_nocheck()}
190 unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
191 let mut r: ~[Ascii] = cast::transmute(self);
197 /// Trait for converting an ascii type to a string. Needed to convert `&[Ascii]` to `~str`
199 /// Convert to a string.
200 fn to_str_ascii(&self) -> ~str;
202 /// Convert to vector representing a lower cased ascii string.
203 fn to_lower(&self) -> ~[Ascii];
205 /// Convert to vector representing a upper cased ascii string.
206 fn to_upper(&self) -> ~[Ascii];
208 /// Compares two Ascii strings ignoring case
209 fn eq_ignore_case(self, other: &[Ascii]) -> bool;
212 impl<'self> AsciiStr for &'self [Ascii] {
214 fn to_str_ascii(&self) -> ~str {
215 let mut cpy = self.to_owned();
216 cpy.push(0u8.to_ascii());
217 unsafe {cast::transmute(cpy)}
221 fn to_lower(&self) -> ~[Ascii] {
222 self.map(|a| a.to_lower())
226 fn to_upper(&self) -> ~[Ascii] {
227 self.map(|a| a.to_upper())
231 fn eq_ignore_case(self, other: &[Ascii]) -> bool {
232 do self.iter().zip(other.iter()).all |(&a, &b)| { a.eq_ignore_case(b) }
236 impl ToStrConsume for ~[Ascii] {
238 fn into_str(self) -> ~str {
240 cpy.push(0u8.to_ascii());
241 unsafe {cast::transmute(cpy)}
245 impl IterBytes for Ascii {
247 fn iter_bytes(&self, _lsb0: bool, f: &fn(buf: &[u8]) -> bool) -> bool {
252 /// Trait to convert to a owned byte array by consuming self
253 pub trait ToBytesConsume {
254 /// Converts to a owned byte array by consuming self
255 fn into_bytes(self) -> ~[u8];
258 impl ToBytesConsume for ~[Ascii] {
259 fn into_bytes(self) -> ~[u8] {
260 unsafe {cast::transmute(self)}
267 use to_bytes::ToBytes;
269 macro_rules! v2ascii (
270 ( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
271 (~[$($e:expr),*]) => (~[$(Ascii{chr:$e}),*]);
276 assert_eq!(65u8.to_ascii().to_byte(), 65u8);
277 assert_eq!(65u8.to_ascii().to_char(), 'A');
278 assert_eq!('A'.to_ascii().to_char(), 'A');
279 assert_eq!('A'.to_ascii().to_byte(), 65u8);
281 assert_eq!('A'.to_ascii().to_lower().to_char(), 'a');
282 assert_eq!('Z'.to_ascii().to_lower().to_char(), 'z');
283 assert_eq!('a'.to_ascii().to_upper().to_char(), 'A');
284 assert_eq!('z'.to_ascii().to_upper().to_char(), 'Z');
286 assert_eq!('@'.to_ascii().to_lower().to_char(), '@');
287 assert_eq!('['.to_ascii().to_lower().to_char(), '[');
288 assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
289 assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
291 assert!("banana".iter().all(|c| c.is_ascii()));
292 assert!(!"ประเทศไทย中华Việt Nam".iter().all(|c| c.is_ascii()));
296 fn test_ascii_vec() {
297 assert_eq!((&[40u8, 32u8, 59u8]).to_ascii(), v2ascii!([40, 32, 59]));
298 assert_eq!("( ;".to_ascii(), v2ascii!([40, 32, 59]));
299 // FIXME: #5475 borrowchk error, owned vectors do not live long enough
300 // if chained-from directly
301 let v = ~[40u8, 32u8, 59u8]; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
302 let v = ~"( ;"; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
304 assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
305 assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
307 assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
308 assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
309 assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
311 assert!("aBcDeF&?#".to_ascii().eq_ignore_case("AbCdEf&?#".to_ascii()));
313 assert!("".is_ascii());
314 assert!("a".is_ascii());
315 assert!(!"\u2009".is_ascii());
320 fn test_owned_ascii_vec() {
321 assert_eq!((~"( ;").into_ascii(), v2ascii!(~[40, 32, 59]));
322 assert_eq!((~[40u8, 32u8, 59u8]).into_ascii(), v2ascii!(~[40, 32, 59]));
326 fn test_ascii_to_str() { assert_eq!(v2ascii!([40, 32, 59]).to_str_ascii(), ~"( ;"); }
329 fn test_ascii_into_str() {
330 assert_eq!(v2ascii!(~[40, 32, 59]).into_str(), ~"( ;");
334 fn test_ascii_to_bytes() {
335 assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
336 assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
339 #[test] #[should_fail]
340 fn test_ascii_vec_fail_u8_slice() { (&[127u8, 128u8, 255u8]).to_ascii(); }
342 #[test] #[should_fail]
343 fn test_ascii_vec_fail_str_slice() { "zoä华".to_ascii(); }
345 #[test] #[should_fail]
346 fn test_ascii_fail_u8_slice() { 255u8.to_ascii(); }
348 #[test] #[should_fail]
349 fn test_ascii_fail_char_slice() { 'λ'.to_ascii(); }