src/libcoretest/char.rs

   1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 use std::{char,str};
  12 use std::convert::TryFrom;
  13
  14 #[test]
  15 fn test_convert() {
  16     assert_eq!(u32::from('a'), 0x61);
  17     assert_eq!(char::from(b'\0'), '\0');
  18     assert_eq!(char::from(b'a'), 'a');
  19     assert_eq!(char::from(b'\xFF'), '\u{FF}');
  20     assert_eq!(char::try_from(0_u32), Ok('\0'));
  21     assert_eq!(char::try_from(0x61_u32), Ok('a'));
  22     assert_eq!(char::try_from(0xD7FF_u32), Ok('\u{D7FF}'));
  23     assert!(char::try_from(0xD800_u32).is_err());
  24     assert!(char::try_from(0xDFFF_u32).is_err());
  25     assert_eq!(char::try_from(0xE000_u32), Ok('\u{E000}'));
  26     assert_eq!(char::try_from(0x10FFFF_u32), Ok('\u{10FFFF}'));
  27     assert!(char::try_from(0x110000_u32).is_err());
  28     assert!(char::try_from(0xFFFF_FFFF_u32).is_err());
  29 }
  30
  31 #[test]
  32 fn test_is_lowercase() {
  33     assert!('a'.is_lowercase());
  34     assert!('ö'.is_lowercase());
  35     assert!('ß'.is_lowercase());
  36     assert!(!'Ü'.is_lowercase());
  37     assert!(!'P'.is_lowercase());
  38 }
  39
  40 #[test]
  41 fn test_is_uppercase() {
  42     assert!(!'h'.is_uppercase());
  43     assert!(!'ä'.is_uppercase());
  44     assert!(!'ß'.is_uppercase());
  45     assert!('Ö'.is_uppercase());
  46     assert!('T'.is_uppercase());
  47 }
  48
  49 #[test]
  50 fn test_is_whitespace() {
  51     assert!(' '.is_whitespace());
  52     assert!('\u{2007}'.is_whitespace());
  53     assert!('\t'.is_whitespace());
  54     assert!('\n'.is_whitespace());
  55     assert!(!'a'.is_whitespace());
  56     assert!(!'_'.is_whitespace());
  57     assert!(!'\u{0}'.is_whitespace());
  58 }
  59
  60 #[test]
  61 fn test_to_digit() {
  62     assert_eq!('0'.to_digit(10), Some(0));
  63     assert_eq!('1'.to_digit(2), Some(1));
  64     assert_eq!('2'.to_digit(3), Some(2));
  65     assert_eq!('9'.to_digit(10), Some(9));
  66     assert_eq!('a'.to_digit(16), Some(10));
  67     assert_eq!('A'.to_digit(16), Some(10));
  68     assert_eq!('b'.to_digit(16), Some(11));
  69     assert_eq!('B'.to_digit(16), Some(11));
  70     assert_eq!('z'.to_digit(36), Some(35));
  71     assert_eq!('Z'.to_digit(36), Some(35));
  72     assert_eq!(' '.to_digit(10), None);
  73     assert_eq!('$'.to_digit(36), None);
  74 }
  75
  76 #[test]
  77 fn test_to_lowercase() {
  78     fn lower(c: char) -> Vec<char> {
  79         c.to_lowercase().collect()
  80     }
  81     assert_eq!(lower('A'), ['a']);
  82     assert_eq!(lower('Ö'), ['ö']);
  83     assert_eq!(lower('ß'), ['ß']);
  84     assert_eq!(lower('Ü'), ['ü']);
  85     assert_eq!(lower('💩'), ['💩']);
  86     assert_eq!(lower('Σ'), ['σ']);
  87     assert_eq!(lower('Τ'), ['τ']);
  88     assert_eq!(lower('Ι'), ['ι']);
  89     assert_eq!(lower('Γ'), ['γ']);
  90     assert_eq!(lower('Μ'), ['μ']);
  91     assert_eq!(lower('Α'), ['α']);
  92     assert_eq!(lower('Σ'), ['σ']);
  93     assert_eq!(lower('ǅ'), ['ǆ']);
  94     assert_eq!(lower('ﬁ'), ['ﬁ']);
  95     assert_eq!(lower('İ'), ['i', '\u{307}']);
  96 }
  97
  98 #[test]
  99 fn test_to_uppercase() {
 100     fn upper(c: char) -> Vec<char> {
 101         c.to_uppercase().collect()
 102     }
 103     assert_eq!(upper('a'), ['A']);
 104     assert_eq!(upper('ö'), ['Ö']);
 105     assert_eq!(upper('ß'), ['S', 'S']); // not ẞ: Latin capital letter sharp s
 106     assert_eq!(upper('ü'), ['Ü']);
 107     assert_eq!(upper('💩'), ['💩']);
 108
 109     assert_eq!(upper('σ'), ['Σ']);
 110     assert_eq!(upper('τ'), ['Τ']);
 111     assert_eq!(upper('ι'), ['Ι']);
 112     assert_eq!(upper('γ'), ['Γ']);
 113     assert_eq!(upper('μ'), ['Μ']);
 114     assert_eq!(upper('α'), ['Α']);
 115     assert_eq!(upper('ς'), ['Σ']);
 116     assert_eq!(upper('ǅ'), ['Ǆ']);
 117     assert_eq!(upper('ﬁ'), ['F', 'I']);
 118     assert_eq!(upper('ᾀ'), ['Ἀ', 'Ι']);
 119 }
 120
 121 #[test]
 122 fn test_is_control() {
 123     assert!('\u{0}'.is_control());
 124     assert!('\u{3}'.is_control());
 125     assert!('\u{6}'.is_control());
 126     assert!('\u{9}'.is_control());
 127     assert!('\u{7f}'.is_control());
 128     assert!('\u{92}'.is_control());
 129     assert!(!'\u{20}'.is_control());
 130     assert!(!'\u{55}'.is_control());
 131     assert!(!'\u{68}'.is_control());
 132 }
 133
 134 #[test]
 135 fn test_is_digit() {
 136    assert!('2'.is_numeric());
 137    assert!('7'.is_numeric());
 138    assert!(!'c'.is_numeric());
 139    assert!(!'i'.is_numeric());
 140    assert!(!'z'.is_numeric());
 141    assert!(!'Q'.is_numeric());
 142 }
 143
 144 #[test]
 145 fn test_escape_debug() {
 146     fn string(c: char) -> String {
 147         c.escape_debug().collect()
 148     }
 149     let s = string('\n');
 150     assert_eq!(s, "\\n");
 151     let s = string('\r');
 152     assert_eq!(s, "\\r");
 153     let s = string('\'');
 154     assert_eq!(s, "\\'");
 155     let s = string('"');
 156     assert_eq!(s, "\\\"");
 157     let s = string(' ');
 158     assert_eq!(s, " ");
 159     let s = string('a');
 160     assert_eq!(s, "a");
 161     let s = string('~');
 162     assert_eq!(s, "~");
 163     let s = string('é');
 164     assert_eq!(s, "é");
 165     let s = string('\x00');
 166     assert_eq!(s, "\\u{0}");
 167     let s = string('\x1f');
 168     assert_eq!(s, "\\u{1f}");
 169     let s = string('\x7f');
 170     assert_eq!(s, "\\u{7f}");
 171     let s = string('\u{80}');
 172     assert_eq!(s, "\\u{80}");
 173     let s = string('\u{ff}');
 174     assert_eq!(s, "\u{ff}");
 175     let s = string('\u{11b}');
 176     assert_eq!(s, "\u{11b}");
 177     let s = string('\u{1d4b6}');
 178     assert_eq!(s, "\u{1d4b6}");
 179     let s = string('\u{200b}'); // zero width space
 180     assert_eq!(s, "\\u{200b}");
 181     let s = string('\u{e000}'); // private use 1
 182     assert_eq!(s, "\\u{e000}");
 183     let s = string('\u{100000}'); // private use 2
 184     assert_eq!(s, "\\u{100000}");
 185 }
 186
 187 #[test]
 188 fn test_escape_default() {
 189     fn string(c: char) -> String {
 190         c.escape_default().collect()
 191     }
 192     let s = string('\n');
 193     assert_eq!(s, "\\n");
 194     let s = string('\r');
 195     assert_eq!(s, "\\r");
 196     let s = string('\'');
 197     assert_eq!(s, "\\'");
 198     let s = string('"');
 199     assert_eq!(s, "\\\"");
 200     let s = string(' ');
 201     assert_eq!(s, " ");
 202     let s = string('a');
 203     assert_eq!(s, "a");
 204     let s = string('~');
 205     assert_eq!(s, "~");
 206     let s = string('é');
 207     assert_eq!(s, "\\u{e9}");
 208     let s = string('\x00');
 209     assert_eq!(s, "\\u{0}");
 210     let s = string('\x1f');
 211     assert_eq!(s, "\\u{1f}");
 212     let s = string('\x7f');
 213     assert_eq!(s, "\\u{7f}");
 214     let s = string('\u{80}');
 215     assert_eq!(s, "\\u{80}");
 216     let s = string('\u{ff}');
 217     assert_eq!(s, "\\u{ff}");
 218     let s = string('\u{11b}');
 219     assert_eq!(s, "\\u{11b}");
 220     let s = string('\u{1d4b6}');
 221     assert_eq!(s, "\\u{1d4b6}");
 222     let s = string('\u{200b}'); // zero width space
 223     assert_eq!(s, "\\u{200b}");
 224     let s = string('\u{e000}'); // private use 1
 225     assert_eq!(s, "\\u{e000}");
 226     let s = string('\u{100000}'); // private use 2
 227     assert_eq!(s, "\\u{100000}");
 228 }
 229
 230 #[test]
 231 fn test_escape_unicode() {
 232     fn string(c: char) -> String { c.escape_unicode().collect() }
 233
 234     let s = string('\x00');
 235     assert_eq!(s, "\\u{0}");
 236     let s = string('\n');
 237     assert_eq!(s, "\\u{a}");
 238     let s = string(' ');
 239     assert_eq!(s, "\\u{20}");
 240     let s = string('a');
 241     assert_eq!(s, "\\u{61}");
 242     let s = string('\u{11b}');
 243     assert_eq!(s, "\\u{11b}");
 244     let s = string('\u{1d4b6}');
 245     assert_eq!(s, "\\u{1d4b6}");
 246 }
 247
 248 #[test]
 249 fn test_encode_utf8() {
 250     fn check(input: char, expect: &[u8]) {
 251         let mut buf = [0; 4];
 252         let ptr = buf.as_ptr();
 253         let s = input.encode_utf8(&mut buf);
 254         assert_eq!(s.as_ptr() as usize, ptr as usize);
 255         assert!(str::from_utf8(s.as_bytes()).is_ok());
 256         assert_eq!(s.as_bytes(), expect);
 257     }
 258
 259     check('x', &[0x78]);
 260     check('\u{e9}', &[0xc3, 0xa9]);
 261     check('\u{a66e}', &[0xea, 0x99, 0xae]);
 262     check('\u{1f4a9}', &[0xf0, 0x9f, 0x92, 0xa9]);
 263 }
 264
 265 #[test]
 266 fn test_encode_utf16() {
 267     fn check(input: char, expect: &[u16]) {
 268         let mut buf = [0; 2];
 269         let ptr = buf.as_mut_ptr();
 270         let b = input.encode_utf16(&mut buf);
 271         assert_eq!(b.as_mut_ptr() as usize, ptr as usize);
 272         assert_eq!(b, expect);
 273     }
 274
 275     check('x', &[0x0078]);
 276     check('\u{e9}', &[0x00e9]);
 277     check('\u{a66e}', &[0xa66e]);
 278     check('\u{1f4a9}', &[0xd83d, 0xdca9]);
 279 }
 280
 281 #[test]
 282 fn test_len_utf16() {
 283     assert!('x'.len_utf16() == 1);
 284     assert!('\u{e9}'.len_utf16() == 1);
 285     assert!('\u{a66e}'.len_utf16() == 1);
 286     assert!('\u{1f4a9}'.len_utf16() == 2);
 287 }
 288
 289 #[test]
 290 fn test_decode_utf16() {
 291     fn check(s: &[u16], expected: &[Result<char, u16>]) {
 292         let v = char::decode_utf16(s.iter().cloned())
 293                      .map(|r| r.map_err(|e| e.unpaired_surrogate()))
 294                      .collect::<Vec<_>>();
 295         assert_eq!(v, expected);
 296     }
 297     check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
 298     check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
 299 }
 300
 301 #[test]
 302 fn ed_iterator_specializations() {
 303     // Check counting
 304     assert_eq!('\n'.escape_default().count(), 2);
 305     assert_eq!('c'.escape_default().count(), 1);
 306     assert_eq!(' '.escape_default().count(), 1);
 307     assert_eq!('\\'.escape_default().count(), 2);
 308     assert_eq!('\''.escape_default().count(), 2);
 309
 310     // Check nth
 311
 312     // Check that OoB is handled correctly
 313     assert_eq!('\n'.escape_default().nth(2), None);
 314     assert_eq!('c'.escape_default().nth(1), None);
 315     assert_eq!(' '.escape_default().nth(1), None);
 316     assert_eq!('\\'.escape_default().nth(2), None);
 317     assert_eq!('\''.escape_default().nth(2), None);
 318
 319     // Check the first char
 320     assert_eq!('\n'.escape_default().nth(0), Some('\\'));
 321     assert_eq!('c'.escape_default().nth(0), Some('c'));
 322     assert_eq!(' '.escape_default().nth(0), Some(' '));
 323     assert_eq!('\\'.escape_default().nth(0), Some('\\'));
 324     assert_eq!('\''.escape_default().nth(0), Some('\\'));
 325
 326     // Check the second char
 327     assert_eq!('\n'.escape_default().nth(1), Some('n'));
 328     assert_eq!('\\'.escape_default().nth(1), Some('\\'));
 329     assert_eq!('\''.escape_default().nth(1), Some('\''));
 330
 331     // Check the last char
 332     assert_eq!('\n'.escape_default().last(), Some('n'));
 333     assert_eq!('c'.escape_default().last(), Some('c'));
 334     assert_eq!(' '.escape_default().last(), Some(' '));
 335     assert_eq!('\\'.escape_default().last(), Some('\\'));
 336     assert_eq!('\''.escape_default().last(), Some('\''));
 337 }
 338
 339 #[test]
 340 fn eu_iterator_specializations() {
 341     fn check(c: char) {
 342         let len = c.escape_unicode().count();
 343
 344         // Check OoB
 345         assert_eq!(c.escape_unicode().nth(len), None);
 346
 347         // For all possible in-bound offsets
 348         let mut iter = c.escape_unicode();
 349         for offset in 0..len {
 350             // Check last
 351             assert_eq!(iter.clone().last(), Some('}'));
 352
 353             // Check len
 354             assert_eq!(iter.len(), len - offset);
 355
 356             // Check size_hint (= len in ExactSizeIterator)
 357             assert_eq!(iter.size_hint(), (iter.len(), Some(iter.len())));
 358
 359             // Check counting
 360             assert_eq!(iter.clone().count(), len - offset);
 361
 362             // Check nth
 363             assert_eq!(c.escape_unicode().nth(offset), iter.next());
 364         }
 365
 366         // Check post-last
 367         assert_eq!(iter.clone().last(), None);
 368         assert_eq!(iter.clone().count(), 0);
 369     }
 370
 371     check('\u{0}');
 372     check('\u{1}');
 373     check('\u{12}');
 374     check('\u{123}');
 375     check('\u{1234}');
 376     check('\u{12340}');
 377     check('\u{10FFFF}');
 378 }
 379
 380 #[test]
 381 fn test_decode_utf8() {
 382     macro_rules! assert_decode_utf8 {
 383         ($input_bytes: expr, $expected_str: expr) => {
 384             let input_bytes: &[u8] = &$input_bytes;
 385             let s = char::decode_utf8(input_bytes.iter().cloned())
 386                 .map(|r_b| r_b.unwrap_or('\u{FFFD}'))
 387                 .collect::<String>();
 388             assert_eq!(s, $expected_str,
 389                        "input bytes: {:?}, expected str: {:?}, result: {:?}",
 390                        input_bytes, $expected_str, s);
 391             assert_eq!(String::from_utf8_lossy(&$input_bytes), $expected_str);
 392         }
 393     }
 394
 395     assert_decode_utf8!([], "");
 396     assert_decode_utf8!([0x41], "A");
 397     assert_decode_utf8!([0xC1, 0x81], "��");
 398     assert_decode_utf8!([0xE2, 0x99, 0xA5], "♥");
 399     assert_decode_utf8!([0xE2, 0x99, 0xA5, 0x41], "♥A");
 400     assert_decode_utf8!([0xE2, 0x99], "�");
 401     assert_decode_utf8!([0xE2, 0x99, 0x41], "�A");
 402     assert_decode_utf8!([0xC0], "�");
 403     assert_decode_utf8!([0xC0, 0x41], "�A");
 404     assert_decode_utf8!([0x80], "�");
 405     assert_decode_utf8!([0x80, 0x41], "�A");
 406     assert_decode_utf8!([0xFE], "�");
 407     assert_decode_utf8!([0xFE, 0x41], "�A");
 408     assert_decode_utf8!([0xFF], "�");
 409     assert_decode_utf8!([0xFF, 0x41], "�A");
 410     assert_decode_utf8!([0xC0, 0x80], "��");
 411
 412     // Surrogates
 413     assert_decode_utf8!([0xED, 0x9F, 0xBF], "\u{D7FF}");
 414     assert_decode_utf8!([0xED, 0xA0, 0x80], "���");
 415     assert_decode_utf8!([0xED, 0xBF, 0x80], "���");
 416     assert_decode_utf8!([0xEE, 0x80, 0x80], "\u{E000}");
 417
 418     // char::MAX
 419     assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0xBF], "\u{10FFFF}");
 420     assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0x41], "�A");
 421     assert_decode_utf8!([0xF4, 0x90, 0x80, 0x80], "����");
 422
 423     // 5 and 6 bytes sequence
 424     // Part of the original design of UTF-8,
 425     // but invalid now that UTF-8 is artificially restricted to match the range of UTF-16.
 426     assert_decode_utf8!([0xF8, 0x80, 0x80, 0x80, 0x80], "�����");
 427     assert_decode_utf8!([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], "������");
 428 }