library/core/tests/ascii.rs

   1 use core::char::from_u32;
   2
   3 #[test]
   4 fn test_is_ascii() {
   5     assert!(b"".is_ascii());
   6     assert!(b"banana\0\x7F".is_ascii());
   7     assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
   8     assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
   9     assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
  10     assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
  11
  12     assert!("".is_ascii());
  13     assert!("banana\0\u{7F}".is_ascii());
  14     assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
  15     assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
  16     assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
  17 }
  18
  19 #[test]
  20 fn test_to_ascii_uppercase() {
  21     assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
  22     assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
  23
  24     for i in 0..501 {
  25         let upper =
  26             if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } else { i };
  27         assert_eq!(
  28             (from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
  29             (from_u32(upper).unwrap()).to_string()
  30         );
  31     }
  32 }
  33
  34 #[test]
  35 fn test_to_ascii_lowercase() {
  36     assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
  37     // Dotted capital I, Kelvin sign, Sharp S.
  38     assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
  39
  40     for i in 0..501 {
  41         let lower =
  42             if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
  43         assert_eq!(
  44             (from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
  45             (from_u32(lower).unwrap()).to_string()
  46         );
  47     }
  48 }
  49
  50 #[test]
  51 fn test_make_ascii_lower_case() {
  52     macro_rules! test {
  53         ($from: expr, $to: expr) => {{
  54             let mut x = $from;
  55             x.make_ascii_lowercase();
  56             assert_eq!(x, $to);
  57         }};
  58     }
  59     test!(b'A', b'a');
  60     test!(b'a', b'a');
  61     test!(b'!', b'!');
  62     test!('A', 'a');
  63     test!('À', 'À');
  64     test!('a', 'a');
  65     test!('!', '!');
  66     test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
  67     test!("HİKß".to_string(), "hİKß");
  68 }
  69
  70 #[test]
  71 fn test_make_ascii_upper_case() {
  72     macro_rules! test {
  73         ($from: expr, $to: expr) => {{
  74             let mut x = $from;
  75             x.make_ascii_uppercase();
  76             assert_eq!(x, $to);
  77         }};
  78     }
  79     test!(b'a', b'A');
  80     test!(b'A', b'A');
  81     test!(b'!', b'!');
  82     test!('a', 'A');
  83     test!('à', 'à');
  84     test!('A', 'A');
  85     test!('!', '!');
  86     test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
  87     test!("hıKß".to_string(), "HıKß");
  88
  89     let mut x = "Hello".to_string();
  90     x[..3].make_ascii_uppercase(); // Test IndexMut on String.
  91     assert_eq!(x, "HELlo")
  92 }
  93
  94 #[test]
  95 fn test_eq_ignore_ascii_case() {
  96     assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
  97     assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
  98     // Dotted capital I, Kelvin sign, Sharp S.
  99     assert!("HİKß".eq_ignore_ascii_case("hİKß"));
 100     assert!(!"İ".eq_ignore_ascii_case("i"));
 101     assert!(!"K".eq_ignore_ascii_case("k"));
 102     assert!(!"ß".eq_ignore_ascii_case("s"));
 103
 104     for i in 0..501 {
 105         let lower =
 106             if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
 107         assert!(
 108             (from_u32(i).unwrap())
 109                 .to_string()
 110                 .eq_ignore_ascii_case(&from_u32(lower).unwrap().to_string())
 111         );
 112     }
 113 }
 114
 115 #[test]
 116 fn inference_works() {
 117     let x = "a".to_string();
 118     let _ = x.eq_ignore_ascii_case("A");
 119 }
 120
 121 // Shorthands used by the is_ascii_* tests.
 122 macro_rules! assert_all {
 123     ($what:ident, $($str:tt),+) => {{
 124         $(
 125             for b in $str.chars() {
 126                 if !b.$what() {
 127                     panic!("expected {}({}) but it isn't",
 128                            stringify!($what), b);
 129                 }
 130             }
 131             for b in $str.as_bytes().iter() {
 132                 if !b.$what() {
 133                     panic!("expected {}(0x{:02x})) but it isn't",
 134                            stringify!($what), b);
 135                 }
 136             }
 137         )+
 138     }};
 139     ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+))
 140 }
 141 macro_rules! assert_none {
 142     ($what:ident, $($str:tt),+) => {{
 143         $(
 144             for b in $str.chars() {
 145                 if b.$what() {
 146                     panic!("expected not-{}({}) but it is",
 147                            stringify!($what), b);
 148                 }
 149             }
 150             for b in $str.as_bytes().iter() {
 151                 if b.$what() {
 152                     panic!("expected not-{}(0x{:02x})) but it is",
 153                            stringify!($what), b);
 154                 }
 155             }
 156         )+
 157     }};
 158     ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+))
 159 }
 160
 161 #[test]
 162 fn test_is_ascii_alphabetic() {
 163     assert_all!(
 164         is_ascii_alphabetic,
 165         "",
 166         "abcdefghijklmnopqrstuvwxyz",
 167         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 168     );
 169     assert_none!(
 170         is_ascii_alphabetic,
 171         "0123456789",
 172         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 173         " \t\n\x0c\r",
 174         "\x00\x01\x02\x03\x04\x05\x06\x07",
 175         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 176         "\x10\x11\x12\x13\x14\x15\x16\x17",
 177         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 178         "\x7f",
 179     );
 180 }
 181
 182 #[test]
 183 fn test_is_ascii_uppercase() {
 184     assert_all!(is_ascii_uppercase, "", "ABCDEFGHIJKLMNOQPRSTUVWXYZ",);
 185     assert_none!(
 186         is_ascii_uppercase,
 187         "abcdefghijklmnopqrstuvwxyz",
 188         "0123456789",
 189         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 190         " \t\n\x0c\r",
 191         "\x00\x01\x02\x03\x04\x05\x06\x07",
 192         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 193         "\x10\x11\x12\x13\x14\x15\x16\x17",
 194         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 195         "\x7f",
 196     );
 197 }
 198
 199 #[test]
 200 fn test_is_ascii_lowercase() {
 201     assert_all!(is_ascii_lowercase, "abcdefghijklmnopqrstuvwxyz",);
 202     assert_none!(
 203         is_ascii_lowercase,
 204         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 205         "0123456789",
 206         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 207         " \t\n\x0c\r",
 208         "\x00\x01\x02\x03\x04\x05\x06\x07",
 209         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 210         "\x10\x11\x12\x13\x14\x15\x16\x17",
 211         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 212         "\x7f",
 213     );
 214 }
 215
 216 #[test]
 217 fn test_is_ascii_alphanumeric() {
 218     assert_all!(
 219         is_ascii_alphanumeric,
 220         "",
 221         "abcdefghijklmnopqrstuvwxyz",
 222         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 223         "0123456789",
 224     );
 225     assert_none!(
 226         is_ascii_alphanumeric,
 227         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 228         " \t\n\x0c\r",
 229         "\x00\x01\x02\x03\x04\x05\x06\x07",
 230         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 231         "\x10\x11\x12\x13\x14\x15\x16\x17",
 232         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 233         "\x7f",
 234     );
 235 }
 236
 237 #[test]
 238 fn test_is_ascii_digit() {
 239     assert_all!(is_ascii_digit, "", "0123456789",);
 240     assert_none!(
 241         is_ascii_digit,
 242         "abcdefghijklmnopqrstuvwxyz",
 243         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 244         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 245         " \t\n\x0c\r",
 246         "\x00\x01\x02\x03\x04\x05\x06\x07",
 247         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 248         "\x10\x11\x12\x13\x14\x15\x16\x17",
 249         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 250         "\x7f",
 251     );
 252 }
 253
 254 #[test]
 255 fn test_is_ascii_hexdigit() {
 256     assert_all!(is_ascii_hexdigit, "", "0123456789", "abcdefABCDEF",);
 257     assert_none!(
 258         is_ascii_hexdigit,
 259         "ghijklmnopqrstuvwxyz",
 260         "GHIJKLMNOQPRSTUVWXYZ",
 261         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 262         " \t\n\x0c\r",
 263         "\x00\x01\x02\x03\x04\x05\x06\x07",
 264         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 265         "\x10\x11\x12\x13\x14\x15\x16\x17",
 266         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 267         "\x7f",
 268     );
 269 }
 270
 271 #[test]
 272 fn test_is_ascii_punctuation() {
 273     assert_all!(is_ascii_punctuation, "", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",);
 274     assert_none!(
 275         is_ascii_punctuation,
 276         "abcdefghijklmnopqrstuvwxyz",
 277         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 278         "0123456789",
 279         " \t\n\x0c\r",
 280         "\x00\x01\x02\x03\x04\x05\x06\x07",
 281         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 282         "\x10\x11\x12\x13\x14\x15\x16\x17",
 283         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 284         "\x7f",
 285     );
 286 }
 287
 288 #[test]
 289 fn test_is_ascii_graphic() {
 290     assert_all!(
 291         is_ascii_graphic,
 292         "",
 293         "abcdefghijklmnopqrstuvwxyz",
 294         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 295         "0123456789",
 296         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 297     );
 298     assert_none!(
 299         is_ascii_graphic,
 300         " \t\n\x0c\r",
 301         "\x00\x01\x02\x03\x04\x05\x06\x07",
 302         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 303         "\x10\x11\x12\x13\x14\x15\x16\x17",
 304         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 305         "\x7f",
 306     );
 307 }
 308
 309 #[test]
 310 fn test_is_ascii_whitespace() {
 311     assert_all!(is_ascii_whitespace, "", " \t\n\x0c\r",);
 312     assert_none!(
 313         is_ascii_whitespace,
 314         "abcdefghijklmnopqrstuvwxyz",
 315         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 316         "0123456789",
 317         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 318         "\x00\x01\x02\x03\x04\x05\x06\x07",
 319         "\x08\x0b\x0e\x0f",
 320         "\x10\x11\x12\x13\x14\x15\x16\x17",
 321         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 322         "\x7f",
 323     );
 324 }
 325
 326 #[test]
 327 fn test_is_ascii_control() {
 328     assert_all!(
 329         is_ascii_control,
 330         "",
 331         "\x00\x01\x02\x03\x04\x05\x06\x07",
 332         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 333         "\x10\x11\x12\x13\x14\x15\x16\x17",
 334         "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 335         "\x7f",
 336     );
 337     assert_none!(
 338         is_ascii_control,
 339         "abcdefghijklmnopqrstuvwxyz",
 340         "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
 341         "0123456789",
 342         "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
 343         " ",
 344     );
 345 }
 346
 347 // `is_ascii` does a good amount of pointer manipulation and has
 348 // alignment-dependent computation. This is all sanity-checked via
 349 // `debug_assert!`s, so we test various sizes/alignments thoroughly versus an
 350 // "obviously correct" baseline function.
 351 #[test]
 352 fn test_is_ascii_align_size_thoroughly() {
 353     // The "obviously-correct" baseline mentioned above.
 354     fn is_ascii_baseline(s: &[u8]) -> bool {
 355         s.iter().all(|b| b.is_ascii())
 356     }
 357
 358     // Helper to repeat `l` copies of `b0` followed by `l` copies of `b1`.
 359     fn repeat_concat(b0: u8, b1: u8, l: usize) -> Vec<u8> {
 360         use core::iter::repeat;
 361         repeat(b0).take(l).chain(repeat(b1).take(l)).collect()
 362     }
 363
 364     // Miri is too slow
 365     let iter = if cfg!(miri) { 0..20 } else { 0..100 };
 366
 367     for i in iter {
 368         #[cfg(not(miri))]
 369         let cases = &[
 370             b"a".repeat(i),
 371             b"\0".repeat(i),
 372             b"\x7f".repeat(i),
 373             b"\x80".repeat(i),
 374             b"\xff".repeat(i),
 375             repeat_concat(b'a', 0x80u8, i),
 376             repeat_concat(0x80u8, b'a', i),
 377         ];
 378
 379         #[cfg(miri)]
 380         let cases = &[b"a".repeat(i), b"\x80".repeat(i), repeat_concat(b'a', 0x80u8, i)];
 381
 382         for case in cases {
 383             for pos in 0..=case.len() {
 384                 // Potentially misaligned head
 385                 let prefix = &case[pos..];
 386                 assert_eq!(is_ascii_baseline(prefix), prefix.is_ascii(),);
 387
 388                 // Potentially misaligned tail
 389                 let suffix = &case[..case.len() - pos];
 390
 391                 assert_eq!(is_ascii_baseline(suffix), suffix.is_ascii(),);
 392
 393                 // Both head and tail are potentially misaligned
 394                 let mid = &case[(pos / 2)..(case.len() - (pos / 2))];
 395                 assert_eq!(is_ascii_baseline(mid), mid.is_ascii(),);
 396             }
 397         }
 398     }
 399 }
 400
 401 #[test]
 402 fn ascii_const() {
 403     // test that the `is_ascii` methods of `char` and `u8` are usable in a const context
 404
 405     const CHAR_IS_ASCII: bool = 'a'.is_ascii();
 406     assert!(CHAR_IS_ASCII);
 407
 408     const BYTE_IS_ASCII: bool = 97u8.is_ascii();
 409     assert!(BYTE_IS_ASCII);
 410 }
 411
 412 #[test]
 413 fn ascii_ctype_const() {
 414     macro_rules! suite {
 415         ( $( $fn:ident => [$a:ident, $A:ident, $nine:ident, $dot:ident, $space:ident]; )* ) => {
 416             $(
 417                 mod $fn {
 418                     const CHAR_A_LOWER: bool = 'a'.$fn();
 419                     const CHAR_A_UPPER: bool = 'A'.$fn();
 420                     const CHAR_NINE: bool = '9'.$fn();
 421                     const CHAR_DOT: bool = '.'.$fn();
 422                     const CHAR_SPACE: bool = ' '.$fn();
 423
 424                     const U8_A_LOWER: bool = b'a'.$fn();
 425                     const U8_A_UPPER: bool = b'A'.$fn();
 426                     const U8_NINE: bool = b'9'.$fn();
 427                     const U8_DOT: bool = b'.'.$fn();
 428                     const U8_SPACE: bool = b' '.$fn();
 429
 430                     pub fn run() {
 431                         assert_eq!(CHAR_A_LOWER, $a);
 432                         assert_eq!(CHAR_A_UPPER, $A);
 433                         assert_eq!(CHAR_NINE, $nine);
 434                         assert_eq!(CHAR_DOT, $dot);
 435                         assert_eq!(CHAR_SPACE, $space);
 436
 437                         assert_eq!(U8_A_LOWER, $a);
 438                         assert_eq!(U8_A_UPPER, $A);
 439                         assert_eq!(U8_NINE, $nine);
 440                         assert_eq!(U8_DOT, $dot);
 441                         assert_eq!(U8_SPACE, $space);
 442                     }
 443                 }
 444             )*
 445
 446             $( $fn::run(); )*
 447         }
 448     }
 449
 450     suite! {
 451         //                        'a'    'A'    '9'    '.'    ' '
 452         is_ascii_alphabetic   => [true,  true,  false, false, false];
 453         is_ascii_uppercase    => [false, true,  false, false, false];
 454         is_ascii_lowercase    => [true,  false, false, false, false];
 455         is_ascii_alphanumeric => [true,  true,  true,  false, false];
 456         is_ascii_digit        => [false, false, true,  false, false];
 457         is_ascii_hexdigit     => [true,  true,  true,  false, false];
 458         is_ascii_punctuation  => [false, false, false, true,  false];
 459         is_ascii_graphic      => [true,  true,  true,  true,  false];
 460         is_ascii_whitespace   => [false, false, false, false, true];
 461         is_ascii_control      => [false, false, false, false, false];
 462     }
 463 }