1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 use str::from_utf8_unchecked_mut;
16 use unicode::printable::is_printable;
17 use unicode::tables::{conversions, derived_property, general_category, property};
21 /// Checks if a `char` is a digit in the given radix.
23 /// A 'radix' here is sometimes also called a 'base'. A radix of two
24 /// indicates a binary number, a radix of ten, decimal, and a radix of
25 /// sixteen, hexadecimal, to give some common values. Arbitrary
26 /// radices are supported.
28 /// Compared to `is_numeric()`, this function only recognizes the characters
29 /// `0-9`, `a-z` and `A-Z`.
31 /// 'Digit' is defined to be only the following characters:
37 /// For a more comprehensive understanding of 'digit', see [`is_numeric`][is_numeric].
39 /// [is_numeric]: #method.is_numeric
43 /// Panics if given a radix larger than 36.
50 /// assert!('1'.is_digit(10));
51 /// assert!('f'.is_digit(16));
52 /// assert!(!'f'.is_digit(10));
55 /// Passing a large radix, causing a panic:
60 /// let result = thread::spawn(|| {
65 /// assert!(result.is_err());
67 #[stable(feature = "rust1", since = "1.0.0")]
69 pub fn is_digit(self, radix: u32) -> bool {
70 self.to_digit(radix).is_some()
73 /// Converts a `char` to a digit in the given radix.
75 /// A 'radix' here is sometimes also called a 'base'. A radix of two
76 /// indicates a binary number, a radix of ten, decimal, and a radix of
77 /// sixteen, hexadecimal, to give some common values. Arbitrary
78 /// radices are supported.
80 /// 'Digit' is defined to be only the following characters:
88 /// Returns `None` if the `char` does not refer to a digit in the given radix.
92 /// Panics if given a radix larger than 36.
99 /// assert_eq!('1'.to_digit(10), Some(1));
100 /// assert_eq!('f'.to_digit(16), Some(15));
103 /// Passing a non-digit results in failure:
106 /// assert_eq!('f'.to_digit(10), None);
107 /// assert_eq!('z'.to_digit(16), None);
110 /// Passing a large radix, causing a panic:
115 /// let result = thread::spawn(|| {
116 /// '1'.to_digit(37);
119 /// assert!(result.is_err());
121 #[stable(feature = "rust1", since = "1.0.0")]
123 pub fn to_digit(self, radix: u32) -> Option<u32> {
125 panic!("to_digit: radix is too high (maximum 36)");
127 let val = match self {
128 '0' ..= '9' => self as u32 - '0' as u32,
129 'a' ..= 'z' => self as u32 - 'a' as u32 + 10,
130 'A' ..= 'Z' => self as u32 - 'A' as u32 + 10,
133 if val < radix { Some(val) }
137 /// Returns an iterator that yields the hexadecimal Unicode escape of a
138 /// character as `char`s.
140 /// This will escape characters with the Rust syntax of the form
141 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
148 /// for c in '❤'.escape_unicode() {
154 /// Using `println!` directly:
157 /// println!("{}", '❤'.escape_unicode());
160 /// Both are equivalent to:
163 /// println!("\\u{{2764}}");
166 /// Using `to_string`:
169 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
171 #[stable(feature = "rust1", since = "1.0.0")]
173 pub fn escape_unicode(self) -> EscapeUnicode {
176 // or-ing 1 ensures that for c==0 the code computes that one
177 // digit should be printed and (which is the same) avoids the
178 // (31 - 32) underflow
179 let msb = 31 - (c | 1).leading_zeros();
181 // the index of the most significant hex digit
182 let ms_hex_digit = msb / 4;
185 state: EscapeUnicodeState::Backslash,
186 hex_digit_idx: ms_hex_digit as usize,
190 /// An extended version of `escape_debug` that optionally permits escaping
191 /// Extended Grapheme codepoints. This allows us to format characters like
192 /// nonspacing marks better when they're at the start of a string.
194 #[unstable(feature = "str_internals", issue = "0")]
196 pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
197 let init_state = match self {
198 '\t' => EscapeDefaultState::Backslash('t'),
199 '\r' => EscapeDefaultState::Backslash('r'),
200 '\n' => EscapeDefaultState::Backslash('n'),
201 '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
202 _ if escape_grapheme_extended && self.is_grapheme_extended() => {
203 EscapeDefaultState::Unicode(self.escape_unicode())
205 _ if is_printable(self) => EscapeDefaultState::Char(self),
206 _ => EscapeDefaultState::Unicode(self.escape_unicode()),
208 EscapeDebug(EscapeDefault { state: init_state })
211 /// Returns an iterator that yields the literal escape code of a character
214 /// This will escape the characters similar to the `Debug` implementations
215 /// of `str` or `char`.
222 /// for c in '\n'.escape_debug() {
228 /// Using `println!` directly:
231 /// println!("{}", '\n'.escape_debug());
234 /// Both are equivalent to:
240 /// Using `to_string`:
243 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
245 #[stable(feature = "char_escape_debug", since = "1.20.0")]
247 pub fn escape_debug(self) -> EscapeDebug {
248 self.escape_debug_ext(true)
251 /// Returns an iterator that yields the literal escape code of a character
254 /// The default is chosen with a bias toward producing literals that are
255 /// legal in a variety of languages, including C++11 and similar C-family
256 /// languages. The exact rules are:
258 /// * Tab is escaped as `\t`.
259 /// * Carriage return is escaped as `\r`.
260 /// * Line feed is escaped as `\n`.
261 /// * Single quote is escaped as `\'`.
262 /// * Double quote is escaped as `\"`.
263 /// * Backslash is escaped as `\\`.
264 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
265 /// inclusive is not escaped.
266 /// * All other characters are given hexadecimal Unicode escapes; see
267 /// [`escape_unicode`][escape_unicode].
269 /// [escape_unicode]: #method.escape_unicode
276 /// for c in '"'.escape_default() {
282 /// Using `println!` directly:
285 /// println!("{}", '"'.escape_default());
289 /// Both are equivalent to:
292 /// println!("\\\"");
295 /// Using `to_string`:
298 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
300 #[stable(feature = "rust1", since = "1.0.0")]
302 pub fn escape_default(self) -> EscapeDefault {
303 let init_state = match self {
304 '\t' => EscapeDefaultState::Backslash('t'),
305 '\r' => EscapeDefaultState::Backslash('r'),
306 '\n' => EscapeDefaultState::Backslash('n'),
307 '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
308 '\x20' ..= '\x7e' => EscapeDefaultState::Char(self),
309 _ => EscapeDefaultState::Unicode(self.escape_unicode())
311 EscapeDefault { state: init_state }
314 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
316 /// That number of bytes is always between 1 and 4, inclusive.
323 /// let len = 'A'.len_utf8();
324 /// assert_eq!(len, 1);
326 /// let len = 'ß'.len_utf8();
327 /// assert_eq!(len, 2);
329 /// let len = 'ℝ'.len_utf8();
330 /// assert_eq!(len, 3);
332 /// let len = '💣'.len_utf8();
333 /// assert_eq!(len, 4);
336 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
337 /// would take if each code point was represented as a `char` vs in the `&str` itself:
341 /// let eastern = '東';
342 /// let capitol = '京';
344 /// // both can be represented as three bytes
345 /// assert_eq!(3, eastern.len_utf8());
346 /// assert_eq!(3, capitol.len_utf8());
348 /// // as a &str, these two are encoded in UTF-8
349 /// let tokyo = "東京";
351 /// let len = eastern.len_utf8() + capitol.len_utf8();
353 /// // we can see that they take six bytes total...
354 /// assert_eq!(6, tokyo.len());
356 /// // ... just like the &str
357 /// assert_eq!(len, tokyo.len());
359 #[stable(feature = "rust1", since = "1.0.0")]
361 pub fn len_utf8(self) -> usize {
362 let code = self as u32;
363 if code < MAX_ONE_B {
365 } else if code < MAX_TWO_B {
367 } else if code < MAX_THREE_B {
374 /// Returns the number of 16-bit code units this `char` would need if
375 /// encoded in UTF-16.
377 /// See the documentation for [`len_utf8`] for more explanation of this
378 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
380 /// [`len_utf8`]: #method.len_utf8
387 /// let n = 'ß'.len_utf16();
388 /// assert_eq!(n, 1);
390 /// let len = '💣'.len_utf16();
391 /// assert_eq!(len, 2);
393 #[stable(feature = "rust1", since = "1.0.0")]
395 pub fn len_utf16(self) -> usize {
396 let ch = self as u32;
397 if (ch & 0xFFFF) == ch { 1 } else { 2 }
400 /// Encodes this character as UTF-8 into the provided byte buffer,
401 /// and then returns the subslice of the buffer that contains the encoded character.
405 /// Panics if the buffer is not large enough.
406 /// A buffer of length four is large enough to encode any `char`.
410 /// In both of these examples, 'ß' takes two bytes to encode.
413 /// let mut b = [0; 2];
415 /// let result = 'ß'.encode_utf8(&mut b);
417 /// assert_eq!(result, "ß");
419 /// assert_eq!(result.len(), 2);
422 /// A buffer that's too small:
427 /// let result = thread::spawn(|| {
428 /// let mut b = [0; 1];
431 /// 'ß'.encode_utf8(&mut b);
434 /// assert!(result.is_err());
436 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
438 pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
439 let code = self as u32;
442 if code < MAX_ONE_B && !dst.is_empty() {
443 *dst.get_unchecked_mut(0) = code as u8;
445 } else if code < MAX_TWO_B && dst.len() >= 2 {
446 *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
447 *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
449 } else if code < MAX_THREE_B && dst.len() >= 3 {
450 *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
451 *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
452 *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
454 } else if dst.len() >= 4 {
455 *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
456 *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
457 *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
458 *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
461 panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
462 from_u32_unchecked(code).len_utf8(),
466 from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
470 /// Encodes this character as UTF-16 into the provided `u16` buffer,
471 /// and then returns the subslice of the buffer that contains the encoded character.
475 /// Panics if the buffer is not large enough.
476 /// A buffer of length 2 is large enough to encode any `char`.
480 /// In both of these examples, '𝕊' takes two `u16`s to encode.
483 /// let mut b = [0; 2];
485 /// let result = '𝕊'.encode_utf16(&mut b);
487 /// assert_eq!(result.len(), 2);
490 /// A buffer that's too small:
495 /// let result = thread::spawn(|| {
496 /// let mut b = [0; 1];
499 /// '𝕊'.encode_utf16(&mut b);
502 /// assert!(result.is_err());
504 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
506 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
507 let mut code = self as u32;
509 if (code & 0xFFFF) == code && !dst.is_empty() {
510 // The BMP falls through (assuming non-surrogate, as it should)
511 *dst.get_unchecked_mut(0) = code as u16;
512 slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
513 } else if dst.len() >= 2 {
514 // Supplementary planes break into surrogates.
516 *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
517 *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
518 slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
520 panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
521 from_u32_unchecked(code).len_utf16(),
528 /// Returns true if this `char` is an alphabetic code point, and false if not.
535 /// assert!('a'.is_alphabetic());
536 /// assert!('京'.is_alphabetic());
539 /// // love is many things, but it is not alphabetic
540 /// assert!(!c.is_alphabetic());
542 #[stable(feature = "rust1", since = "1.0.0")]
544 pub fn is_alphabetic(self) -> bool {
546 'a'..='z' | 'A'..='Z' => true,
547 c if c > '\x7f' => derived_property::Alphabetic(c),
552 /// Returns true if this `char` satisfies the 'XID_Start' Unicode property, and false
555 /// 'XID_Start' is a Unicode Derived Property specified in
556 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
557 /// mostly similar to `ID_Start` but modified for closure under `NFKx`.
558 #[unstable(feature = "rustc_private",
559 reason = "mainly needed for compiler internals",
562 pub fn is_xid_start(self) -> bool {
563 derived_property::XID_Start(self)
566 /// Returns true if this `char` satisfies the 'XID_Continue' Unicode property, and false
569 /// 'XID_Continue' is a Unicode Derived Property specified in
570 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
571 /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
572 #[unstable(feature = "rustc_private",
573 reason = "mainly needed for compiler internals",
576 pub fn is_xid_continue(self) -> bool {
577 derived_property::XID_Continue(self)
580 /// Returns true if this `char` is lowercase, and false otherwise.
582 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
583 /// Property `Lowercase`.
590 /// assert!('a'.is_lowercase());
591 /// assert!('δ'.is_lowercase());
592 /// assert!(!'A'.is_lowercase());
593 /// assert!(!'Δ'.is_lowercase());
595 /// // The various Chinese scripts do not have case, and so:
596 /// assert!(!'中'.is_lowercase());
598 #[stable(feature = "rust1", since = "1.0.0")]
600 pub fn is_lowercase(self) -> bool {
603 c if c > '\x7f' => derived_property::Lowercase(c),
608 /// Returns true if this `char` is uppercase, and false otherwise.
610 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
611 /// Property `Uppercase`.
618 /// assert!(!'a'.is_uppercase());
619 /// assert!(!'δ'.is_uppercase());
620 /// assert!('A'.is_uppercase());
621 /// assert!('Δ'.is_uppercase());
623 /// // The various Chinese scripts do not have case, and so:
624 /// assert!(!'中'.is_uppercase());
626 #[stable(feature = "rust1", since = "1.0.0")]
628 pub fn is_uppercase(self) -> bool {
631 c if c > '\x7f' => derived_property::Uppercase(c),
636 /// Returns true if this `char` is whitespace, and false otherwise.
638 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
639 /// Property `White_Space`.
646 /// assert!(' '.is_whitespace());
648 /// // a non-breaking space
649 /// assert!('\u{A0}'.is_whitespace());
651 /// assert!(!'越'.is_whitespace());
653 #[stable(feature = "rust1", since = "1.0.0")]
655 pub fn is_whitespace(self) -> bool {
657 ' ' | '\x09'..='\x0d' => true,
658 c if c > '\x7f' => property::White_Space(c),
663 /// Returns true if this `char` is alphanumeric, and false otherwise.
665 /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
666 /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
673 /// assert!('٣'.is_alphanumeric());
674 /// assert!('7'.is_alphanumeric());
675 /// assert!('৬'.is_alphanumeric());
676 /// assert!('¾'.is_alphanumeric());
677 /// assert!('①'.is_alphanumeric());
678 /// assert!('K'.is_alphanumeric());
679 /// assert!('و'.is_alphanumeric());
680 /// assert!('藏'.is_alphanumeric());
682 #[stable(feature = "rust1", since = "1.0.0")]
684 pub fn is_alphanumeric(self) -> bool {
685 self.is_alphabetic() || self.is_numeric()
688 /// Returns true if this `char` is a control code point, and false otherwise.
690 /// 'Control code point' is defined in terms of the Unicode General
698 /// // U+009C, STRING TERMINATOR
699 /// assert!('
\9c'.is_control());
700 /// assert!(!'q'.is_control());
702 #[stable(feature = "rust1", since = "1.0.0")]
704 pub fn is_control(self) -> bool {
705 general_category::Cc(self)
708 /// Returns true if this `char` is an extended grapheme character, and false otherwise.
710 /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
711 /// Category `Grapheme_Extend`.
713 pub(crate) fn is_grapheme_extended(self) -> bool {
714 derived_property::Grapheme_Extend(self)
717 /// Returns true if this `char` is numeric, and false otherwise.
719 /// 'Numeric'-ness is defined in terms of the Unicode General Categories
720 /// 'Nd', 'Nl', 'No'.
727 /// assert!('٣'.is_numeric());
728 /// assert!('7'.is_numeric());
729 /// assert!('৬'.is_numeric());
730 /// assert!('¾'.is_numeric());
731 /// assert!('①'.is_numeric());
732 /// assert!(!'K'.is_numeric());
733 /// assert!(!'و'.is_numeric());
734 /// assert!(!'藏'.is_numeric());
736 #[stable(feature = "rust1", since = "1.0.0")]
738 pub fn is_numeric(self) -> bool {
741 c if c > '\x7f' => general_category::N(c),
746 /// Returns an iterator that yields the lowercase equivalent of a `char`
747 /// as one or more `char`s.
749 /// If a character does not have a lowercase equivalent, the same character
750 /// will be returned back by the iterator.
752 /// This performs complex unconditional mappings with no tailoring: it maps
753 /// one Unicode character to its lowercase equivalent according to the
754 /// [Unicode database] and the additional complex mappings
755 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
756 /// language) are not considered here.
758 /// For a full reference, see [here][reference].
760 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
762 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
764 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
771 /// for c in 'İ'.to_lowercase() {
777 /// Using `println!` directly:
780 /// println!("{}", 'İ'.to_lowercase());
783 /// Both are equivalent to:
786 /// println!("i\u{307}");
789 /// Using `to_string`:
792 /// assert_eq!('C'.to_lowercase().to_string(), "c");
794 /// // Sometimes the result is more than one character:
795 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
797 /// // Characters that do not have both uppercase and lowercase
798 /// // convert into themselves.
799 /// assert_eq!('山'.to_lowercase().to_string(), "山");
801 #[stable(feature = "rust1", since = "1.0.0")]
803 pub fn to_lowercase(self) -> ToLowercase {
804 ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
807 /// Returns an iterator that yields the uppercase equivalent of a `char`
808 /// as one or more `char`s.
810 /// If a character does not have an uppercase equivalent, the same character
811 /// will be returned back by the iterator.
813 /// This performs complex unconditional mappings with no tailoring: it maps
814 /// one Unicode character to its uppercase equivalent according to the
815 /// [Unicode database] and the additional complex mappings
816 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
817 /// language) are not considered here.
819 /// For a full reference, see [here][reference].
821 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
823 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
825 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
832 /// for c in 'ß'.to_uppercase() {
838 /// Using `println!` directly:
841 /// println!("{}", 'ß'.to_uppercase());
844 /// Both are equivalent to:
850 /// Using `to_string`:
853 /// assert_eq!('c'.to_uppercase().to_string(), "C");
855 /// // Sometimes the result is more than one character:
856 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
858 /// // Characters that do not have both uppercase and lowercase
859 /// // convert into themselves.
860 /// assert_eq!('山'.to_uppercase().to_string(), "山");
865 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
867 /// * 'Dotless': I / ı, sometimes written ï
868 /// * 'Dotted': İ / i
870 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
873 /// let upper_i = 'i'.to_uppercase().to_string();
876 /// The value of `upper_i` here relies on the language of the text: if we're
877 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
878 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
881 /// let upper_i = 'i'.to_uppercase().to_string();
883 /// assert_eq!(upper_i, "I");
886 /// holds across languages.
887 #[stable(feature = "rust1", since = "1.0.0")]
889 pub fn to_uppercase(self) -> ToUppercase {
890 ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
893 /// Checks if the value is within the ASCII range.
899 /// let non_ascii = '❤';
901 /// assert!(ascii.is_ascii());
902 /// assert!(!non_ascii.is_ascii());
904 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
906 pub fn is_ascii(&self) -> bool {
910 /// Makes a copy of the value in its ASCII upper case equivalent.
912 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
913 /// but non-ASCII letters are unchanged.
915 /// To uppercase the value in-place, use [`make_ascii_uppercase`].
917 /// To uppercase ASCII characters in addition to non-ASCII characters, use
918 /// [`to_uppercase`].
924 /// let non_ascii = '❤';
926 /// assert_eq!('A', ascii.to_ascii_uppercase());
927 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
930 /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
931 /// [`to_uppercase`]: #method.to_uppercase
932 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
934 pub fn to_ascii_uppercase(&self) -> char {
936 (*self as u8).to_ascii_uppercase() as char
942 /// Makes a copy of the value in its ASCII lower case equivalent.
944 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
945 /// but non-ASCII letters are unchanged.
947 /// To lowercase the value in-place, use [`make_ascii_lowercase`].
949 /// To lowercase ASCII characters in addition to non-ASCII characters, use
950 /// [`to_lowercase`].
956 /// let non_ascii = '❤';
958 /// assert_eq!('a', ascii.to_ascii_lowercase());
959 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
962 /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
963 /// [`to_lowercase`]: #method.to_lowercase
964 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
966 pub fn to_ascii_lowercase(&self) -> char {
968 (*self as u8).to_ascii_lowercase() as char
974 /// Checks that two values are an ASCII case-insensitive match.
976 /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
981 /// let upper_a = 'A';
982 /// let lower_a = 'a';
983 /// let lower_z = 'z';
985 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
986 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
987 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
989 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
991 pub fn eq_ignore_ascii_case(&self, other: &char) -> bool {
992 self.to_ascii_lowercase() == other.to_ascii_lowercase()
995 /// Converts this type to its ASCII upper case equivalent in-place.
997 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
998 /// but non-ASCII letters are unchanged.
1000 /// To return a new uppercased value without modifying the existing one, use
1001 /// [`to_ascii_uppercase`].
1006 /// let mut ascii = 'a';
1008 /// ascii.make_ascii_uppercase();
1010 /// assert_eq!('A', ascii);
1013 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
1014 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1016 pub fn make_ascii_uppercase(&mut self) {
1017 *self = self.to_ascii_uppercase();
1020 /// Converts this type to its ASCII lower case equivalent in-place.
1022 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1023 /// but non-ASCII letters are unchanged.
1025 /// To return a new lowercased value without modifying the existing one, use
1026 /// [`to_ascii_lowercase`].
1031 /// let mut ascii = 'A';
1033 /// ascii.make_ascii_lowercase();
1035 /// assert_eq!('a', ascii);
1038 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
1039 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1041 pub fn make_ascii_lowercase(&mut self) {
1042 *self = self.to_ascii_lowercase();
1045 /// Checks if the value is an ASCII alphabetic character:
1047 /// - U+0041 'A' ... U+005A 'Z', or
1048 /// - U+0061 'a' ... U+007A 'z'.
1053 /// #![feature(ascii_ctype)]
1055 /// let uppercase_a = 'A';
1056 /// let uppercase_g = 'G';
1060 /// let percent = '%';
1061 /// let space = ' ';
1063 /// let esc: char = 0x1b_u8.into();
1065 /// assert!(uppercase_a.is_ascii_alphabetic());
1066 /// assert!(uppercase_g.is_ascii_alphabetic());
1067 /// assert!(a.is_ascii_alphabetic());
1068 /// assert!(g.is_ascii_alphabetic());
1069 /// assert!(!zero.is_ascii_alphabetic());
1070 /// assert!(!percent.is_ascii_alphabetic());
1071 /// assert!(!space.is_ascii_alphabetic());
1072 /// assert!(!lf.is_ascii_alphabetic());
1073 /// assert!(!esc.is_ascii_alphabetic());
1075 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1077 pub fn is_ascii_alphabetic(&self) -> bool {
1078 self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1081 /// Checks if the value is an ASCII uppercase character:
1082 /// U+0041 'A' ... U+005A 'Z'.
1087 /// #![feature(ascii_ctype)]
1089 /// let uppercase_a = 'A';
1090 /// let uppercase_g = 'G';
1094 /// let percent = '%';
1095 /// let space = ' ';
1097 /// let esc: char = 0x1b_u8.into();
1099 /// assert!(uppercase_a.is_ascii_uppercase());
1100 /// assert!(uppercase_g.is_ascii_uppercase());
1101 /// assert!(!a.is_ascii_uppercase());
1102 /// assert!(!g.is_ascii_uppercase());
1103 /// assert!(!zero.is_ascii_uppercase());
1104 /// assert!(!percent.is_ascii_uppercase());
1105 /// assert!(!space.is_ascii_uppercase());
1106 /// assert!(!lf.is_ascii_uppercase());
1107 /// assert!(!esc.is_ascii_uppercase());
1109 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1111 pub fn is_ascii_uppercase(&self) -> bool {
1112 self.is_ascii() && (*self as u8).is_ascii_uppercase()
1115 /// Checks if the value is an ASCII lowercase character:
1116 /// U+0061 'a' ... U+007A 'z'.
1121 /// #![feature(ascii_ctype)]
1123 /// let uppercase_a = 'A';
1124 /// let uppercase_g = 'G';
1128 /// let percent = '%';
1129 /// let space = ' ';
1131 /// let esc: char = 0x1b_u8.into();
1133 /// assert!(!uppercase_a.is_ascii_lowercase());
1134 /// assert!(!uppercase_g.is_ascii_lowercase());
1135 /// assert!(a.is_ascii_lowercase());
1136 /// assert!(g.is_ascii_lowercase());
1137 /// assert!(!zero.is_ascii_lowercase());
1138 /// assert!(!percent.is_ascii_lowercase());
1139 /// assert!(!space.is_ascii_lowercase());
1140 /// assert!(!lf.is_ascii_lowercase());
1141 /// assert!(!esc.is_ascii_lowercase());
1143 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1145 pub fn is_ascii_lowercase(&self) -> bool {
1146 self.is_ascii() && (*self as u8).is_ascii_lowercase()
1149 /// Checks if the value is an ASCII alphanumeric character:
1151 /// - U+0041 'A' ... U+005A 'Z', or
1152 /// - U+0061 'a' ... U+007A 'z', or
1153 /// - U+0030 '0' ... U+0039 '9'.
1158 /// #![feature(ascii_ctype)]
1160 /// let uppercase_a = 'A';
1161 /// let uppercase_g = 'G';
1165 /// let percent = '%';
1166 /// let space = ' ';
1168 /// let esc: char = 0x1b_u8.into();
1170 /// assert!(uppercase_a.is_ascii_alphanumeric());
1171 /// assert!(uppercase_g.is_ascii_alphanumeric());
1172 /// assert!(a.is_ascii_alphanumeric());
1173 /// assert!(g.is_ascii_alphanumeric());
1174 /// assert!(zero.is_ascii_alphanumeric());
1175 /// assert!(!percent.is_ascii_alphanumeric());
1176 /// assert!(!space.is_ascii_alphanumeric());
1177 /// assert!(!lf.is_ascii_alphanumeric());
1178 /// assert!(!esc.is_ascii_alphanumeric());
1180 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1182 pub fn is_ascii_alphanumeric(&self) -> bool {
1183 self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1186 /// Checks if the value is an ASCII decimal digit:
1187 /// U+0030 '0' ... U+0039 '9'.
1192 /// #![feature(ascii_ctype)]
1194 /// let uppercase_a = 'A';
1195 /// let uppercase_g = 'G';
1199 /// let percent = '%';
1200 /// let space = ' ';
1202 /// let esc: char = 0x1b_u8.into();
1204 /// assert!(!uppercase_a.is_ascii_digit());
1205 /// assert!(!uppercase_g.is_ascii_digit());
1206 /// assert!(!a.is_ascii_digit());
1207 /// assert!(!g.is_ascii_digit());
1208 /// assert!(zero.is_ascii_digit());
1209 /// assert!(!percent.is_ascii_digit());
1210 /// assert!(!space.is_ascii_digit());
1211 /// assert!(!lf.is_ascii_digit());
1212 /// assert!(!esc.is_ascii_digit());
1214 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1216 pub fn is_ascii_digit(&self) -> bool {
1217 self.is_ascii() && (*self as u8).is_ascii_digit()
1220 /// Checks if the value is an ASCII hexadecimal digit:
1222 /// - U+0030 '0' ... U+0039 '9', or
1223 /// - U+0041 'A' ... U+0046 'F', or
1224 /// - U+0061 'a' ... U+0066 'f'.
1229 /// #![feature(ascii_ctype)]
1231 /// let uppercase_a = 'A';
1232 /// let uppercase_g = 'G';
1236 /// let percent = '%';
1237 /// let space = ' ';
1239 /// let esc: char = 0x1b_u8.into();
1241 /// assert!(uppercase_a.is_ascii_hexdigit());
1242 /// assert!(!uppercase_g.is_ascii_hexdigit());
1243 /// assert!(a.is_ascii_hexdigit());
1244 /// assert!(!g.is_ascii_hexdigit());
1245 /// assert!(zero.is_ascii_hexdigit());
1246 /// assert!(!percent.is_ascii_hexdigit());
1247 /// assert!(!space.is_ascii_hexdigit());
1248 /// assert!(!lf.is_ascii_hexdigit());
1249 /// assert!(!esc.is_ascii_hexdigit());
1251 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1253 pub fn is_ascii_hexdigit(&self) -> bool {
1254 self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1257 /// Checks if the value is an ASCII punctuation character:
1259 /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
1260 /// - U+003A ... U+0040 `: ; < = > ? @`, or
1261 /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or
1262 /// - U+007B ... U+007E `{ | } ~`
1267 /// #![feature(ascii_ctype)]
1269 /// let uppercase_a = 'A';
1270 /// let uppercase_g = 'G';
1274 /// let percent = '%';
1275 /// let space = ' ';
1277 /// let esc: char = 0x1b_u8.into();
1279 /// assert!(!uppercase_a.is_ascii_punctuation());
1280 /// assert!(!uppercase_g.is_ascii_punctuation());
1281 /// assert!(!a.is_ascii_punctuation());
1282 /// assert!(!g.is_ascii_punctuation());
1283 /// assert!(!zero.is_ascii_punctuation());
1284 /// assert!(percent.is_ascii_punctuation());
1285 /// assert!(!space.is_ascii_punctuation());
1286 /// assert!(!lf.is_ascii_punctuation());
1287 /// assert!(!esc.is_ascii_punctuation());
1289 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1291 pub fn is_ascii_punctuation(&self) -> bool {
1292 self.is_ascii() && (*self as u8).is_ascii_punctuation()
1295 /// Checks if the value is an ASCII graphic character:
1296 /// U+0021 '!' ... U+007E '~'.
1301 /// #![feature(ascii_ctype)]
1303 /// let uppercase_a = 'A';
1304 /// let uppercase_g = 'G';
1308 /// let percent = '%';
1309 /// let space = ' ';
1311 /// let esc: char = 0x1b_u8.into();
1313 /// assert!(uppercase_a.is_ascii_graphic());
1314 /// assert!(uppercase_g.is_ascii_graphic());
1315 /// assert!(a.is_ascii_graphic());
1316 /// assert!(g.is_ascii_graphic());
1317 /// assert!(zero.is_ascii_graphic());
1318 /// assert!(percent.is_ascii_graphic());
1319 /// assert!(!space.is_ascii_graphic());
1320 /// assert!(!lf.is_ascii_graphic());
1321 /// assert!(!esc.is_ascii_graphic());
1323 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1325 pub fn is_ascii_graphic(&self) -> bool {
1326 self.is_ascii() && (*self as u8).is_ascii_graphic()
1329 /// Checks if the value is an ASCII whitespace character:
1330 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1331 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1333 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1334 /// whitespace][infra-aw]. There are several other definitions in
1335 /// wide use. For instance, [the POSIX locale][pct] includes
1336 /// U+000B VERTICAL TAB as well as all the above characters,
1337 /// but—from the very same specification—[the default rule for
1338 /// "field splitting" in the Bourne shell][bfs] considers *only*
1339 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1341 /// If you are writing a program that will process an existing
1342 /// file format, check what that format's definition of whitespace is
1343 /// before using this function.
1345 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1346 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1347 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1352 /// #![feature(ascii_ctype)]
1354 /// let uppercase_a = 'A';
1355 /// let uppercase_g = 'G';
1359 /// let percent = '%';
1360 /// let space = ' ';
1362 /// let esc: char = 0x1b_u8.into();
1364 /// assert!(!uppercase_a.is_ascii_whitespace());
1365 /// assert!(!uppercase_g.is_ascii_whitespace());
1366 /// assert!(!a.is_ascii_whitespace());
1367 /// assert!(!g.is_ascii_whitespace());
1368 /// assert!(!zero.is_ascii_whitespace());
1369 /// assert!(!percent.is_ascii_whitespace());
1370 /// assert!(space.is_ascii_whitespace());
1371 /// assert!(lf.is_ascii_whitespace());
1372 /// assert!(!esc.is_ascii_whitespace());
1374 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1376 pub fn is_ascii_whitespace(&self) -> bool {
1377 self.is_ascii() && (*self as u8).is_ascii_whitespace()
1380 /// Checks if the value is an ASCII control character:
1381 /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
1382 /// Note that most ASCII whitespace characters are control
1383 /// characters, but SPACE is not.
1388 /// #![feature(ascii_ctype)]
1390 /// let uppercase_a = 'A';
1391 /// let uppercase_g = 'G';
1395 /// let percent = '%';
1396 /// let space = ' ';
1398 /// let esc: char = 0x1b_u8.into();
1400 /// assert!(!uppercase_a.is_ascii_control());
1401 /// assert!(!uppercase_g.is_ascii_control());
1402 /// assert!(!a.is_ascii_control());
1403 /// assert!(!g.is_ascii_control());
1404 /// assert!(!zero.is_ascii_control());
1405 /// assert!(!percent.is_ascii_control());
1406 /// assert!(!space.is_ascii_control());
1407 /// assert!(lf.is_ascii_control());
1408 /// assert!(esc.is_ascii_control());
1410 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1412 pub fn is_ascii_control(&self) -> bool {
1413 self.is_ascii() && (*self as u8).is_ascii_control()