1 //! Utilities for the `char` primitive type.
3 //! *[See also the `char` primitive type](primitive@char).*
5 //! The `char` type represents a single character. More specifically, since
6 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
10 //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11 //! [Unicode code point]: https://www.unicode.org/glossary/#code_point
13 //! This module exists for technical reasons, the primary documentation for
14 //! `char` is directly on [the `char` primitive type][char] itself.
16 //! This module is the home of the iterator implementations for the iterators
17 //! implemented on `char`, as well as some useful constants and conversion
18 //! functions that convert various types to `char`.
20 #![allow(non_snake_case)]
21 #![stable(feature = "core_char", since = "1.2.0")]
28 #[stable(feature = "try_from", since = "1.34.0")]
29 pub use self::convert::CharTryFromError;
30 #[stable(feature = "char_from_str", since = "1.20.0")]
31 pub use self::convert::ParseCharError;
32 #[stable(feature = "decode_utf16", since = "1.9.0")]
33 pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
35 // perma-unstable re-exports
36 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37 pub use self::methods::encode_utf16_raw;
38 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39 pub use self::methods::encode_utf8_raw;
41 use crate::error::Error;
42 use crate::fmt::{self, Write};
43 use crate::iter::FusedIterator;
45 pub(crate) use self::methods::EscapeDebugExtArgs;
47 // UTF-8 ranges and tags for encoding characters
48 const TAG_CONT: u8 = 0b1000_0000;
49 const TAG_TWO_B: u8 = 0b1100_0000;
50 const TAG_THREE_B: u8 = 0b1110_0000;
51 const TAG_FOUR_B: u8 = 0b1111_0000;
52 const MAX_ONE_B: u32 = 0x80;
53 const MAX_TWO_B: u32 = 0x800;
54 const MAX_THREE_B: u32 = 0x10000;
57 Lu Uppercase_Letter an uppercase letter
58 Ll Lowercase_Letter a lowercase letter
59 Lt Titlecase_Letter a digraphic character, with first part uppercase
60 Lm Modifier_Letter a modifier letter
61 Lo Other_Letter other letters, including syllables and ideographs
62 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
63 Mc Spacing_Mark a spacing combining mark (positive advance width)
64 Me Enclosing_Mark an enclosing combining mark
65 Nd Decimal_Number a decimal digit
66 Nl Letter_Number a letterlike numeric character
67 No Other_Number a numeric character of other type
68 Pc Connector_Punctuation a connecting punctuation mark, like a tie
69 Pd Dash_Punctuation a dash or hyphen punctuation mark
70 Ps Open_Punctuation an opening punctuation mark (of a pair)
71 Pe Close_Punctuation a closing punctuation mark (of a pair)
72 Pi Initial_Punctuation an initial quotation mark
73 Pf Final_Punctuation a final quotation mark
74 Po Other_Punctuation a punctuation mark of other type
75 Sm Math_Symbol a symbol of primarily mathematical use
76 Sc Currency_Symbol a currency sign
77 Sk Modifier_Symbol a non-letterlike modifier symbol
78 So Other_Symbol a symbol of other type
79 Zs Space_Separator a space character (of various non-zero widths)
80 Zl Line_Separator U+2028 LINE SEPARATOR only
81 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
82 Cc Control a C0 or C1 control code
83 Cf Format a format control character
84 Cs Surrogate a surrogate code point
85 Co Private_Use a private-use character
86 Cn Unassigned a reserved unassigned code point or a noncharacter
89 /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
90 #[stable(feature = "rust1", since = "1.0.0")]
91 pub const MAX: char = char::MAX;
93 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
94 /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
95 #[stable(feature = "decode_utf16", since = "1.9.0")]
96 pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
98 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
99 /// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
100 #[stable(feature = "unicode_version", since = "1.45.0")]
101 pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
103 /// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
104 /// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
105 #[stable(feature = "decode_utf16", since = "1.9.0")]
107 pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
108 self::decode::decode_utf16(iter)
111 /// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
112 #[stable(feature = "rust1", since = "1.0.0")]
113 #[rustc_const_stable(feature = "const_char_convert", since = "CURRENT_RUSTC_VERSION")]
116 pub const fn from_u32(i: u32) -> Option<char> {
117 self::convert::from_u32(i)
120 /// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
122 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
123 #[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
126 pub const unsafe fn from_u32_unchecked(i: u32) -> char {
127 // SAFETY: the safety contract must be upheld by the caller.
128 unsafe { self::convert::from_u32_unchecked(i) }
131 /// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
132 #[stable(feature = "rust1", since = "1.0.0")]
133 #[rustc_const_stable(feature = "const_char_convert", since = "CURRENT_RUSTC_VERSION")]
136 pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
137 self::convert::from_digit(num, radix)
140 /// Returns an iterator that yields the hexadecimal Unicode escape of a
141 /// character, as `char`s.
143 /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
144 /// its documentation for more.
146 /// [`escape_unicode`]: char::escape_unicode
147 #[derive(Clone, Debug)]
148 #[stable(feature = "rust1", since = "1.0.0")]
149 pub struct EscapeUnicode {
151 state: EscapeUnicodeState,
153 // The index of the next hex digit to be printed (0 if none),
154 // i.e., the number of remaining hex digits to be printed;
155 // increasing from the least significant digit: 0x543210
156 hex_digit_idx: usize,
159 // The enum values are ordered so that their representation is the
160 // same as the remaining length (besides the hexadecimal digits). This
161 // likely makes `len()` a single load from memory) and inline-worth.
162 #[derive(Clone, Debug)]
163 enum EscapeUnicodeState {
172 #[stable(feature = "rust1", since = "1.0.0")]
173 impl Iterator for EscapeUnicode {
176 fn next(&mut self) -> Option<char> {
178 EscapeUnicodeState::Backslash => {
179 self.state = EscapeUnicodeState::Type;
182 EscapeUnicodeState::Type => {
183 self.state = EscapeUnicodeState::LeftBrace;
186 EscapeUnicodeState::LeftBrace => {
187 self.state = EscapeUnicodeState::Value;
190 EscapeUnicodeState::Value => {
191 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
192 let c = from_digit(hex_digit, 16).unwrap();
193 if self.hex_digit_idx == 0 {
194 self.state = EscapeUnicodeState::RightBrace;
196 self.hex_digit_idx -= 1;
200 EscapeUnicodeState::RightBrace => {
201 self.state = EscapeUnicodeState::Done;
204 EscapeUnicodeState::Done => None,
209 fn size_hint(&self) -> (usize, Option<usize>) {
215 fn count(self) -> usize {
219 fn last(self) -> Option<char> {
221 EscapeUnicodeState::Done => None,
223 EscapeUnicodeState::RightBrace
224 | EscapeUnicodeState::Value
225 | EscapeUnicodeState::LeftBrace
226 | EscapeUnicodeState::Type
227 | EscapeUnicodeState::Backslash => Some('}'),
232 #[stable(feature = "exact_size_escape", since = "1.11.0")]
233 impl ExactSizeIterator for EscapeUnicode {
235 fn len(&self) -> usize {
236 // The match is a single memory access with no branching
239 EscapeUnicodeState::Done => 0,
240 EscapeUnicodeState::RightBrace => 1,
241 EscapeUnicodeState::Value => 2,
242 EscapeUnicodeState::LeftBrace => 3,
243 EscapeUnicodeState::Type => 4,
244 EscapeUnicodeState::Backslash => 5,
249 #[stable(feature = "fused", since = "1.26.0")]
250 impl FusedIterator for EscapeUnicode {}
252 #[stable(feature = "char_struct_display", since = "1.16.0")]
253 impl fmt::Display for EscapeUnicode {
254 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
255 for c in self.clone() {
262 /// An iterator that yields the literal escape code of a `char`.
264 /// This `struct` is created by the [`escape_default`] method on [`char`]. See
265 /// its documentation for more.
267 /// [`escape_default`]: char::escape_default
268 #[derive(Clone, Debug)]
269 #[stable(feature = "rust1", since = "1.0.0")]
270 pub struct EscapeDefault {
271 state: EscapeDefaultState,
274 #[derive(Clone, Debug)]
275 enum EscapeDefaultState {
279 Unicode(EscapeUnicode),
282 #[stable(feature = "rust1", since = "1.0.0")]
283 impl Iterator for EscapeDefault {
286 fn next(&mut self) -> Option<char> {
288 EscapeDefaultState::Backslash(c) => {
289 self.state = EscapeDefaultState::Char(c);
292 EscapeDefaultState::Char(c) => {
293 self.state = EscapeDefaultState::Done;
296 EscapeDefaultState::Done => None,
297 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
302 fn size_hint(&self) -> (usize, Option<usize>) {
308 fn count(self) -> usize {
312 fn nth(&mut self, n: usize) -> Option<char> {
314 EscapeDefaultState::Backslash(c) if n == 0 => {
315 self.state = EscapeDefaultState::Char(c);
318 EscapeDefaultState::Backslash(c) if n == 1 => {
319 self.state = EscapeDefaultState::Done;
322 EscapeDefaultState::Backslash(_) => {
323 self.state = EscapeDefaultState::Done;
326 EscapeDefaultState::Char(c) => {
327 self.state = EscapeDefaultState::Done;
329 if n == 0 { Some(c) } else { None }
331 EscapeDefaultState::Done => None,
332 EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
336 fn last(self) -> Option<char> {
338 EscapeDefaultState::Unicode(iter) => iter.last(),
339 EscapeDefaultState::Done => None,
340 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
345 #[stable(feature = "exact_size_escape", since = "1.11.0")]
346 impl ExactSizeIterator for EscapeDefault {
347 fn len(&self) -> usize {
349 EscapeDefaultState::Done => 0,
350 EscapeDefaultState::Char(_) => 1,
351 EscapeDefaultState::Backslash(_) => 2,
352 EscapeDefaultState::Unicode(ref iter) => iter.len(),
357 #[stable(feature = "fused", since = "1.26.0")]
358 impl FusedIterator for EscapeDefault {}
360 #[stable(feature = "char_struct_display", since = "1.16.0")]
361 impl fmt::Display for EscapeDefault {
362 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
363 for c in self.clone() {
370 /// An iterator that yields the literal escape code of a `char`.
372 /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
373 /// documentation for more.
375 /// [`escape_debug`]: char::escape_debug
376 #[stable(feature = "char_escape_debug", since = "1.20.0")]
377 #[derive(Clone, Debug)]
378 pub struct EscapeDebug(EscapeDefault);
380 #[stable(feature = "char_escape_debug", since = "1.20.0")]
381 impl Iterator for EscapeDebug {
383 fn next(&mut self) -> Option<char> {
386 fn size_hint(&self) -> (usize, Option<usize>) {
391 #[stable(feature = "char_escape_debug", since = "1.20.0")]
392 impl ExactSizeIterator for EscapeDebug {}
394 #[stable(feature = "fused", since = "1.26.0")]
395 impl FusedIterator for EscapeDebug {}
397 #[stable(feature = "char_escape_debug", since = "1.20.0")]
398 impl fmt::Display for EscapeDebug {
399 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400 fmt::Display::fmt(&self.0, f)
404 /// Returns an iterator that yields the lowercase equivalent of a `char`.
406 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
407 /// its documentation for more.
409 /// [`to_lowercase`]: char::to_lowercase
410 #[stable(feature = "rust1", since = "1.0.0")]
411 #[derive(Debug, Clone)]
412 pub struct ToLowercase(CaseMappingIter);
414 #[stable(feature = "rust1", since = "1.0.0")]
415 impl Iterator for ToLowercase {
417 fn next(&mut self) -> Option<char> {
420 fn size_hint(&self) -> (usize, Option<usize>) {
425 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
426 impl DoubleEndedIterator for ToLowercase {
427 fn next_back(&mut self) -> Option<char> {
432 #[stable(feature = "fused", since = "1.26.0")]
433 impl FusedIterator for ToLowercase {}
435 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
436 impl ExactSizeIterator for ToLowercase {}
438 /// Returns an iterator that yields the uppercase equivalent of a `char`.
440 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
441 /// its documentation for more.
443 /// [`to_uppercase`]: char::to_uppercase
444 #[stable(feature = "rust1", since = "1.0.0")]
445 #[derive(Debug, Clone)]
446 pub struct ToUppercase(CaseMappingIter);
448 #[stable(feature = "rust1", since = "1.0.0")]
449 impl Iterator for ToUppercase {
451 fn next(&mut self) -> Option<char> {
454 fn size_hint(&self) -> (usize, Option<usize>) {
459 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
460 impl DoubleEndedIterator for ToUppercase {
461 fn next_back(&mut self) -> Option<char> {
466 #[stable(feature = "fused", since = "1.26.0")]
467 impl FusedIterator for ToUppercase {}
469 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
470 impl ExactSizeIterator for ToUppercase {}
472 #[derive(Debug, Clone)]
473 enum CaseMappingIter {
474 Three(char, char, char),
480 impl CaseMappingIter {
481 fn new(chars: [char; 3]) -> CaseMappingIter {
482 if chars[2] == '\0' {
483 if chars[1] == '\0' {
484 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
486 CaseMappingIter::Two(chars[0], chars[1])
489 CaseMappingIter::Three(chars[0], chars[1], chars[2])
494 impl Iterator for CaseMappingIter {
496 fn next(&mut self) -> Option<char> {
498 CaseMappingIter::Three(a, b, c) => {
499 *self = CaseMappingIter::Two(b, c);
502 CaseMappingIter::Two(b, c) => {
503 *self = CaseMappingIter::One(c);
506 CaseMappingIter::One(c) => {
507 *self = CaseMappingIter::Zero;
510 CaseMappingIter::Zero => None,
514 fn size_hint(&self) -> (usize, Option<usize>) {
515 let size = match self {
516 CaseMappingIter::Three(..) => 3,
517 CaseMappingIter::Two(..) => 2,
518 CaseMappingIter::One(_) => 1,
519 CaseMappingIter::Zero => 0,
525 impl DoubleEndedIterator for CaseMappingIter {
526 fn next_back(&mut self) -> Option<char> {
528 CaseMappingIter::Three(a, b, c) => {
529 *self = CaseMappingIter::Two(a, b);
532 CaseMappingIter::Two(b, c) => {
533 *self = CaseMappingIter::One(b);
536 CaseMappingIter::One(c) => {
537 *self = CaseMappingIter::Zero;
540 CaseMappingIter::Zero => None,
545 impl fmt::Display for CaseMappingIter {
546 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
548 CaseMappingIter::Three(a, b, c) => {
553 CaseMappingIter::Two(b, c) => {
557 CaseMappingIter::One(c) => f.write_char(c),
558 CaseMappingIter::Zero => Ok(()),
563 #[stable(feature = "char_struct_display", since = "1.16.0")]
564 impl fmt::Display for ToLowercase {
565 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
566 fmt::Display::fmt(&self.0, f)
570 #[stable(feature = "char_struct_display", since = "1.16.0")]
571 impl fmt::Display for ToUppercase {
572 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
573 fmt::Display::fmt(&self.0, f)
577 /// The error type returned when a checked char conversion fails.
578 #[stable(feature = "u8_from_char", since = "1.59.0")]
579 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
580 pub struct TryFromCharError(pub(crate) ());
582 #[stable(feature = "u8_from_char", since = "1.59.0")]
583 impl fmt::Display for TryFromCharError {
584 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
585 "unicode code point out of range".fmt(fmt)
589 #[stable(feature = "u8_from_char", since = "1.59.0")]
590 impl Error for TryFromCharError {}