3 //! The `char` type represents a single character. More specifically, since
4 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
5 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
8 //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
9 //! [Unicode code point]: https://www.unicode.org/glossary/#code_point
11 //! This module exists for technical reasons, the primary documentation for
12 //! `char` is directly on [the `char` primitive type][char] itself.
14 //! This module is the home of the iterator implementations for the iterators
15 //! implemented on `char`, as well as some useful constants and conversion
16 //! functions that convert various types to `char`.
18 #![allow(non_snake_case)]
19 #![stable(feature = "core_char", since = "1.2.0")]
26 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
27 pub use self::convert::from_u32_unchecked;
28 #[stable(feature = "try_from", since = "1.34.0")]
29 pub use self::convert::CharTryFromError;
30 #[stable(feature = "char_from_str", since = "1.20.0")]
31 pub use self::convert::ParseCharError;
32 #[stable(feature = "rust1", since = "1.0.0")]
33 pub use self::convert::{from_digit, from_u32};
34 #[stable(feature = "decode_utf16", since = "1.9.0")]
35 pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
36 #[stable(feature = "unicode_version", since = "1.45.0")]
37 pub use crate::unicode::UNICODE_VERSION;
39 // perma-unstable re-exports
40 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
41 pub use self::methods::encode_utf16_raw;
42 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
43 pub use self::methods::encode_utf8_raw;
45 use crate::fmt::{self, Write};
46 use crate::iter::FusedIterator;
48 pub(crate) use self::methods::EscapeDebugExtArgs;
50 // UTF-8 ranges and tags for encoding characters
51 const TAG_CONT: u8 = 0b1000_0000;
52 const TAG_TWO_B: u8 = 0b1100_0000;
53 const TAG_THREE_B: u8 = 0b1110_0000;
54 const TAG_FOUR_B: u8 = 0b1111_0000;
55 const MAX_ONE_B: u32 = 0x80;
56 const MAX_TWO_B: u32 = 0x800;
57 const MAX_THREE_B: u32 = 0x10000;
60 Lu Uppercase_Letter an uppercase letter
61 Ll Lowercase_Letter a lowercase letter
62 Lt Titlecase_Letter a digraphic character, with first part uppercase
63 Lm Modifier_Letter a modifier letter
64 Lo Other_Letter other letters, including syllables and ideographs
65 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
66 Mc Spacing_Mark a spacing combining mark (positive advance width)
67 Me Enclosing_Mark an enclosing combining mark
68 Nd Decimal_Number a decimal digit
69 Nl Letter_Number a letterlike numeric character
70 No Other_Number a numeric character of other type
71 Pc Connector_Punctuation a connecting punctuation mark, like a tie
72 Pd Dash_Punctuation a dash or hyphen punctuation mark
73 Ps Open_Punctuation an opening punctuation mark (of a pair)
74 Pe Close_Punctuation a closing punctuation mark (of a pair)
75 Pi Initial_Punctuation an initial quotation mark
76 Pf Final_Punctuation a final quotation mark
77 Po Other_Punctuation a punctuation mark of other type
78 Sm Math_Symbol a symbol of primarily mathematical use
79 Sc Currency_Symbol a currency sign
80 Sk Modifier_Symbol a non-letterlike modifier symbol
81 So Other_Symbol a symbol of other type
82 Zs Space_Separator a space character (of various non-zero widths)
83 Zl Line_Separator U+2028 LINE SEPARATOR only
84 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
85 Cc Control a C0 or C1 control code
86 Cf Format a format control character
87 Cs Surrogate a surrogate code point
88 Co Private_Use a private-use character
89 Cn Unassigned a reserved unassigned code point or a noncharacter
92 /// The highest valid code point a `char` can have.
94 /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
95 /// Point], but only ones within a certain range. `MAX` is the highest valid
96 /// code point that's a valid [Unicode Scalar Value].
98 /// [Unicode Scalar Value]: https://www.unicode.org/glossary/#unicode_scalar_value
99 /// [Code Point]: https://www.unicode.org/glossary/#code_point
100 #[stable(feature = "rust1", since = "1.0.0")]
101 pub const MAX: char = char::MAX;
103 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
106 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
107 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
108 #[stable(feature = "decode_utf16", since = "1.9.0")]
109 pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
111 /// Returns an iterator that yields the hexadecimal Unicode escape of a
112 /// character, as `char`s.
114 /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
115 /// its documentation for more.
117 /// [`escape_unicode`]: char::escape_unicode
118 #[derive(Clone, Debug)]
119 #[stable(feature = "rust1", since = "1.0.0")]
120 pub struct EscapeUnicode {
122 state: EscapeUnicodeState,
124 // The index of the next hex digit to be printed (0 if none),
125 // i.e., the number of remaining hex digits to be printed;
126 // increasing from the least significant digit: 0x543210
127 hex_digit_idx: usize,
130 // The enum values are ordered so that their representation is the
131 // same as the remaining length (besides the hexadecimal digits). This
132 // likely makes `len()` a single load from memory) and inline-worth.
133 #[derive(Clone, Debug)]
134 enum EscapeUnicodeState {
143 #[stable(feature = "rust1", since = "1.0.0")]
144 impl Iterator for EscapeUnicode {
147 fn next(&mut self) -> Option<char> {
149 EscapeUnicodeState::Backslash => {
150 self.state = EscapeUnicodeState::Type;
153 EscapeUnicodeState::Type => {
154 self.state = EscapeUnicodeState::LeftBrace;
157 EscapeUnicodeState::LeftBrace => {
158 self.state = EscapeUnicodeState::Value;
161 EscapeUnicodeState::Value => {
162 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
163 let c = from_digit(hex_digit, 16).unwrap();
164 if self.hex_digit_idx == 0 {
165 self.state = EscapeUnicodeState::RightBrace;
167 self.hex_digit_idx -= 1;
171 EscapeUnicodeState::RightBrace => {
172 self.state = EscapeUnicodeState::Done;
175 EscapeUnicodeState::Done => None,
180 fn size_hint(&self) -> (usize, Option<usize>) {
186 fn count(self) -> usize {
190 fn last(self) -> Option<char> {
192 EscapeUnicodeState::Done => None,
194 EscapeUnicodeState::RightBrace
195 | EscapeUnicodeState::Value
196 | EscapeUnicodeState::LeftBrace
197 | EscapeUnicodeState::Type
198 | EscapeUnicodeState::Backslash => Some('}'),
203 #[stable(feature = "exact_size_escape", since = "1.11.0")]
204 impl ExactSizeIterator for EscapeUnicode {
206 fn len(&self) -> usize {
207 // The match is a single memory access with no branching
210 EscapeUnicodeState::Done => 0,
211 EscapeUnicodeState::RightBrace => 1,
212 EscapeUnicodeState::Value => 2,
213 EscapeUnicodeState::LeftBrace => 3,
214 EscapeUnicodeState::Type => 4,
215 EscapeUnicodeState::Backslash => 5,
220 #[stable(feature = "fused", since = "1.26.0")]
221 impl FusedIterator for EscapeUnicode {}
223 #[stable(feature = "char_struct_display", since = "1.16.0")]
224 impl fmt::Display for EscapeUnicode {
225 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226 for c in self.clone() {
233 /// An iterator that yields the literal escape code of a `char`.
235 /// This `struct` is created by the [`escape_default`] method on [`char`]. See
236 /// its documentation for more.
238 /// [`escape_default`]: char::escape_default
239 #[derive(Clone, Debug)]
240 #[stable(feature = "rust1", since = "1.0.0")]
241 pub struct EscapeDefault {
242 state: EscapeDefaultState,
245 #[derive(Clone, Debug)]
246 enum EscapeDefaultState {
250 Unicode(EscapeUnicode),
253 #[stable(feature = "rust1", since = "1.0.0")]
254 impl Iterator for EscapeDefault {
257 fn next(&mut self) -> Option<char> {
259 EscapeDefaultState::Backslash(c) => {
260 self.state = EscapeDefaultState::Char(c);
263 EscapeDefaultState::Char(c) => {
264 self.state = EscapeDefaultState::Done;
267 EscapeDefaultState::Done => None,
268 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
273 fn size_hint(&self) -> (usize, Option<usize>) {
279 fn count(self) -> usize {
283 fn nth(&mut self, n: usize) -> Option<char> {
285 EscapeDefaultState::Backslash(c) if n == 0 => {
286 self.state = EscapeDefaultState::Char(c);
289 EscapeDefaultState::Backslash(c) if n == 1 => {
290 self.state = EscapeDefaultState::Done;
293 EscapeDefaultState::Backslash(_) => {
294 self.state = EscapeDefaultState::Done;
297 EscapeDefaultState::Char(c) => {
298 self.state = EscapeDefaultState::Done;
300 if n == 0 { Some(c) } else { None }
302 EscapeDefaultState::Done => None,
303 EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
307 fn last(self) -> Option<char> {
309 EscapeDefaultState::Unicode(iter) => iter.last(),
310 EscapeDefaultState::Done => None,
311 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
316 #[stable(feature = "exact_size_escape", since = "1.11.0")]
317 impl ExactSizeIterator for EscapeDefault {
318 fn len(&self) -> usize {
320 EscapeDefaultState::Done => 0,
321 EscapeDefaultState::Char(_) => 1,
322 EscapeDefaultState::Backslash(_) => 2,
323 EscapeDefaultState::Unicode(ref iter) => iter.len(),
328 #[stable(feature = "fused", since = "1.26.0")]
329 impl FusedIterator for EscapeDefault {}
331 #[stable(feature = "char_struct_display", since = "1.16.0")]
332 impl fmt::Display for EscapeDefault {
333 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
334 for c in self.clone() {
341 /// An iterator that yields the literal escape code of a `char`.
343 /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
344 /// documentation for more.
346 /// [`escape_debug`]: char::escape_debug
347 #[stable(feature = "char_escape_debug", since = "1.20.0")]
348 #[derive(Clone, Debug)]
349 pub struct EscapeDebug(EscapeDefault);
351 #[stable(feature = "char_escape_debug", since = "1.20.0")]
352 impl Iterator for EscapeDebug {
354 fn next(&mut self) -> Option<char> {
357 fn size_hint(&self) -> (usize, Option<usize>) {
362 #[stable(feature = "char_escape_debug", since = "1.20.0")]
363 impl ExactSizeIterator for EscapeDebug {}
365 #[stable(feature = "fused", since = "1.26.0")]
366 impl FusedIterator for EscapeDebug {}
368 #[stable(feature = "char_escape_debug", since = "1.20.0")]
369 impl fmt::Display for EscapeDebug {
370 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
371 fmt::Display::fmt(&self.0, f)
375 /// Returns an iterator that yields the lowercase equivalent of a `char`.
377 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
378 /// its documentation for more.
380 /// [`to_lowercase`]: char::to_lowercase
381 #[stable(feature = "rust1", since = "1.0.0")]
382 #[derive(Debug, Clone)]
383 pub struct ToLowercase(CaseMappingIter);
385 #[stable(feature = "rust1", since = "1.0.0")]
386 impl Iterator for ToLowercase {
388 fn next(&mut self) -> Option<char> {
391 fn size_hint(&self) -> (usize, Option<usize>) {
396 #[stable(feature = "rust1", since = "1.0.0")]
397 impl DoubleEndedIterator for ToLowercase {
398 fn next_back(&mut self) -> Option<char> {
403 #[stable(feature = "fused", since = "1.26.0")]
404 impl FusedIterator for ToLowercase {}
406 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
407 impl ExactSizeIterator for ToLowercase {}
409 /// Returns an iterator that yields the uppercase equivalent of a `char`.
411 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
412 /// its documentation for more.
414 /// [`to_uppercase`]: char::to_uppercase
415 #[stable(feature = "rust1", since = "1.0.0")]
416 #[derive(Debug, Clone)]
417 pub struct ToUppercase(CaseMappingIter);
419 #[stable(feature = "rust1", since = "1.0.0")]
420 impl Iterator for ToUppercase {
422 fn next(&mut self) -> Option<char> {
425 fn size_hint(&self) -> (usize, Option<usize>) {
430 #[stable(feature = "rust1", since = "1.0.0")]
431 impl DoubleEndedIterator for ToUppercase {
432 fn next_back(&mut self) -> Option<char> {
437 #[stable(feature = "fused", since = "1.26.0")]
438 impl FusedIterator for ToUppercase {}
440 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
441 impl ExactSizeIterator for ToUppercase {}
443 #[derive(Debug, Clone)]
444 enum CaseMappingIter {
445 Three(char, char, char),
451 impl CaseMappingIter {
452 fn new(chars: [char; 3]) -> CaseMappingIter {
453 if chars[2] == '\0' {
454 if chars[1] == '\0' {
455 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
457 CaseMappingIter::Two(chars[0], chars[1])
460 CaseMappingIter::Three(chars[0], chars[1], chars[2])
465 impl Iterator for CaseMappingIter {
467 fn next(&mut self) -> Option<char> {
469 CaseMappingIter::Three(a, b, c) => {
470 *self = CaseMappingIter::Two(b, c);
473 CaseMappingIter::Two(b, c) => {
474 *self = CaseMappingIter::One(c);
477 CaseMappingIter::One(c) => {
478 *self = CaseMappingIter::Zero;
481 CaseMappingIter::Zero => None,
485 fn size_hint(&self) -> (usize, Option<usize>) {
486 let size = match self {
487 CaseMappingIter::Three(..) => 3,
488 CaseMappingIter::Two(..) => 2,
489 CaseMappingIter::One(_) => 1,
490 CaseMappingIter::Zero => 0,
496 impl DoubleEndedIterator for CaseMappingIter {
497 fn next_back(&mut self) -> Option<char> {
499 CaseMappingIter::Three(a, b, c) => {
500 *self = CaseMappingIter::Two(a, b);
503 CaseMappingIter::Two(b, c) => {
504 *self = CaseMappingIter::One(b);
507 CaseMappingIter::One(c) => {
508 *self = CaseMappingIter::Zero;
511 CaseMappingIter::Zero => None,
516 impl fmt::Display for CaseMappingIter {
517 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
519 CaseMappingIter::Three(a, b, c) => {
524 CaseMappingIter::Two(b, c) => {
528 CaseMappingIter::One(c) => f.write_char(c),
529 CaseMappingIter::Zero => Ok(()),
534 #[stable(feature = "char_struct_display", since = "1.16.0")]
535 impl fmt::Display for ToLowercase {
536 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
537 fmt::Display::fmt(&self.0, f)
541 #[stable(feature = "char_struct_display", since = "1.16.0")]
542 impl fmt::Display for ToUppercase {
543 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
544 fmt::Display::fmt(&self.0, f)