3 //! The `char` type represents a single character. More specifically, since
4 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
5 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
8 //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
9 //! [Unicode code point]: http://www.unicode.org/glossary/#code_point
11 //! This module exists for technical reasons, the primary documentation for
12 //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
15 //! This module is the home of the iterator implementations for the iterators
16 //! implemented on `char`, as well as some useful constants and conversion
17 //! functions that convert various types to `char`.
19 #![allow(non_snake_case)]
20 #![stable(feature = "core_char", since = "1.2.0")]
27 #[stable(feature = "rust1", since = "1.0.0")]
28 pub use self::convert::{from_u32, from_digit};
29 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
30 pub use self::convert::from_u32_unchecked;
31 #[stable(feature = "char_from_str", since = "1.20.0")]
32 pub use self::convert::ParseCharError;
33 #[unstable(feature = "try_from", issue = "33417")]
34 pub use self::convert::CharTryFromError;
35 #[stable(feature = "decode_utf16", since = "1.9.0")]
36 pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
38 // unstable re-exports
39 #[unstable(feature = "unicode_version", issue = "49726")]
40 pub use unicode::tables::UNICODE_VERSION;
41 #[unstable(feature = "unicode_version", issue = "49726")]
42 pub use unicode::version::UnicodeVersion;
44 use fmt::{self, Write};
45 use iter::FusedIterator;
47 // UTF-8 ranges and tags for encoding characters
48 const TAG_CONT: u8 = 0b1000_0000;
49 const TAG_TWO_B: u8 = 0b1100_0000;
50 const TAG_THREE_B: u8 = 0b1110_0000;
51 const TAG_FOUR_B: u8 = 0b1111_0000;
52 const MAX_ONE_B: u32 = 0x80;
53 const MAX_TWO_B: u32 = 0x800;
54 const MAX_THREE_B: u32 = 0x10000;
57 Lu Uppercase_Letter an uppercase letter
58 Ll Lowercase_Letter a lowercase letter
59 Lt Titlecase_Letter a digraphic character, with first part uppercase
60 Lm Modifier_Letter a modifier letter
61 Lo Other_Letter other letters, including syllables and ideographs
62 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
63 Mc Spacing_Mark a spacing combining mark (positive advance width)
64 Me Enclosing_Mark an enclosing combining mark
65 Nd Decimal_Number a decimal digit
66 Nl Letter_Number a letterlike numeric character
67 No Other_Number a numeric character of other type
68 Pc Connector_Punctuation a connecting punctuation mark, like a tie
69 Pd Dash_Punctuation a dash or hyphen punctuation mark
70 Ps Open_Punctuation an opening punctuation mark (of a pair)
71 Pe Close_Punctuation a closing punctuation mark (of a pair)
72 Pi Initial_Punctuation an initial quotation mark
73 Pf Final_Punctuation a final quotation mark
74 Po Other_Punctuation a punctuation mark of other type
75 Sm Math_Symbol a symbol of primarily mathematical use
76 Sc Currency_Symbol a currency sign
77 Sk Modifier_Symbol a non-letterlike modifier symbol
78 So Other_Symbol a symbol of other type
79 Zs Space_Separator a space character (of various non-zero widths)
80 Zl Line_Separator U+2028 LINE SEPARATOR only
81 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
82 Cc Control a C0 or C1 control code
83 Cf Format a format control character
84 Cs Surrogate a surrogate code point
85 Co Private_Use a private-use character
86 Cn Unassigned a reserved unassigned code point or a noncharacter
89 /// The highest valid code point a `char` can have.
91 /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
92 /// Point], but only ones within a certain range. `MAX` is the highest valid
93 /// code point that's a valid [Unicode Scalar Value].
95 /// [`char`]: ../../std/primitive.char.html
96 /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
97 /// [Code Point]: http://www.unicode.org/glossary/#code_point
98 #[stable(feature = "rust1", since = "1.0.0")]
99 pub const MAX: char = '\u{10ffff}';
101 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
104 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
105 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
106 #[stable(feature = "decode_utf16", since = "1.9.0")]
107 pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
109 /// Returns an iterator that yields the hexadecimal Unicode escape of a
110 /// character, as `char`s.
112 /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
113 /// its documentation for more.
115 /// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode
116 /// [`char`]: ../../std/primitive.char.html
117 #[derive(Clone, Debug)]
118 #[stable(feature = "rust1", since = "1.0.0")]
119 pub struct EscapeUnicode {
121 state: EscapeUnicodeState,
123 // The index of the next hex digit to be printed (0 if none),
124 // i.e., the number of remaining hex digits to be printed;
125 // increasing from the least significant digit: 0x543210
126 hex_digit_idx: usize,
129 // The enum values are ordered so that their representation is the
130 // same as the remaining length (besides the hexadecimal digits). This
131 // likely makes `len()` a single load from memory) and inline-worth.
132 #[derive(Clone, Debug)]
133 enum EscapeUnicodeState {
142 #[stable(feature = "rust1", since = "1.0.0")]
143 impl Iterator for EscapeUnicode {
146 fn next(&mut self) -> Option<char> {
148 EscapeUnicodeState::Backslash => {
149 self.state = EscapeUnicodeState::Type;
152 EscapeUnicodeState::Type => {
153 self.state = EscapeUnicodeState::LeftBrace;
156 EscapeUnicodeState::LeftBrace => {
157 self.state = EscapeUnicodeState::Value;
160 EscapeUnicodeState::Value => {
161 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
162 let c = from_digit(hex_digit, 16).unwrap();
163 if self.hex_digit_idx == 0 {
164 self.state = EscapeUnicodeState::RightBrace;
166 self.hex_digit_idx -= 1;
170 EscapeUnicodeState::RightBrace => {
171 self.state = EscapeUnicodeState::Done;
174 EscapeUnicodeState::Done => None,
179 fn size_hint(&self) -> (usize, Option<usize>) {
185 fn count(self) -> usize {
189 fn last(self) -> Option<char> {
191 EscapeUnicodeState::Done => None,
193 EscapeUnicodeState::RightBrace |
194 EscapeUnicodeState::Value |
195 EscapeUnicodeState::LeftBrace |
196 EscapeUnicodeState::Type |
197 EscapeUnicodeState::Backslash => Some('}'),
202 #[stable(feature = "exact_size_escape", since = "1.11.0")]
203 impl ExactSizeIterator for EscapeUnicode {
205 fn len(&self) -> usize {
206 // The match is a single memory access with no branching
207 self.hex_digit_idx + match self.state {
208 EscapeUnicodeState::Done => 0,
209 EscapeUnicodeState::RightBrace => 1,
210 EscapeUnicodeState::Value => 2,
211 EscapeUnicodeState::LeftBrace => 3,
212 EscapeUnicodeState::Type => 4,
213 EscapeUnicodeState::Backslash => 5,
218 #[stable(feature = "fused", since = "1.26.0")]
219 impl FusedIterator for EscapeUnicode {}
221 #[stable(feature = "char_struct_display", since = "1.16.0")]
222 impl fmt::Display for EscapeUnicode {
223 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224 for c in self.clone() {
231 /// An iterator that yields the literal escape code of a `char`.
233 /// This `struct` is created by the [`escape_default`] method on [`char`]. See
234 /// its documentation for more.
236 /// [`escape_default`]: ../../std/primitive.char.html#method.escape_default
237 /// [`char`]: ../../std/primitive.char.html
238 #[derive(Clone, Debug)]
239 #[stable(feature = "rust1", since = "1.0.0")]
240 pub struct EscapeDefault {
241 state: EscapeDefaultState
244 #[derive(Clone, Debug)]
245 enum EscapeDefaultState {
249 Unicode(EscapeUnicode),
252 #[stable(feature = "rust1", since = "1.0.0")]
253 impl Iterator for EscapeDefault {
256 fn next(&mut self) -> Option<char> {
258 EscapeDefaultState::Backslash(c) => {
259 self.state = EscapeDefaultState::Char(c);
262 EscapeDefaultState::Char(c) => {
263 self.state = EscapeDefaultState::Done;
266 EscapeDefaultState::Done => None,
267 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
272 fn size_hint(&self) -> (usize, Option<usize>) {
278 fn count(self) -> usize {
282 fn nth(&mut self, n: usize) -> Option<char> {
284 EscapeDefaultState::Backslash(c) if n == 0 => {
285 self.state = EscapeDefaultState::Char(c);
288 EscapeDefaultState::Backslash(c) if n == 1 => {
289 self.state = EscapeDefaultState::Done;
292 EscapeDefaultState::Backslash(_) => {
293 self.state = EscapeDefaultState::Done;
296 EscapeDefaultState::Char(c) => {
297 self.state = EscapeDefaultState::Done;
305 EscapeDefaultState::Done => None,
306 EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
310 fn last(self) -> Option<char> {
312 EscapeDefaultState::Unicode(iter) => iter.last(),
313 EscapeDefaultState::Done => None,
314 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
319 #[stable(feature = "exact_size_escape", since = "1.11.0")]
320 impl ExactSizeIterator for EscapeDefault {
321 fn len(&self) -> usize {
323 EscapeDefaultState::Done => 0,
324 EscapeDefaultState::Char(_) => 1,
325 EscapeDefaultState::Backslash(_) => 2,
326 EscapeDefaultState::Unicode(ref iter) => iter.len(),
331 #[stable(feature = "fused", since = "1.26.0")]
332 impl FusedIterator for EscapeDefault {}
334 #[stable(feature = "char_struct_display", since = "1.16.0")]
335 impl fmt::Display for EscapeDefault {
336 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
337 for c in self.clone() {
344 /// An iterator that yields the literal escape code of a `char`.
346 /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
347 /// documentation for more.
349 /// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug
350 /// [`char`]: ../../std/primitive.char.html
351 #[stable(feature = "char_escape_debug", since = "1.20.0")]
352 #[derive(Clone, Debug)]
353 pub struct EscapeDebug(EscapeDefault);
355 #[stable(feature = "char_escape_debug", since = "1.20.0")]
356 impl Iterator for EscapeDebug {
358 fn next(&mut self) -> Option<char> { self.0.next() }
359 fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
362 #[stable(feature = "char_escape_debug", since = "1.20.0")]
363 impl ExactSizeIterator for EscapeDebug { }
365 #[stable(feature = "fused", since = "1.26.0")]
366 impl FusedIterator for EscapeDebug {}
368 #[stable(feature = "char_escape_debug", since = "1.20.0")]
369 impl fmt::Display for EscapeDebug {
370 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
371 fmt::Display::fmt(&self.0, f)
375 /// Returns an iterator that yields the lowercase equivalent of a `char`.
377 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
378 /// its documentation for more.
380 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
381 /// [`char`]: ../../std/primitive.char.html
382 #[stable(feature = "rust1", since = "1.0.0")]
383 #[derive(Debug, Clone)]
384 pub struct ToLowercase(CaseMappingIter);
386 #[stable(feature = "rust1", since = "1.0.0")]
387 impl Iterator for ToLowercase {
389 fn next(&mut self) -> Option<char> {
394 #[stable(feature = "fused", since = "1.26.0")]
395 impl FusedIterator for ToLowercase {}
397 /// Returns an iterator that yields the uppercase equivalent of a `char`.
399 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
400 /// its documentation for more.
402 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
403 /// [`char`]: ../../std/primitive.char.html
404 #[stable(feature = "rust1", since = "1.0.0")]
405 #[derive(Debug, Clone)]
406 pub struct ToUppercase(CaseMappingIter);
408 #[stable(feature = "rust1", since = "1.0.0")]
409 impl Iterator for ToUppercase {
411 fn next(&mut self) -> Option<char> {
416 #[stable(feature = "fused", since = "1.26.0")]
417 impl FusedIterator for ToUppercase {}
419 #[derive(Debug, Clone)]
420 enum CaseMappingIter {
421 Three(char, char, char),
427 impl CaseMappingIter {
428 fn new(chars: [char; 3]) -> CaseMappingIter {
429 if chars[2] == '\0' {
430 if chars[1] == '\0' {
431 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
433 CaseMappingIter::Two(chars[0], chars[1])
436 CaseMappingIter::Three(chars[0], chars[1], chars[2])
441 impl Iterator for CaseMappingIter {
443 fn next(&mut self) -> Option<char> {
445 CaseMappingIter::Three(a, b, c) => {
446 *self = CaseMappingIter::Two(b, c);
449 CaseMappingIter::Two(b, c) => {
450 *self = CaseMappingIter::One(c);
453 CaseMappingIter::One(c) => {
454 *self = CaseMappingIter::Zero;
457 CaseMappingIter::Zero => None,
462 impl fmt::Display for CaseMappingIter {
463 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
465 CaseMappingIter::Three(a, b, c) => {
470 CaseMappingIter::Two(b, c) => {
474 CaseMappingIter::One(c) => {
477 CaseMappingIter::Zero => Ok(()),
482 #[stable(feature = "char_struct_display", since = "1.16.0")]
483 impl fmt::Display for ToLowercase {
484 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
485 fmt::Display::fmt(&self.0, f)
489 #[stable(feature = "char_struct_display", since = "1.16.0")]
490 impl fmt::Display for ToUppercase {
491 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
492 fmt::Display::fmt(&self.0, f)