1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 //! The `char` type represents a single character. More specifically, since
14 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
18 //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19 //! [Unicode code point]: http://www.unicode.org/glossary/#code_point
21 //! This module exists for technical reasons, the primary documentation for
22 //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
25 //! This module is the home of the iterator implementations for the iterators
26 //! implemented on `char`, as well as some useful constants and conversion
27 //! functions that convert various types to `char`.
29 #![allow(non_snake_case)]
30 #![stable(feature = "core_char", since = "1.2.0")]
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub use self::convert::{from_u32, from_digit};
39 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
40 pub use self::convert::from_u32_unchecked;
41 #[stable(feature = "char_from_str", since = "1.20.0")]
42 pub use self::convert::ParseCharError;
43 #[unstable(feature = "try_from", issue = "33417")]
44 pub use self::convert::CharTryFromError;
45 #[stable(feature = "decode_utf16", since = "1.9.0")]
46 pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
48 // unstable re-exports
49 #[unstable(feature = "unicode_version", issue = "49726")]
50 pub use unicode::tables::UNICODE_VERSION;
51 #[unstable(feature = "unicode_version", issue = "49726")]
52 pub use unicode::version::UnicodeVersion;
54 use fmt::{self, Write};
55 use iter::FusedIterator;
57 // UTF-8 ranges and tags for encoding characters
58 const TAG_CONT: u8 = 0b1000_0000;
59 const TAG_TWO_B: u8 = 0b1100_0000;
60 const TAG_THREE_B: u8 = 0b1110_0000;
61 const TAG_FOUR_B: u8 = 0b1111_0000;
62 const MAX_ONE_B: u32 = 0x80;
63 const MAX_TWO_B: u32 = 0x800;
64 const MAX_THREE_B: u32 = 0x10000;
67 Lu Uppercase_Letter an uppercase letter
68 Ll Lowercase_Letter a lowercase letter
69 Lt Titlecase_Letter a digraphic character, with first part uppercase
70 Lm Modifier_Letter a modifier letter
71 Lo Other_Letter other letters, including syllables and ideographs
72 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
73 Mc Spacing_Mark a spacing combining mark (positive advance width)
74 Me Enclosing_Mark an enclosing combining mark
75 Nd Decimal_Number a decimal digit
76 Nl Letter_Number a letterlike numeric character
77 No Other_Number a numeric character of other type
78 Pc Connector_Punctuation a connecting punctuation mark, like a tie
79 Pd Dash_Punctuation a dash or hyphen punctuation mark
80 Ps Open_Punctuation an opening punctuation mark (of a pair)
81 Pe Close_Punctuation a closing punctuation mark (of a pair)
82 Pi Initial_Punctuation an initial quotation mark
83 Pf Final_Punctuation a final quotation mark
84 Po Other_Punctuation a punctuation mark of other type
85 Sm Math_Symbol a symbol of primarily mathematical use
86 Sc Currency_Symbol a currency sign
87 Sk Modifier_Symbol a non-letterlike modifier symbol
88 So Other_Symbol a symbol of other type
89 Zs Space_Separator a space character (of various non-zero widths)
90 Zl Line_Separator U+2028 LINE SEPARATOR only
91 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
92 Cc Control a C0 or C1 control code
93 Cf Format a format control character
94 Cs Surrogate a surrogate code point
95 Co Private_Use a private-use character
96 Cn Unassigned a reserved unassigned code point or a noncharacter
99 /// The highest valid code point a `char` can have.
101 /// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
102 /// Point], but only ones within a certain range. `MAX` is the highest valid
103 /// code point that's a valid [Unicode Scalar Value].
105 /// [`char`]: ../../std/primitive.char.html
106 /// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
107 /// [Code Point]: http://www.unicode.org/glossary/#code_point
108 #[stable(feature = "rust1", since = "1.0.0")]
109 pub const MAX: char = '\u{10ffff}';
111 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
114 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
115 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
116 #[stable(feature = "decode_utf16", since = "1.9.0")]
117 pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
119 /// Returns an iterator that yields the hexadecimal Unicode escape of a
120 /// character, as `char`s.
122 /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
123 /// its documentation for more.
125 /// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode
126 /// [`char`]: ../../std/primitive.char.html
127 #[derive(Clone, Debug)]
128 #[stable(feature = "rust1", since = "1.0.0")]
129 pub struct EscapeUnicode {
131 state: EscapeUnicodeState,
133 // The index of the next hex digit to be printed (0 if none),
134 // i.e. the number of remaining hex digits to be printed;
135 // increasing from the least significant digit: 0x543210
136 hex_digit_idx: usize,
139 // The enum values are ordered so that their representation is the
140 // same as the remaining length (besides the hexadecimal digits). This
141 // likely makes `len()` a single load from memory) and inline-worth.
142 #[derive(Clone, Debug)]
143 enum EscapeUnicodeState {
152 #[stable(feature = "rust1", since = "1.0.0")]
153 impl Iterator for EscapeUnicode {
156 fn next(&mut self) -> Option<char> {
158 EscapeUnicodeState::Backslash => {
159 self.state = EscapeUnicodeState::Type;
162 EscapeUnicodeState::Type => {
163 self.state = EscapeUnicodeState::LeftBrace;
166 EscapeUnicodeState::LeftBrace => {
167 self.state = EscapeUnicodeState::Value;
170 EscapeUnicodeState::Value => {
171 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
172 let c = from_digit(hex_digit, 16).unwrap();
173 if self.hex_digit_idx == 0 {
174 self.state = EscapeUnicodeState::RightBrace;
176 self.hex_digit_idx -= 1;
180 EscapeUnicodeState::RightBrace => {
181 self.state = EscapeUnicodeState::Done;
184 EscapeUnicodeState::Done => None,
189 fn size_hint(&self) -> (usize, Option<usize>) {
195 fn count(self) -> usize {
199 fn last(self) -> Option<char> {
201 EscapeUnicodeState::Done => None,
203 EscapeUnicodeState::RightBrace |
204 EscapeUnicodeState::Value |
205 EscapeUnicodeState::LeftBrace |
206 EscapeUnicodeState::Type |
207 EscapeUnicodeState::Backslash => Some('}'),
212 #[stable(feature = "exact_size_escape", since = "1.11.0")]
213 impl ExactSizeIterator for EscapeUnicode {
215 fn len(&self) -> usize {
216 // The match is a single memory access with no branching
217 self.hex_digit_idx + match self.state {
218 EscapeUnicodeState::Done => 0,
219 EscapeUnicodeState::RightBrace => 1,
220 EscapeUnicodeState::Value => 2,
221 EscapeUnicodeState::LeftBrace => 3,
222 EscapeUnicodeState::Type => 4,
223 EscapeUnicodeState::Backslash => 5,
228 #[stable(feature = "fused", since = "1.26.0")]
229 impl FusedIterator for EscapeUnicode {}
231 #[stable(feature = "char_struct_display", since = "1.16.0")]
232 impl fmt::Display for EscapeUnicode {
233 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
234 for c in self.clone() {
241 /// An iterator that yields the literal escape code of a `char`.
243 /// This `struct` is created by the [`escape_default`] method on [`char`]. See
244 /// its documentation for more.
246 /// [`escape_default`]: ../../std/primitive.char.html#method.escape_default
247 /// [`char`]: ../../std/primitive.char.html
248 #[derive(Clone, Debug)]
249 #[stable(feature = "rust1", since = "1.0.0")]
250 pub struct EscapeDefault {
251 state: EscapeDefaultState
254 #[derive(Clone, Debug)]
255 enum EscapeDefaultState {
259 Unicode(EscapeUnicode),
262 #[stable(feature = "rust1", since = "1.0.0")]
263 impl Iterator for EscapeDefault {
266 fn next(&mut self) -> Option<char> {
268 EscapeDefaultState::Backslash(c) => {
269 self.state = EscapeDefaultState::Char(c);
272 EscapeDefaultState::Char(c) => {
273 self.state = EscapeDefaultState::Done;
276 EscapeDefaultState::Done => None,
277 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
282 fn size_hint(&self) -> (usize, Option<usize>) {
288 fn count(self) -> usize {
292 fn nth(&mut self, n: usize) -> Option<char> {
294 EscapeDefaultState::Backslash(c) if n == 0 => {
295 self.state = EscapeDefaultState::Char(c);
298 EscapeDefaultState::Backslash(c) if n == 1 => {
299 self.state = EscapeDefaultState::Done;
302 EscapeDefaultState::Backslash(_) => {
303 self.state = EscapeDefaultState::Done;
306 EscapeDefaultState::Char(c) => {
307 self.state = EscapeDefaultState::Done;
315 EscapeDefaultState::Done => None,
316 EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
320 fn last(self) -> Option<char> {
322 EscapeDefaultState::Unicode(iter) => iter.last(),
323 EscapeDefaultState::Done => None,
324 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
329 #[stable(feature = "exact_size_escape", since = "1.11.0")]
330 impl ExactSizeIterator for EscapeDefault {
331 fn len(&self) -> usize {
333 EscapeDefaultState::Done => 0,
334 EscapeDefaultState::Char(_) => 1,
335 EscapeDefaultState::Backslash(_) => 2,
336 EscapeDefaultState::Unicode(ref iter) => iter.len(),
341 #[stable(feature = "fused", since = "1.26.0")]
342 impl FusedIterator for EscapeDefault {}
344 #[stable(feature = "char_struct_display", since = "1.16.0")]
345 impl fmt::Display for EscapeDefault {
346 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
347 for c in self.clone() {
354 /// An iterator that yields the literal escape code of a `char`.
356 /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
357 /// documentation for more.
359 /// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug
360 /// [`char`]: ../../std/primitive.char.html
361 #[stable(feature = "char_escape_debug", since = "1.20.0")]
362 #[derive(Clone, Debug)]
363 pub struct EscapeDebug(EscapeDefault);
365 #[stable(feature = "char_escape_debug", since = "1.20.0")]
366 impl Iterator for EscapeDebug {
368 fn next(&mut self) -> Option<char> { self.0.next() }
369 fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
372 #[stable(feature = "char_escape_debug", since = "1.20.0")]
373 impl ExactSizeIterator for EscapeDebug { }
375 #[stable(feature = "fused", since = "1.26.0")]
376 impl FusedIterator for EscapeDebug {}
378 #[stable(feature = "char_escape_debug", since = "1.20.0")]
379 impl fmt::Display for EscapeDebug {
380 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
381 fmt::Display::fmt(&self.0, f)
385 /// Returns an iterator that yields the lowercase equivalent of a `char`.
387 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
388 /// its documentation for more.
390 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
391 /// [`char`]: ../../std/primitive.char.html
392 #[stable(feature = "rust1", since = "1.0.0")]
393 #[derive(Debug, Clone)]
394 pub struct ToLowercase(CaseMappingIter);
396 #[stable(feature = "rust1", since = "1.0.0")]
397 impl Iterator for ToLowercase {
399 fn next(&mut self) -> Option<char> {
404 #[stable(feature = "fused", since = "1.26.0")]
405 impl FusedIterator for ToLowercase {}
407 /// Returns an iterator that yields the uppercase equivalent of a `char`.
409 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
410 /// its documentation for more.
412 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
413 /// [`char`]: ../../std/primitive.char.html
414 #[stable(feature = "rust1", since = "1.0.0")]
415 #[derive(Debug, Clone)]
416 pub struct ToUppercase(CaseMappingIter);
418 #[stable(feature = "rust1", since = "1.0.0")]
419 impl Iterator for ToUppercase {
421 fn next(&mut self) -> Option<char> {
426 #[stable(feature = "fused", since = "1.26.0")]
427 impl FusedIterator for ToUppercase {}
429 #[derive(Debug, Clone)]
430 enum CaseMappingIter {
431 Three(char, char, char),
437 impl CaseMappingIter {
438 fn new(chars: [char; 3]) -> CaseMappingIter {
439 if chars[2] == '\0' {
440 if chars[1] == '\0' {
441 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
443 CaseMappingIter::Two(chars[0], chars[1])
446 CaseMappingIter::Three(chars[0], chars[1], chars[2])
451 impl Iterator for CaseMappingIter {
453 fn next(&mut self) -> Option<char> {
455 CaseMappingIter::Three(a, b, c) => {
456 *self = CaseMappingIter::Two(b, c);
459 CaseMappingIter::Two(b, c) => {
460 *self = CaseMappingIter::One(c);
463 CaseMappingIter::One(c) => {
464 *self = CaseMappingIter::Zero;
467 CaseMappingIter::Zero => None,
472 impl fmt::Display for CaseMappingIter {
473 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
475 CaseMappingIter::Three(a, b, c) => {
480 CaseMappingIter::Two(b, c) => {
484 CaseMappingIter::One(c) => {
487 CaseMappingIter::Zero => Ok(()),
492 #[stable(feature = "char_struct_display", since = "1.16.0")]
493 impl fmt::Display for ToLowercase {
494 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
495 fmt::Display::fmt(&self.0, f)
499 #[stable(feature = "char_struct_display", since = "1.16.0")]
500 impl fmt::Display for ToUppercase {
501 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
502 fmt::Display::fmt(&self.0, f)