1 //! UTF-8 and UTF-16 decoding iterators
4 use super::from_u32_unchecked;
6 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
7 #[stable(feature = "decode_utf16", since = "1.9.0")]
8 #[derive(Clone, Debug)]
9 pub struct DecodeUtf16<I>
10 where I: Iterator<Item = u16>
16 /// An error that can be returned when decoding UTF-16 code points.
17 #[stable(feature = "decode_utf16", since = "1.9.0")]
18 #[derive(Debug, Clone, Eq, PartialEq)]
19 pub struct DecodeUtf16Error {
23 /// Creates an iterator over the UTF-16 encoded code points in `iter`,
24 /// returning unpaired surrogates as `Err`s.
31 /// use std::char::decode_utf16;
34 /// // 𝄞mus<invalid>ic<invalid>
35 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
36 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
39 /// assert_eq!(decode_utf16(v.iter().cloned())
40 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
41 /// .collect::<Vec<_>>(),
43 /// Ok('m'), Ok('u'), Ok('s'),
50 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
53 /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
56 /// // 𝄞mus<invalid>ic<invalid>
57 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
58 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
61 /// assert_eq!(decode_utf16(v.iter().cloned())
62 /// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
63 /// .collect::<String>(),
67 #[stable(feature = "decode_utf16", since = "1.9.0")]
69 pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
71 iter: iter.into_iter(),
76 #[stable(feature = "decode_utf16", since = "1.9.0")]
77 impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> {
78 type Item = Result<char, DecodeUtf16Error>;
80 fn next(&mut self) -> Option<Result<char, DecodeUtf16Error>> {
81 let u = match self.buf.take() {
83 None => self.iter.next()?
86 if u < 0xD800 || 0xDFFF < u {
88 Some(Ok(unsafe { from_u32_unchecked(u as u32) }))
89 } else if u >= 0xDC00 {
90 // a trailing surrogate
91 Some(Err(DecodeUtf16Error { code: u }))
93 let u2 = match self.iter.next() {
96 None => return Some(Err(DecodeUtf16Error { code: u })),
98 if u2 < 0xDC00 || u2 > 0xDFFF {
99 // not a trailing surrogate so we're not a valid
100 // surrogate pair, so rewind to redecode u2 next time.
102 return Some(Err(DecodeUtf16Error { code: u }));
105 // all ok, so lets decode it.
106 let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
107 Some(Ok(unsafe { from_u32_unchecked(c) }))
112 fn size_hint(&self) -> (usize, Option<usize>) {
113 let (low, high) = self.iter.size_hint();
114 // we could be entirely valid surrogates (2 elements per
115 // char), or entirely non-surrogates (1 element per char)
120 impl DecodeUtf16Error {
121 /// Returns the unpaired surrogate which caused this error.
122 #[stable(feature = "decode_utf16", since = "1.9.0")]
123 pub fn unpaired_surrogate(&self) -> u16 {
128 #[stable(feature = "decode_utf16", since = "1.9.0")]
129 impl fmt::Display for DecodeUtf16Error {
130 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
131 write!(f, "unpaired surrogate found: {:x}", self.code)