2 use crate::borrow::Cow;
5 fn code_point_from_u32() {
6 assert!(CodePoint::from_u32(0).is_some());
7 assert!(CodePoint::from_u32(0xD800).is_some());
8 assert!(CodePoint::from_u32(0x10FFFF).is_some());
9 assert!(CodePoint::from_u32(0x110000).is_none());
13 fn code_point_to_u32() {
14 fn c(value: u32) -> CodePoint {
15 CodePoint::from_u32(value).unwrap()
17 assert_eq!(c(0).to_u32(), 0);
18 assert_eq!(c(0xD800).to_u32(), 0xD800);
19 assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF);
23 fn code_point_from_char() {
24 assert_eq!(CodePoint::from_char('a').to_u32(), 0x61);
25 assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9);
29 fn code_point_to_string() {
30 assert_eq!(format!("{:?}", CodePoint::from_char('a')), "U+0061");
31 assert_eq!(format!("{:?}", CodePoint::from_char('💩')), "U+1F4A9");
35 fn code_point_to_char() {
36 fn c(value: u32) -> CodePoint {
37 CodePoint::from_u32(value).unwrap()
39 assert_eq!(c(0x61).to_char(), Some('a'));
40 assert_eq!(c(0x1F4A9).to_char(), Some('💩'));
41 assert_eq!(c(0xD800).to_char(), None);
45 fn code_point_to_char_lossy() {
46 fn c(value: u32) -> CodePoint {
47 CodePoint::from_u32(value).unwrap()
49 assert_eq!(c(0x61).to_char_lossy(), 'a');
50 assert_eq!(c(0x1F4A9).to_char_lossy(), '💩');
51 assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}');
56 assert_eq!(Wtf8Buf::new().bytes, b"");
60 fn wtf8buf_from_str() {
61 assert_eq!(Wtf8Buf::from_str("").bytes, b"");
62 assert_eq!(Wtf8Buf::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
66 fn wtf8buf_from_string() {
67 assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b"");
68 assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
72 fn wtf8buf_from_wide() {
73 assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b"");
75 Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
76 b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"
81 fn wtf8buf_push_str() {
82 let mut string = Wtf8Buf::new();
83 assert_eq!(string.bytes, b"");
84 string.push_str("aé 💩");
85 assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
89 fn wtf8buf_push_char() {
90 let mut string = Wtf8Buf::from_str("aé ");
91 assert_eq!(string.bytes, b"a\xC3\xA9 ");
92 string.push_char('💩');
93 assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
98 let mut string = Wtf8Buf::from_str("aé ");
99 assert_eq!(string.bytes, b"a\xC3\xA9 ");
100 string.push(CodePoint::from_char('💩'));
101 assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
103 fn c(value: u32) -> CodePoint {
104 CodePoint::from_u32(value).unwrap()
107 let mut string = Wtf8Buf::new();
108 string.push(c(0xD83D)); // lead
109 string.push(c(0xDCA9)); // trail
110 assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
112 let mut string = Wtf8Buf::new();
113 string.push(c(0xD83D)); // lead
114 string.push(c(0x20)); // not surrogate
115 string.push(c(0xDCA9)); // trail
116 assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
118 let mut string = Wtf8Buf::new();
119 string.push(c(0xD800)); // lead
120 string.push(c(0xDBFF)); // lead
121 assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
123 let mut string = Wtf8Buf::new();
124 string.push(c(0xD800)); // lead
125 string.push(c(0xE000)); // not surrogate
126 assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
128 let mut string = Wtf8Buf::new();
129 string.push(c(0xD7FF)); // not surrogate
130 string.push(c(0xDC00)); // trail
131 assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
133 let mut string = Wtf8Buf::new();
134 string.push(c(0x61)); // not surrogate, < 3 bytes
135 string.push(c(0xDC00)); // trail
136 assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
138 let mut string = Wtf8Buf::new();
139 string.push(c(0xDC00)); // trail
140 assert_eq!(string.bytes, b"\xED\xB0\x80");
144 fn wtf8buf_push_wtf8() {
145 let mut string = Wtf8Buf::from_str("aé");
146 assert_eq!(string.bytes, b"a\xC3\xA9");
147 string.push_wtf8(Wtf8::from_str(" 💩"));
148 assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
150 fn w(v: &[u8]) -> &Wtf8 {
151 unsafe { Wtf8::from_bytes_unchecked(v) }
154 let mut string = Wtf8Buf::new();
155 string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
156 string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
157 assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
159 let mut string = Wtf8Buf::new();
160 string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
161 string.push_wtf8(w(b" ")); // not surrogate
162 string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
163 assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
165 let mut string = Wtf8Buf::new();
166 string.push_wtf8(w(b"\xED\xA0\x80")); // lead
167 string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
168 assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
170 let mut string = Wtf8Buf::new();
171 string.push_wtf8(w(b"\xED\xA0\x80")); // lead
172 string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
173 assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
175 let mut string = Wtf8Buf::new();
176 string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
177 string.push_wtf8(w(b"\xED\xB0\x80")); // trail
178 assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
180 let mut string = Wtf8Buf::new();
181 string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
182 string.push_wtf8(w(b"\xED\xB0\x80")); // trail
183 assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
185 let mut string = Wtf8Buf::new();
186 string.push_wtf8(w(b"\xED\xB0\x80")); // trail
187 assert_eq!(string.bytes, b"\xED\xB0\x80");
191 fn wtf8buf_truncate() {
192 let mut string = Wtf8Buf::from_str("aé");
194 assert_eq!(string.bytes, b"a");
199 fn wtf8buf_truncate_fail_code_point_boundary() {
200 let mut string = Wtf8Buf::from_str("aé");
206 fn wtf8buf_truncate_fail_longer() {
207 let mut string = Wtf8Buf::from_str("aé");
212 fn wtf8buf_into_string() {
213 let mut string = Wtf8Buf::from_str("aé 💩");
214 assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩")));
215 string.push(CodePoint::from_u32(0xD800).unwrap());
216 assert_eq!(string.clone().into_string(), Err(string));
220 fn wtf8buf_into_string_lossy() {
221 let mut string = Wtf8Buf::from_str("aé 💩");
222 assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩"));
223 string.push(CodePoint::from_u32(0xD800).unwrap());
224 assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�"));
228 fn wtf8buf_from_iterator() {
229 fn f(values: &[u32]) -> Wtf8Buf {
230 values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::<Wtf8Buf>()
232 assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
234 assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
235 assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
236 assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
237 assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
238 assert_eq!(f(&[0xD7FF, 0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
239 assert_eq!(f(&[0x61, 0xDC00]).bytes, b"\x61\xED\xB0\x80");
240 assert_eq!(f(&[0xDC00]).bytes, b"\xED\xB0\x80");
244 fn wtf8buf_extend() {
245 fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf {
246 fn c(value: &u32) -> CodePoint {
247 CodePoint::from_u32(*value).unwrap()
249 let mut string = initial.iter().map(c).collect::<Wtf8Buf>();
250 string.extend(extended.iter().map(c));
254 assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
256 assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
257 assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
258 assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
259 assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
260 assert_eq!(e(&[0xD7FF], &[0xDC00]).bytes, b"\xED\x9F\xBF\xED\xB0\x80");
261 assert_eq!(e(&[0x61], &[0xDC00]).bytes, b"\x61\xED\xB0\x80");
262 assert_eq!(e(&[], &[0xDC00]).bytes, b"\xED\xB0\x80");
267 let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r");
268 string.push(CodePoint::from_u32(0xD800).unwrap());
269 assert_eq!(format!("{string:?}"), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\"");
273 fn wtf8buf_as_slice() {
274 assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé"));
278 fn wtf8buf_show_str() {
279 let text = "a\té 💩\r";
280 let string = Wtf8Buf::from_str(text);
281 assert_eq!(format!("{text:?}"), format!("{string:?}"));
286 assert_eq!(&Wtf8::from_str("").bytes, b"");
287 assert_eq!(&Wtf8::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
292 assert_eq!(Wtf8::from_str("").len(), 0);
293 assert_eq!(Wtf8::from_str("aé 💩").len(), 8);
298 assert_eq!(&Wtf8::from_str("aé 💩")[1..4].bytes, b"\xC3\xA9 ");
303 fn wtf8_slice_not_code_point_boundary() {
304 let _ = &Wtf8::from_str("aé 💩")[2..4];
308 fn wtf8_slice_from() {
309 assert_eq!(&Wtf8::from_str("aé 💩")[1..].bytes, b"\xC3\xA9 \xF0\x9F\x92\xA9");
314 fn wtf8_slice_from_not_code_point_boundary() {
315 let _ = &Wtf8::from_str("aé 💩")[2..];
320 assert_eq!(&Wtf8::from_str("aé 💩")[..4].bytes, b"a\xC3\xA9 ");
325 fn wtf8_slice_to_not_code_point_boundary() {
326 let _ = &Wtf8::from_str("aé 💩")[5..];
330 fn wtf8_ascii_byte_at() {
331 let slice = Wtf8::from_str("aé 💩");
332 assert_eq!(slice.ascii_byte_at(0), b'a');
333 assert_eq!(slice.ascii_byte_at(1), b'\xFF');
334 assert_eq!(slice.ascii_byte_at(2), b'\xFF');
335 assert_eq!(slice.ascii_byte_at(3), b' ');
336 assert_eq!(slice.ascii_byte_at(4), b'\xFF');
340 fn wtf8_code_points() {
341 fn c(value: u32) -> CodePoint {
342 CodePoint::from_u32(value).unwrap()
344 fn cp(string: &Wtf8Buf) -> Vec<Option<char>> {
345 string.code_points().map(|c| c.to_char()).collect::<Vec<_>>()
347 let mut string = Wtf8Buf::from_str("é ");
348 assert_eq!(cp(&string), [Some('é'), Some(' ')]);
349 string.push(c(0xD83D));
350 assert_eq!(cp(&string), [Some('é'), Some(' '), None]);
351 string.push(c(0xDCA9));
352 assert_eq!(cp(&string), [Some('é'), Some(' '), Some('💩')]);
357 assert_eq!(Wtf8::from_str("").as_str(), Some(""));
358 assert_eq!(Wtf8::from_str("aé 💩").as_str(), Some("aé 💩"));
359 let mut string = Wtf8Buf::new();
360 string.push(CodePoint::from_u32(0xD800).unwrap());
361 assert_eq!(string.as_str(), None);
365 fn wtf8_to_string_lossy() {
366 assert_eq!(Wtf8::from_str("").to_string_lossy(), Cow::Borrowed(""));
367 assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩"));
368 let mut string = Wtf8Buf::from_str("aé 💩");
369 string.push(CodePoint::from_u32(0xD800).unwrap());
370 let expected: Cow<'_, str> = Cow::Owned(String::from("aé 💩�"));
371 assert_eq!(string.to_string_lossy(), expected);
376 fn d(b: &[u8]) -> String {
377 (&unsafe { Wtf8::from_bytes_unchecked(b) }).to_string()
380 assert_eq!("", d("".as_bytes()));
381 assert_eq!("aé 💩", d("aé 💩".as_bytes()));
383 let mut string = Wtf8Buf::from_str("aé 💩");
384 string.push(CodePoint::from_u32(0xD800).unwrap());
385 assert_eq!("aé 💩�", d(string.as_inner()));
389 fn wtf8_encode_wide() {
390 let mut string = Wtf8Buf::from_str("aé ");
391 string.push(CodePoint::from_u32(0xD83D).unwrap());
392 string.push_char('💩');
394 string.encode_wide().collect::<Vec<_>>(),
395 vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]
400 fn wtf8_encode_wide_size_hint() {
401 let string = Wtf8Buf::from_str("\u{12345}");
402 let mut iter = string.encode_wide();
403 assert_eq!((1, Some(8)), iter.size_hint());
404 iter.next().unwrap();
405 assert_eq!((1, Some(1)), iter.size_hint());
406 iter.next().unwrap();
407 assert_eq!((0, Some(0)), iter.size_hint());
408 assert!(iter.next().is_none());