1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 use std::convert::TryFrom;
16 assert_eq!(u32::from('a'), 0x61);
17 assert_eq!(char::from(b'\0'), '\0');
18 assert_eq!(char::from(b'a'), 'a');
19 assert_eq!(char::from(b'\xFF'), '\u{FF}');
20 assert_eq!(char::try_from(0_u32), Ok('\0'));
21 assert_eq!(char::try_from(0x61_u32), Ok('a'));
22 assert_eq!(char::try_from(0xD7FF_u32), Ok('\u{D7FF}'));
23 assert!(char::try_from(0xD800_u32).is_err());
24 assert!(char::try_from(0xDFFF_u32).is_err());
25 assert_eq!(char::try_from(0xE000_u32), Ok('\u{E000}'));
26 assert_eq!(char::try_from(0x10FFFF_u32), Ok('\u{10FFFF}'));
27 assert!(char::try_from(0x110000_u32).is_err());
28 assert!(char::try_from(0xFFFF_FFFF_u32).is_err());
32 fn test_is_lowercase() {
33 assert!('a'.is_lowercase());
34 assert!('ö'.is_lowercase());
35 assert!('ß'.is_lowercase());
36 assert!(!'Ü'.is_lowercase());
37 assert!(!'P'.is_lowercase());
41 fn test_is_uppercase() {
42 assert!(!'h'.is_uppercase());
43 assert!(!'ä'.is_uppercase());
44 assert!(!'ß'.is_uppercase());
45 assert!('Ö'.is_uppercase());
46 assert!('T'.is_uppercase());
50 fn test_is_whitespace() {
51 assert!(' '.is_whitespace());
52 assert!('\u{2007}'.is_whitespace());
53 assert!('\t'.is_whitespace());
54 assert!('\n'.is_whitespace());
55 assert!(!'a'.is_whitespace());
56 assert!(!'_'.is_whitespace());
57 assert!(!'\u{0}'.is_whitespace());
62 assert_eq!('0'.to_digit(10), Some(0));
63 assert_eq!('1'.to_digit(2), Some(1));
64 assert_eq!('2'.to_digit(3), Some(2));
65 assert_eq!('9'.to_digit(10), Some(9));
66 assert_eq!('a'.to_digit(16), Some(10));
67 assert_eq!('A'.to_digit(16), Some(10));
68 assert_eq!('b'.to_digit(16), Some(11));
69 assert_eq!('B'.to_digit(16), Some(11));
70 assert_eq!('z'.to_digit(36), Some(35));
71 assert_eq!('Z'.to_digit(36), Some(35));
72 assert_eq!(' '.to_digit(10), None);
73 assert_eq!('$'.to_digit(36), None);
77 fn test_to_lowercase() {
78 fn lower(c: char) -> Vec<char> {
79 c.to_lowercase().collect()
81 assert_eq!(lower('A'), ['a']);
82 assert_eq!(lower('Ö'), ['ö']);
83 assert_eq!(lower('ß'), ['ß']);
84 assert_eq!(lower('Ü'), ['ü']);
85 assert_eq!(lower('💩'), ['💩']);
86 assert_eq!(lower('Σ'), ['σ']);
87 assert_eq!(lower('Τ'), ['τ']);
88 assert_eq!(lower('Ι'), ['ι']);
89 assert_eq!(lower('Γ'), ['γ']);
90 assert_eq!(lower('Μ'), ['μ']);
91 assert_eq!(lower('Α'), ['α']);
92 assert_eq!(lower('Σ'), ['σ']);
93 assert_eq!(lower('Dž'), ['dž']);
94 assert_eq!(lower('fi'), ['fi']);
95 assert_eq!(lower('İ'), ['i', '\u{307}']);
99 fn test_to_uppercase() {
100 fn upper(c: char) -> Vec<char> {
101 c.to_uppercase().collect()
103 assert_eq!(upper('a'), ['A']);
104 assert_eq!(upper('ö'), ['Ö']);
105 assert_eq!(upper('ß'), ['S', 'S']); // not ẞ: Latin capital letter sharp s
106 assert_eq!(upper('ü'), ['Ü']);
107 assert_eq!(upper('💩'), ['💩']);
109 assert_eq!(upper('σ'), ['Σ']);
110 assert_eq!(upper('τ'), ['Τ']);
111 assert_eq!(upper('ι'), ['Ι']);
112 assert_eq!(upper('γ'), ['Γ']);
113 assert_eq!(upper('μ'), ['Μ']);
114 assert_eq!(upper('α'), ['Α']);
115 assert_eq!(upper('ς'), ['Σ']);
116 assert_eq!(upper('Dž'), ['DŽ']);
117 assert_eq!(upper('fi'), ['F', 'I']);
118 assert_eq!(upper('ᾀ'), ['Ἀ', 'Ι']);
122 fn test_is_control() {
123 assert!('\u{0}'.is_control());
124 assert!('\u{3}'.is_control());
125 assert!('\u{6}'.is_control());
126 assert!('\u{9}'.is_control());
127 assert!('\u{7f}'.is_control());
128 assert!('\u{92}'.is_control());
129 assert!(!'\u{20}'.is_control());
130 assert!(!'\u{55}'.is_control());
131 assert!(!'\u{68}'.is_control());
136 assert!('2'.is_numeric());
137 assert!('7'.is_numeric());
138 assert!(!'c'.is_numeric());
139 assert!(!'i'.is_numeric());
140 assert!(!'z'.is_numeric());
141 assert!(!'Q'.is_numeric());
145 fn test_escape_debug() {
146 fn string(c: char) -> String {
147 c.escape_debug().collect()
149 let s = string('\n');
150 assert_eq!(s, "\\n");
151 let s = string('\r');
152 assert_eq!(s, "\\r");
153 let s = string('\'');
154 assert_eq!(s, "\\'");
156 assert_eq!(s, "\\\"");
165 let s = string('\x00');
166 assert_eq!(s, "\\u{0}");
167 let s = string('\x1f');
168 assert_eq!(s, "\\u{1f}");
169 let s = string('\x7f');
170 assert_eq!(s, "\\u{7f}");
171 let s = string('\u{80}');
172 assert_eq!(s, "\\u{80}");
173 let s = string('\u{ff}');
174 assert_eq!(s, "\u{ff}");
175 let s = string('\u{11b}');
176 assert_eq!(s, "\u{11b}");
177 let s = string('\u{1d4b6}');
178 assert_eq!(s, "\u{1d4b6}");
179 let s = string('\u{200b}'); // zero width space
180 assert_eq!(s, "\\u{200b}");
181 let s = string('\u{e000}'); // private use 1
182 assert_eq!(s, "\\u{e000}");
183 let s = string('\u{100000}'); // private use 2
184 assert_eq!(s, "\\u{100000}");
188 fn test_escape_default() {
189 fn string(c: char) -> String {
190 c.escape_default().collect()
192 let s = string('\n');
193 assert_eq!(s, "\\n");
194 let s = string('\r');
195 assert_eq!(s, "\\r");
196 let s = string('\'');
197 assert_eq!(s, "\\'");
199 assert_eq!(s, "\\\"");
207 assert_eq!(s, "\\u{e9}");
208 let s = string('\x00');
209 assert_eq!(s, "\\u{0}");
210 let s = string('\x1f');
211 assert_eq!(s, "\\u{1f}");
212 let s = string('\x7f');
213 assert_eq!(s, "\\u{7f}");
214 let s = string('\u{80}');
215 assert_eq!(s, "\\u{80}");
216 let s = string('\u{ff}');
217 assert_eq!(s, "\\u{ff}");
218 let s = string('\u{11b}');
219 assert_eq!(s, "\\u{11b}");
220 let s = string('\u{1d4b6}');
221 assert_eq!(s, "\\u{1d4b6}");
222 let s = string('\u{200b}'); // zero width space
223 assert_eq!(s, "\\u{200b}");
224 let s = string('\u{e000}'); // private use 1
225 assert_eq!(s, "\\u{e000}");
226 let s = string('\u{100000}'); // private use 2
227 assert_eq!(s, "\\u{100000}");
231 fn test_escape_unicode() {
232 fn string(c: char) -> String { c.escape_unicode().collect() }
234 let s = string('\x00');
235 assert_eq!(s, "\\u{0}");
236 let s = string('\n');
237 assert_eq!(s, "\\u{a}");
239 assert_eq!(s, "\\u{20}");
241 assert_eq!(s, "\\u{61}");
242 let s = string('\u{11b}');
243 assert_eq!(s, "\\u{11b}");
244 let s = string('\u{1d4b6}');
245 assert_eq!(s, "\\u{1d4b6}");
249 fn test_encode_utf8() {
250 fn check(input: char, expect: &[u8]) {
251 let mut buf = [0; 4];
252 let ptr = buf.as_ptr();
253 let s = input.encode_utf8(&mut buf);
254 assert_eq!(s.as_ptr() as usize, ptr as usize);
255 assert!(str::from_utf8(s.as_bytes()).is_ok());
256 assert_eq!(s.as_bytes(), expect);
260 check('\u{e9}', &[0xc3, 0xa9]);
261 check('\u{a66e}', &[0xea, 0x99, 0xae]);
262 check('\u{1f4a9}', &[0xf0, 0x9f, 0x92, 0xa9]);
266 fn test_encode_utf16() {
267 fn check(input: char, expect: &[u16]) {
268 let mut buf = [0; 2];
269 let ptr = buf.as_mut_ptr();
270 let b = input.encode_utf16(&mut buf);
271 assert_eq!(b.as_mut_ptr() as usize, ptr as usize);
272 assert_eq!(b, expect);
275 check('x', &[0x0078]);
276 check('\u{e9}', &[0x00e9]);
277 check('\u{a66e}', &[0xa66e]);
278 check('\u{1f4a9}', &[0xd83d, 0xdca9]);
282 fn test_len_utf16() {
283 assert!('x'.len_utf16() == 1);
284 assert!('\u{e9}'.len_utf16() == 1);
285 assert!('\u{a66e}'.len_utf16() == 1);
286 assert!('\u{1f4a9}'.len_utf16() == 2);
290 fn test_decode_utf16() {
291 fn check(s: &[u16], expected: &[Result<char, u16>]) {
292 let v = char::decode_utf16(s.iter().cloned())
293 .map(|r| r.map_err(|e| e.unpaired_surrogate()))
294 .collect::<Vec<_>>();
295 assert_eq!(v, expected);
297 check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
298 check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
302 fn ed_iterator_specializations() {
304 assert_eq!('\n'.escape_default().count(), 2);
305 assert_eq!('c'.escape_default().count(), 1);
306 assert_eq!(' '.escape_default().count(), 1);
307 assert_eq!('\\'.escape_default().count(), 2);
308 assert_eq!('\''.escape_default().count(), 2);
312 // Check that OoB is handled correctly
313 assert_eq!('\n'.escape_default().nth(2), None);
314 assert_eq!('c'.escape_default().nth(1), None);
315 assert_eq!(' '.escape_default().nth(1), None);
316 assert_eq!('\\'.escape_default().nth(2), None);
317 assert_eq!('\''.escape_default().nth(2), None);
319 // Check the first char
320 assert_eq!('\n'.escape_default().nth(0), Some('\\'));
321 assert_eq!('c'.escape_default().nth(0), Some('c'));
322 assert_eq!(' '.escape_default().nth(0), Some(' '));
323 assert_eq!('\\'.escape_default().nth(0), Some('\\'));
324 assert_eq!('\''.escape_default().nth(0), Some('\\'));
326 // Check the second char
327 assert_eq!('\n'.escape_default().nth(1), Some('n'));
328 assert_eq!('\\'.escape_default().nth(1), Some('\\'));
329 assert_eq!('\''.escape_default().nth(1), Some('\''));
331 // Check the last char
332 assert_eq!('\n'.escape_default().last(), Some('n'));
333 assert_eq!('c'.escape_default().last(), Some('c'));
334 assert_eq!(' '.escape_default().last(), Some(' '));
335 assert_eq!('\\'.escape_default().last(), Some('\\'));
336 assert_eq!('\''.escape_default().last(), Some('\''));
340 fn eu_iterator_specializations() {
342 let len = c.escape_unicode().count();
345 assert_eq!(c.escape_unicode().nth(len), None);
347 // For all possible in-bound offsets
348 let mut iter = c.escape_unicode();
349 for offset in 0..len {
351 assert_eq!(iter.clone().last(), Some('}'));
354 assert_eq!(iter.len(), len - offset);
356 // Check size_hint (= len in ExactSizeIterator)
357 assert_eq!(iter.size_hint(), (iter.len(), Some(iter.len())));
360 assert_eq!(iter.clone().count(), len - offset);
363 assert_eq!(c.escape_unicode().nth(offset), iter.next());
367 assert_eq!(iter.clone().last(), None);
368 assert_eq!(iter.clone().count(), 0);
381 fn test_decode_utf8() {
382 macro_rules! assert_decode_utf8 {
383 ($input_bytes: expr, $expected_str: expr) => {
384 let input_bytes: &[u8] = &$input_bytes;
385 let s = char::decode_utf8(input_bytes.iter().cloned())
386 .map(|r_b| r_b.unwrap_or('\u{FFFD}'))
387 .collect::<String>();
388 assert_eq!(s, $expected_str,
389 "input bytes: {:?}, expected str: {:?}, result: {:?}",
390 input_bytes, $expected_str, s);
391 assert_eq!(String::from_utf8_lossy(&$input_bytes), $expected_str);
395 assert_decode_utf8!([], "");
396 assert_decode_utf8!([0x41], "A");
397 assert_decode_utf8!([0xC1, 0x81], "��");
398 assert_decode_utf8!([0xE2, 0x99, 0xA5], "♥");
399 assert_decode_utf8!([0xE2, 0x99, 0xA5, 0x41], "♥A");
400 assert_decode_utf8!([0xE2, 0x99], "�");
401 assert_decode_utf8!([0xE2, 0x99, 0x41], "�A");
402 assert_decode_utf8!([0xC0], "�");
403 assert_decode_utf8!([0xC0, 0x41], "�A");
404 assert_decode_utf8!([0x80], "�");
405 assert_decode_utf8!([0x80, 0x41], "�A");
406 assert_decode_utf8!([0xFE], "�");
407 assert_decode_utf8!([0xFE, 0x41], "�A");
408 assert_decode_utf8!([0xFF], "�");
409 assert_decode_utf8!([0xFF, 0x41], "�A");
410 assert_decode_utf8!([0xC0, 0x80], "��");
413 assert_decode_utf8!([0xED, 0x9F, 0xBF], "\u{D7FF}");
414 assert_decode_utf8!([0xED, 0xA0, 0x80], "���");
415 assert_decode_utf8!([0xED, 0xBF, 0x80], "���");
416 assert_decode_utf8!([0xEE, 0x80, 0x80], "\u{E000}");
419 assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0xBF], "\u{10FFFF}");
420 assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0x41], "�A");
421 assert_decode_utf8!([0xF4, 0x90, 0x80, 0x80], "����");
423 // 5 and 6 bytes sequence
424 // Part of the original design of UTF-8,
425 // but invalid now that UTF-8 is artificially restricted to match the range of UTF-16.
426 assert_decode_utf8!([0xF8, 0x80, 0x80, 0x80, 0x80], "�����");
427 assert_decode_utf8!([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], "������");