/// Encodes this character as UTF-8 into the provided byte buffer.
///
- /// The buffer must be at least 4 bytes long or a runtime failure will
+ /// The buffer must be at least 4 bytes long or a runtime failure may
/// occur.
///
- /// This will then return the number of characters written to the slice.
+ /// This will then return the number of bytes written to the slice.
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
+
+ /// Encodes this character as UTF-16 into the provided `u16` buffer.
+ ///
+ /// The buffer must be at least 2 elements long or a runtime failure may
+ /// occur.
+ ///
+ /// This will then return the number of `u16`s written to the slice.
+ fn encode_utf16(&self, dst: &mut [u16]) -> uint;
}
impl Char for char {
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
- fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
+ fn encode_utf8(&self, dst: &mut [u8]) -> uint {
let code = *self as uint;
if code < MAX_ONE_B {
dst[0] = code as u8;
return 4;
}
}
+
+ fn encode_utf16(&self, dst: &mut [u16]) -> uint {
+ let mut ch = *self as uint;
+ if (ch & 0xFFFF_u) == ch {
+ // The BMP falls through (assuming non-surrogate, as it
+ // should)
+ assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
+ dst[0] = ch as u16;
+ 1
+ } else {
+ // Supplementary planes break into surrogates.
+ assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
+ ch -= 0x1_0000_u;
+ dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
+ dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
+ 2
+ }
+ }
}
#[cfg(not(test))]
fn to_utf16(&self) -> ~[u16] {
let mut u = ~[];
for ch in self.chars() {
- // Arithmetic with u32 literals is easier on the eyes than chars.
- let mut ch = ch as u32;
-
- if (ch & 0xFFFF_u32) == ch {
- // The BMP falls through (assuming non-surrogate, as it
- // should)
- assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
- u.push(ch as u16)
- } else {
- // Supplementary planes break into surrogates.
- assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
- ch -= 0x1_0000_u32;
- let w1 = 0xD800_u16 | ((ch >> 10) as u16);
- let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
- u.push_all([w1, w2])
- }
+ let mut buf = [0u16, ..2];
+ let n = ch.encode_utf16(buf /* as mut slice! */);
+ u.push_all(buf.slice_to(n));
}
u
}