Closes #14358.
~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`.
This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context.
[breaking-change]
cc @huonw
}
/// Converts to a vector of `u16` encoded as UTF-16.
+ #[deprecated = "use `utf16_units` instead"]
fn to_utf16(&self) -> Vec<u16> {
- let me = self.as_slice();
- let mut u = Vec::new();
- for ch in me.chars() {
- let mut buf = [0u16, ..2];
- let n = ch.encode_utf16(buf /* as mut slice! */);
- u.push_all(buf.slice_to(n));
- }
- u
+ self.as_slice().utf16_units().collect::<Vec<u16>>()
}
/// Given a string, make a new string with repeated copies of it.
for p in pairs.iter() {
let (s, u) = (*p).clone();
+ let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
+ let u_as_string = from_utf16(u.as_slice()).unwrap();
+
assert!(is_utf16(u.as_slice()));
- assert_eq!(s.to_utf16(), u);
+ assert_eq!(s_as_utf16, u);
- assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
+ assert_eq!(u_as_string, s);
assert_eq!(from_utf16_lossy(u.as_slice()), s);
- assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
- assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
+ assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
+ assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
}
}
use mem;
use char;
+use char::Char;
use clone::Clone;
use cmp;
use cmp::{PartialEq, Eq};
use iter::{Filter, Map, Iterator};
use iter::{DoubleEndedIterator, ExactSize};
use iter::range;
-use num::Saturating;
+use num::{CheckedMul, Saturating};
use option::{None, Option, Some};
use raw::Repr;
use slice::ImmutableVector;
}
}
+/// External iterator for a string's UTF16 codeunits.
+/// Use with the `std::iter` module.
+#[deriving(Clone)]
+pub struct Utf16CodeUnits<'a> {
+ chars: Chars<'a>,
+ extra: u16
+}
+
+impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<u16> {
+ if self.extra != 0 {
+ let tmp = self.extra;
+ self.extra = 0;
+ return Some(tmp);
+ }
+
+ let mut buf = [0u16, ..2];
+ self.chars.next().map(|ch| {
+ let n = ch.encode_utf16(buf /* as mut slice! */);
+ if n == 2 { self.extra = buf[1]; }
+ buf[0]
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (low, high) = self.chars.size_hint();
+ // every char gets either one u16 or two u16,
+ // so this iterator is between 1 or 2 times as
+ // long as the underlying iterator.
+ (low, high.and_then(|n| n.checked_mul(&2)))
+ }
+}
+
/*
Section: Comparing strings
*/
/// and that it is not reallocated (e.g. by pushing to the
/// string).
fn as_ptr(&self) -> *const u8;
+
+ /// Return an iterator of `u16` over the string encoded as UTF-16.
+ fn utf16_units(&self) -> Utf16CodeUnits<'a>;
}
impl<'a> StrSlice<'a> for &'a str {
fn as_ptr(&self) -> *const u8 {
self.repr().data
}
+
+ #[inline]
+ fn utf16_units(&self) -> Utf16CodeUnits<'a> {
+ Utf16CodeUnits{ chars: self.chars(), extra: 0}
+ }
}
impl<'a> Default for &'a str {
pub mod compat {
use std::intrinsics::{atomic_store_relaxed, transmute};
+ use std::iter::Iterator;
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
extern "system" {
// layer (after it's loaded) shouldn't be any slower than a regular DLL
// call.
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
- let module = module.to_utf16().append_one(0);
+ let module: Vec<u16> = module.utf16_units().collect();
+ let module = module.append_one(0);
symbol.with_c_str(|symbol| {
let handle = GetModuleHandleW(module.as_ptr());
let func: uint = transmute(GetProcAddress(handle, symbol));
pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
match s.as_str() {
- Some(s) => Ok(s.to_utf16().append_one(0)),
+ Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
None => Err(IoError {
code: libc::ERROR_INVALID_NAME as uint,
extra: 0,
use libc::funcs::extra::msvcrt::get_osfhandle;
use std::mem;
+ use std::iter::Iterator;
+ use std::str::StrSlice;
if cfg.gid.is_some() || cfg.uid.is_some() {
return Err(IoError {
lpSecurityDescriptor: ptr::mut_null(),
bInheritHandle: 1,
};
- let filename = "NUL".to_utf16().append_one(0);
+ let filename: Vec<u16> = "NUL".utf16_units().collect();
+ let filename = filename.append_one(0);
*slot = libc::CreateFileW(filename.as_ptr(),
access,
libc::FILE_SHARE_READ |
with_envp(cfg.env, |envp| {
with_dirp(cfg.cwd, |dirp| {
- let mut cmd_str = cmd_str.to_utf16().append_one(0);
+ let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
+ cmd_str = cmd_str.append_one(0);
let created = CreateProcessW(ptr::null(),
cmd_str.as_mut_ptr(),
ptr::mut_null(),
let kv = format!("{}={}",
pair.ref0().as_str().unwrap(),
pair.ref1().as_str().unwrap());
- blk.push_all(kv.to_utf16().as_slice());
+ blk.extend(kv.as_slice().utf16_units());
blk.push(0);
}
Some(dir) => {
let dir_str = dir.as_str()
.expect("expected workingdirectory to be utf-8 encoded");
- let dir_str = dir_str.to_utf16().append_one(0);
+ let dir_str: Vec<u16> = dir_str.utf16_units().collect();
+ let dir_str = dir_str.append_one(0);
+
cb(dir_str.as_ptr())
},
None => cb(ptr::null())
impl Lock {
pub fn new(p: &Path) -> Lock {
- let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
+ let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
+ let p_16 = p_16.append_one(0);
let handle = unsafe {
libc::CreateFileW(p_16.as_ptr(),
libc::FILE_GENERIC_READ |
#[cfg(target_os = "win32")]
pub mod dl {
use c_str::ToCStr;
+ use iter::Iterator;
use libc;
use os;
use ptr;
use result::{Ok, Err, Result};
- use str::StrAllocating;
+ use str::StrSlice;
use str;
use string::String;
+ use vec::Vec;
pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
// Windows expects Unicode data
let filename_cstr = filename.to_c_str();
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
- let filename_str = filename_str.to_utf16().append_one(0);
+ let filename_str: Vec<u16> = filename_str.utf16_units().collect();
+ let filename_str = filename_str.append_one(0);
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
}
unsafe {
with_env_lock(|| {
use os::win32::{fill_utf16_buf_and_decode};
- let n = n.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
fill_utf16_buf_and_decode(|buf, sz| {
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
})
#[cfg(windows)]
fn _setenv(n: &str, v: &str) {
- let n = n.to_utf16().append_one(0);
- let v = v.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
+ let v: Vec<u16> = v.utf16_units().collect();
+ let v = v.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
#[cfg(windows)]
fn _unsetenv(n: &str) {
- let n = n.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
#[cfg(windows)]
fn chdir(p: &Path) -> bool {
let p = match p.as_str() {
- Some(s) => s.to_utf16().append_one(0),
+ Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
None => return false,
};
unsafe {