Closes #14358.
~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`.
This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context.
[breaking-change]
cc @huonw
impl<'a> PartialOrd for MaybeOwned<'a> {
#[inline]
- fn lt(&self, other: &MaybeOwned) -> bool {
- self.as_slice().lt(&other.as_slice())
+ fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
+ Some(self.cmp(other))
}
}
}
/// Converts to a vector of `u16` encoded as UTF-16.
+ #[deprecated = "use `utf16_units` instead"]
fn to_utf16(&self) -> Vec<u16> {
- let me = self.as_slice();
- let mut u = Vec::new();
- for ch in me.chars() {
- let mut buf = [0u16, ..2];
- let n = ch.encode_utf16(buf /* as mut slice! */);
- u.push_all(buf.slice_to(n));
- }
- u
+ self.as_slice().utf16_units().collect::<Vec<u16>>()
}
/// Given a string, make a new string with repeated copies of it.
assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
fn a_million_letter_a() -> String {
- let mut i = 0;
+ let mut i = 0u;
let mut rs = String::new();
while i < 100000 {
rs.push_str("aaaaaaaaaa");
rs
}
fn half_a_million_letter_a() -> String {
- let mut i = 0;
+ let mut i = 0u;
let mut rs = String::new();
while i < 100000 {
rs.push_str("aaaaa");
assert_eq!("华", data.slice(30, 33));
fn a_million_letter_x() -> String {
- let mut i = 0;
+ let mut i = 0u;
let mut rs = String::new();
while i < 100000 {
rs.push_str("华华华华华华华华华华");
rs
}
fn half_a_million_letter_x() -> String {
- let mut i = 0;
+ let mut i = 0u;
let mut rs = String::new();
while i < 100000 {
rs.push_str("华华华华华");
for p in pairs.iter() {
let (s, u) = (*p).clone();
+ let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
+ let u_as_string = from_utf16(u.as_slice()).unwrap();
+
assert!(is_utf16(u.as_slice()));
- assert_eq!(s.to_utf16(), u);
+ assert_eq!(s_as_utf16, u);
- assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
+ assert_eq!(u_as_string, s);
assert_eq!(from_utf16_lossy(u.as_slice()), s);
- assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
- assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
+ assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
+ assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
}
}
use mem;
use char;
+ use char::Char;
use clone::Clone;
use cmp;
use cmp::{PartialEq, Eq};
use iter::{Filter, Map, Iterator};
use iter::{DoubleEndedIterator, ExactSize};
use iter::range;
- use num::Saturating;
+ use num::{CheckedMul, Saturating};
use option::{None, Option, Some};
use raw::Repr;
use slice::ImmutableVector;
}
}
+ /// External iterator for a string's UTF16 codeunits.
+ /// Use with the `std::iter` module.
+ #[deriving(Clone)]
+ pub struct Utf16CodeUnits<'a> {
+ chars: Chars<'a>,
+ extra: u16
+ }
+
+ impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
+ #[inline]
+ fn next(&mut self) -> Option<u16> {
+ if self.extra != 0 {
+ let tmp = self.extra;
+ self.extra = 0;
+ return Some(tmp);
+ }
+
+ let mut buf = [0u16, ..2];
+ self.chars.next().map(|ch| {
+ let n = ch.encode_utf16(buf /* as mut slice! */);
+ if n == 2 { self.extra = buf[1]; }
+ buf[0]
+ })
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ let (low, high) = self.chars.size_hint();
+ // every char gets either one u16 or two u16,
+ // so this iterator is between 1 or 2 times as
+ // long as the underlying iterator.
+ (low, high.and_then(|n| n.checked_mul(&2)))
+ }
+ }
+
/*
Section: Comparing strings
*/
/// Bytewise slice equality
/// NOTE: This function is (ab)used in rustc::middle::trans::_match
/// to compare &[u8] byte slices that are not necessarily valid UTF-8.
-#[cfg(not(test))]
#[lang="str_eq"]
#[inline]
pub fn eq_slice(a: &str, b: &str) -> bool {
eq_slice_(a, b)
}
-/// Bytewise slice equality
-#[cfg(test)]
-#[inline]
-pub fn eq_slice(a: &str, b: &str) -> bool {
- eq_slice_(a, b)
-}
-
/*
Section: Misc
*/
Section: Trait implementations
*/
-#[cfg(not(test))]
#[allow(missing_doc)]
pub mod traits {
use cmp::{Ord, Ordering, Less, Equal, Greater, PartialEq, PartialOrd, Equiv, Eq};
use collections::Collection;
use iter::Iterator;
- use option::{Some, None};
+ use option::{Option, Some, None};
use str::{Str, StrSlice, eq_slice};
impl<'a> Ord for &'a str {
impl<'a> PartialOrd for &'a str {
#[inline]
- fn lt(&self, other: & &'a str) -> bool { self.cmp(other) == Less }
+ fn partial_cmp(&self, other: &&'a str) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
}
impl<'a, S: Str> Equiv<S> for &'a str {
}
}
-#[cfg(test)]
-pub mod traits {}
-
/// Any string that can be represented as a slice
pub trait Str {
/// Work with `self` as a slice.
/// and that it is not reallocated (e.g. by pushing to the
/// string).
fn as_ptr(&self) -> *const u8;
+
+ /// Return an iterator of `u16` over the string encoded as UTF-16.
+ fn utf16_units(&self) -> Utf16CodeUnits<'a>;
}
impl<'a> StrSlice<'a> for &'a str {
fn as_ptr(&self) -> *const u8 {
self.repr().data
}
+
+ #[inline]
+ fn utf16_units(&self) -> Utf16CodeUnits<'a> {
+ Utf16CodeUnits{ chars: self.chars(), extra: 0}
+ }
}
impl<'a> Default for &'a str {
use libc::funcs::extra::msvcrt::get_osfhandle;
use std::mem;
+ use std::iter::Iterator;
+ use std::str::StrSlice;
if cfg.gid.is_some() || cfg.uid.is_some() {
return Err(IoError {
lpSecurityDescriptor: ptr::mut_null(),
bInheritHandle: 1,
};
- let filename = "NUL".to_utf16().append_one(0);
+ let filename: Vec<u16> = "NUL".utf16_units().collect();
+ let filename = filename.append_one(0);
*slot = libc::CreateFileW(filename.as_ptr(),
access,
libc::FILE_SHARE_READ |
with_envp(cfg.env, |envp| {
with_dirp(cfg.cwd, |dirp| {
- let mut cmd_str = cmd_str.to_utf16().append_one(0);
+ let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
+ cmd_str = cmd_str.append_one(0);
let created = CreateProcessW(ptr::null(),
cmd_str.as_mut_ptr(),
ptr::mut_null(),
let kv = format!("{}={}",
pair.ref0().as_str().unwrap(),
pair.ref1().as_str().unwrap());
- blk.push_all(kv.to_utf16().as_slice());
+ blk.extend(kv.as_slice().utf16_units());
blk.push(0);
}
Some(dir) => {
let dir_str = dir.as_str()
.expect("expected workingdirectory to be utf-8 encoded");
- let dir_str = dir_str.to_utf16().append_one(0);
+ let dir_str: Vec<u16> = dir_str.utf16_units().collect();
+ let dir_str = dir_str.append_one(0);
+
cb(dir_str.as_ptr())
},
None => cb(ptr::null())
// which will wake up the other end at some point, so we just allow this
// signal to be coalesced with the pending signals on the pipe.
extern fn sigchld_handler(_signum: libc::c_int) {
- let msg = 1;
+ let msg = 1i;
match unsafe {
libc::write(WRITE_FD, &msg as *const _ as *const libc::c_void, 1)
} {
unsafe {
with_env_lock(|| {
use os::win32::{fill_utf16_buf_and_decode};
- let n = n.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
fill_utf16_buf_and_decode(|buf, sz| {
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
})
#[cfg(windows)]
fn _setenv(n: &str, v: &str) {
- let n = n.to_utf16().append_one(0);
- let v = v.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
+ let v: Vec<u16> = v.utf16_units().collect();
+ let v = v.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
#[cfg(windows)]
fn _unsetenv(n: &str) {
- let n = n.to_utf16().append_one(0);
+ let n: Vec<u16> = n.utf16_units().collect();
+ let n = n.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
#[cfg(windows)]
fn chdir(p: &Path) -> bool {
let p = match p.as_str() {
- Some(s) => s.to_utf16().append_one(0),
+ Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
None => return false,
};
unsafe {
#[ignore]
fn test_getenv_big() {
let mut s = "".to_string();
- let mut i = 0;
+ let mut i = 0i;
while i < 100 {
s.push_str("aaaaaaaaaa");
i += 1;