Change `os::args()` and `os::env()` to use `str::from_utf8_lossy()`.
Add new functions `os::args_as_bytes()` and `os::env_as_bytes()` to retrieve the args/env as byte vectors instead.
The existing methods were left returning strings because I expect that the common use-case is to want string handling.
Fixes #7188.
use vec::{ImmutableVector, MutableVector};
use vec;
use rt::global_heap::malloc_raw;
+use unstable::raw::Slice;
/// The representation of a C String.
///
}
/// Converts the CString into a `&[u8]` without copying.
+ /// Includes the terminating NUL byte.
///
/// # Failure
///
pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
if self.buf.is_null() { fail!("CString is null!"); }
unsafe {
- cast::transmute((self.buf, self.len() + 1))
+ cast::transmute(Slice { data: self.buf, len: self.len() + 1 })
+ }
+ }
+
+ /// Converts the CString into a `&[u8]` without copying.
+ /// Does not include the terminating NUL byte.
+ ///
+ /// # Failure
+ ///
+ /// Fails if the CString is null.
+ #[inline]
+ pub fn as_bytes_no_nul<'a>(&'a self) -> &'a [u8] {
+ if self.buf.is_null() { fail!("CString is null!"); }
+ unsafe {
+ cast::transmute(Slice { data: self.buf, len: self.len() })
}
}
/// Fails if the CString is null.
#[inline]
pub fn as_str<'a>(&'a self) -> Option<&'a str> {
- let buf = self.as_bytes();
- let buf = buf.slice_to(buf.len()-1); // chop off the trailing NUL
+ let buf = self.as_bytes_no_nul();
str::from_utf8(buf)
}
let expected = ["zero", "one"];
let mut it = expected.iter();
let result = from_c_multistring(ptr as *libc::c_char, None, |c| {
- let cbytes = c.as_bytes().slice_to(c.len());
+ let cbytes = c.as_bytes_no_nul();
assert_eq!(cbytes, it.next().unwrap().as_bytes());
});
assert_eq!(result, 2);
assert_eq!(c_str.as_bytes(), bytes!("foo", 0xff, 0));
}
+ #[test]
+ fn test_as_bytes_no_nul() {
+ let c_str = "hello".to_c_str();
+ assert_eq!(c_str.as_bytes_no_nul(), bytes!("hello"));
+ let c_str = "".to_c_str();
+ let exp: &[u8] = [];
+ assert_eq!(c_str.as_bytes_no_nul(), exp);
+ let c_str = bytes!("foo", 0xff).to_c_str();
+ assert_eq!(c_str.as_bytes_no_nul(), bytes!("foo", 0xff));
+ }
+
#[test]
#[should_fail]
fn test_as_bytes_fail() {
c_str.as_bytes();
}
+ #[test]
+ #[should_fail]
+ fn test_as_bytes_no_nul_fail() {
+ let c_str = unsafe { CString::new(ptr::null(), false) };
+ c_str.as_bytes_no_nul();
+ }
+
#[test]
fn test_as_str() {
let c_str = "hello".to_c_str();
#[cfg(unix)]
use c_str::ToCStr;
+#[cfg(windows)]
+use str::OwnedStr;
/// Delegates to the libc close() function, returning the same return value.
pub fn close(fd: int) -> int {
/// Returns a vector of (variable, value) pairs for all the environment
/// variables of the current process.
+///
+/// Invalid UTF-8 bytes are replaced with \uFFFD. See `str::from_utf8_lossy()`
+/// for details.
pub fn env() -> ~[(~str,~str)] {
+ env_as_bytes().move_iter().map(|(k,v)| {
+ let k = str::from_utf8_lossy(k).into_owned();
+ let v = str::from_utf8_lossy(v).into_owned();
+ (k,v)
+ }).collect()
+}
+
+/// Returns a vector of (variable, value) byte-vector pairs for all the
+/// environment variables of the current process.
+pub fn env_as_bytes() -> ~[(~[u8],~[u8])] {
unsafe {
#[cfg(windows)]
- unsafe fn get_env_pairs() -> ~[~str] {
+ unsafe fn get_env_pairs() -> ~[~[u8]] {
use c_str;
use str::StrSlice;
}
let mut result = ~[];
c_str::from_c_multistring(ch as *c_char, None, |cstr| {
- result.push(cstr.as_str().unwrap().to_owned());
+ result.push(cstr.as_bytes_no_nul().to_owned());
});
FreeEnvironmentStringsA(ch);
result
}
#[cfg(unix)]
- unsafe fn get_env_pairs() -> ~[~str] {
+ unsafe fn get_env_pairs() -> ~[~[u8]] {
+ use c_str::CString;
+
extern {
fn rust_env_pairs() -> **c_char;
}
}
let mut result = ~[];
ptr::array_each(environ, |e| {
- let env_pair = str::raw::from_c_str(e);
- debug!("get_env_pairs: {}", env_pair);
+ let env_pair = CString::new(e, false).as_bytes_no_nul().to_owned();
result.push(env_pair);
});
result
}
- fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] {
+ fn env_convert(input: ~[~[u8]]) -> ~[(~[u8], ~[u8])] {
let mut pairs = ~[];
for p in input.iter() {
- let vs: ~[&str] = p.splitn('=', 1).collect();
- debug!("splitting: len: {}", vs.len());
- assert_eq!(vs.len(), 2);
- pairs.push((vs[0].to_owned(), vs[1].to_owned()));
+ let vs: ~[&[u8]] = p.splitn(1, |b| *b == '=' as u8).collect();
+ let key = vs[0].to_owned();
+ let val = (if vs.len() < 2 { ~[] } else { vs[1].to_owned() });
+ pairs.push((key, val));
}
pairs
}
#[cfg(unix)]
/// Fetches the environment variable `n` from the current process, returning
/// None if the variable isn't set.
+///
+/// Any invalid UTF-8 bytes in the value are replaced by \uFFFD. See
+/// `str::from_utf8_lossy()` for details.
+///
+/// # Failure
+///
+/// Fails if `n` has any interior NULs.
pub fn getenv(n: &str) -> Option<~str> {
+ getenv_as_bytes(n).map(|v| str::from_utf8_lossy(v).into_owned())
+}
+
+#[cfg(unix)]
+/// Fetches the environment variable `n` byte vector from the current process,
+/// returning None if the variable isn't set.
+///
+/// # Failure
+///
+/// Fails if `n` has any interior NULs.
+pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
+ use c_str::CString;
+
unsafe {
with_env_lock(|| {
let s = n.with_c_str(|buf| libc::getenv(buf));
if s.is_null() {
None
} else {
- Some(str::raw::from_c_str(s))
+ Some(CString::new(s, false).as_bytes_no_nul().to_owned())
}
})
}
}
}
+#[cfg(windows)]
+/// Fetches the environment variable `n` byte vector from the current process,
+/// returning None if the variable isn't set.
+pub fn getenv_as_bytes(n: &str) -> Option<~[u8]> {
+ getenv(n).map(|s| s.into_bytes())
+}
+
#[cfg(unix)]
/// Sets the environment variable `n` to the value `v` for the currently running
/// process
+///
+/// # Failure
+///
+/// Fails if `n` or `v` have any interior NULs.
pub fn setenv(n: &str, v: &str) {
unsafe {
with_env_lock(|| {
}
/// Remove a variable from the environment entirely
+///
+/// # Failure
+///
+/// Fails (on unix) if `n` has any interior NULs.
pub fn unsetenv(n: &str) {
#[cfg(unix)]
fn _unsetenv(n: &str) {
}
#[cfg(target_os = "macos")]
-unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
+unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] {
+ use c_str::CString;
+
let mut args = ~[];
for i in range(0u, argc as uint) {
- args.push(str::raw::from_c_str(*argv.offset(i as int)));
+ args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned())
}
args
}
* Returns a list of the command line arguments.
*/
#[cfg(target_os = "macos")]
-fn real_args() -> ~[~str] {
+fn real_args_as_bytes() -> ~[~[u8]] {
unsafe {
let (argc, argv) = (*_NSGetArgc() as int,
*_NSGetArgv() as **c_char);
#[cfg(target_os = "linux")]
#[cfg(target_os = "android")]
#[cfg(target_os = "freebsd")]
-fn real_args() -> ~[~str] {
+fn real_args_as_bytes() -> ~[~[u8]] {
use rt;
match rt::args::clone() {
}
}
+#[cfg(not(windows))]
+fn real_args() -> ~[~str] {
+ real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
+}
+
#[cfg(windows)]
fn real_args() -> ~[~str] {
use vec;
return args;
}
+#[cfg(windows)]
+fn real_args_as_bytes() -> ~[~[u8]] {
+ real_args().move_iter().map(|s| s.into_bytes()).collect()
+}
+
type LPCWSTR = *u16;
#[cfg(windows)]
/// Returns the arguments which this program was started with (normally passed
/// via the command line).
+///
+/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
+/// See `str::from_utf8_lossy` for details.
pub fn args() -> ~[~str] {
real_args()
}
+/// Returns the arguments which this program was started with (normally passed
+/// via the command line) as byte vectors.
+pub fn args_as_bytes() -> ~[~[u8]] {
+ real_args_as_bytes()
+}
+
#[cfg(target_os = "macos")]
extern {
// These functions are in crt_externs.h.
impl BytesContainer for CString {
#[inline]
fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
- let s = self.as_bytes();
- s.slice_to(s.len()-1)
+ self.as_bytes_no_nul()
}
}
#[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() }
/// Take the global arguments from global storage.
-#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() }
-#[cfg(test)] pub fn take() -> Option<~[~str]> {
+#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() }
+#[cfg(test)] pub fn take() -> Option<~[~[u8]]> {
match realargs::take() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
/// Give the global arguments to global storage.
///
/// It is an error if the arguments already exist.
-#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) }
-#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) }
+#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) }
+#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) }
/// Make a clone of the global arguments.
-#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() }
-#[cfg(test)] pub fn clone() -> Option<~[~str]> {
+#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() }
+#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> {
match realargs::clone() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
mod imp {
use cast;
use clone::Clone;
- #[cfg(not(test))] use libc;
use option::{Option, Some, None};
use ptr::RawPtr;
use iter::Iterator;
- #[cfg(not(test))] use str;
use unstable::finally::Finally;
use unstable::mutex::{Mutex, MUTEX_INIT};
use mem;
- #[cfg(not(test))] use vec;
static mut global_args_ptr: uint = 0;
static mut lock: Mutex = MUTEX_INIT;
lock.destroy();
}
- pub fn take() -> Option<~[~str]> {
+ pub fn take() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
let val = mem::replace(&mut *ptr, None);
- val.as_ref().map(|s: &~~[~str]| (**s).clone())
+ val.as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
- pub fn put(args: ~[~str]) {
+ pub fn put(args: ~[~[u8]]) {
with_lock(|| unsafe {
let ptr = get_global_ptr();
rtassert!((*ptr).is_none());
})
}
- pub fn clone() -> Option<~[~str]> {
+ pub fn clone() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
- (*ptr).as_ref().map(|s: &~~[~str]| (**s).clone())
+ (*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
})
}
- fn get_global_ptr() -> *mut Option<~~[~str]> {
+ fn get_global_ptr() -> *mut Option<~~[~[u8]]> {
unsafe { cast::transmute(&global_args_ptr) }
}
// Copied from `os`.
#[cfg(not(test))]
- unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] {
+ unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] {
+ use c_str::CString;
+ use {vec, libc};
+ use vec::CloneableVector;
+
vec::from_fn(argc as uint, |i| {
- str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int))
+ let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false);
+ cs.as_bytes_no_nul().to_owned()
})
}
// Preserve the actual global state.
let saved_value = take();
- let expected = ~[~"happy", ~"today?"];
+ let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()];
put(expected.clone());
assert!(clone() == Some(expected.clone()));
pub fn cleanup() {
}
- pub fn take() -> Option<~[~str]> {
+ pub fn take() -> Option<~[~[u8]]> {
fail!()
}
- pub fn put(_args: ~[~str]) {
+ pub fn put(_args: ~[~[u8]]) {
fail!()
}
- pub fn clone() -> Option<~[~str]> {
+ pub fn clone() -> Option<~[~[u8]]> {
fail!()
}
}