X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=src%2Fshims%2Fos_str.rs;h=ede29d04d6bc9f4513e50a288a45bc684815b60f;hb=3fedc7b249fd5f9450b6c2d2df28a5b828166ac9;hp=74932ef6ca4ed2ea1cc4efa58f8a87707654cad7;hpb=3504d5266830e99f7cd2b978e91250bf1c35ee45;p=rust.git diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs index 74932ef6ca4..ede29d04d6b 100644 --- a/src/shims/os_str.rs +++ b/src/shims/os_str.rs @@ -9,85 +9,67 @@ #[cfg(windows)] use std::os::windows::ffi::{OsStrExt, OsStringExt}; -use rustc::ty::layout::LayoutOf; +use rustc_middle::ty::layout::LayoutOf; +use rustc_target::abi::{Align, Size}; use crate::*; /// Represent how path separator conversion should be done. -enum Pathconversion { +pub enum PathConversion { HostToTarget, TargetToHost, } -/// Perform path separator conversion if needed. -fn convert_path_separator<'a>( - os_str: &'a OsStr, - target_os: &str, - direction: Pathconversion, -) -> Cow<'a, OsStr> { - #[cfg(windows)] - return if target_os == "windows" { - // Windows-on-Windows, all fine. - Cow::Borrowed(os_str) - } else { - // Unix target, Windows host. - let (from, to) = match direction { - Pathconversion::HostToTarget => ('\\', '/'), - Pathconversion::TargetToHost => ('/', '\\'), - }; - let converted = os_str - .encode_wide() - .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_wide(&converted)) - }; - #[cfg(unix)] - return if target_os == "windows" { - // Windows target, Unix host. - let (from, to) = match direction { - Pathconversion::HostToTarget => ('/', '\\'), - Pathconversion::TargetToHost => ('\\', '/'), - }; - let converted = os_str - .as_bytes() - .iter() - .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_vec(converted)) - } else { - // Unix-on-Unix, all is fine. - Cow::Borrowed(os_str) - }; +#[cfg(unix)] +pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + Ok(os_str.as_bytes()) +} + +#[cfg(not(unix))] +pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the + // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually + // valid. + os_str + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) +} + +#[cfg(unix)] +pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + Ok(OsStr::from_bytes(bytes)) +} +#[cfg(not(unix))] +pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + let s = std::str::from_utf8(bytes) + .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; + Ok(OsStr::new(s)) } -impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} +impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> { /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what /// the Unix APIs usually handle. - fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, &'a OsStr> + fn read_os_str_from_c_str<'a>( + &'a self, + ptr: Pointer>, + ) -> InterpResult<'tcx, &'a OsStr> where 'tcx: 'a, 'mir: 'a, { - #[cfg(unix)] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - Ok(OsStr::from_bytes(bytes)) - } - #[cfg(not(unix))] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - let s = std::str::from_utf8(bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; - Ok(OsStr::new(s)) - } - let this = self.eval_context_ref(); - let bytes = this.memory.read_c_str(scalar)?; + let bytes = this.read_c_str(ptr)?; bytes_to_os_str(bytes) } /// Helper function to read an OsString from a 0x0000-terminated sequence of u16, /// which is what the Windows APIs usually handle. - fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, OsString> + fn read_os_str_from_wide_str<'a>( + &'a self, + ptr: Pointer>, + ) -> InterpResult<'tcx, OsString> where 'tcx: 'a, 'mir: 'a, @@ -103,7 +85,7 @@ pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsS Ok(s.into()) } - let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?; + let u16_vec = self.eval_context_ref().read_wide_str(ptr)?; u16vec_to_osstring(u16_vec) } @@ -115,24 +97,9 @@ pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec) -> InterpResult<'tcx, OsS fn write_os_str_to_c_str( &mut self, os_str: &OsStr, - scalar: Scalar, + ptr: Pointer>, size: u64, ) -> InterpResult<'tcx, (bool, u64)> { - #[cfg(unix)] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - Ok(os_str.as_bytes()) - } - #[cfg(not(unix))] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the - // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually - // valid. - os_str - .to_str() - .map(|s| s.as_bytes()) - .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) - } - let bytes = os_str_to_bytes(os_str)?; // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null // terminator to memory using the `ptr` pointer would cause an out-of-bounds access. @@ -142,7 +109,7 @@ fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> } self.eval_context_mut() .memory - .write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?; + .write_bytes(ptr, bytes.iter().copied().chain(iter::once(0u8)))?; Ok((true, string_length)) } @@ -154,7 +121,7 @@ fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> fn write_os_str_to_wide_str( &mut self, os_str: &OsStr, - scalar: Scalar, + ptr: Pointer>, size: u64, ) -> InterpResult<'tcx, (bool, u64)> { #[cfg(windows)] @@ -176,15 +143,24 @@ fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec> { // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required // 0x0000 terminator to memory would cause an out-of-bounds access. let string_length = u64::try_from(u16_vec.len()).unwrap(); - if size <= string_length { + let string_length = string_length.checked_add(1).unwrap(); + if size < string_length { return Ok((false, string_length)); } // Store the UTF-16 string. - self.eval_context_mut() + let size2 = Size::from_bytes(2); + let this = self.eval_context_mut(); + let mut alloc = this .memory - .write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?; - Ok((true, string_length)) + .get_mut(ptr, size2 * string_length, Align::from_bytes(2).unwrap())? + .unwrap(); // not a ZST, so we will get a result + for (offset, wchar) in u16_vec.into_iter().chain(iter::once(0x0000)).enumerate() { + let offset = u64::try_from(offset).unwrap(); + alloc + .write_scalar(alloc_range(size2 * offset, size2), Scalar::from_u16(wchar).into())?; + } + Ok((true, string_length - 1)) } /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes. @@ -192,14 +168,14 @@ fn alloc_os_str_as_c_str( &mut self, os_str: &OsStr, memkind: MemoryKind, - ) -> Pointer { + ) -> InterpResult<'tcx, Pointer>> { let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator. let this = self.eval_context_mut(); let arg_type = this.tcx.mk_array(this.tcx.types.u8, size); - let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); + let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?; assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0); - arg_place.ptr.assert_ptr() + Ok(arg_place.ptr) } /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`. @@ -207,37 +183,43 @@ fn alloc_os_str_as_wide_str( &mut self, os_str: &OsStr, memkind: MemoryKind, - ) -> Pointer { + ) -> InterpResult<'tcx, Pointer>> { let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator. let this = self.eval_context_mut(); let arg_type = this.tcx.mk_array(this.tcx.types.u16, size); - let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind); + let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?; assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0); - arg_place.ptr.assert_ptr() + Ok(arg_place.ptr) } /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed. - fn read_path_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, Cow<'a, Path>> + fn read_path_from_c_str<'a>( + &'a self, + ptr: Pointer>, + ) -> InterpResult<'tcx, Cow<'a, Path>> where 'tcx: 'a, 'mir: 'a, { let this = self.eval_context_ref(); - let os_str = this.read_os_str_from_c_str(scalar)?; + let os_str = this.read_os_str_from_c_str(ptr)?; - Ok(match convert_path_separator(os_str, &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) { + Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) { Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), Cow::Owned(y) => Cow::Owned(PathBuf::from(y)), }) } /// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed. - fn read_path_from_wide_str(&self, scalar: Scalar) -> InterpResult<'tcx, PathBuf> { + fn read_path_from_wide_str(&self, ptr: Pointer>) -> InterpResult<'tcx, PathBuf> { let this = self.eval_context_ref(); - let os_str = this.read_os_str_from_wide_str(scalar)?; + let os_str = this.read_os_str_from_wide_str(ptr)?; - Ok(PathBuf::from(&convert_path_separator(&os_str, &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost))) + Ok(this + .convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost) + .into_owned() + .into()) } /// Write a Path to the machine memory (as a null-terminated sequence of bytes), @@ -245,12 +227,13 @@ fn read_path_from_wide_str(&self, scalar: Scalar) -> InterpResult<'tcx, Pat fn write_path_to_c_str( &mut self, path: &Path, - scalar: Scalar, + ptr: Pointer>, size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = convert_path_separator(path.as_os_str(), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget); - this.write_os_str_to_c_str(&os_str, scalar, size) + let os_str = this + .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + this.write_os_str_to_c_str(&os_str, ptr, size) } /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s), @@ -258,11 +241,54 @@ fn write_path_to_c_str( fn write_path_to_wide_str( &mut self, path: &Path, - scalar: Scalar, + ptr: Pointer>, size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = convert_path_separator(path.as_os_str(), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget); - this.write_os_str_to_wide_str(&os_str, scalar, size) + let os_str = this + .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + this.write_os_str_to_wide_str(&os_str, ptr, size) + } + + fn convert_path_separator<'a>( + &self, + os_str: Cow<'a, OsStr>, + direction: PathConversion, + ) -> Cow<'a, OsStr> { + let this = self.eval_context_ref(); + let target_os = &this.tcx.sess.target.os; + #[cfg(windows)] + return if target_os == "windows" { + // Windows-on-Windows, all fine. + os_str + } else { + // Unix target, Windows host. + let (from, to) = match direction { + PathConversion::HostToTarget => ('\\', '/'), + PathConversion::TargetToHost => ('/', '\\'), + }; + let converted = os_str + .encode_wide() + .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_wide(&converted)) + }; + #[cfg(unix)] + return if target_os == "windows" { + // Windows target, Unix host. + let (from, to) = match direction { + PathConversion::HostToTarget => ('/', '\\'), + PathConversion::TargetToHost => ('\\', '/'), + }; + let converted = os_str + .as_bytes() + .iter() + .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_vec(converted)) + } else { + // Unix-on-Unix, all is fine. + os_str + }; } }