Auto merge of #2183 - RalfJung:better-provenance-control, r=RalfJung

[rust.git] / src / shims / os_str.rs
diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs

index 704994da4bd46ee92db303faf1c0cf3d2e67060e..d6669b21a731a8fde8b7e59d5d964bb7d3881b1e 100644 (file)
--- a/src/shims/os_str.rs
+++ b/src/shims/os_str.rs
@@ -1,5 +1,4 @@
  use std::borrow::Cow;
-use std::convert::TryFrom;
  use std::ffi::{OsStr, OsString};
  use std::iter;
  use std::path::{Path, PathBuf};
@@ -10,84 +9,66 @@
  use std::os::windows::ffi::{OsStrExt, OsStringExt};
  
  use rustc_middle::ty::layout::LayoutOf;
+use rustc_target::abi::{Align, Size};
  
  use crate::*;
  
  /// Represent how path separator conversion should be done.
-enum Pathconversion {
+pub enum PathConversion {
      HostToTarget,
      TargetToHost,
  }
  
-/// Perform path separator conversion if needed.
-fn convert_path_separator<'a>(
-    os_str: Cow<'a, OsStr>,
-    target_os: &str,
-    direction: Pathconversion,
-) -> Cow<'a, OsStr> {
-    #[cfg(windows)]
-    return if target_os == "windows" {
-        // Windows-on-Windows, all fine.
-        os_str
-    } else {
-        // Unix target, Windows host.
-        let (from, to) = match direction {
-            Pathconversion::HostToTarget => ('\\', '/'),
-            Pathconversion::TargetToHost => ('/', '\\'),
-        };
-        let converted = os_str
-            .encode_wide()
-            .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
-            .collect::<Vec<_>>();
-        Cow::Owned(OsString::from_wide(&converted))
-    };
-    #[cfg(unix)]
-    return if target_os == "windows" {
-        // Windows target, Unix host.
-        let (from, to) = match direction {
-            Pathconversion::HostToTarget => ('/', '\\'),
-            Pathconversion::TargetToHost => ('\\', '/'),
-        };
-        let converted = os_str
-            .as_bytes()
-            .iter()
-            .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
-            .collect::<Vec<_>>();
-        Cow::Owned(OsString::from_vec(converted))
-    } else {
-        // Unix-on-Unix, all is fine.
-        os_str
-    };
+#[cfg(unix)]
+pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    Ok(os_str.as_bytes())
+}
+
+#[cfg(not(unix))]
+pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
+    // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
+    // valid.
+    os_str
+        .to_str()
+        .map(|s| s.as_bytes())
+        .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
  }
  
-impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
+#[cfg(unix)]
+pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
+    Ok(OsStr::from_bytes(bytes))
+}
+#[cfg(not(unix))]
+pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
+    let s = std::str::from_utf8(bytes)
+        .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
+    Ok(OsStr::new(s))
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
  pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
      /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
      /// the Unix APIs usually handle.
-    fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
+    fn read_os_str_from_c_str<'a>(
+        &'a self,
+        ptr: Pointer<Option<Tag>>,
+    ) -> InterpResult<'tcx, &'a OsStr>
      where
          'tcx: 'a,
          'mir: 'a,
      {
-        #[cfg(unix)]
-        fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
-            Ok(OsStr::from_bytes(bytes))
-        }
-        #[cfg(not(unix))]
-        fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
-            let s = std::str::from_utf8(bytes)
-                .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
-            Ok(OsStr::new(s))
-        }
-
          let this = self.eval_context_ref();
-        let bytes = this.memory.read_c_str(scalar)?;
+        let bytes = this.read_c_str(ptr)?;
          bytes_to_os_str(bytes)
      }
  
      /// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
      /// which is what the Windows APIs usually handle.
-    fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString>
+    fn read_os_str_from_wide_str<'a>(
+        &'a self,
+        ptr: Pointer<Option<Tag>>,
+    ) -> InterpResult<'tcx, OsString>
      where
          'tcx: 'a,
          'mir: 'a,
@@ -97,13 +78,13 @@ pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsS
              Ok(OsString::from_wide(&u16_vec[..]))
          }
          #[cfg(not(windows))]
-        pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
+        pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
              let s = String::from_utf16(&u16_vec[..])
                  .map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
              Ok(s.into())
          }
  
-        let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
+        let u16_vec = self.eval_context_ref().read_wide_str(ptr)?;
          u16vec_to_osstring(u16_vec)
      }
  
@@ -115,24 +96,9 @@ pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsS
      fn write_os_str_to_c_str(
          &mut self,
          os_str: &OsStr,
-        scalar: Scalar<Tag>,
+        ptr: Pointer<Option<Tag>>,
          size: u64,
      ) -> InterpResult<'tcx, (bool, u64)> {
-        #[cfg(unix)]
-        fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
-            Ok(os_str.as_bytes())
-        }
-        #[cfg(not(unix))]
-        fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
-            // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
-            // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
-            // valid.
-            os_str
-                .to_str()
-                .map(|s| s.as_bytes())
-                .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
-        }
-
          let bytes = os_str_to_bytes(os_str)?;
          // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
          // terminator to memory using the `ptr` pointer would cause an out-of-bounds access.
@@ -141,8 +107,7 @@ fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]>
              return Ok((false, string_length));
          }
          self.eval_context_mut()
-            .memory
-            .write_bytes(scalar, bytes.iter().copied().chain(iter::once(0u8)))?;
+            .write_bytes_ptr(ptr, bytes.iter().copied().chain(iter::once(0u8)))?;
          Ok((true, string_length))
      }
  
@@ -154,7 +119,7 @@ fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]>
      fn write_os_str_to_wide_str(
          &mut self,
          os_str: &OsStr,
-        scalar: Scalar<Tag>,
+        ptr: Pointer<Option<Tag>>,
          size: u64,
      ) -> InterpResult<'tcx, (bool, u64)> {
          #[cfg(windows)]
@@ -176,15 +141,23 @@ fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
          // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
          // 0x0000 terminator to memory would cause an out-of-bounds access.
          let string_length = u64::try_from(u16_vec.len()).unwrap();
-        if size <= string_length {
+        let string_length = string_length.checked_add(1).unwrap();
+        if size < string_length {
              return Ok((false, string_length));
          }
  
          // Store the UTF-16 string.
-        self.eval_context_mut()
-            .memory
-            .write_u16s(scalar, u16_vec.into_iter().chain(iter::once(0x0000)))?;
-        Ok((true, string_length))
+        let size2 = Size::from_bytes(2);
+        let this = self.eval_context_mut();
+        let mut alloc = this
+            .get_ptr_alloc_mut(ptr, size2 * string_length, Align::from_bytes(2).unwrap())?
+            .unwrap(); // not a ZST, so we will get a result
+        for (offset, wchar) in u16_vec.into_iter().chain(iter::once(0x0000)).enumerate() {
+            let offset = u64::try_from(offset).unwrap();
+            alloc
+                .write_scalar(alloc_range(size2 * offset, size2), Scalar::from_u16(wchar).into())?;
+        }
+        Ok((true, string_length - 1))
      }
  
      /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of bytes.
@@ -192,14 +165,14 @@ fn alloc_os_str_as_c_str(
          &mut self,
          os_str: &OsStr,
          memkind: MemoryKind<MiriMemoryKind>,
-    ) -> Pointer<Tag> {
+    ) -> InterpResult<'tcx, Pointer<Option<Tag>>> {
          let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator.
          let this = self.eval_context_mut();
  
          let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
-        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
+        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
          assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0);
-        arg_place.ptr.assert_ptr()
+        Ok(arg_place.ptr)
      }
  
      /// Allocate enough memory to store the given `OsStr` as a null-terminated sequence of `u16`.
@@ -207,37 +180,43 @@ fn alloc_os_str_as_wide_str(
          &mut self,
          os_str: &OsStr,
          memkind: MemoryKind<MiriMemoryKind>,
-    ) -> Pointer<Tag> {
+    ) -> InterpResult<'tcx, Pointer<Option<Tag>>> {
          let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator.
          let this = self.eval_context_mut();
  
          let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
-        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
+        let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind)?;
          assert!(self.write_os_str_to_wide_str(os_str, arg_place.ptr, size).unwrap().0);
-        arg_place.ptr.assert_ptr()
+        Ok(arg_place.ptr)
      }
  
      /// Read a null-terminated sequence of bytes, and perform path separator conversion if needed.
-    fn read_path_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, Cow<'a, Path>>
+    fn read_path_from_c_str<'a>(
+        &'a self,
+        ptr: Pointer<Option<Tag>>,
+    ) -> InterpResult<'tcx, Cow<'a, Path>>
      where
          'tcx: 'a,
          'mir: 'a,
      {
          let this = self.eval_context_ref();
-        let os_str = this.read_os_str_from_c_str(scalar)?;
+        let os_str = this.read_os_str_from_c_str(ptr)?;
  
-        Ok(match convert_path_separator(Cow::Borrowed(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) {
+        Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
              Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
              Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
          })
      }
  
      /// Read a null-terminated sequence of `u16`s, and perform path separator conversion if needed.
-    fn read_path_from_wide_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, PathBuf> {
+    fn read_path_from_wide_str(&self, ptr: Pointer<Option<Tag>>) -> InterpResult<'tcx, PathBuf> {
          let this = self.eval_context_ref();
-        let os_str = this.read_os_str_from_wide_str(scalar)?;
+        let os_str = this.read_os_str_from_wide_str(ptr)?;
  
-        Ok(convert_path_separator(Cow::Owned(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost).into_owned().into())
+        Ok(this
+            .convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost)
+            .into_owned()
+            .into())
      }
  
      /// Write a Path to the machine memory (as a null-terminated sequence of bytes),
@@ -245,12 +224,13 @@ fn read_path_from_wide_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, Pat
      fn write_path_to_c_str(
          &mut self,
          path: &Path,
-        scalar: Scalar<Tag>,
+        ptr: Pointer<Option<Tag>>,
          size: u64,
      ) -> InterpResult<'tcx, (bool, u64)> {
          let this = self.eval_context_mut();
-        let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
-        this.write_os_str_to_c_str(&os_str, scalar, size)
+        let os_str = this
+            .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
+        this.write_os_str_to_c_str(&os_str, ptr, size)
      }
  
      /// Write a Path to the machine memory (as a null-terminated sequence of `u16`s),
@@ -258,11 +238,54 @@ fn write_path_to_c_str(
      fn write_path_to_wide_str(
          &mut self,
          path: &Path,
-        scalar: Scalar<Tag>,
+        ptr: Pointer<Option<Tag>>,
          size: u64,
      ) -> InterpResult<'tcx, (bool, u64)> {
          let this = self.eval_context_mut();
-        let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
-        this.write_os_str_to_wide_str(&os_str, scalar, size)
+        let os_str = this
+            .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
+        this.write_os_str_to_wide_str(&os_str, ptr, size)
+    }
+
+    fn convert_path_separator<'a>(
+        &self,
+        os_str: Cow<'a, OsStr>,
+        direction: PathConversion,
+    ) -> Cow<'a, OsStr> {
+        let this = self.eval_context_ref();
+        let target_os = &this.tcx.sess.target.os;
+        #[cfg(windows)]
+        return if target_os == "windows" {
+            // Windows-on-Windows, all fine.
+            os_str
+        } else {
+            // Unix target, Windows host.
+            let (from, to) = match direction {
+                PathConversion::HostToTarget => ('\\', '/'),
+                PathConversion::TargetToHost => ('/', '\\'),
+            };
+            let converted = os_str
+                .encode_wide()
+                .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
+                .collect::<Vec<_>>();
+            Cow::Owned(OsString::from_wide(&converted))
+        };
+        #[cfg(unix)]
+        return if target_os == "windows" {
+            // Windows target, Unix host.
+            let (from, to) = match direction {
+                PathConversion::HostToTarget => ('/', '\\'),
+                PathConversion::TargetToHost => ('\\', '/'),
+            };
+            let converted = os_str
+                .as_bytes()
+                .iter()
+                .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
+                .collect::<Vec<_>>();
+            Cow::Owned(OsString::from_vec(converted))
+        } else {
+            // Unix-on-Unix, all is fine.
+            os_str
+        };
      }
  }