rename track-raw-pointers flag to tag-raw-pointers

[rust.git] / src / data_race.rs
diff --git a/src/data_race.rs b/src/data_race.rs

index 49332721fcbd11757b57c9c9216b7721b5212285..9e5dfd9dbaf0d39596a82932164169fb777ba62d 100644 (file)
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -1,13 +1,21 @@
  //! Implementation of a data-race detector using Lamport Timestamps / Vector-clocks
-//! based on the Dyamic Race Detection for C++:
+//! based on the Dynamic Race Detection for C++:
  //! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
  //! which does not report false-positives when fences are used, and gives better
  //! accuracy in presence of read-modify-write operations.
  //!
+//! The implementation contains modifications to correctly model the changes to the memory model in C++20
+//! regarding the weakening of release sequences: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0982r1.html.
+//! Relaxed stores now unconditionally block all currently active release sequences and so per-thread tracking of release
+//! sequences is not needed.
+//!
+//! The implementation also models races with memory allocation and deallocation via treating allocation and
+//! deallocation as a type of write internally for detecting data-races.
+//!
  //! This does not explore weak memory orders and so can still miss data-races
  //! but should not report false-positives
  //!
-//! Data-race definiton from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
+//! Data-race definition from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
  //! a data race occurs between two memory accesses if they are on different threads, at least one operation
  //! is non-atomic, at least one operation is a write and neither access happens-before the other. Read the link
  //! for full definition.
@@ -21,7 +29,7 @@
  //! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
  //! thread.
  //!
-//! The sequentially consistant ordering corresponds to the ordering that the threads
+//! The sequentially consistent ordering corresponds to the ordering that the threads
  //! are currently scheduled, this means that the data-race detector has no additional
  //! logic for sequentially consistent accesses at the moment since they are indistinguishable
  //! from acquire/release operations. If weak memory orderings are explored then this
@@ -34,7 +42,7 @@
  //!    order exists in which all threads observe all modifications in the same
  //!    order (see Sequentially-consistent ordering below) "
  //! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
-//! to a acquire load and a release store given the global sequentially consistent order
+//! to an acquire load and a release store given the global sequentially consistent order
  //! of the schedule.
  //!
  //! The timestamps used in the data-race detector assign each sequence of non-atomic operations
@@ -44,7 +52,7 @@
  //! required for accurate reporting of data-race values.
  //!
  //! As per the paper a threads timestamp is only incremented after a release operation is performed
-//! so some atomic operations that only perform acquires do not increment the timestamp, due to shared
+//! so some atomic operations that only perform acquires do not increment the timestamp. Due to shared
  //! code some atomic operations may increment the timestamp when not necessary but this has no effect
  //! on the data-race detection code.
  //!
@@ -57,7 +65,6 @@
      cell::{Cell, Ref, RefCell, RefMut},
      fmt::Debug,
      mem,
-    rc::Rc,
  };
  
  use rustc_data_structures::fx::{FxHashMap, FxHashSet};
@@ -66,13 +73,13 @@
  use rustc_target::abi::Size;
  
  use crate::{
-    ImmTy, Immediate, InterpResult, MPlaceTy, MemPlaceMeta, MiriEvalContext, MiriEvalContextExt,
-    OpTy, Pointer, RangeMap, ScalarMaybeUninit, Tag, ThreadId, VClock, VSmallClockMap, VTimestamp,
-    VectorIdx,
+    AllocId, AllocRange, ImmTy, Immediate, InterpResult, MPlaceTy, MemPlaceMeta, MemoryKind,
+    MiriEvalContext, MiriEvalContextExt, MiriMemoryKind, OpTy, Pointer, RangeMap, Scalar,
+    ScalarMaybeUninit, Tag, ThreadId, VClock, VTimestamp, VectorIdx,
  };
  
  pub type AllocExtra = VClockAlloc;
-pub type MemoryExtra = Rc<GlobalState>;
+pub type MemoryExtra = GlobalState;
  
  /// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive).
  #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -122,7 +129,7 @@ struct ThreadClockSet {
      /// thread once it performs an acquire fence.
      fence_acquire: VClock,
  
-    /// The last timesamp of happens-before relations that
+    /// The last timestamp of happens-before relations that
      /// have been released by this thread by a fence.
      fence_release: VClock,
  }
@@ -135,7 +142,7 @@ fn apply_release_fence(&mut self) {
          self.fence_release.clone_from(&self.clock);
      }
  
-    /// Apply the effects of a acquire fence to this
+    /// Apply the effects of an acquire fence to this
      /// set of thread vector clocks.
      #[inline]
      fn apply_acquire_fence(&mut self) {
@@ -185,13 +192,34 @@ struct AtomicMemoryCellClocks {
      /// happen-before a thread if an acquire-load is
      /// performed on the data.
      sync_vector: VClock,
+}
  
-    /// The Hash-Map of all threads for which a release
-    /// sequence exists in the memory cell, required
-    /// since read-modify-write operations do not
-    /// invalidate existing release sequences.
-    /// See page 6 of linked paper.
-    release_sequences: VSmallClockMap,
+/// Type of write operation: allocating memory
+/// non-atomic writes and deallocating memory
+/// are all treated as writes for the purpose
+/// of the data-race detector.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum WriteType {
+    /// Allocate memory.
+    Allocate,
+
+    /// Standard unsynchronized write.
+    Write,
+
+    /// Deallocate memory.
+    /// Note that when memory is deallocated first, later non-atomic accesses
+    /// will be reported as use-after-free, not as data races.
+    /// (Same for `Allocate` above.)
+    Deallocate,
+}
+impl WriteType {
+    fn get_descriptor(self) -> &'static str {
+        match self {
+            WriteType::Allocate => "Allocate",
+            WriteType::Write => "Write",
+            WriteType::Deallocate => "Deallocate",
+        }
+    }
  }
  
  /// Memory Cell vector clock metadata
@@ -206,8 +234,13 @@ struct MemoryCellClocks {
      /// that performed the last write operation.
      write_index: VectorIdx,
  
+    /// The type of operation that the write index represents,
+    /// either newly allocated memory, a non-atomic write or
+    /// a deallocation of memory.
+    write_type: WriteType,
+
      /// The vector-clock of the timestamp of the last read operation
-    /// performed by a thread since the last write operation occured.
+    /// performed by a thread since the last write operation occurred.
      /// It is reset to zero on each write operation.
      read: VClock,
  
@@ -217,20 +250,19 @@ struct MemoryCellClocks {
      atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
  }
  
-/// Create a default memory cell clocks instance
-/// for uninitialized memory.
-impl Default for MemoryCellClocks {
-    fn default() -> Self {
+impl MemoryCellClocks {
+    /// Create a new set of clocks representing memory allocated
+    ///  at a given vector timestamp and index.
+    fn new(alloc: VTimestamp, alloc_index: VectorIdx) -> Self {
          MemoryCellClocks {
              read: VClock::default(),
-            write: 0,
-            write_index: VectorIdx::MAX_INDEX,
+            write: alloc,
+            write_index: alloc_index,
+            write_type: WriteType::Allocate,
              atomic_ops: None,
          }
      }
-}
  
-impl MemoryCellClocks {
      /// Load the internal atomic memory cells if they exist.
      #[inline]
      fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
@@ -283,8 +315,6 @@ fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result
          self.atomic_write_detect(clocks, index)?;
          let atomic = self.atomic_mut();
          atomic.sync_vector.clone_from(&clocks.clock);
-        atomic.release_sequences.clear();
-        atomic.release_sequences.insert(index, &clocks.clock);
          Ok(())
      }
  
@@ -292,12 +322,13 @@ fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result
      /// store relaxed semantics.
      fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
          self.atomic_write_detect(clocks, index)?;
+
+        // The handling of release sequences was changed in C++20 and so
+        // the code here is different to the paper since now all relaxed
+        // stores block release sequences. The exception for same-thread
+        // relaxed stores has been removed.
          let atomic = self.atomic_mut();
          atomic.sync_vector.clone_from(&clocks.fence_release);
-        if let Some(release) = atomic.release_sequences.get(index) {
-            atomic.sync_vector.join(release);
-        }
-        atomic.release_sequences.retain_index(index);
          Ok(())
      }
  
@@ -307,7 +338,6 @@ fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(
          self.atomic_write_detect(clocks, index)?;
          let atomic = self.atomic_mut();
          atomic.sync_vector.join(&clocks.clock);
-        atomic.release_sequences.insert(index, &clocks.clock);
          Ok(())
      }
  
@@ -385,6 +415,7 @@ fn write_race_detect(
          &mut self,
          clocks: &ThreadClockSet,
          index: VectorIdx,
+        write_type: WriteType,
      ) -> Result<(), DataRace> {
          log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
          if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
@@ -396,6 +427,7 @@ fn write_race_detect(
              if race_free {
                  self.write = clocks.clock[index];
                  self.write_index = index;
+                self.write_type = write_type;
                  self.read.set_zero_vector();
                  Ok(())
              } else {
@@ -413,7 +445,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
      /// Atomic variant of read_scalar_at_offset.
      fn read_scalar_at_offset_atomic(
          &self,
-        op: OpTy<'tcx, Tag>,
+        op: &OpTy<'tcx, Tag>,
          offset: u64,
          layout: TyAndLayout<'tcx>,
          atomic: AtomicReadOp,
@@ -425,13 +457,13 @@ fn read_scalar_at_offset_atomic(
          // Ensure that the following read at an offset is within bounds.
          assert!(op_place.layout.size >= offset + layout.size);
          let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.read_scalar_atomic(value_place, atomic)
+        this.read_scalar_atomic(&value_place, atomic)
      }
  
      /// Atomic variant of write_scalar_at_offset.
      fn write_scalar_at_offset_atomic(
          &mut self,
-        op: OpTy<'tcx, Tag>,
+        op: &OpTy<'tcx, Tag>,
          offset: u64,
          value: impl Into<ScalarMaybeUninit<Tag>>,
          layout: TyAndLayout<'tcx>,
@@ -444,18 +476,18 @@ fn write_scalar_at_offset_atomic(
          // Ensure that the following read at an offset is within bounds.
          assert!(op_place.layout.size >= offset + layout.size);
          let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.write_scalar_atomic(value.into(), value_place, atomic)
+        this.write_scalar_atomic(value.into(), &value_place, atomic)
      }
  
      /// Perform an atomic read operation at the memory location.
      fn read_scalar_atomic(
          &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          atomic: AtomicReadOp,
      ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
          let this = self.eval_context_ref();
-        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place.into()))?;
-        self.validate_atomic_load(place, atomic)?;
+        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+        this.validate_atomic_load(place, atomic)?;
          Ok(scalar)
      }
  
@@ -463,31 +495,31 @@ fn read_scalar_atomic(
      fn write_scalar_atomic(
          &mut self,
          val: ScalarMaybeUninit<Tag>,
-        dest: MPlaceTy<'tcx, Tag>,
+        dest: &MPlaceTy<'tcx, Tag>,
          atomic: AtomicWriteOp,
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
-        this.allow_data_races_mut(move |this| this.write_scalar(val, dest.into()))?;
-        self.validate_atomic_store(dest, atomic)
+        this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
+        this.validate_atomic_store(dest, atomic)
      }
  
-    /// Perform a atomic operation on a memory location.
+    /// Perform an atomic operation on a memory location.
      fn atomic_op_immediate(
          &mut self,
-        place: MPlaceTy<'tcx, Tag>,
-        rhs: ImmTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        rhs: &ImmTy<'tcx, Tag>,
          op: mir::BinOp,
          neg: bool,
          atomic: AtomicRwOp,
      ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
          let this = self.eval_context_mut();
  
-        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
  
          // Atomics wrap around on overflow.
-        let val = this.binary_op(op, old, rhs)?;
-        let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.allow_data_races_mut(|this| this.write_immediate(*val, place.into()))?;
+        let val = this.binary_op(op, &old, rhs)?;
+        let val = if neg { this.unary_op(mir::UnOp::Not, &val)? } else { val };
+        this.allow_data_races_mut(|this| this.write_immediate(*val, &(*place).into()))?;
  
          this.validate_atomic_rmw(place, atomic)?;
          Ok(old)
@@ -497,46 +529,86 @@ fn atomic_op_immediate(
      /// scalar value, the old value is returned.
      fn atomic_exchange_scalar(
          &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          new: ScalarMaybeUninit<Tag>,
          atomic: AtomicRwOp,
      ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
          let this = self.eval_context_mut();
  
-        let old = this.allow_data_races_mut(|this| this.read_scalar(place.into()))?;
-        this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
+        let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
+        this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
          this.validate_atomic_rmw(place, atomic)?;
          Ok(old)
      }
  
+    /// Perform an conditional atomic exchange with a memory place and a new
+    /// scalar value, the old value is returned.
+    fn atomic_min_max_scalar(
+        &mut self,
+        place: &MPlaceTy<'tcx, Tag>,
+        rhs: ImmTy<'tcx, Tag>,
+        min: bool,
+        atomic: AtomicRwOp,
+    ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+        let this = self.eval_context_mut();
+
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
+        let lt = this.overflowing_binary_op(mir::BinOp::Lt, &old, &rhs)?.0.to_bool()?;
+
+        let new_val = if min {
+            if lt { &old } else { &rhs }
+        } else {
+            if lt { &rhs } else { &old }
+        };
+
+        this.allow_data_races_mut(|this| this.write_immediate(**new_val, &(*place).into()))?;
+
+        this.validate_atomic_rmw(&place, atomic)?;
+
+        // Return the old value.
+        Ok(old)
+    }
+
      /// Perform an atomic compare and exchange at a given memory location.
      /// On success an atomic RMW operation is performed and on failure
-    /// only an atomic read occurs.
+    /// only an atomic read occurs. If `can_fail_spuriously` is true,
+    /// then we treat it as a "compare_exchange_weak" operation, and
+    /// some portion of the time fail even when the values are actually
+    /// identical.
      fn atomic_compare_exchange_scalar(
          &mut self,
-        place: MPlaceTy<'tcx, Tag>,
-        expect_old: ImmTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        expect_old: &ImmTy<'tcx, Tag>,
          new: ScalarMaybeUninit<Tag>,
          success: AtomicRwOp,
          fail: AtomicReadOp,
+        can_fail_spuriously: bool,
      ) -> InterpResult<'tcx, Immediate<Tag>> {
+        use rand::Rng as _;
          let this = self.eval_context_mut();
  
          // Failure ordering cannot be stronger than success ordering, therefore first attempt
-        // to read with the failure ordering and if successfull then try again with the success
+        // to read with the failure ordering and if successful then try again with the success
          // read ordering and write in the success case.
          // Read as immediate for the sake of `binary_op()`
-        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
-
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&(place.into())))?;
          // `binary_op` will bail if either of them is not a scalar.
-        let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
-        let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
+        let eq = this.overflowing_binary_op(mir::BinOp::Eq, &old, expect_old)?.0;
+        // If the operation would succeed, but is "weak", fail some portion
+        // of the time, based on `rate`.
+        let rate = this.memory.extra.cmpxchg_weak_failure_rate;
+        let cmpxchg_success = eq.to_bool()?
+            && (!can_fail_spuriously || this.memory.extra.rng.get_mut().gen::<f64>() < rate);
+        let res = Immediate::ScalarPair(
+            old.to_scalar_or_uninit(),
+            Scalar::from_bool(cmpxchg_success).into(),
+        );
  
          // Update ptr depending on comparison.
          // if successful, perform a full rw-atomic validation
          // otherwise treat this as an atomic load with the fail ordering.
-        if eq.to_bool()? {
-            this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
+        if cmpxchg_success {
+            this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
              this.validate_atomic_rmw(place, success)?;
          } else {
              this.validate_atomic_load(place, fail)?;
@@ -546,11 +618,11 @@ fn atomic_compare_exchange_scalar(
          Ok(res)
      }
  
-    /// Update the data-race detector for an atomic read occuring at the
+    /// Update the data-race detector for an atomic read occurring at the
      /// associated memory-place and on the current thread.
      fn validate_atomic_load(
          &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          atomic: AtomicReadOp,
      ) -> InterpResult<'tcx> {
          let this = self.eval_context_ref();
@@ -568,14 +640,14 @@ fn validate_atomic_load(
          )
      }
  
-    /// Update the data-race detector for an atomic write occuring at the
+    /// Update the data-race detector for an atomic write occurring at the
      /// associated memory-place and on the current thread.
      fn validate_atomic_store(
          &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          atomic: AtomicWriteOp,
      ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
+        let this = self.eval_context_mut();
          this.validate_atomic_op(
              place,
              atomic,
@@ -590,17 +662,17 @@ fn validate_atomic_store(
          )
      }
  
-    /// Update the data-race detector for an atomic read-modify-write occuring
+    /// Update the data-race detector for an atomic read-modify-write occurring
      /// at the associated memory place and on the current thread.
      fn validate_atomic_rmw(
          &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          atomic: AtomicRwOp,
      ) -> InterpResult<'tcx> {
          use AtomicRwOp::*;
          let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
          let release = matches!(atomic, Release | AcqRel | SeqCst);
-        let this = self.eval_context_ref();
+        let this = self.eval_context_mut();
          this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
              if acquire {
                  memory.load_acquire(clocks, index)?;
@@ -618,12 +690,12 @@ fn validate_atomic_rmw(
      /// Update the data-race detector for an atomic fence on the current thread.
      fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
          let this = self.eval_context_mut();
-        if let Some(data_race) = &this.memory.extra.data_race {
+        if let Some(data_race) = &mut this.memory.extra.data_race {
              data_race.maybe_perform_sync_operation(move |index, mut clocks| {
                  log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
  
                  // Apply data-race detection for the current fences
-                // this treats AcqRel and SeqCst as the same as a acquire
+                // this treats AcqRel and SeqCst as the same as an acquire
                  // and release fence applied in the same timestamp.
                  if atomic != AtomicFenceOp::Release {
                      // Either Acquire | AcqRel | SeqCst
@@ -633,8 +705,8 @@ fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx>
                      // Either Release | AcqRel | SeqCst
                      clocks.apply_release_fence();
                  }
-                
-                // Increment timestamp if hase release semantics
+
+                // Increment timestamp in case of release semantics.
                  Ok(atomic != AtomicFenceOp::Acquire)
              })
          } else {
@@ -648,23 +720,47 @@ fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx>
  pub struct VClockAlloc {
      /// Assigning each byte a MemoryCellClocks.
      alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
-
-    // Pointer to global state.
-    global: MemoryExtra,
  }
  
  impl VClockAlloc {
-    /// Create a new data-race allocation detector.
-    pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
+    /// Create a new data-race detector for newly allocated memory.
+    pub fn new_allocation(
+        global: &MemoryExtra,
+        len: Size,
+        kind: MemoryKind<MiriMemoryKind>,
+    ) -> VClockAlloc {
+        let (alloc_timestamp, alloc_index) = match kind {
+            // User allocated and stack memory should track allocation.
+            MemoryKind::Machine(
+                MiriMemoryKind::Rust | MiriMemoryKind::C | MiriMemoryKind::WinHeap,
+            )
+            | MemoryKind::Stack => {
+                let (alloc_index, clocks) = global.current_thread_state();
+                let alloc_timestamp = clocks.clock[alloc_index];
+                (alloc_timestamp, alloc_index)
+            }
+            // Other global memory should trace races but be allocated at the 0 timestamp.
+            MemoryKind::Machine(
+                MiriMemoryKind::Global
+                | MiriMemoryKind::Machine
+                | MiriMemoryKind::Env
+                | MiriMemoryKind::ExternStatic
+                | MiriMemoryKind::Tls,
+            )
+            | MemoryKind::CallerLocation => (0, VectorIdx::MAX_INDEX),
+        };
          VClockAlloc {
-            global: Rc::clone(global),
-            alloc_ranges: RefCell::new(RangeMap::new(len, MemoryCellClocks::default())),
+            alloc_ranges: RefCell::new(RangeMap::new(
+                len,
+                MemoryCellClocks::new(alloc_timestamp, alloc_index),
+            )),
          }
      }
  
      // Find an index, if one exists where the value
      // in `l` is greater than the value in `r`.
      fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
+        log::trace!("Find index where not {:?} <= {:?}", l, r);
          let l_slice = l.as_slice();
          let r_slice = r.as_slice();
          l_slice
@@ -684,7 +780,7 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
                          .enumerate()
                          .find_map(|(idx, &r)| if r == 0 { None } else { Some(idx) })
                          .expect("Invalid VClock Invariant");
-                    Some(idx)
+                    Some(idx + r_slice.len())
                  } else {
                      None
                  }
@@ -694,9 +790,9 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
  
      /// Report a data-race found in the program.
      /// This finds the two racing threads and the type
-    /// of data-race that occured. This will also
+    /// of data-race that occurred. This will also
      /// return info about the memory location the data-race
-    /// occured in.
+    /// occurred in.
      #[cold]
      #[inline(never)]
      fn report_data_race<'tcx>(
@@ -704,8 +800,7 @@ fn report_data_race<'tcx>(
          range: &MemoryCellClocks,
          action: &str,
          is_atomic: bool,
-        pointer: Pointer<Tag>,
-        len: Size,
+        ptr_dbg: Pointer<AllocId>,
      ) -> InterpResult<'tcx> {
          let (current_index, current_clocks) = global.current_thread_state();
          let write_clock;
@@ -715,18 +810,18 @@ fn report_data_race<'tcx>(
              // Convert the write action into the vector clock it
              // represents for diagnostic purposes.
              write_clock = VClock::new_with_index(range.write_index, range.write);
-            ("WRITE", range.write_index, &write_clock)
+            (range.write_type.get_descriptor(), range.write_index, &write_clock)
          } else if let Some(idx) = Self::find_gt_index(&range.read, &current_clocks.clock) {
-            ("READ", idx, &range.read)
+            ("Read", idx, &range.read)
          } else if !is_atomic {
              if let Some(atomic) = range.atomic() {
                  if let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
                  {
-                    ("ATOMIC_STORE", idx, &atomic.write_vector)
+                    ("Atomic Store", idx, &atomic.write_vector)
                  } else if let Some(idx) =
                      Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
                  {
-                    ("ATOMIC_LOAD", idx, &atomic.read_vector)
+                    ("Atomic Load", idx, &atomic.read_vector)
                  } else {
                      unreachable!(
                          "Failed to report data-race for non-atomic operation: no race found"
@@ -747,40 +842,40 @@ fn report_data_race<'tcx>(
  
          // Throw the data-race detection.
          throw_ub_format!(
-            "Data race detected between {} on {} and {} on {}, memory({:?},offset={},size={})\
-            \n\t\t -current vector clock = {:?}\
-            \n\t\t -conflicting timestamp = {:?}",
+            "Data race detected between {} on {} and {} on {} at {:?} (current vector clock = {:?}, conflicting timestamp = {:?})",
              action,
              current_thread_info,
              other_action,
              other_thread_info,
-            pointer.alloc_id,
-            pointer.offset.bytes(),
-            len.bytes(),
+            ptr_dbg,
              current_clocks.clock,
              other_clock
          )
      }
  
-    /// Detect data-races for an unsychronized read operation, will not perform
+    /// Detect data-races for an unsynchronized read operation, will not perform
      /// data-race detection if `multi-threaded` is false, either due to no threads
      /// being created or if it is temporarily disabled during a racy read or write
      /// operation for which data-race detection is handled separately, for example
      /// atomic read operations.
-    pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        if self.global.multi_threaded.get() {
-            let (index, clocks) = self.global.current_thread_state();
+    pub fn read<'tcx>(
+        &self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> InterpResult<'tcx> {
+        if global.multi_threaded.get() {
+            let (index, clocks) = global.current_thread_state();
              let mut alloc_ranges = self.alloc_ranges.borrow_mut();
-            for (_, range) in alloc_ranges.iter_mut(pointer.offset, len) {
+            for (offset, range) in alloc_ranges.iter_mut(range.start, range.size) {
                  if let Err(DataRace) = range.read_race_detect(&*clocks, index) {
                      // Report data-race.
                      return Self::report_data_race(
-                        &self.global,
+                        global,
                          range,
-                        "READ",
+                        "Read",
                          false,
-                        pointer,
-                        len,
+                        Pointer::new(alloc_id, offset),
                      );
                  }
              }
@@ -793,22 +888,22 @@ pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx>
      // Shared code for detecting data-races on unique access to a section of memory
      fn unique_access<'tcx>(
          &mut self,
-        pointer: Pointer<Tag>,
-        len: Size,
-        action: &str,
+        alloc_id: AllocId,
+        range: AllocRange,
+        write_type: WriteType,
+        global: &mut GlobalState,
      ) -> InterpResult<'tcx> {
-        if self.global.multi_threaded.get() {
-            let (index, clocks) = self.global.current_thread_state();
-            for (_, range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if let Err(DataRace) = range.write_race_detect(&*clocks, index) {
+        if global.multi_threaded.get() {
+            let (index, clocks) = global.current_thread_state();
+            for (offset, range) in self.alloc_ranges.get_mut().iter_mut(range.start, range.size) {
+                if let Err(DataRace) = range.write_race_detect(&*clocks, index, write_type) {
                      // Report data-race
                      return Self::report_data_race(
-                        &self.global,
+                        global,
                          range,
-                        action,
+                        write_type.get_descriptor(),
                          false,
-                        pointer,
-                        len,
+                        Pointer::new(alloc_id, offset),
                      );
                  }
              }
@@ -818,27 +913,37 @@ fn unique_access<'tcx>(
          }
      }
  
-    /// Detect data-races for an unsychronized write operation, will not perform
+    /// Detect data-races for an unsynchronized write operation, will not perform
      /// data-race threads if `multi-threaded` is false, either due to no threads
      /// being created or if it is temporarily disabled during a racy read or write
      /// operation
-    pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        self.unique_access(pointer, len, "Write")
+    pub fn write<'tcx>(
+        &mut self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &mut GlobalState,
+    ) -> InterpResult<'tcx> {
+        self.unique_access(alloc_id, range, WriteType::Write, global)
      }
  
-    /// Detect data-races for an unsychronized deallocate operation, will not perform
+    /// Detect data-races for an unsynchronized deallocate operation, will not perform
      /// data-race threads if `multi-threaded` is false, either due to no threads
      /// being created or if it is temporarily disabled during a racy read or write
      /// operation
-    pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        self.unique_access(pointer, len, "Deallocate")
+    pub fn deallocate<'tcx>(
+        &mut self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &mut GlobalState,
+    ) -> InterpResult<'tcx> {
+        self.unique_access(alloc_id, range, WriteType::Deallocate, global)
      }
  }
  
  impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
  trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
      // Temporarily allow data-races to occur, this should only be
-    // used if either one of the appropiate `validate_atomic` functions
+    // used if either one of the appropriate `validate_atomic` functions
      // will be called to treat a memory access as atomic or if the memory
      // being accessed should be treated as internal state, that cannot be
      // accessed by the interpreted program.
@@ -878,15 +983,10 @@ fn allow_data_races_mut<R>(
          result
      }
  
-    /// Generic atomic operation implementation,
-    /// this accesses memory via get_raw instead of
-    /// get_raw_mut, due to issues calling get_raw_mut
-    /// for atomic loads from read-only memory.
-    /// FIXME: is this valid, or should get_raw_mut be used for
-    /// atomic-stores/atomic-rmw?
+    /// Generic atomic operation implementation
      fn validate_atomic_op<A: Debug + Copy>(
          &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
          atomic: A,
          description: &str,
          mut op: impl FnMut(
@@ -899,36 +999,35 @@ fn validate_atomic_op<A: Debug + Copy>(
          let this = self.eval_context_ref();
          if let Some(data_race) = &this.memory.extra.data_race {
              if data_race.multi_threaded.get() {
-                // Load and log the atomic operation.
-                let place_ptr = place.ptr.assert_ptr();
                  let size = place.layout.size;
+                let (alloc_id, base_offset, ptr) = this.memory.ptr_get_alloc(place.ptr)?;
+                // Load and log the atomic operation.
+                // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
                  let alloc_meta =
-                    &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race.as_ref().unwrap();
+                    &this.memory.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                  log::trace!(
-                    "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                    "Atomic op({}) with ordering {:?} on {:?} (size={})",
                      description,
                      &atomic,
-                    place_ptr.alloc_id,
-                    place_ptr.offset.bytes(),
+                    ptr,
                      size.bytes()
                  );
  
                  // Perform the atomic operation.
-                let data_race = &alloc_meta.global;
                  data_race.maybe_perform_sync_operation(|index, mut clocks| {
-                    for (_, range) in
-                        alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size)
+                    for (offset, range) in
+                        alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
                      {
                          if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
                              mem::drop(clocks);
                              return VClockAlloc::report_data_race(
-                                &alloc_meta.global,
+                                data_race,
                                  range,
                                  description,
                                  true,
-                                place_ptr,
-                                size,
-                            ).map(|_| true);
+                                Pointer::new(alloc_id, offset),
+                            )
+                            .map(|_| true);
                          }
                      }
  
@@ -938,12 +1037,11 @@ fn validate_atomic_op<A: Debug + Copy>(
  
                  // Log changes to atomic memory.
                  if log::log_enabled!(log::Level::Trace) {
-                    for (_, range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size)
+                    for (_offset, range) in alloc_meta.alloc_ranges.borrow().iter(base_offset, size)
                      {
                          log::trace!(
-                            "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                            place.ptr.assert_ptr().alloc_id,
-                            place_ptr.offset.bytes(),
+                            "Updated atomic memory({:?}, size={}) to {:#?}",
+                            ptr,
                              size.bytes(),
                              range.atomic_ops
                          );
@@ -980,6 +1078,7 @@ struct ThreadExtraState {
  /// Global data-race detection state, contains the currently
  /// executing thread as well as the vector-clocks associated
  /// with each of the threads.
+// FIXME: it is probably better to have one large RefCell, than to have so many small ones.
  #[derive(Debug, Clone)]
  pub struct GlobalState {
      /// Set to true once the first additional
@@ -1000,7 +1099,7 @@ pub struct GlobalState {
      /// if a vector index is re-assigned to a new thread.
      vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>,
  
-    /// The mapping of a given thread to assocaited thread metadata.
+    /// The mapping of a given thread to associated thread metadata.
      thread_info: RefCell<IndexVec<ThreadId, ThreadExtraState>>,
  
      /// The current vector index being executed.
@@ -1017,7 +1116,7 @@ pub struct GlobalState {
  
      /// Counts the number of threads that are currently active
      /// if the number of active threads reduces to 1 and then
-    /// a join operation occures with the remaining main thread
+    /// a join operation occurs with the remaining main thread
      /// then multi-threaded execution may be disabled.
      active_thread_count: Cell<usize>,
  
@@ -1033,7 +1132,7 @@ impl GlobalState {
      /// Create a new global state, setup with just thread-id=0
      /// advanced to timestamp = 1.
      pub fn new() -> Self {
-        let global_state = GlobalState {
+        let mut global_state = GlobalState {
              multi_threaded: Cell::new(false),
              vector_clocks: RefCell::new(IndexVec::new()),
              vector_info: RefCell::new(IndexVec::new()),
@@ -1047,9 +1146,9 @@ pub fn new() -> Self {
          // Setup the main-thread since it is not explicitly created:
          // uses vector index and thread-id 0, also the rust runtime gives
          // the main-thread a name of "main".
-        let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
-        global_state.vector_info.borrow_mut().push(ThreadId::new(0));
-        global_state.thread_info.borrow_mut().push(ThreadExtraState {
+        let index = global_state.vector_clocks.get_mut().push(ThreadClockSet::default());
+        global_state.vector_info.get_mut().push(ThreadId::new(0));
+        global_state.thread_info.get_mut().push(ThreadExtraState {
              vector_index: Some(index),
              thread_name: Some("main".to_string().into_boxed_str()),
              termination_vector_clock: None,
@@ -1096,7 +1195,7 @@ fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
      // Hook for thread creation, enabled multi-threaded execution and marks
      // the current thread timestamp as happening-before the current thread.
      #[inline]
-    pub fn thread_created(&self, thread: ThreadId) {
+    pub fn thread_created(&mut self, thread: ThreadId) {
          let current_index = self.current_index();
  
          // Increment the number of active threads.
@@ -1116,12 +1215,12 @@ pub fn thread_created(&self, thread: ThreadId) {
          let created_index = if let Some(reuse_index) = self.find_vector_index_reuse_candidate() {
              // Now re-configure the re-use candidate, increment the clock
              // for the new sync use of the vector.
-            let mut vector_clocks = self.vector_clocks.borrow_mut();
+            let vector_clocks = self.vector_clocks.get_mut();
              vector_clocks[reuse_index].increment_clock(reuse_index);
  
              // Locate the old thread the vector was associated with and update
              // it to represent the new thread instead.
-            let mut vector_info = self.vector_info.borrow_mut();
+            let vector_info = self.vector_info.get_mut();
              let old_thread = vector_info[reuse_index];
              vector_info[reuse_index] = thread;
  
@@ -1133,15 +1232,17 @@ pub fn thread_created(&self, thread: ThreadId) {
          } else {
              // No vector re-use candidates available, instead create
              // a new vector index.
-            let mut vector_info = self.vector_info.borrow_mut();
+            let vector_info = self.vector_info.get_mut();
              vector_info.push(thread)
          };
  
+        log::trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index);
+
          // Mark the chosen vector index as in use by the thread.
          thread_info[thread].vector_index = Some(created_index);
  
          // Create a thread clock set if applicable.
-        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        let vector_clocks = self.vector_clocks.get_mut();
          if created_index == vector_clocks.next_index() {
              vector_clocks.push(ThreadClockSet::default());
          }
@@ -1160,11 +1261,11 @@ pub fn thread_created(&self, thread: ThreadId) {
      }
  
      /// Hook on a thread join to update the implicit happens-before relation
-    /// between the joined thead and the current thread.
+    /// between the joined thread and the current thread.
      #[inline]
-    pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
-        let mut clocks_vec = self.vector_clocks.borrow_mut();
-        let thread_info = self.thread_info.borrow();
+    pub fn thread_joined(&mut self, current_thread: ThreadId, join_thread: ThreadId) {
+        let clocks_vec = self.vector_clocks.get_mut();
+        let thread_info = self.thread_info.get_mut();
  
          // Load the vector clock of the current thread.
          let current_index = thread_info[current_thread]
@@ -1178,7 +1279,6 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
              .as_ref()
              .expect("Joined with thread but thread has not terminated");
  
-
          // The join thread happens-before the current thread
          // so update the current vector clock.
          // Is not a release operation so the clock is not incremented.
@@ -1194,7 +1294,7 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
                  .iter_enumerated()
                  .all(|(idx, clocks)| clocks.clock[idx] <= current_clock.clock[idx])
              {
-                // The all thread termations happen-before the current clock
+                // All thread terminations happen-before the current clock
                  // therefore no data-races can be reported until a new thread
                  // is created, so disable multi-threaded execution.
                  self.multi_threaded.set(false);
@@ -1203,9 +1303,9 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
  
          // If the thread is marked as terminated but not joined
          // then move the thread to the re-use set.
-        let mut termination = self.terminated_threads.borrow_mut();
+        let termination = self.terminated_threads.get_mut();
          if let Some(index) = termination.remove(&join_thread) {
-            let mut reuse = self.reuse_candidates.borrow_mut();
+            let reuse = self.reuse_candidates.get_mut();
              reuse.insert(index);
          }
      }
@@ -1213,33 +1313,33 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
      /// On thread termination, the vector-clock may re-used
      /// in the future once all remaining thread-clocks catch
      /// up with the time index of the terminated thread.
-    /// This assiges thread termination with a unique index
+    /// This assigns thread termination with a unique index
      /// which will be used to join the thread
      /// This should be called strictly before any calls to
      /// `thread_joined`.
      #[inline]
-    pub fn thread_terminated(&self) {
+    pub fn thread_terminated(&mut self) {
          let current_index = self.current_index();
  
          // Increment the clock to a unique termination timestamp.
-        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        let vector_clocks = self.vector_clocks.get_mut();
          let current_clocks = &mut vector_clocks[current_index];
          current_clocks.increment_clock(current_index);
  
          // Load the current thread id for the executing vector.
-        let vector_info = self.vector_info.borrow();
+        let vector_info = self.vector_info.get_mut();
          let current_thread = vector_info[current_index];
  
          // Load the current thread metadata, and move to a terminated
          // vector state. Setting up the vector clock all join operations
          // will use.
-        let mut thread_info = self.thread_info.borrow_mut();
+        let thread_info = self.thread_info.get_mut();
          let current = &mut thread_info[current_thread];
          current.termination_vector_clock = Some(current_clocks.clock.clone());
  
          // Add this thread as a candidate for re-use after a thread join
          // occurs.
-        let mut termination = self.terminated_threads.borrow_mut();
+        let termination = self.terminated_threads.get_mut();
          termination.insert(current_thread, current_index);
  
          // Reduce the number of active threads, now that a thread has
@@ -1266,9 +1366,9 @@ pub fn thread_set_active(&self, thread: ThreadId) {
      /// the thread name is used for improved diagnostics
      /// during a data-race.
      #[inline]
-    pub fn thread_set_name(&self, thread: ThreadId, name: String) {
+    pub fn thread_set_name(&mut self, thread: ThreadId, name: String) {
          let name = name.into_boxed_str();
-        let mut thread_info = self.thread_info.borrow_mut();
+        let thread_info = self.thread_info.get_mut();
          thread_info[thread].thread_name = Some(name);
      }
  
@@ -1318,8 +1418,8 @@ pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
      /// Release a lock handle, express that this happens-before
      /// any subsequent calls to `validate_lock_acquire`.
      /// For normal locks this should be equivalent to `validate_lock_release_shared`
-    /// since an acquire operation should have occured before, however
-    /// for futex & cond-var operations this is not the case and this
+    /// since an acquire operation should have occurred before, however
+    /// for futex & condvar operations this is not the case and this
      /// operation must be used.
      pub fn validate_lock_release(&self, lock: &mut VClock, thread: ThreadId) {
          let (index, mut clocks) = self.load_thread_state_mut(thread);