]> git.lizzy.rs Git - rust.git/blobdiff - src/data_race.rs
rename track-raw-pointers flag to tag-raw-pointers
[rust.git] / src / data_race.rs
index 49332721fcbd11757b57c9c9216b7721b5212285..9e5dfd9dbaf0d39596a82932164169fb777ba62d 100644 (file)
@@ -1,13 +1,21 @@
 //! Implementation of a data-race detector using Lamport Timestamps / Vector-clocks
-//! based on the Dyamic Race Detection for C++:
+//! based on the Dynamic Race Detection for C++:
 //! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
 //! which does not report false-positives when fences are used, and gives better
 //! accuracy in presence of read-modify-write operations.
 //!
+//! The implementation contains modifications to correctly model the changes to the memory model in C++20
+//! regarding the weakening of release sequences: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0982r1.html.
+//! Relaxed stores now unconditionally block all currently active release sequences and so per-thread tracking of release
+//! sequences is not needed.
+//!
+//! The implementation also models races with memory allocation and deallocation via treating allocation and
+//! deallocation as a type of write internally for detecting data-races.
+//!
 //! This does not explore weak memory orders and so can still miss data-races
 //! but should not report false-positives
 //!
-//! Data-race definiton from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
+//! Data-race definition from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
 //! a data race occurs between two memory accesses if they are on different threads, at least one operation
 //! is non-atomic, at least one operation is a write and neither access happens-before the other. Read the link
 //! for full definition.
@@ -21,7 +29,7 @@
 //! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
 //! thread.
 //!
-//! The sequentially consistant ordering corresponds to the ordering that the threads
+//! The sequentially consistent ordering corresponds to the ordering that the threads
 //! are currently scheduled, this means that the data-race detector has no additional
 //! logic for sequentially consistent accesses at the moment since they are indistinguishable
 //! from acquire/release operations. If weak memory orderings are explored then this
@@ -34,7 +42,7 @@
 //!    order exists in which all threads observe all modifications in the same
 //!    order (see Sequentially-consistent ordering below) "
 //! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
-//! to a acquire load and a release store given the global sequentially consistent order
+//! to an acquire load and a release store given the global sequentially consistent order
 //! of the schedule.
 //!
 //! The timestamps used in the data-race detector assign each sequence of non-atomic operations
@@ -44,7 +52,7 @@
 //! required for accurate reporting of data-race values.
 //!
 //! As per the paper a threads timestamp is only incremented after a release operation is performed
-//! so some atomic operations that only perform acquires do not increment the timestamp, due to shared
+//! so some atomic operations that only perform acquires do not increment the timestamp. Due to shared
 //! code some atomic operations may increment the timestamp when not necessary but this has no effect
 //! on the data-race detection code.
 //!
@@ -57,7 +65,6 @@
     cell::{Cell, Ref, RefCell, RefMut},
     fmt::Debug,
     mem,
-    rc::Rc,
 };
 
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_target::abi::Size;
 
 use crate::{
-    ImmTy, Immediate, InterpResult, MPlaceTy, MemPlaceMeta, MiriEvalContext, MiriEvalContextExt,
-    OpTy, Pointer, RangeMap, ScalarMaybeUninit, Tag, ThreadId, VClock, VSmallClockMap, VTimestamp,
-    VectorIdx,
+    AllocId, AllocRange, ImmTy, Immediate, InterpResult, MPlaceTy, MemPlaceMeta, MemoryKind,
+    MiriEvalContext, MiriEvalContextExt, MiriMemoryKind, OpTy, Pointer, RangeMap, Scalar,
+    ScalarMaybeUninit, Tag, ThreadId, VClock, VTimestamp, VectorIdx,
 };
 
 pub type AllocExtra = VClockAlloc;
-pub type MemoryExtra = Rc<GlobalState>;
+pub type MemoryExtra = GlobalState;
 
 /// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive).
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -122,7 +129,7 @@ struct ThreadClockSet {
     /// thread once it performs an acquire fence.
     fence_acquire: VClock,
 
-    /// The last timesamp of happens-before relations that
+    /// The last timestamp of happens-before relations that
     /// have been released by this thread by a fence.
     fence_release: VClock,
 }
@@ -135,7 +142,7 @@ fn apply_release_fence(&mut self) {
         self.fence_release.clone_from(&self.clock);
     }
 
-    /// Apply the effects of a acquire fence to this
+    /// Apply the effects of an acquire fence to this
     /// set of thread vector clocks.
     #[inline]
     fn apply_acquire_fence(&mut self) {
@@ -185,13 +192,34 @@ struct AtomicMemoryCellClocks {
     /// happen-before a thread if an acquire-load is
     /// performed on the data.
     sync_vector: VClock,
+}
 
-    /// The Hash-Map of all threads for which a release
-    /// sequence exists in the memory cell, required
-    /// since read-modify-write operations do not
-    /// invalidate existing release sequences.
-    /// See page 6 of linked paper.
-    release_sequences: VSmallClockMap,
+/// Type of write operation: allocating memory
+/// non-atomic writes and deallocating memory
+/// are all treated as writes for the purpose
+/// of the data-race detector.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+enum WriteType {
+    /// Allocate memory.
+    Allocate,
+
+    /// Standard unsynchronized write.
+    Write,
+
+    /// Deallocate memory.
+    /// Note that when memory is deallocated first, later non-atomic accesses
+    /// will be reported as use-after-free, not as data races.
+    /// (Same for `Allocate` above.)
+    Deallocate,
+}
+impl WriteType {
+    fn get_descriptor(self) -> &'static str {
+        match self {
+            WriteType::Allocate => "Allocate",
+            WriteType::Write => "Write",
+            WriteType::Deallocate => "Deallocate",
+        }
+    }
 }
 
 /// Memory Cell vector clock metadata
@@ -206,8 +234,13 @@ struct MemoryCellClocks {
     /// that performed the last write operation.
     write_index: VectorIdx,
 
+    /// The type of operation that the write index represents,
+    /// either newly allocated memory, a non-atomic write or
+    /// a deallocation of memory.
+    write_type: WriteType,
+
     /// The vector-clock of the timestamp of the last read operation
-    /// performed by a thread since the last write operation occured.
+    /// performed by a thread since the last write operation occurred.
     /// It is reset to zero on each write operation.
     read: VClock,
 
@@ -217,20 +250,19 @@ struct MemoryCellClocks {
     atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
 }
 
-/// Create a default memory cell clocks instance
-/// for uninitialized memory.
-impl Default for MemoryCellClocks {
-    fn default() -> Self {
+impl MemoryCellClocks {
+    /// Create a new set of clocks representing memory allocated
+    ///  at a given vector timestamp and index.
+    fn new(alloc: VTimestamp, alloc_index: VectorIdx) -> Self {
         MemoryCellClocks {
             read: VClock::default(),
-            write: 0,
-            write_index: VectorIdx::MAX_INDEX,
+            write: alloc,
+            write_index: alloc_index,
+            write_type: WriteType::Allocate,
             atomic_ops: None,
         }
     }
-}
 
-impl MemoryCellClocks {
     /// Load the internal atomic memory cells if they exist.
     #[inline]
     fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
@@ -283,8 +315,6 @@ fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.clone_from(&clocks.clock);
-        atomic.release_sequences.clear();
-        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
 
@@ -292,12 +322,13 @@ fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result
     /// store relaxed semantics.
     fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_write_detect(clocks, index)?;
+
+        // The handling of release sequences was changed in C++20 and so
+        // the code here is different to the paper since now all relaxed
+        // stores block release sequences. The exception for same-thread
+        // relaxed stores has been removed.
         let atomic = self.atomic_mut();
         atomic.sync_vector.clone_from(&clocks.fence_release);
-        if let Some(release) = atomic.release_sequences.get(index) {
-            atomic.sync_vector.join(release);
-        }
-        atomic.release_sequences.retain_index(index);
         Ok(())
     }
 
@@ -307,7 +338,6 @@ fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.clock);
-        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
 
@@ -385,6 +415,7 @@ fn write_race_detect(
         &mut self,
         clocks: &ThreadClockSet,
         index: VectorIdx,
+        write_type: WriteType,
     ) -> Result<(), DataRace> {
         log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
@@ -396,6 +427,7 @@ fn write_race_detect(
             if race_free {
                 self.write = clocks.clock[index];
                 self.write_index = index;
+                self.write_type = write_type;
                 self.read.set_zero_vector();
                 Ok(())
             } else {
@@ -413,7 +445,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     /// Atomic variant of read_scalar_at_offset.
     fn read_scalar_at_offset_atomic(
         &self,
-        op: OpTy<'tcx, Tag>,
+        op: &OpTy<'tcx, Tag>,
         offset: u64,
         layout: TyAndLayout<'tcx>,
         atomic: AtomicReadOp,
@@ -425,13 +457,13 @@ fn read_scalar_at_offset_atomic(
         // Ensure that the following read at an offset is within bounds.
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.read_scalar_atomic(value_place, atomic)
+        this.read_scalar_atomic(&value_place, atomic)
     }
 
     /// Atomic variant of write_scalar_at_offset.
     fn write_scalar_at_offset_atomic(
         &mut self,
-        op: OpTy<'tcx, Tag>,
+        op: &OpTy<'tcx, Tag>,
         offset: u64,
         value: impl Into<ScalarMaybeUninit<Tag>>,
         layout: TyAndLayout<'tcx>,
@@ -444,18 +476,18 @@ fn write_scalar_at_offset_atomic(
         // Ensure that the following read at an offset is within bounds.
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.write_scalar_atomic(value.into(), value_place, atomic)
+        this.write_scalar_atomic(value.into(), &value_place, atomic)
     }
 
     /// Perform an atomic read operation at the memory location.
     fn read_scalar_atomic(
         &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_ref();
-        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place.into()))?;
-        self.validate_atomic_load(place, atomic)?;
+        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+        this.validate_atomic_load(place, atomic)?;
         Ok(scalar)
     }
 
@@ -463,31 +495,31 @@ fn read_scalar_atomic(
     fn write_scalar_atomic(
         &mut self,
         val: ScalarMaybeUninit<Tag>,
-        dest: MPlaceTy<'tcx, Tag>,
+        dest: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.allow_data_races_mut(move |this| this.write_scalar(val, dest.into()))?;
-        self.validate_atomic_store(dest, atomic)
+        this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
+        this.validate_atomic_store(dest, atomic)
     }
 
-    /// Perform a atomic operation on a memory location.
+    /// Perform an atomic operation on a memory location.
     fn atomic_op_immediate(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>,
-        rhs: ImmTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        rhs: &ImmTy<'tcx, Tag>,
         op: mir::BinOp,
         neg: bool,
         atomic: AtomicRwOp,
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
-        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
 
         // Atomics wrap around on overflow.
-        let val = this.binary_op(op, old, rhs)?;
-        let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.allow_data_races_mut(|this| this.write_immediate(*val, place.into()))?;
+        let val = this.binary_op(op, &old, rhs)?;
+        let val = if neg { this.unary_op(mir::UnOp::Not, &val)? } else { val };
+        this.allow_data_races_mut(|this| this.write_immediate(*val, &(*place).into()))?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(old)
@@ -497,46 +529,86 @@ fn atomic_op_immediate(
     /// scalar value, the old value is returned.
     fn atomic_exchange_scalar(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         new: ScalarMaybeUninit<Tag>,
         atomic: AtomicRwOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
-        let old = this.allow_data_races_mut(|this| this.read_scalar(place.into()))?;
-        this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
+        let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
+        this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
         this.validate_atomic_rmw(place, atomic)?;
         Ok(old)
     }
 
+    /// Perform an conditional atomic exchange with a memory place and a new
+    /// scalar value, the old value is returned.
+    fn atomic_min_max_scalar(
+        &mut self,
+        place: &MPlaceTy<'tcx, Tag>,
+        rhs: ImmTy<'tcx, Tag>,
+        min: bool,
+        atomic: AtomicRwOp,
+    ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+        let this = self.eval_context_mut();
+
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
+        let lt = this.overflowing_binary_op(mir::BinOp::Lt, &old, &rhs)?.0.to_bool()?;
+
+        let new_val = if min {
+            if lt { &old } else { &rhs }
+        } else {
+            if lt { &rhs } else { &old }
+        };
+
+        this.allow_data_races_mut(|this| this.write_immediate(**new_val, &(*place).into()))?;
+
+        this.validate_atomic_rmw(&place, atomic)?;
+
+        // Return the old value.
+        Ok(old)
+    }
+
     /// Perform an atomic compare and exchange at a given memory location.
     /// On success an atomic RMW operation is performed and on failure
-    /// only an atomic read occurs.
+    /// only an atomic read occurs. If `can_fail_spuriously` is true,
+    /// then we treat it as a "compare_exchange_weak" operation, and
+    /// some portion of the time fail even when the values are actually
+    /// identical.
     fn atomic_compare_exchange_scalar(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>,
-        expect_old: ImmTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        expect_old: &ImmTy<'tcx, Tag>,
         new: ScalarMaybeUninit<Tag>,
         success: AtomicRwOp,
         fail: AtomicReadOp,
+        can_fail_spuriously: bool,
     ) -> InterpResult<'tcx, Immediate<Tag>> {
+        use rand::Rng as _;
         let this = self.eval_context_mut();
 
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
-        // to read with the failure ordering and if successfull then try again with the success
+        // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
         // Read as immediate for the sake of `binary_op()`
-        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
-
+        let old = this.allow_data_races_mut(|this| this.read_immediate(&(place.into())))?;
         // `binary_op` will bail if either of them is not a scalar.
-        let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
-        let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
+        let eq = this.overflowing_binary_op(mir::BinOp::Eq, &old, expect_old)?.0;
+        // If the operation would succeed, but is "weak", fail some portion
+        // of the time, based on `rate`.
+        let rate = this.memory.extra.cmpxchg_weak_failure_rate;
+        let cmpxchg_success = eq.to_bool()?
+            && (!can_fail_spuriously || this.memory.extra.rng.get_mut().gen::<f64>() < rate);
+        let res = Immediate::ScalarPair(
+            old.to_scalar_or_uninit(),
+            Scalar::from_bool(cmpxchg_success).into(),
+        );
 
         // Update ptr depending on comparison.
         // if successful, perform a full rw-atomic validation
         // otherwise treat this as an atomic load with the fail ordering.
-        if eq.to_bool()? {
-            this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
+        if cmpxchg_success {
+            this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
             this.validate_atomic_rmw(place, success)?;
         } else {
             this.validate_atomic_load(place, fail)?;
@@ -546,11 +618,11 @@ fn atomic_compare_exchange_scalar(
         Ok(res)
     }
 
-    /// Update the data-race detector for an atomic read occuring at the
+    /// Update the data-race detector for an atomic read occurring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_load(
         &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
@@ -568,14 +640,14 @@ fn validate_atomic_load(
         )
     }
 
-    /// Update the data-race detector for an atomic write occuring at the
+    /// Update the data-race detector for an atomic write occurring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_store(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
+        let this = self.eval_context_mut();
         this.validate_atomic_op(
             place,
             atomic,
@@ -590,17 +662,17 @@ fn validate_atomic_store(
         )
     }
 
-    /// Update the data-race detector for an atomic read-modify-write occuring
+    /// Update the data-race detector for an atomic read-modify-write occurring
     /// at the associated memory place and on the current thread.
     fn validate_atomic_rmw(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicRwOp,
     ) -> InterpResult<'tcx> {
         use AtomicRwOp::*;
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
-        let this = self.eval_context_ref();
+        let this = self.eval_context_mut();
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
                 memory.load_acquire(clocks, index)?;
@@ -618,12 +690,12 @@ fn validate_atomic_rmw(
     /// Update the data-race detector for an atomic fence on the current thread.
     fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        if let Some(data_race) = &this.memory.extra.data_race {
+        if let Some(data_race) = &mut this.memory.extra.data_race {
             data_race.maybe_perform_sync_operation(move |index, mut clocks| {
                 log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
 
                 // Apply data-race detection for the current fences
-                // this treats AcqRel and SeqCst as the same as a acquire
+                // this treats AcqRel and SeqCst as the same as an acquire
                 // and release fence applied in the same timestamp.
                 if atomic != AtomicFenceOp::Release {
                     // Either Acquire | AcqRel | SeqCst
@@ -633,8 +705,8 @@ fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx>
                     // Either Release | AcqRel | SeqCst
                     clocks.apply_release_fence();
                 }
-                
-                // Increment timestamp if hase release semantics
+
+                // Increment timestamp in case of release semantics.
                 Ok(atomic != AtomicFenceOp::Acquire)
             })
         } else {
@@ -648,23 +720,47 @@ fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx>
 pub struct VClockAlloc {
     /// Assigning each byte a MemoryCellClocks.
     alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
-
-    // Pointer to global state.
-    global: MemoryExtra,
 }
 
 impl VClockAlloc {
-    /// Create a new data-race allocation detector.
-    pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
+    /// Create a new data-race detector for newly allocated memory.
+    pub fn new_allocation(
+        global: &MemoryExtra,
+        len: Size,
+        kind: MemoryKind<MiriMemoryKind>,
+    ) -> VClockAlloc {
+        let (alloc_timestamp, alloc_index) = match kind {
+            // User allocated and stack memory should track allocation.
+            MemoryKind::Machine(
+                MiriMemoryKind::Rust | MiriMemoryKind::C | MiriMemoryKind::WinHeap,
+            )
+            | MemoryKind::Stack => {
+                let (alloc_index, clocks) = global.current_thread_state();
+                let alloc_timestamp = clocks.clock[alloc_index];
+                (alloc_timestamp, alloc_index)
+            }
+            // Other global memory should trace races but be allocated at the 0 timestamp.
+            MemoryKind::Machine(
+                MiriMemoryKind::Global
+                | MiriMemoryKind::Machine
+                | MiriMemoryKind::Env
+                | MiriMemoryKind::ExternStatic
+                | MiriMemoryKind::Tls,
+            )
+            | MemoryKind::CallerLocation => (0, VectorIdx::MAX_INDEX),
+        };
         VClockAlloc {
-            global: Rc::clone(global),
-            alloc_ranges: RefCell::new(RangeMap::new(len, MemoryCellClocks::default())),
+            alloc_ranges: RefCell::new(RangeMap::new(
+                len,
+                MemoryCellClocks::new(alloc_timestamp, alloc_index),
+            )),
         }
     }
 
     // Find an index, if one exists where the value
     // in `l` is greater than the value in `r`.
     fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
+        log::trace!("Find index where not {:?} <= {:?}", l, r);
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
         l_slice
@@ -684,7 +780,7 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
                         .enumerate()
                         .find_map(|(idx, &r)| if r == 0 { None } else { Some(idx) })
                         .expect("Invalid VClock Invariant");
-                    Some(idx)
+                    Some(idx + r_slice.len())
                 } else {
                     None
                 }
@@ -694,9 +790,9 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
 
     /// Report a data-race found in the program.
     /// This finds the two racing threads and the type
-    /// of data-race that occured. This will also
+    /// of data-race that occurred. This will also
     /// return info about the memory location the data-race
-    /// occured in.
+    /// occurred in.
     #[cold]
     #[inline(never)]
     fn report_data_race<'tcx>(
@@ -704,8 +800,7 @@ fn report_data_race<'tcx>(
         range: &MemoryCellClocks,
         action: &str,
         is_atomic: bool,
-        pointer: Pointer<Tag>,
-        len: Size,
+        ptr_dbg: Pointer<AllocId>,
     ) -> InterpResult<'tcx> {
         let (current_index, current_clocks) = global.current_thread_state();
         let write_clock;
@@ -715,18 +810,18 @@ fn report_data_race<'tcx>(
             // Convert the write action into the vector clock it
             // represents for diagnostic purposes.
             write_clock = VClock::new_with_index(range.write_index, range.write);
-            ("WRITE", range.write_index, &write_clock)
+            (range.write_type.get_descriptor(), range.write_index, &write_clock)
         } else if let Some(idx) = Self::find_gt_index(&range.read, &current_clocks.clock) {
-            ("READ", idx, &range.read)
+            ("Read", idx, &range.read)
         } else if !is_atomic {
             if let Some(atomic) = range.atomic() {
                 if let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
                 {
-                    ("ATOMIC_STORE", idx, &atomic.write_vector)
+                    ("Atomic Store", idx, &atomic.write_vector)
                 } else if let Some(idx) =
                     Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
                 {
-                    ("ATOMIC_LOAD", idx, &atomic.read_vector)
+                    ("Atomic Load", idx, &atomic.read_vector)
                 } else {
                     unreachable!(
                         "Failed to report data-race for non-atomic operation: no race found"
@@ -747,40 +842,40 @@ fn report_data_race<'tcx>(
 
         // Throw the data-race detection.
         throw_ub_format!(
-            "Data race detected between {} on {} and {} on {}, memory({:?},offset={},size={})\
-            \n\t\t -current vector clock = {:?}\
-            \n\t\t -conflicting timestamp = {:?}",
+            "Data race detected between {} on {} and {} on {} at {:?} (current vector clock = {:?}, conflicting timestamp = {:?})",
             action,
             current_thread_info,
             other_action,
             other_thread_info,
-            pointer.alloc_id,
-            pointer.offset.bytes(),
-            len.bytes(),
+            ptr_dbg,
             current_clocks.clock,
             other_clock
         )
     }
 
-    /// Detect data-races for an unsychronized read operation, will not perform
+    /// Detect data-races for an unsynchronized read operation, will not perform
     /// data-race detection if `multi-threaded` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation for which data-race detection is handled separately, for example
     /// atomic read operations.
-    pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        if self.global.multi_threaded.get() {
-            let (index, clocks) = self.global.current_thread_state();
+    pub fn read<'tcx>(
+        &self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> InterpResult<'tcx> {
+        if global.multi_threaded.get() {
+            let (index, clocks) = global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
-            for (_, range) in alloc_ranges.iter_mut(pointer.offset, len) {
+            for (offset, range) in alloc_ranges.iter_mut(range.start, range.size) {
                 if let Err(DataRace) = range.read_race_detect(&*clocks, index) {
                     // Report data-race.
                     return Self::report_data_race(
-                        &self.global,
+                        global,
                         range,
-                        "READ",
+                        "Read",
                         false,
-                        pointer,
-                        len,
+                        Pointer::new(alloc_id, offset),
                     );
                 }
             }
@@ -793,22 +888,22 @@ pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx>
     // Shared code for detecting data-races on unique access to a section of memory
     fn unique_access<'tcx>(
         &mut self,
-        pointer: Pointer<Tag>,
-        len: Size,
-        action: &str,
+        alloc_id: AllocId,
+        range: AllocRange,
+        write_type: WriteType,
+        global: &mut GlobalState,
     ) -> InterpResult<'tcx> {
-        if self.global.multi_threaded.get() {
-            let (index, clocks) = self.global.current_thread_state();
-            for (_, range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if let Err(DataRace) = range.write_race_detect(&*clocks, index) {
+        if global.multi_threaded.get() {
+            let (index, clocks) = global.current_thread_state();
+            for (offset, range) in self.alloc_ranges.get_mut().iter_mut(range.start, range.size) {
+                if let Err(DataRace) = range.write_race_detect(&*clocks, index, write_type) {
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global,
+                        global,
                         range,
-                        action,
+                        write_type.get_descriptor(),
                         false,
-                        pointer,
-                        len,
+                        Pointer::new(alloc_id, offset),
                     );
                 }
             }
@@ -818,27 +913,37 @@ fn unique_access<'tcx>(
         }
     }
 
-    /// Detect data-races for an unsychronized write operation, will not perform
+    /// Detect data-races for an unsynchronized write operation, will not perform
     /// data-race threads if `multi-threaded` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
-    pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        self.unique_access(pointer, len, "Write")
+    pub fn write<'tcx>(
+        &mut self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &mut GlobalState,
+    ) -> InterpResult<'tcx> {
+        self.unique_access(alloc_id, range, WriteType::Write, global)
     }
 
-    /// Detect data-races for an unsychronized deallocate operation, will not perform
+    /// Detect data-races for an unsynchronized deallocate operation, will not perform
     /// data-race threads if `multi-threaded` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
-    pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        self.unique_access(pointer, len, "Deallocate")
+    pub fn deallocate<'tcx>(
+        &mut self,
+        alloc_id: AllocId,
+        range: AllocRange,
+        global: &mut GlobalState,
+    ) -> InterpResult<'tcx> {
+        self.unique_access(alloc_id, range, WriteType::Deallocate, global)
     }
 }
 
 impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     // Temporarily allow data-races to occur, this should only be
-    // used if either one of the appropiate `validate_atomic` functions
+    // used if either one of the appropriate `validate_atomic` functions
     // will be called to treat a memory access as atomic or if the memory
     // being accessed should be treated as internal state, that cannot be
     // accessed by the interpreted program.
@@ -878,15 +983,10 @@ fn allow_data_races_mut<R>(
         result
     }
 
-    /// Generic atomic operation implementation,
-    /// this accesses memory via get_raw instead of
-    /// get_raw_mut, due to issues calling get_raw_mut
-    /// for atomic loads from read-only memory.
-    /// FIXME: is this valid, or should get_raw_mut be used for
-    /// atomic-stores/atomic-rmw?
+    /// Generic atomic operation implementation
     fn validate_atomic_op<A: Debug + Copy>(
         &self,
-        place: MPlaceTy<'tcx, Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
         atomic: A,
         description: &str,
         mut op: impl FnMut(
@@ -899,36 +999,35 @@ fn validate_atomic_op<A: Debug + Copy>(
         let this = self.eval_context_ref();
         if let Some(data_race) = &this.memory.extra.data_race {
             if data_race.multi_threaded.get() {
-                // Load and log the atomic operation.
-                let place_ptr = place.ptr.assert_ptr();
                 let size = place.layout.size;
+                let (alloc_id, base_offset, ptr) = this.memory.ptr_get_alloc(place.ptr)?;
+                // Load and log the atomic operation.
+                // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
                 let alloc_meta =
-                    &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race.as_ref().unwrap();
+                    &this.memory.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                 log::trace!(
-                    "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                    "Atomic op({}) with ordering {:?} on {:?} (size={})",
                     description,
                     &atomic,
-                    place_ptr.alloc_id,
-                    place_ptr.offset.bytes(),
+                    ptr,
                     size.bytes()
                 );
 
                 // Perform the atomic operation.
-                let data_race = &alloc_meta.global;
                 data_race.maybe_perform_sync_operation(|index, mut clocks| {
-                    for (_, range) in
-                        alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size)
+                    for (offset, range) in
+                        alloc_meta.alloc_ranges.borrow_mut().iter_mut(base_offset, size)
                     {
                         if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
                             mem::drop(clocks);
                             return VClockAlloc::report_data_race(
-                                &alloc_meta.global,
+                                data_race,
                                 range,
                                 description,
                                 true,
-                                place_ptr,
-                                size,
-                            ).map(|_| true);
+                                Pointer::new(alloc_id, offset),
+                            )
+                            .map(|_| true);
                         }
                     }
 
@@ -938,12 +1037,11 @@ fn validate_atomic_op<A: Debug + Copy>(
 
                 // Log changes to atomic memory.
                 if log::log_enabled!(log::Level::Trace) {
-                    for (_, range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size)
+                    for (_offset, range) in alloc_meta.alloc_ranges.borrow().iter(base_offset, size)
                     {
                         log::trace!(
-                            "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                            place.ptr.assert_ptr().alloc_id,
-                            place_ptr.offset.bytes(),
+                            "Updated atomic memory({:?}, size={}) to {:#?}",
+                            ptr,
                             size.bytes(),
                             range.atomic_ops
                         );
@@ -980,6 +1078,7 @@ struct ThreadExtraState {
 /// Global data-race detection state, contains the currently
 /// executing thread as well as the vector-clocks associated
 /// with each of the threads.
+// FIXME: it is probably better to have one large RefCell, than to have so many small ones.
 #[derive(Debug, Clone)]
 pub struct GlobalState {
     /// Set to true once the first additional
@@ -1000,7 +1099,7 @@ pub struct GlobalState {
     /// if a vector index is re-assigned to a new thread.
     vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>,
 
-    /// The mapping of a given thread to assocaited thread metadata.
+    /// The mapping of a given thread to associated thread metadata.
     thread_info: RefCell<IndexVec<ThreadId, ThreadExtraState>>,
 
     /// The current vector index being executed.
@@ -1017,7 +1116,7 @@ pub struct GlobalState {
 
     /// Counts the number of threads that are currently active
     /// if the number of active threads reduces to 1 and then
-    /// a join operation occures with the remaining main thread
+    /// a join operation occurs with the remaining main thread
     /// then multi-threaded execution may be disabled.
     active_thread_count: Cell<usize>,
 
@@ -1033,7 +1132,7 @@ impl GlobalState {
     /// Create a new global state, setup with just thread-id=0
     /// advanced to timestamp = 1.
     pub fn new() -> Self {
-        let global_state = GlobalState {
+        let mut global_state = GlobalState {
             multi_threaded: Cell::new(false),
             vector_clocks: RefCell::new(IndexVec::new()),
             vector_info: RefCell::new(IndexVec::new()),
@@ -1047,9 +1146,9 @@ pub fn new() -> Self {
         // Setup the main-thread since it is not explicitly created:
         // uses vector index and thread-id 0, also the rust runtime gives
         // the main-thread a name of "main".
-        let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
-        global_state.vector_info.borrow_mut().push(ThreadId::new(0));
-        global_state.thread_info.borrow_mut().push(ThreadExtraState {
+        let index = global_state.vector_clocks.get_mut().push(ThreadClockSet::default());
+        global_state.vector_info.get_mut().push(ThreadId::new(0));
+        global_state.thread_info.get_mut().push(ThreadExtraState {
             vector_index: Some(index),
             thread_name: Some("main".to_string().into_boxed_str()),
             termination_vector_clock: None,
@@ -1096,7 +1195,7 @@ fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
     // Hook for thread creation, enabled multi-threaded execution and marks
     // the current thread timestamp as happening-before the current thread.
     #[inline]
-    pub fn thread_created(&self, thread: ThreadId) {
+    pub fn thread_created(&mut self, thread: ThreadId) {
         let current_index = self.current_index();
 
         // Increment the number of active threads.
@@ -1116,12 +1215,12 @@ pub fn thread_created(&self, thread: ThreadId) {
         let created_index = if let Some(reuse_index) = self.find_vector_index_reuse_candidate() {
             // Now re-configure the re-use candidate, increment the clock
             // for the new sync use of the vector.
-            let mut vector_clocks = self.vector_clocks.borrow_mut();
+            let vector_clocks = self.vector_clocks.get_mut();
             vector_clocks[reuse_index].increment_clock(reuse_index);
 
             // Locate the old thread the vector was associated with and update
             // it to represent the new thread instead.
-            let mut vector_info = self.vector_info.borrow_mut();
+            let vector_info = self.vector_info.get_mut();
             let old_thread = vector_info[reuse_index];
             vector_info[reuse_index] = thread;
 
@@ -1133,15 +1232,17 @@ pub fn thread_created(&self, thread: ThreadId) {
         } else {
             // No vector re-use candidates available, instead create
             // a new vector index.
-            let mut vector_info = self.vector_info.borrow_mut();
+            let vector_info = self.vector_info.get_mut();
             vector_info.push(thread)
         };
 
+        log::trace!("Creating thread = {:?} with vector index = {:?}", thread, created_index);
+
         // Mark the chosen vector index as in use by the thread.
         thread_info[thread].vector_index = Some(created_index);
 
         // Create a thread clock set if applicable.
-        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        let vector_clocks = self.vector_clocks.get_mut();
         if created_index == vector_clocks.next_index() {
             vector_clocks.push(ThreadClockSet::default());
         }
@@ -1160,11 +1261,11 @@ pub fn thread_created(&self, thread: ThreadId) {
     }
 
     /// Hook on a thread join to update the implicit happens-before relation
-    /// between the joined thead and the current thread.
+    /// between the joined thread and the current thread.
     #[inline]
-    pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
-        let mut clocks_vec = self.vector_clocks.borrow_mut();
-        let thread_info = self.thread_info.borrow();
+    pub fn thread_joined(&mut self, current_thread: ThreadId, join_thread: ThreadId) {
+        let clocks_vec = self.vector_clocks.get_mut();
+        let thread_info = self.thread_info.get_mut();
 
         // Load the vector clock of the current thread.
         let current_index = thread_info[current_thread]
@@ -1178,7 +1279,6 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
             .as_ref()
             .expect("Joined with thread but thread has not terminated");
 
-
         // The join thread happens-before the current thread
         // so update the current vector clock.
         // Is not a release operation so the clock is not incremented.
@@ -1194,7 +1294,7 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
                 .iter_enumerated()
                 .all(|(idx, clocks)| clocks.clock[idx] <= current_clock.clock[idx])
             {
-                // The all thread termations happen-before the current clock
+                // All thread terminations happen-before the current clock
                 // therefore no data-races can be reported until a new thread
                 // is created, so disable multi-threaded execution.
                 self.multi_threaded.set(false);
@@ -1203,9 +1303,9 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
 
         // If the thread is marked as terminated but not joined
         // then move the thread to the re-use set.
-        let mut termination = self.terminated_threads.borrow_mut();
+        let termination = self.terminated_threads.get_mut();
         if let Some(index) = termination.remove(&join_thread) {
-            let mut reuse = self.reuse_candidates.borrow_mut();
+            let reuse = self.reuse_candidates.get_mut();
             reuse.insert(index);
         }
     }
@@ -1213,33 +1313,33 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
     /// On thread termination, the vector-clock may re-used
     /// in the future once all remaining thread-clocks catch
     /// up with the time index of the terminated thread.
-    /// This assiges thread termination with a unique index
+    /// This assigns thread termination with a unique index
     /// which will be used to join the thread
     /// This should be called strictly before any calls to
     /// `thread_joined`.
     #[inline]
-    pub fn thread_terminated(&self) {
+    pub fn thread_terminated(&mut self) {
         let current_index = self.current_index();
 
         // Increment the clock to a unique termination timestamp.
-        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        let vector_clocks = self.vector_clocks.get_mut();
         let current_clocks = &mut vector_clocks[current_index];
         current_clocks.increment_clock(current_index);
 
         // Load the current thread id for the executing vector.
-        let vector_info = self.vector_info.borrow();
+        let vector_info = self.vector_info.get_mut();
         let current_thread = vector_info[current_index];
 
         // Load the current thread metadata, and move to a terminated
         // vector state. Setting up the vector clock all join operations
         // will use.
-        let mut thread_info = self.thread_info.borrow_mut();
+        let thread_info = self.thread_info.get_mut();
         let current = &mut thread_info[current_thread];
         current.termination_vector_clock = Some(current_clocks.clock.clone());
 
         // Add this thread as a candidate for re-use after a thread join
         // occurs.
-        let mut termination = self.terminated_threads.borrow_mut();
+        let termination = self.terminated_threads.get_mut();
         termination.insert(current_thread, current_index);
 
         // Reduce the number of active threads, now that a thread has
@@ -1266,9 +1366,9 @@ pub fn thread_set_active(&self, thread: ThreadId) {
     /// the thread name is used for improved diagnostics
     /// during a data-race.
     #[inline]
-    pub fn thread_set_name(&self, thread: ThreadId, name: String) {
+    pub fn thread_set_name(&mut self, thread: ThreadId, name: String) {
         let name = name.into_boxed_str();
-        let mut thread_info = self.thread_info.borrow_mut();
+        let thread_info = self.thread_info.get_mut();
         thread_info[thread].thread_name = Some(name);
     }
 
@@ -1318,8 +1418,8 @@ pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
     /// Release a lock handle, express that this happens-before
     /// any subsequent calls to `validate_lock_acquire`.
     /// For normal locks this should be equivalent to `validate_lock_release_shared`
-    /// since an acquire operation should have occured before, however
-    /// for futex & cond-var operations this is not the case and this
+    /// since an acquire operation should have occurred before, however
+    /// for futex & condvar operations this is not the case and this
     /// operation must be used.
     pub fn validate_lock_release(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);