From 9cb6b8da3f6ca2da1139c91754d520bf2d354f31 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Thu, 5 Nov 2020 03:54:39 +0000
Subject: [PATCH] Split out vector_clock to separate file, general tidy up of
 some of the  code & add support for vector index re-use for multiple threads 
 after termination.

---
 src/data_race.rs        | 1333 +++++++++++++--------------------------
 src/lib.rs              |    4 +
 src/shims/intrinsics.rs |   31 +-
 src/shims/posix/sync.rs |   39 +-
 src/thread.rs           |    4 +-
 src/vector_clock.rs     |  602 ++++++++++++++++++
 6 files changed, 1084 insertions(+), 929 deletions(-)
 create mode 100644 src/vector_clock.rs
diff --git a/src/data_race.rs b/src/data_race.rs
index 8e7a3548f5c..e992c5a1d58 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -13,21 +13,21 @@
 //!    - 1 of the operations happens-before the other operation (see link for definition)
 
 use std::{
-    fmt::{self, Debug}, cmp::Ordering, rc::Rc,
-    cell::{Cell, RefCell, Ref, RefMut}, ops::Index, mem
+    fmt::Debug, rc::Rc,
+    cell::{Cell, RefCell, Ref, RefMut}, mem
 };
 
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_target::abi::Size;
 use rustc_middle::ty::layout::TyAndLayout;
-use rustc_data_structures::fx::FxHashMap;
-
-use smallvec::SmallVec;
+use rustc_data_structures::fx::FxHashSet;
 
 use crate::{
-    MiriEvalContext, ThreadId, Tag, MiriEvalContextExt, RangeMap,
-    MPlaceTy, ImmTy, InterpResult, Pointer, ScalarMaybeUninit,
-    OpTy, Immediate, MemPlaceMeta
+    MiriEvalContext, MiriEvalContextExt,
+    ThreadId, Tag, RangeMap,
+    InterpResult, Pointer, ScalarMaybeUninit,
+    MPlaceTy, OpTy, MemPlaceMeta,
+    VClock, VSmallClockSet, VectorIdx, VTimestamp
 };
 
 pub type AllocExtra = VClockAlloc;
@@ -73,194 +73,136 @@ pub enum AtomicFenceOp {
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    /// Variant of `read_immediate` that does not perform `data-race` checks.
-    fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+    // Temporarily allow data-races to occur, this should only be
+    //  used if either one of the appropiate `validate_atomic` functions
+    //  will be called to treat a memory access as atomic or if the memory
+    //  being accessed should be treated as internal state, that cannot be
+    //  accessed by the interpreted program.
+    #[inline]
+    fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
-
         let old = data_race.multi_threaded.replace(false);
-        let res = this.read_immediate(op.into());
+        let result = op(this);
         data_race.multi_threaded.set(old);
-
-        res
+        result
     }
-    
-    /// Variant of `write_immediate` that does not perform `data-race` checks.
-    fn write_immediate_racy(
-        &mut self, src: Immediate<Tag>, dest: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx> {
+
+    /// Same as `allow_data_races_ref`, this temporarily disables any data-race detection and
+    ///  so should only be used for atomic operations or internal state that the program cannot
+    ///  access
+    #[inline]
+    fn allow_data_races_mut<R>(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_mut();
         let data_race = &*this.memory.extra.data_race;
         let old = data_race.multi_threaded.replace(false);
-
-        let imm = this.write_immediate(src, dest.into());
-
+        let result = op(this);
         let data_race = &*this.memory.extra.data_race;
         data_race.multi_threaded.set(old);
-        imm
+        result
     }
 
-    /// Variant of `read_scalar` that does not perform data-race checks.
-    fn read_scalar_racy(
-        &self, op: MPlaceTy<'tcx, Tag>
-    )-> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        Ok(self.read_immediate_racy(op)?.to_scalar_or_uninit())
-    }
 
-    /// Variant of `write_scalar` that does not perform data-race checks.
-    fn write_scalar_racy(
-        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx> {
-        self.write_immediate_racy(Immediate::Scalar(val.into()), dest)
-    }
-
-    /// Variant of `read_scalar_at_offset` helper function that does not perform
-    /// `data-race checks.
-    fn read_scalar_at_offset_racy(
-        &self,
-        op: OpTy<'tcx, Tag>,
-        offset: u64,
-        layout: TyAndLayout<'tcx>,
-    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let this = self.eval_context_ref();
-        let op_place = this.deref_operand(op)?;
-        let offset = Size::from_bytes(offset);
-        // Ensure that the following read at an offset is within bounds
-        assert!(op_place.layout.size >= offset + layout.size);
-        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.read_scalar_racy(value_place.into())
-    }
-
-    /// Variant of `write_scalar_at_offfset` helper function that performs
-    ///  an atomic load operation with verification instead
     fn read_scalar_at_offset_atomic(
-        &mut self,
+        &self,
         op: OpTy<'tcx, Tag>,
         offset: u64,
         layout: TyAndLayout<'tcx>,
         atomic: AtomicReadOp
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let this = self.eval_context_mut();
+        let this = self.eval_context_ref();
         let op_place = this.deref_operand(op)?;
         let offset = Size::from_bytes(offset);
         // Ensure that the following read at an offset is within bounds
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        let res = this.read_scalar_racy(value_place.into())?;
-        this.validate_atomic_load(value_place, atomic)?;
-        Ok(res)
+        this.read_scalar_atomic(value_place, atomic)
     }
-
-    /// Variant of `write_scalar_at_offfset` helper function that does not perform
-    ///  data-race checks.
-    fn write_scalar_at_offset_racy(
+    fn write_scalar_at_offset_atomic(
         &mut self,
         op: OpTy<'tcx, Tag>,
         offset: u64,
         value: impl Into<ScalarMaybeUninit<Tag>>,
         layout: TyAndLayout<'tcx>,
-    ) -> InterpResult<'tcx, ()> {
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let op_place = this.deref_operand(op)?;
         let offset = Size::from_bytes(offset);
         // Ensure that the following read at an offset is within bounds
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.write_scalar_racy(value.into(), value_place.into())
+        this.write_scalar_atomic(value.into(), value_place, atomic)
     }
-
-    /// Load the data race allocation state for a given memory place
-    ///  also returns the size and offset of the result in the allocation
-    ///  metadata
-    /// This is used for atomic loads since unconditionally requesteing
-    ///  mutable access causes issues for read-only memory, which will
-    ///  fail validation on mutable access
-    fn load_data_race_state_ref<'a>(
-        &'a self, place: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx, (&'a VClockAlloc, Size, Size)> where 'mir: 'a {
-        let this = self.eval_context_ref();
-
-        let ptr = place.ptr.assert_ptr();
-        let size = place.layout.size;
-        let data_race = &this.memory.get_raw(ptr.alloc_id)?.extra.data_race;
-
-        Ok((data_race, size, ptr.offset))
-    }
-
-    /// Load the data race allocation state for a given memory place
-    ///  also returns the size and the offset of the result in the allocation
-    ///  metadata
-    fn load_data_race_state_mut<'a>(
-        &'a mut self, place: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx, (&'a mut VClockAlloc, Size, Size)> where 'mir: 'a {
-        let this = self.eval_context_mut();
-
-        let ptr = place.ptr.assert_ptr();
-        let size = place.layout.size;
-        let data_race = &mut this.memory.get_raw_mut(ptr.alloc_id)?.extra.data_race;
-
-        Ok((data_race, size, ptr.offset))
+    fn read_scalar_atomic(
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let scalar = self.allow_data_races_ref(move |this| {
+            this.read_scalar(place.into())
+        })?;
+        self.validate_atomic_load(place, atomic)?;
+        Ok(scalar)
+    }
+    fn write_scalar_atomic(
+        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        self.allow_data_races_mut(move |this| {
+            this.write_scalar(val, dest.into())
+        })?;
+        self.validate_atomic_store(dest, atomic)
     }
     
     /// Update the data-race detector for an atomic read occuring at the
     ///  associated memory-place and on the current thread
     fn validate_atomic_load(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
     ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
+        let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
         if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
 
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_ref(place)?;
+            // Load an log the atomic operation
+            //  the memory access has to be `get_raw` since otherwise this despite only 
+            //  mutating MemoryExtra will still trigger errors on read-only memory
+            let place_ptr = place.ptr.assert_ptr();
+            let size = place.layout.size;
+            let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race;
             log::trace!(
-                "Atomic load on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                alloc.global.current_thread(), atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
+                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                "Atomic load", &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
             );
 
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            if atomic == AtomicReadOp::Relaxed {
-                // Perform relaxed atomic load
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
-                    if range.load_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
+            // Perform the atomic operation
+            let data_race = &alloc_meta.global;
+            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                    let res = if atomic == AtomicReadOp::Relaxed {
+                        range.load_relaxed(&mut *clocks, index)
+                    }else{
+                        range.acquire(&mut *clocks, index)
+                    };
+                    if let Err(DataRace) = res {
+                        mem::drop(clocks);
                         return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_LOAD", true,
-                            place.ptr.assert_ptr(), size
+                            &alloc_meta.global, range, "Atomic load", true,
+                            place_ptr, size
                         );
                     }
                 }
-            }else{
-                // Perform acquire(or seq-cst) atomic load
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
-                    if range.acquire(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_LOAD", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }
+                Ok(())
+            })?;
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter(offset, size) {
+                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
                     log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
+                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
                         range.atomic_ops
                     );
                 }
             }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
         }
         Ok(())
     }
@@ -271,61 +213,16 @@ fn validate_atomic_store(
         &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_mut(place)?;
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            log::trace!(
-                "Atomic store on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                current_thread, atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
-            );
-
-            if atomic == AtomicWriteOp::Relaxed {
-                // Perform relaxed atomic store
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    if range.store_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_STORE", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }else{
-                // Perform release(or seq-cst) atomic store
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    if range.release(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_STORE", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }
-
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
-                    log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
+        this.validate_atomic_op_mut(
+            place, atomic, "Atomic Store",
+            move |memory, clocks, index, atomic| {
+                if atomic == AtomicWriteOp::Relaxed {
+                    memory.store_relaxed(clocks, index)
+                }else{
+                    memory.release(clocks, index)
                 }
             }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
-        }
-        Ok(())
+        )
     }
 
     /// Update the data-race detector for an atomic read-modify-write occuring
@@ -334,97 +231,104 @@ fn validate_atomic_rmw(
         &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRWOp
     ) -> InterpResult<'tcx> {
         use AtomicRWOp::*;
+        let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
+        let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_mut(place)?;
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            log::trace!(
-                "Atomic RMW on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                current_thread, atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
-            );
-
-            let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
-            let release = matches!(atomic, Release | AcqRel | SeqCst);
-            for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                //FIXME: this is probably still slightly wrong due to the quirks
-                // in the c++11 memory model
-                let maybe_race = if acquire {
-                    // Atomic RW-Op acquire
-                    range.acquire(&mut *current_state, current_thread)
+        this.validate_atomic_op_mut(
+            place, atomic, "Atomic RMW",
+            move |memory, clocks, index, _| {
+                if acquire {
+                    memory.acquire(clocks, index)?;
                 }else{
-                    range.load_relaxed(&mut *current_state, current_thread) 
-                };
-                if maybe_race == Err(DataRace) {
-                    mem::drop(current_state);
-                    return VClockAlloc::report_data_race(
-                        &alloc.global, range, "ATOMIC_RMW(LOAD)", true,
-                        place.ptr.assert_ptr(), size
-                    );
+                    memory.load_relaxed(clocks, index)?;
                 }
-                let maybe_race = if release {
-                    // Atomic RW-Op release
-                    range.rmw_release(&mut *current_state, current_thread)
+                if release {
+                    memory.rmw_release(clocks, index)
                 }else{
-                    range.rmw_relaxed(&mut *current_state, current_thread)
-                };
-                if maybe_race == Err(DataRace) {
-                    mem::drop(current_state);
-                    return VClockAlloc::report_data_race(
-                        &alloc.global, range, "ATOMIC_RMW(STORE)", true,
-                        place.ptr.assert_ptr(), size
-                    );
+                    memory.rmw_relaxed(clocks, index)
                 }
             }
-
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
-                    log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
-                }
-            }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
-        }
-        Ok(())
+        )
     }
 
     /// Update the data-race detector for an atomic fence on the current thread
     fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            log::trace!("Atomic fence on {:?} with ordering {:?}", data_race.current_thread(), atomic);
+        data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+            log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
             // Apply data-race detection for the current fences
             //  this treats AcqRel and SeqCst as the same as a acquire
             //  and release fence applied in the same timestamp.
             if atomic != AtomicFenceOp::Release {
                 // Either Acquire | AcqRel | SeqCst
-                data_race.current_thread_state_mut().apply_acquire_fence();
+                clocks.apply_acquire_fence();
             }
             if atomic != AtomicFenceOp::Acquire {
                 // Either Release | AcqRel | SeqCst
-                data_race.current_thread_state_mut().apply_release_fence();
+                clocks.apply_release_fence();
             }
+            Ok(())
+        })
+    }
+}
 
-            data_race.advance_vector_clock();
+impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
+trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
+
+    /// Generic atomic operation implementation, this however
+    ///  cannot be used for the atomic read operation since
+    ///  that requires non mutable memory access to not trigger
+    ///  the writing to read-only memory errors during `get_raw_mut`
+    fn validate_atomic_op_mut<A: Debug + Copy>(
+        &mut self, place: MPlaceTy<'tcx, Tag>,
+        atomic: A, description: &str,
+        mut op: impl FnMut(
+            &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
+        ) -> Result<(), DataRace>
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+
+            // Load an log the atomic operation
+            let place_ptr = place.ptr.assert_ptr();
+            let size = place.layout.size;
+            let alloc_meta =  &mut this.memory.get_raw_mut(place_ptr.alloc_id)?.extra.data_race;
+            log::trace!(
+                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
+            );
+
+            // Perform the atomic operation
+            let data_race = &alloc_meta.global;
+            data_race.maybe_perform_sync_operation(|index, mut clocks| {
+                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                    if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
+                        mem::drop(clocks);
+                        return VClockAlloc::report_data_race(
+                            &alloc_meta.global, range, description, true,
+                            place_ptr, size
+                        );
+                    }
+                }
+                Ok(())
+            })?;
+
+            // Log changes to atomic memory
+            if log::log_enabled!(log::Level::Trace) {
+                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
+                    log::trace!(
+                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
+                        range.atomic_ops
+                    );
+                }
+            }
         }
         Ok(())
     }
+
 }
 
 /// Handle for locks to express their
@@ -439,7 +343,7 @@ pub struct DataRaceLockHandle {
 }
 impl DataRaceLockHandle {
     pub fn set_values(&mut self, other: &Self) {
-        self.clock.set_values(&other.clock)
+        self.clock.clone_from(&other.clock)
     }
     pub fn reset(&mut self) {
         self.clock.set_zero_vector();
@@ -447,126 +351,6 @@ pub fn reset(&mut self) {
 }
 
 
-/// Avoid an atomic allocation for the common
-///  case with atomic operations where the number
-///  of active release sequences is small
-#[derive(Clone, PartialEq, Eq)]
-enum AtomicReleaseSequences {
-
-    /// Contains one or no values
-    ///  if empty: (None, reset vector clock)
-    ///  if one:   (Some(thread), thread_clock)
-    ReleaseOneOrEmpty(Option<ThreadId>, VClock),
-
-    /// Contains two or more values
-    ///  stored in a hash-map of thread id to
-    ///  vector clocks
-    ReleaseMany(FxHashMap<ThreadId, VClock>)
-}
-impl AtomicReleaseSequences {
-
-    /// Return an empty set of atomic release sequences
-    #[inline]
-    fn new() -> AtomicReleaseSequences {
-        Self::ReleaseOneOrEmpty(None, VClock::default())
-    }
-
-    /// Remove all values except for the value stored at `thread` and set
-    ///  the vector clock to the associated `clock` value
-    #[inline]
-    fn clear_and_set(&mut self, thread: ThreadId, clock: &VClock) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                *id = Some(thread);
-                rel_clock.set_values(clock);
-            }
-            Self::ReleaseMany(_) => {
-                *self = Self::ReleaseOneOrEmpty(Some(thread), clock.clone());
-            }
-        }
-    }
-
-    /// Remove all values except for the value stored at `thread`
-    #[inline]
-    fn clear_and_retain(&mut self, thread: ThreadId) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                // If the id is the same, then reatin the value
-                //  otherwise delete and clear the release vector clock
-                if *id != Some(thread) {
-                    *id = None;
-                    rel_clock.set_zero_vector();
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                // Retain only the thread element, so reduce to size
-                //  of 1 or 0, and move to smaller format
-                if let Some(clock) = hash_map.remove(&thread) {
-                    *self = Self::ReleaseOneOrEmpty(Some(thread), clock);
-                }else{
-                    *self = Self::new();
-                }
-            }
-        }
-    }
-
-    /// Insert a release sequence at `thread` with values `clock`
-    fn insert(&mut self, thread: ThreadId, clock: &VClock) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                if id.map_or(true, |id| id == thread) {
-                    *id = Some(thread);
-                    rel_clock.set_values(clock);
-                }else{
-                    let mut hash_map = FxHashMap::default();
-                    hash_map.insert(thread, clock.clone());
-                    hash_map.insert(id.unwrap(), rel_clock.clone());
-                    *self = Self::ReleaseMany(hash_map);
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                hash_map.insert(thread, clock.clone());
-            }
-        }
-    }
-
-    /// Return the release sequence at `thread` if one exists
-    #[inline]
-    fn load(&self, thread: ThreadId) -> Option<&VClock> {
-        match self {
-            Self::ReleaseOneOrEmpty(id, clock) => {
-                if *id == Some(thread) {
-                    Some(clock)
-                }else{
-                    None
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                hash_map.get(&thread)
-            }
-        }
-    }
-}
-
-/// Custom debug implementation to correctly
-///  print debug as a logical mapping from threads
-///  to vector-clocks
-impl Debug for AtomicReleaseSequences {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::ReleaseOneOrEmpty(None,_) => {
-                f.debug_map().finish()
-            },
-            Self::ReleaseOneOrEmpty(Some(id), clock) => {
-                f.debug_map().entry(&id, &clock).finish()
-            },
-            Self::ReleaseMany(hash_map) => {
-                Debug::fmt(hash_map, f)
-            }
-        }
-    }
-}
-
 /// Error returned by finding a data race
 ///  should be elaborated upon
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
@@ -576,7 +360,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 ///  explicitly to reduce memory usage for the
 ///  common case where no atomic operations
 ///  exists on the memory cell
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, PartialEq, Eq, Default, Debug)]
 struct AtomicMemoryCellClocks {
 
     /// The clock-vector for the set of atomic read operations
@@ -599,7 +383,7 @@ struct AtomicMemoryCellClocks {
     ///  sequence exists in the memory cell, required
     ///  since read-modify-write operations do not
     ///  invalidate existing release sequences 
-    release_sequences: AtomicReleaseSequences,
+    release_sequences: VSmallClockSet,
 }
 
 /// Memory Cell vector clock metadata
@@ -609,11 +393,11 @@ struct MemoryCellClocks {
 
     /// The vector-clock of the last write, only one value is stored
     ///  since all previous writes happened-before the current write
-    write: Timestamp,
+    write: VTimestamp,
 
     /// The identifier of the thread that performed the last write
     ///  operation
-    write_thread: ThreadId,
+    write_index: VectorIdx,
 
     /// The vector-clock of the set of previous reads
     ///  each index is set to the timestamp that the associated
@@ -633,7 +417,7 @@ fn default() -> Self {
         MemoryCellClocks {
             read: VClock::default(),
             write: 0,
-            write_thread: ThreadId::new(u32::MAX as usize),
+            write_index: VectorIdx::MAX_INDEX,
             atomic_ops: None
         }
     }
@@ -654,21 +438,14 @@ fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
     ///  if it does not exist
     #[inline]
     fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
-        self.atomic_ops.get_or_insert_with(|| {
-            Box::new(AtomicMemoryCellClocks {
-                read_vector: VClock::default(),
-                write_vector: VClock::default(),
-                sync_vector: VClock::default(),
-                release_sequences: AtomicReleaseSequences::new()
-            })
-        })
+        self.atomic_ops.get_or_insert_with(Default::default)
     }
 
     /// Update memory cell data-race tracking for atomic
     ///  load acquire semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_read_detect(clocks, thread)?;
+    fn acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.clock.join(&atomic.sync_vector);
         }
@@ -677,8 +454,8 @@ fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(
     /// Update memory cell data-race tracking for atomic
     ///  load relaxed semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_read_detect(clocks, thread)?;
+    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.fence_acquire.join(&atomic.sync_vector);
         }
@@ -688,38 +465,39 @@ fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Res
 
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics
-    fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
-        atomic.sync_vector.set_values(&clocks.clock);
-        atomic.release_sequences.clear_and_set(thread, &clocks.clock);
+        atomic.sync_vector.clone_from(&clocks.clock);
+        atomic.release_sequences.clear();
+        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics
-    fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
-        atomic.sync_vector.set_values(&clocks.fence_release);
-        if let Some(release) = atomic.release_sequences.load(thread) {
+        atomic.sync_vector.clone_from(&clocks.fence_release);
+        if let Some(release) = atomic.release_sequences.get(index) {
             atomic.sync_vector.join(release);
         }
-        atomic.release_sequences.clear_and_retain(thread);
+        atomic.release_sequences.retain_index(index);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics for RMW operations
-    fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.clock);
-        atomic.release_sequences.insert(thread, &clocks.clock);
+        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics for RMW operations
-    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.fence_release);
         Ok(())
@@ -727,11 +505,11 @@ fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(
     
     /// Detect data-races with an atomic read, caused by a non-atomic write that does
     ///  not happen-before the atomic-read
-    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] {
+        if self.write <= clocks.clock[self.write_index] {
             let atomic = self.atomic_mut();
-            atomic.read_vector.set_at_thread(&clocks.clock, thread);
+            atomic.read_vector.set_at_index(&clocks.clock, index);
             Ok(())
         }else{
             Err(DataRace)
@@ -740,11 +518,11 @@ fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> R
 
     /// Detect data-races with an atomic write, either with a non-atomic read or with
     ///  a non-atomic write:
-    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+        if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let atomic = self.atomic_mut();
-            atomic.write_vector.set_at_thread(&clocks.clock, thread);
+            atomic.write_vector.set_at_index(&clocks.clock, index);
             Ok(())
         }else{
             Err(DataRace)
@@ -753,16 +531,16 @@ fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) ->
 
     /// Detect races for non-atomic read operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn read_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] {
+        if self.write <= clocks.clock[self.write_index] {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock
             }else{
                 true
             };
             if race_free {
-                self.read.set_at_thread(&clocks.clock, thread);
+                self.read.set_at_index(&clocks.clock, index);
                 Ok(())
             }else{
                 Err(DataRace)
@@ -774,17 +552,17 @@ fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Res
 
     /// Detect races for non-atomic write operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId)  -> Result<(), DataRace> {
+    fn write_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx)  -> Result<(), DataRace> {
         log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+        if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock && atomic.read_vector <= clocks.clock
             }else{
                 true
             };
             if race_free {
-                self.write = clocks.clock[thread];
-                self.write_thread = thread;
+                self.write = clocks.clock[index];
+                self.write_index = index;
                 self.read.set_zero_vector();
                 Ok(())
             }else{
@@ -822,7 +600,7 @@ pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
 
     // Find an index, if one exists where the value
     //  in `l` is greater than the value in `r`
-    fn find_gt_index(l: &VClock, r: &VClock) -> Option<usize> {
+    fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
         l_slice.iter().zip(r_slice.iter())
@@ -844,7 +622,7 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option<usize> {
                 }else{
                     None
                 }
-            })
+            }).map(|idx| VectorIdx::new(idx))
     }
 
     /// Report a data-race found in the program
@@ -859,35 +637,29 @@ fn report_data_race<'tcx>(
         action: &str, is_atomic: bool,
         pointer: Pointer<Tag>, len: Size
     ) -> InterpResult<'tcx> {
-        let current_thread = global.current_thread();
-        let current_state = global.current_thread_state();
-        let mut write_clock = VClock::default();
+        let (current_index, current_clocks) = global.current_thread_state();
+        let write_clock;
         let (
             other_action, other_thread, other_clock
-        ) = if range.write > current_state.clock[range.write_thread] {
-
+        ) = if range.write > current_clocks.clock[range.write_index] {
             // Convert the write action into the vector clock it
             //  represents for diagnostic purposes
-            let wclock = write_clock.get_mut_with_min_len(
-                current_state.clock.as_slice().len()
-                .max(range.write_thread.to_u32() as usize + 1)
-            );
-            wclock[range.write_thread.to_u32() as usize] = range.write;
-            ("WRITE", range.write_thread, write_clock.as_slice())
+            write_clock = VClock::new_with_index(range.write_index, range.write);
+            ("WRITE", range.write_index, &write_clock)
         }else if let Some(idx) = Self::find_gt_index(
-            &range.read, &current_state.clock
+            &range.read, &current_clocks.clock
         ){
-            ("READ", ThreadId::new(idx), range.read.as_slice())
+            ("READ", idx, &range.read)
         }else if !is_atomic {
             if let Some(atomic) = range.atomic() {
                 if let Some(idx) = Self::find_gt_index(
-                    &atomic.write_vector, &current_state.clock
+                    &atomic.write_vector, &current_clocks.clock
                 ) {
-                    ("ATOMIC_STORE", ThreadId::new(idx), atomic.write_vector.as_slice())
+                    ("ATOMIC_STORE", idx, &atomic.write_vector)
                 }else if let Some(idx) = Self::find_gt_index(
-                    &atomic.read_vector, &current_state.clock
+                    &atomic.read_vector, &current_clocks.clock
                 ) {
-                    ("ATOMIC_LOAD", ThreadId::new(idx), atomic.read_vector.as_slice())
+                    ("ATOMIC_LOAD", idx, &atomic.read_vector)
                 }else{
                     unreachable!("Failed to find report data-race for non-atomic operation: no race found")
                 }
@@ -899,7 +671,7 @@ fn report_data_race<'tcx>(
         };
 
         // Load elaborated thread information about the racing thread actions
-        let current_thread_info = global.print_thread_metadata(current_thread);
+        let current_thread_info = global.print_thread_metadata(current_index);
         let other_thread_info = global.print_thread_metadata(other_thread);
         
         // Throw the data-race detection
@@ -910,7 +682,7 @@ fn report_data_race<'tcx>(
             action, current_thread_info, 
             other_action, other_thread_info,
             pointer.alloc_id, pointer.offset.bytes(), len.bytes(),
-            current_state.clock,
+            current_clocks.clock,
             other_clock
         )
     }
@@ -921,14 +693,10 @@ fn report_data_race<'tcx>(
     ///  operation
     pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
-
-            // The alloc-ranges are not split, however changes are not going to be made
-            //  to the ranges being tested, so this is ok
+            let (index, clocks) = self.global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
-                if range.read_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.read_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global,range, "READ", false, pointer, len
@@ -946,10 +714,9 @@ pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx>
     ///  operation
     pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
+            let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global, range, "WRITE", false, pointer, len
@@ -967,10 +734,9 @@ pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<
     ///  operation
     pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
+            let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global, range, "DEALLOCATE", false, pointer, len
@@ -989,6 +755,7 @@ pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpRe
 ///  additional metadata to model atomic fence operations
 #[derive(Clone, Default, Debug)]
 struct ThreadClockSet {
+
     /// The increasing clock representing timestamps
     ///  that happen-before this thread.
     clock: VClock,
@@ -1008,7 +775,7 @@ impl ThreadClockSet {
     ///  set of thread vector clocks
     #[inline]
     fn apply_release_fence(&mut self) {
-        self.fence_release.set_values(&self.clock);
+        self.fence_release.clone_from(&self.clock);
     }
 
     /// Apply the effects of a acquire fence to this
@@ -1021,8 +788,8 @@ fn apply_acquire_fence(&mut self) {
     /// Increment the happens-before clock at a
     ///  known index
     #[inline]
-    fn increment_clock(&mut self, thread: ThreadId) {
-        self.clock.increment_thread(thread);
+    fn increment_clock(&mut self, index: VectorIdx) {
+        self.clock.increment_index(index);
     }
 
     /// Join the happens-before clock with that of
@@ -1047,81 +814,178 @@ pub struct GlobalState {
     ///  any data-races
     multi_threaded: Cell<bool>,
 
-    /// The current vector clock for all threads
-    ///  this includes threads that have terminated
-    ///  execution
-    thread_clocks: RefCell<IndexVec<ThreadId, ThreadClockSet>>,
+    /// Mapping of a vector index to a known set of thread
+    ///  clocks, this is not directly mapping from a thread id
+    ///  since it may refer to multiple threads
+    vector_clocks: RefCell<IndexVec<VectorIdx, ThreadClockSet>>,
+
+    /// Mapping of a given vector index to the current thread
+    ///  that the execution is representing, this may change
+    ///  if a vector index is re-assigned to a new thread
+    vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>, //FIXME: make option
+
+    /// The mapping of a given thread to a known vector clock
+    thread_info: RefCell<IndexVec<ThreadId, (Option<VectorIdx>, Option<Box<str>>)>>,
 
-    /// Thread name cache for better diagnostics on the reporting
-    ///  of a data-race
-    thread_names: RefCell<IndexVec<ThreadId, Option<Box<str>>>>,
+    /// The current vector index being executed
+    current_index: Cell<VectorIdx>,
 
-    /// The current thread being executed,
-    ///  this is mirrored from the scheduler since
-    ///  it is required for loading the current vector
-    ///  clock for data-race detection
-    current_thread_id: Cell<ThreadId>,
+    /// Potential vector indices that could be re-used on thread creation
+    ///  values are inserted here on thread join events, and can be
+    ///  re-used once the vector clocks of all current threads
+    ///  are equal to the vector clock of the joined thread
+    reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
 }
 impl GlobalState {
 
     /// Create a new global state, setup with just thread-id=0
     ///  advanced to timestamp = 1
     pub fn new() -> Self {
-        let mut vec = IndexVec::new();
-        let thread_id = vec.push(ThreadClockSet::default());
-        vec[thread_id].increment_clock(thread_id);
-        GlobalState {
+        let global_state = GlobalState {
             multi_threaded: Cell::new(false),
-            thread_clocks: RefCell::new(vec),
-            thread_names: RefCell::new(IndexVec::new()),
-            current_thread_id: Cell::new(thread_id),
-        }
+            vector_clocks: RefCell::new(IndexVec::new()),
+            vector_info: RefCell::new(IndexVec::new()),
+            thread_info: RefCell::new(IndexVec::new()),
+            current_index: Cell::new(VectorIdx::new(0)),
+            reuse_candidates: RefCell::new(FxHashSet::default()),
+        };
+
+        // Setup the main-thread since it is not explicitly created:
+        //  uses vector index and thread-id 0, also the rust runtime gives
+        //  the main-thread a name of "main".
+        let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
+        global_state.vector_info.borrow_mut().push(ThreadId::new(0));
+        global_state.thread_info.borrow_mut().push(
+            (Some(index), Some("main".to_string().into_boxed_str())
+        ));
+
+        global_state
     }
     
+    // Try to find vector index values that can potentially be re-used
+    //  by a new thread instead of a new vector index being created
+    fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
+        let mut reuse = self.reuse_candidates.borrow_mut();
+        let vector_clocks = self.vector_clocks.borrow();
+        for  &candidate in reuse.iter() {
+            let target_timestamp = vector_clocks[candidate].clock[candidate];
+            if vector_clocks.iter().all(|clock| {
+                clock.clock[candidate] == target_timestamp
+            }) {
+                // All vector clocks for each vector index are equal to
+                //  the target timestamp, therefore since the thread has
+                //  terminated and cannot update the vector clock.
+                // No more data-races involving this vector index are possible
+                //  so it can be re-used
+                assert!(reuse.remove(&candidate));
+                return Some(candidate)
+            }
+        }
+        None
+    }
 
     // Hook for thread creation, enabled multi-threaded execution and marks
     //  the current thread timestamp as happening-before the current thread
     #[inline]
     pub fn thread_created(&self, thread: ThreadId) {
+        let current_index = self.current_index();
 
-        // Enable multi-threaded execution mode now that there are at least
-        //  two threads
+        // Enable multi-threaded execution, there are now two threads
+        //  so data-races are now possible.
         self.multi_threaded.set(true);
-        let current_thread = self.current_thread_id.get();
-        let mut vectors = self.thread_clocks.borrow_mut();
-        vectors.ensure_contains_elem(thread, Default::default);
-        let (current, created) = vectors.pick2_mut(current_thread, thread);
 
-        // Pre increment clocks before atomic operation
-        current.increment_clock(current_thread);
+        // Load and setup the associated thread metadata
+        let mut thread_info = self.thread_info.borrow_mut();
+        thread_info.ensure_contains_elem(thread, Default::default);
+
+        // Assign a vector index for the thread, attempting to re-use an old
+        //  vector index that can no longer report any data-races if possible
+        let created_index = if let Some(
+            reuse_index
+        ) = self.find_vector_index_reuse_candidate() {
+            // Now re-configure the re-use candidate, increment the clock
+            //  for the new sync use of the vector
+            let mut vector_clocks = self.vector_clocks.borrow_mut();
+            vector_clocks[reuse_index].increment_clock(reuse_index);
+
+            // Locate the old thread the vector was associated with and update
+            //  it to represent the new thread instead
+            let mut vector_info = self.vector_info.borrow_mut();
+            let old_thread = vector_info[reuse_index];
+            vector_info[reuse_index] = thread;
+
+            // Mark the thread the vector index was associated with as no longer
+            //  representing a thread index
+            thread_info[old_thread].0 = None;
+
+            reuse_index
+        }else{
+            // No vector re-use candidates available, instead create
+            //  a new vector index
+            let mut vector_info = self.vector_info.borrow_mut();
+            vector_info.push(thread)
+        };
+
+        // Mark the chosen vector index as in use by the thread
+        thread_info[thread].0 = Some(created_index);
+
+        // Create a thread clock set if applicable
+        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        if created_index == vector_clocks.next_index() {
+            vector_clocks.push(ThreadClockSet::default());
+        }
 
-        // The current thread happens-before the created thread
-        //  so update the created vector clock
+        // Now load the two clocks and configure the initial state
+        let (current, created) = vector_clocks.pick2_mut(current_index, created_index);
+
+        // Advance the current thread before the synchronized operation
+        current.increment_clock(current_index);
+
+        // Join the created with current, since the current threads
+        //  previous actions happen-before the created thread
         created.join_with(current);
 
-        // Post increment clocks after atomic operation
-        current.increment_clock(current_thread);
-        created.increment_clock(thread);
+        // Advance both threads after the synchronized operation
+        current.increment_clock(current_index);
+        created.increment_clock(created_index);
     }
 
     /// Hook on a thread join to update the implicit happens-before relation
-    ///  between the joined thead and the current thread
+    ///  between the joined thead and the current thread.
+    /// Called after the join has occured, and hence implicitly also states
+    ///  that the thread must have terminated as well
     #[inline]
     pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
-        let mut vectors = self.thread_clocks.borrow_mut();
-        let (current, join) = vectors.pick2_mut(current_thread, join_thread);
+        let (current_index, join_index) = {
+            let thread_info = self.thread_info.borrow();
+            let current_index = thread_info[current_thread].0
+                .expect("Joining into thread with no assigned vector");
+            let join_index = thread_info[join_thread].0
+                .expect("Joining thread with no assigned vector");
+            (current_index, join_index)
+        };
+        let mut clocks_vec = self.vector_clocks.borrow_mut();
+        let (current, join) = clocks_vec.pick2_mut(current_index, join_index);
 
         // Pre increment clocks before atomic operation
-        current.increment_clock(current_thread);
-        join.increment_clock(join_thread);
+        current.increment_clock(current_index);
+        join.increment_clock(join_index);
 
         // The join thread happens-before the current thread
         //   so update the current vector clock
         current.join_with(join);
 
         // Post increment clocks after atomic operation
-        current.increment_clock(current_thread);
-        join.increment_clock(join_thread);
+        current.increment_clock(current_index);
+        join.increment_clock(join_index);
+
+        // The joined thread vector clock is a potential candidate
+        //  for re-use given sufficient time, mark as available once
+        //  threads have been created. This is because this function
+        //  is called once join_thread has terminated and such cannot
+        //  update any-more
+        let mut reuse = self.reuse_candidates.borrow_mut();
+        reuse.insert(join_index);
     }
 
     /// Hook for updating the local tracker of the currently
@@ -1129,7 +993,10 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
     ///  `active_thread` in thread.rs is updated
     #[inline]
     pub fn thread_set_active(&self, thread: ThreadId) {
-        self.current_thread_id.set(thread);
+        let thread_info = self.thread_info.borrow();
+        let vector_idx = thread_info[thread].0
+            .expect("Setting thread active with no assigned vector");
+        self.current_index.set(vector_idx);
     }
 
     /// Hook for updating the local tracker of the threads name
@@ -1137,33 +1004,40 @@ pub fn thread_set_active(&self, thread: ThreadId) {
     ///  the thread name is used for improved diagnostics
     ///  during a data-race
     #[inline]
-    pub fn thread_set_name(&self, name: String) {
+    pub fn thread_set_name(&self, thread: ThreadId, name: String) {
         let name = name.into_boxed_str();
-        let mut names = self.thread_names.borrow_mut();
-        let thread = self.current_thread_id.get();
-        names.ensure_contains_elem(thread, Default::default);
-        names[thread] = Some(name);
+        let mut thread_info = self.thread_info.borrow_mut();
+        thread_info[thread].1 = Some(name);
     }
 
 
-    /// Advance the vector clock for a thread
-    ///  this is called before and after any atomic/synchronizing operations
-    ///  that may manipulate state
-    #[inline]
-    fn advance_vector_clock(&self) {
-        let thread = self.current_thread_id.get();
-        let mut vectors = self.thread_clocks.borrow_mut();
-        vectors[thread].increment_clock(thread);
-
-        // Log the increment in the atomic vector clock
-        log::trace!("Atomic vector clock increase for {:?} to {:?}",thread, vectors[thread].clock);
+    /// Attempt to perform a synchronized operation, this
+    ///  will perform no operation if multi-threading is
+    ///  not currently enabled.
+    /// Otherwise it will increment the clock for the current
+    ///  vector before and after the operation for data-race
+    ///  detection between any happens-before edges the
+    ///  operation may create
+    fn maybe_perform_sync_operation<'tcx>(
+        &self, op: impl FnOnce(VectorIdx, RefMut<'_,ThreadClockSet>) -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx> {
+        if self.multi_threaded.get() {
+            let (index, mut clocks) = self.current_thread_state_mut();
+            clocks.increment_clock(index);
+            op(index, clocks)?;
+            let (_, mut clocks) = self.current_thread_state_mut();
+            clocks.increment_clock(index);
+        }
+        Ok(())
     }
     
 
     /// Internal utility to identify a thread stored internally
     ///  returns the id and the name for better diagnostics
-    fn print_thread_metadata(&self, thread: ThreadId) -> String {
-        if let Some(Some(name)) = self.thread_names.borrow().get(thread) {
+    fn print_thread_metadata(&self, vector: VectorIdx) -> String {
+        let thread = self.vector_info.borrow()[vector];
+        let thread_name = &self.thread_info.borrow()[thread].1;
+        if let Some(name) = thread_name {
             let name: &str = name;
             format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name)
         }else{
@@ -1175,25 +1049,19 @@ fn print_thread_metadata(&self, thread: ThreadId) -> String {
     /// Acquire a lock, express that the previous call of
     ///  `validate_lock_release` must happen before this
     pub fn validate_lock_acquire(&self, lock: &DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &mut ref_vector[thread];
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
         clocks.clock.join(&lock.clock);
-
-        ref_vector[thread].increment_clock(thread);
+        clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
     ///  any subsequent calls to `validate_lock_acquire`
     pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &ref_vector[thread];
-        lock.clock.set_values(&clocks.clock);
-
-        ref_vector[thread].increment_clock(thread);
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
+        lock.clock.clone_from(&clocks.clock);
+        clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
@@ -1201,401 +1069,48 @@ pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: Threa
     ///  as any previous calls to this function after any
     ///  `validate_lock_release` calls
     pub fn validate_lock_release_shared(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &ref_vector[thread];
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
         lock.clock.join(&clocks.clock);
-
-        ref_vector[thread].increment_clock(thread);
-    }
-
-    /// Load the thread clock set associated with the current thread
-    #[inline]
-    fn current_thread_state(&self) -> Ref<'_, ThreadClockSet> {
-        let ref_vector = self.thread_clocks.borrow();
-        let thread = self.current_thread_id.get();
-        Ref::map(ref_vector, |vector| &vector[thread])
-    }
-
-    /// Load the thread clock set associated with the current thread
-    ///  mutably for modification
-    #[inline]
-    fn current_thread_state_mut(&self) -> RefMut<'_, ThreadClockSet> {
-        let ref_vector = self.thread_clocks.borrow_mut();
-        let thread = self.current_thread_id.get();
-        RefMut::map(ref_vector, |vector| &mut vector[thread])
-    }
-
-    /// Return the current thread, should be the same
-    ///  as the data-race active thread
-    #[inline]
-    fn current_thread(&self) -> ThreadId {
-        self.current_thread_id.get()
+        clocks.increment_clock(index);
     }
-}
-
-
-/// The size of the vector-clock to store inline
-///  clock vectors larger than this will be stored on the heap
-const SMALL_VECTOR: usize = 4;
-
-/// The type of the time-stamps recorded in the data-race detector
-///  set to a type of unsigned integer
-type Timestamp = u32;
-
-/// A vector clock for detecting data-races
-///  invariants:
-///   - the last element in a VClock must not be 0
-///     -- this means that derive(PartialEq & Eq) is correct
-///     --  as there is no implicit zero tail that might be equal
-///     --  also simplifies the implementation of PartialOrd
-#[derive(Clone, PartialEq, Eq, Default, Debug)]
-pub struct VClock(SmallVec<[Timestamp; SMALL_VECTOR]>);
-
-impl VClock {
 
-    /// Load the backing slice behind the clock vector.
+    /// Load the vector index used by the given thread as well as the set of vector clocks
+    ///  used by the thread
     #[inline]
-    fn as_slice(&self) -> &[Timestamp] {
-        self.0.as_slice()
+    fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+        let index = self.thread_info.borrow()[thread].0
+            .expect("Loading thread state for thread with no assigned vector");
+        let ref_vector = self.vector_clocks.borrow_mut();
+        let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
+        (index, clocks)
     }
 
-    /// Get a mutable slice to the internal vector with minimum `min_len`
-    ///  elements, to preserve invariants this vector must modify
-    ///  the `min_len`-1 nth element to a non-zero value
+    /// Load the current vector clock in use and the current set of thread clocks
+    ///  in use for the vector
     #[inline]
-    fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [Timestamp] {
-        if self.0.len() < min_len {
-            self.0.resize(min_len, 0);
-        }
-        assert!(self.0.len() >= min_len);
-        self.0.as_mut_slice()
+    fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
+        let index = self.current_index();
+        let ref_vector = self.vector_clocks.borrow();
+        let clocks = Ref::map(ref_vector, |vec| &vec[index]);
+        (index, clocks)
     }
 
-    /// Increment the vector clock at a known index
+    /// Load the current vector clock in use and the current set of thread clocks
+    ///  in use for the vector mutably for modification
     #[inline]
-    fn increment_index(&mut self, idx: usize) {
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let idx_ref = &mut mut_slice[idx];
-        *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow")
+    fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+        let index = self.current_index();
+        let ref_vector = self.vector_clocks.borrow_mut();
+        let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
+        (index, clocks)
     }
 
-    // Increment the vector element representing the progress
-    //  of execution in the given thread
-    #[inline]
-    pub fn increment_thread(&mut self, thread: ThreadId) {
-        self.increment_index(thread.to_u32() as usize);
-    }
-
-    // Join the two vector-clocks together, this
-    //  sets each vector-element to the maximum value
-    //  of that element in either of the two source elements.
-    pub fn join(&mut self, other: &Self) {
-        let rhs_slice = other.as_slice();
-        let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
-
-        // Element-wise set to maximum.
-        for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) {
-            *l = r.max(*l);
-        }
-    }
-
-    /// Joins with a thread at a known index
-    fn set_at_index(&mut self, other: &Self, idx: usize){
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let slice = other.as_slice();
-        mut_slice[idx] = slice[idx];
-    }
-
-    /// Join with a threads vector clock only at the desired index
-    ///  returns true if the value updated
-    #[inline]
-    pub fn set_at_thread(&mut self, other: &Self, thread: ThreadId){
-        self.set_at_index(other, thread.to_u32() as usize);
-    }
-
-    /// Clear the vector to all zeros, stored as an empty internal
-    ///  vector
-    #[inline]
-    pub fn set_zero_vector(&mut self) {
-        self.0.clear();
-    }
-
-    /// Set the values stored in this vector clock
-    ///  to the values stored in another.
-    pub fn set_values(&mut self, new_value: &VClock) {
-        let new_slice = new_value.as_slice();
-        self.0.resize(new_slice.len(), 0);
-        self.0.copy_from_slice(new_slice);
-    }
-}
-
-
-impl PartialOrd for VClock {
-    fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
-
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // Iterate through the combined vector slice
-        //  keeping track of the order that is currently possible to satisfy.
-        // If an ordering relation is detected to be impossible, then bail and
-        //  directly return None
-        let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
-        let mut order = match iter.next() {
-            Some((lhs, rhs)) => lhs.cmp(rhs),
-            None => Ordering::Equal
-        };
-        for (l, r) in iter {
-            match order {
-                Ordering::Equal => order = l.cmp(r),
-                Ordering::Less => if l > r {
-                    return None
-                },
-                Ordering::Greater => if l < r {
-                    return None
-                }
-            }
-        }
-
-        //Now test if either left or right have trailing elements
-        // by the invariant the trailing elements have at least 1
-        // non zero value, so no additional calculation is required
-        // to determine the result of the PartialOrder
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        match l_len.cmp(&r_len) {
-            // Equal has no additional elements: return current order
-            Ordering::Equal => Some(order),
-            // Right has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Less or None
-            Ordering::Less => match order {
-                Ordering::Less | Ordering::Equal => Some(Ordering::Less),
-                Ordering::Greater => None
-            }
-            // Left has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Greater or None
-            Ordering::Greater => match order {
-                Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
-                Ordering::Less => None
-            }
-        }
-    }
-
-    fn lt(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len <= r_len {
-            // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
-            let mut equal = l_len == r_len;
-            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
-                if l > r {
-                    return false
-                }else if l < r {
-                    equal = false;
-                }
-            }
-            !equal
-        }else{
-            false
-        }
-    }
-
-    fn le(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len <= r_len {
-            // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r
-            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
-        }else{
-            false
-        }
-    }
-
-    fn gt(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len >= r_len {
-            // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >=, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
-            let mut equal = l_len == r_len;
-            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
-                if l < r {
-                    return false
-                }else if l > r {
-                    equal = false;
-                }
-            }
-            !equal
-        }else{
-            false
-        }
-    }
-
-    fn ge(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len >= r_len {
-            // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >= r
-            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
-        }else{
-            false
-        }
-    }
-}
-
-impl Index<ThreadId> for VClock {
-    type Output = Timestamp;
-
+    /// Return the current thread, should be the same
+    ///  as the data-race active thread
     #[inline]
-    fn index(&self, index: ThreadId) -> &Timestamp {
-       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+    fn current_index(&self) -> VectorIdx {
+        self.current_index.get()
     }
 }
 
-
-/// Test vector clock ordering operations
-///  data-race detection is tested in the external
-///  test suite
-#[cfg(test)]
-mod tests {
-    use super::{VClock, Timestamp};
-    use std::cmp::Ordering;
-
-    #[test]
-    fn test_equal() {
-        let mut c1 = VClock::default();
-        let mut c2 = VClock::default();
-        assert_eq!(c1, c2);
-        c1.increment_index(5);
-        assert_ne!(c1, c2);
-        c2.increment_index(53);
-        assert_ne!(c1, c2);
-        c1.increment_index(53);
-        assert_ne!(c1, c2);
-        c2.increment_index(5);
-        assert_eq!(c1, c2);
-    }
-
-    #[test]
-    fn test_partial_order() {
-        // Small test
-        assert_order(&[1], &[1], Some(Ordering::Equal));
-        assert_order(&[1], &[2], Some(Ordering::Less));
-        assert_order(&[2], &[1], Some(Ordering::Greater));
-        assert_order(&[1], &[1,2], Some(Ordering::Less));
-        assert_order(&[2], &[1,2], None);
-
-        // Misc tests
-        assert_order(&[400], &[0, 1], None);
-
-        // Large test
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
-    }
-
-    fn from_slice(mut slice: &[Timestamp]) -> VClock {
-        while let Some(0) = slice.last() {
-            slice = &slice[..slice.len() - 1]
-        }
-        VClock(smallvec::SmallVec::from_slice(slice))
-    }
-
-    fn assert_order(l: &[Timestamp], r: &[Timestamp], o: Option<Ordering>) {
-        let l = from_slice(l);
-        let r = from_slice(r);
-
-        //Test partial_cmp
-        let compare = l.partial_cmp(&r);
-        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
-        let alt_compare = r.partial_cmp(&l);
-        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
-
-        //Test operatorsm with faster implementations
-        assert_eq!(
-            matches!(compare,Some(Ordering::Less)), l < r,
-            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
-            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Greater)), l > r,
-            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
-            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less)), r < l,
-            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
-            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater)), r > l,
-            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
-            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
-        );
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index f384787e4c6..c8c9e70ec3d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,7 @@
 mod stacked_borrows;
 mod sync;
 mod thread;
+mod vector_clock;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
 
@@ -79,6 +80,9 @@
 pub use crate::sync::{
     EvalContextExt as SyncEvalContextExt, CondvarId, MutexId, RwLockId
 };
+pub use crate::vector_clock::{
+    VClock, VSmallClockSet, VectorIdx, VTimestamp
+};
 
 /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
 /// set per default, for maximal validation power.
diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs
index 2bb15e712c5..50f97af8453 100644
--- a/src/shims/intrinsics.rs
+++ b/src/shims/intrinsics.rs
@@ -469,8 +469,7 @@ fn atomic_load(
         let place = this.deref_operand(place)?;
 
         // make sure it fits into a scalar; otherwise it cannot be atomic
-        let val = this.read_scalar_racy(place)?;
-        this.validate_atomic_load(place, atomic)?;
+        let val = this.read_scalar_atomic(place, atomic)?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -495,9 +494,7 @@ fn atomic_store(&mut self, args: &[OpTy<'tcx, Tag>], atomic: AtomicWriteOp) -> I
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
         // Perform atomic store
-        this.write_scalar_racy(val, place)?;
-
-        this.validate_atomic_store(place, atomic)?;
+        this.write_scalar_atomic(val, place, atomic)?;
         Ok(())
     }
 
@@ -527,7 +524,9 @@ fn atomic_op(
             bug!("Atomic arithmetic operations only work on integer types");
         }
         let rhs = this.read_immediate(rhs)?;
-        let old = this.read_immediate_racy(place)?;
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place. into())
+        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -539,7 +538,9 @@ fn atomic_op(
         // Atomics wrap around on overflow.
         let val = this.binary_op(op, old, rhs)?;
         let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.write_immediate_racy(*val, place)?;
+        this.allow_data_races_mut(|this| {
+            this.write_immediate(*val, place.into())
+        })?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(())
@@ -553,7 +554,9 @@ fn atomic_exchange(
         let &[place, new] = check_arg_count(args)?;
         let place = this.deref_operand(place)?;
         let new = this.read_scalar(new)?;
-        let old = this.read_scalar_racy(place)?;
+        let old = this.allow_data_races_mut(|this| {
+            this.read_scalar(place.into())
+        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -562,7 +565,9 @@ fn atomic_exchange(
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
         this.write_scalar(old, dest)?; // old value is returned
-        this.write_scalar_racy(new, place)?;
+        this.allow_data_races_mut(|this| {
+            this.write_scalar(new, place.into())
+        })?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(())
@@ -583,7 +588,9 @@ fn atomic_compare_exchange(
         //  to read with the failure ordering and if successfull then try again with the success
         //  read ordering and write in the success case.
         // Read as immediate for the sake of `binary_op()`
-        let old = this.read_immediate_racy(place)?; 
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place.into())
+        })?; 
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -602,7 +609,9 @@ fn atomic_compare_exchange(
         //  if successful, perform a full rw-atomic validation
         //  otherwise treat this as an atomic load with the fail ordering
         if eq.to_bool()? {
-            this.write_scalar_racy(new, place)?;
+            this.allow_data_races_mut(|this| {
+                this.write_scalar(new, place.into())
+            })?;
             this.validate_atomic_rmw(place, success)?;
         } else {
             this.validate_atomic_load(place, fail)?;
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index 332e79071a0..d741ef346e9 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -62,7 +62,10 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.read_scalar_at_offset_racy(mutex_op, offset, ecx.machine.layouts.i32)
+    ecx.read_scalar_at_offset_atomic(
+        mutex_op, offset, ecx.machine.layouts.i32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn mutex_set_kind<'mir, 'tcx: 'mir>(
@@ -71,14 +74,19 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     kind: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.write_scalar_at_offset_racy(mutex_op, offset, kind, ecx.machine.layouts.i32)
+    ecx.write_scalar_at_offset_atomic(
+        mutex_op, offset, kind, ecx.machine.layouts.i32, 
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn mutex_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(mutex_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::SeqCst
+    )
 }
 
 fn mutex_set_id<'mir, 'tcx: 'mir>(
@@ -86,7 +94,10 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(mutex_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        mutex_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn mutex_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -116,7 +127,10 @@ fn rwlock_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     rwlock_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(rwlock_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        rwlock_op, 4, ecx.machine.layouts.u32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn rwlock_set_id<'mir, 'tcx: 'mir>(
@@ -124,7 +138,10 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
     rwlock_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(rwlock_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        rwlock_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn rwlock_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -177,7 +194,10 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     cond_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(cond_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        cond_op, 4, ecx.machine.layouts.u32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn cond_set_id<'mir, 'tcx: 'mir>(
@@ -185,7 +205,10 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
     cond_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(cond_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        cond_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn cond_get_or_create_id<'mir, 'tcx: 'mir>(
diff --git a/src/thread.rs b/src/thread.rs
index 08aeaa4fd09..f94805ae022 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -638,7 +638,9 @@ fn active_thread_stack_mut(&mut self) -> &mut Vec<Frame<'mir, 'tcx, Tag, FrameDa
     fn set_active_thread_name(&mut self, new_thread_name: Vec<u8>) {
         let this = self.eval_context_mut();
         if let Ok(string) = String::from_utf8(new_thread_name.clone()) {
-            this.memory.extra.data_race.thread_set_name(string);
+            this.memory.extra.data_race.thread_set_name(
+                this.machine.threads.active_thread, string
+            );
         }
         this.machine.threads.set_thread_name(new_thread_name);
     }
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
new file mode 100644
index 00000000000..8d05eb1b992
--- /dev/null
+++ b/src/vector_clock.rs
@@ -0,0 +1,602 @@
+use std::{
+    fmt::{self, Debug}, cmp::Ordering, ops::Index,
+    num::TryFromIntError, convert::TryFrom, mem
+};
+use smallvec::SmallVec;
+use rustc_index::vec::Idx;
+use rustc_data_structures::fx::FxHashMap;
+
+/// A vector clock index, this is associated with a thread id
+///  but in some cases one vector index may be shared with
+///  multiple thread ids.
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct VectorIdx(u32);
+
+impl VectorIdx{
+    pub fn to_u32(self) -> u32 {
+        self.0
+    }
+    pub const MAX_INDEX: VectorIdx = VectorIdx(u32::MAX);
+}
+
+impl Idx for VectorIdx {
+    fn new(idx: usize) -> Self {
+        VectorIdx(u32::try_from(idx).unwrap())
+    }
+
+    fn index(self) -> usize {
+        usize::try_from(self.0).unwrap()
+    }
+}
+
+impl TryFrom<u64> for VectorIdx {
+    type Error = TryFromIntError;
+    fn try_from(id: u64) -> Result<Self, Self::Error> {
+        u32::try_from(id).map(|id_u32| Self(id_u32))
+    }
+}
+
+impl From<u32> for VectorIdx {
+    fn from(id: u32) -> Self {
+        Self(id)
+    }
+}
+
+
+/// A sparse set of vector clocks, where each vector index
+///  is associated with a vector clock.
+/// This treats all vector clocks that have not been assigned
+///  as equal to the all zero vector clocks
+/// Is optimized for the common case where only 1 element is stored
+///  in the set and the rest can be ignored, falling-back to
+///  using an internal hash-map once more than 1 element is assigned
+///  at any one time
+#[derive(Clone)]
+pub struct VSmallClockSet(VSmallClockSetInner);
+
+#[derive(Clone)]
+enum VSmallClockSetInner {
+    /// Zero or 1 vector elements, common
+    ///  case for the sparse set.
+    /// The all zero vector clock is treated
+    ///  as equal to the empty element
+    Small(VectorIdx, VClock),
+
+    /// Hash-map of vector clocks
+    Large(FxHashMap<VectorIdx, VClock>)
+}
+
+impl VSmallClockSet {
+
+    /// Remove all clock vectors from the map, setting them
+    ///  to the zero vector
+    pub fn clear(&mut self) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(_, clock) => {
+                clock.set_zero_vector()
+            }
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.clear();
+            }
+        }
+    }
+
+    /// Remove all clock vectors except for the clock vector
+    ///  stored at the given index, which is retained
+    pub fn retain_index(&mut self, index: VectorIdx) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(small_idx, clock) => {
+                if index != *small_idx {
+                    // The zero-vector is considered to equal
+                    //  the empty element
+                    clock.set_zero_vector()
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.retain(|idx,_| {
+                    *idx == index
+                });
+            }
+        }
+    }
+
+    /// Insert the vector clock into the associated vector
+    ///  index
+    pub fn insert(&mut self, index: VectorIdx, clock: &VClock) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if small_clock.is_zero_vector() {
+                    *small_idx = index;
+                    small_clock.clone_from(clock);
+                }else if !clock.is_zero_vector() {
+                    let mut hash_map = FxHashMap::default();
+                    hash_map.insert(*small_idx, mem::take(small_clock));
+                    hash_map.insert(index, clock.clone());
+                    self.0 = VSmallClockSetInner::Large(hash_map);
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                if !clock.is_zero_vector() {
+                    hash_map.insert(index, clock.clone());
+                }
+            }
+        }
+    }
+
+    /// Try to load the vector clock associated with the current
+    ///  vector index.
+    pub fn get(&self, index: VectorIdx) -> Option<&VClock> {
+        match &self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if *small_idx == index && !small_clock.is_zero_vector() {
+                    Some(small_clock)
+                }else{
+                    None
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.get(&index)
+            }
+        }
+    }
+}
+
+impl Default for VSmallClockSet {
+    #[inline]
+    fn default() -> Self {
+        VSmallClockSet(
+            VSmallClockSetInner::Small(VectorIdx::new(0), VClock::default())
+        )
+    }
+}
+
+impl Debug for VSmallClockSet {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Print the contents of the small vector clock set as the map
+        //  of vector index to vector clock that they represent
+        let mut map = f.debug_map();
+        match &self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if !small_clock.is_zero_vector() {
+                    map.entry(&small_idx, &small_clock);
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                for (idx, elem) in hash_map.iter() {
+                    map.entry(idx, elem);
+                }
+            }
+        }
+        map.finish()
+    }
+}
+impl PartialEq for VSmallClockSet {
+    fn eq(&self, other: &Self) -> bool {
+        use VSmallClockSetInner::*;
+        match (&self.0, &other.0) {
+            (Small(i1, c1), Small(i2, c2)) => {
+                if c1.is_zero_vector() {
+                    // Either they are both zero or they are non-equal
+                    c2.is_zero_vector()
+                }else{
+                    // At least one is non-zero, so the full comparison is correct
+                    i1 == i2 && c1 == c2
+                }
+            }
+            (VSmallClockSetInner::Small(idx, clock), VSmallClockSetInner::Large(hash_map)) |
+            (VSmallClockSetInner::Large(hash_map), VSmallClockSetInner::Small(idx, clock)) => {
+                if hash_map.len() == 0 {
+                    // Equal to the empty hash-map
+                    clock.is_zero_vector()
+                }else if hash_map.len() == 1 {
+                    // Equal to the hash-map with one element
+                    let (hash_idx, hash_clock) = hash_map.iter().next().unwrap();
+                    hash_idx == idx && hash_clock == clock
+                }else{
+                    false
+                }
+            }
+            (Large(map1), Large(map2)) => {
+                map1 == map2
+            }
+        }
+    }
+}
+impl Eq for VSmallClockSet {}
+
+
+
+/// The size of the vector-clock to store inline
+///  clock vectors larger than this will be stored on the heap
+const SMALL_VECTOR: usize = 4;
+
+/// The type of the time-stamps recorded in the data-race detector
+///  set to a type of unsigned integer
+pub type VTimestamp = u32;
+
+/// A vector clock for detecting data-races
+///  invariants:
+///   - the last element in a VClock must not be 0
+///     -- this means that derive(PartialEq & Eq) is correct
+///     --  as there is no implicit zero tail that might be equal
+///     --  also simplifies the implementation of PartialOrd
+#[derive(PartialEq, Eq, Default, Debug)]
+pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
+
+impl VClock {
+
+    /// Create a new vector-clock containing all zeros except
+    ///  for a value at the given index
+    pub fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
+        let len = index.index() + 1;
+        let mut vec = smallvec::smallvec![0; len];
+        vec[index.index()] = timestamp;
+        VClock(vec)
+    }
+
+    /// Load the internal timestamp slice in the vector clock
+    #[inline]
+    pub fn as_slice(&self) -> &[VTimestamp] {
+        self.0.as_slice()
+    }
+
+    /// Get a mutable slice to the internal vector with minimum `min_len`
+    ///  elements, to preserve invariants this vector must modify
+    ///  the `min_len`-1 nth element to a non-zero value
+    #[inline]
+    fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [VTimestamp] {
+        if self.0.len() < min_len {
+            self.0.resize(min_len, 0);
+        }
+        assert!(self.0.len() >= min_len);
+        self.0.as_mut_slice()
+    }
+
+    /// Increment the vector clock at a known index
+    ///  this will panic if the vector index overflows
+    #[inline]
+    pub fn increment_index(&mut self, idx: VectorIdx) {
+        let idx = idx.index();
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let idx_ref = &mut mut_slice[idx];
+        *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow")
+    }
+
+    // Join the two vector-clocks together, this
+    //  sets each vector-element to the maximum value
+    //  of that element in either of the two source elements.
+    pub fn join(&mut self, other: &Self) {
+        let rhs_slice = other.as_slice();
+        let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
+        for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) {
+            *l = r.max(*l);
+        }
+    }
+
+    /// Set the element at the current index of the vector
+    pub fn set_at_index(&mut self, other: &Self, idx: VectorIdx) {
+        let idx = idx.index();
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let slice = other.as_slice();
+        mut_slice[idx] = slice[idx];
+    }
+
+    /// Set the vector to the all-zero vector
+    #[inline]
+    pub fn set_zero_vector(&mut self) {
+        self.0.clear();
+    }
+
+    /// Return if this vector is the all-zero vector
+    pub fn is_zero_vector(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+impl Clone for VClock {
+    fn clone(&self) -> Self {
+        VClock(self.0.clone())
+    }
+    fn clone_from(&mut self, source: &Self) {
+        let source_slice = source.as_slice();
+        self.0.clear();
+        self.0.extend_from_slice(source_slice);
+    }
+}
+
+impl PartialOrd for VClock {
+    fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
+
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // Iterate through the combined vector slice
+        //  keeping track of the order that is currently possible to satisfy.
+        // If an ordering relation is detected to be impossible, then bail and
+        //  directly return None
+        let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
+        let mut order = match iter.next() {
+            Some((lhs, rhs)) => lhs.cmp(rhs),
+            None => Ordering::Equal
+        };
+        for (l, r) in iter {
+            match order {
+                Ordering::Equal => order = l.cmp(r),
+                Ordering::Less => if l > r {
+                    return None
+                },
+                Ordering::Greater => if l < r {
+                    return None
+                }
+            }
+        }
+
+        //Now test if either left or right have trailing elements
+        // by the invariant the trailing elements have at least 1
+        // non zero value, so no additional calculation is required
+        // to determine the result of the PartialOrder
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        match l_len.cmp(&r_len) {
+            // Equal has no additional elements: return current order
+            Ordering::Equal => Some(order),
+            // Right has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Less or None
+            Ordering::Less => match order {
+                Ordering::Less | Ordering::Equal => Some(Ordering::Less),
+                Ordering::Greater => None
+            }
+            // Left has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Greater or None
+            Ordering::Greater => match order {
+                Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
+                Ordering::Less => None
+            }
+        }
+    }
+
+    fn lt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l > r {
+                    return false
+                }else if l < r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn le(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
+        }else{
+            false
+        }
+    }
+
+    fn gt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >=, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l < r {
+                    return false
+                }else if l > r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn ge(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
+        }else{
+            false
+        }
+    }
+}
+
+impl Index<VectorIdx> for VClock {
+    type Output = VTimestamp;
+
+    #[inline]
+    fn index(&self, index: VectorIdx) -> &VTimestamp {
+       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+    }
+}
+
+
+/// Test vector clock ordering operations
+///  data-race detection is tested in the external
+///  test suite
+#[cfg(test)]
+mod tests {
+    use super::{VClock, VTimestamp, VectorIdx, VSmallClockSet};
+    use std::cmp::Ordering;
+
+    #[test]
+    fn test_equal() {
+        let mut c1 = VClock::default();
+        let mut c2 = VClock::default();
+        assert_eq!(c1, c2);
+        c1.increment_index(VectorIdx(5));
+        assert_ne!(c1, c2);
+        c2.increment_index(VectorIdx(53));
+        assert_ne!(c1, c2);
+        c1.increment_index(VectorIdx(53));
+        assert_ne!(c1, c2);
+        c2.increment_index(VectorIdx(5));
+        assert_eq!(c1, c2);
+    }
+
+    #[test]
+    fn test_partial_order() {
+        // Small test
+        assert_order(&[1], &[1], Some(Ordering::Equal));
+        assert_order(&[1], &[2], Some(Ordering::Less));
+        assert_order(&[2], &[1], Some(Ordering::Greater));
+        assert_order(&[1], &[1,2], Some(Ordering::Less));
+        assert_order(&[2], &[1,2], None);
+
+        // Misc tests
+        assert_order(&[400], &[0, 1], None);
+
+        // Large test
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+    }
+
+    fn from_slice(mut slice: &[VTimestamp]) -> VClock {
+        while let Some(0) = slice.last() {
+            slice = &slice[..slice.len() - 1]
+        }
+        VClock(smallvec::SmallVec::from_slice(slice))
+    }
+
+    fn assert_order(l: &[VTimestamp], r: &[VTimestamp], o: Option<Ordering>) {
+        let l = from_slice(l);
+        let r = from_slice(r);
+
+        //Test partial_cmp
+        let compare = l.partial_cmp(&r);
+        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
+        let alt_compare = r.partial_cmp(&l);
+        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
+
+        //Test operatorsm with faster implementations
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less)), l < r,
+            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
+            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater)), l > r,
+            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
+            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less)), r < l,
+            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
+            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater)), r > l,
+            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
+            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+    }
+
+    #[test]
+    pub fn test_vclock_set() {
+        let mut set = VSmallClockSet::default();
+        let v1 = from_slice(&[3,0,1]);
+        let v2 = from_slice(&[4,2,3]);
+        let v3 = from_slice(&[4,8,3]);
+        set.insert(VectorIdx(0), &v1);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        set.insert(VectorIdx(5), &v2);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
+        set.insert(VectorIdx(53), &v3);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+        set.retain_index(VectorIdx(53));
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+        set.clear();
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), None);
+        set.insert(VectorIdx(53), &v3);
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+    }
+}
-- 
2.44.0