From 9cb6b8da3f6ca2da1139c91754d520bf2d354f31 Mon Sep 17 00:00:00 2001 From: JCTyBlaidd Date: Thu, 5 Nov 2020 03:54:39 +0000 Subject: [PATCH] Split out vector_clock to separate file, general tidy up of some of the code & add support for vector index re-use for multiple threads after termination. --- src/data_race.rs | 1333 +++++++++++++-------------------------- src/lib.rs | 4 + src/shims/intrinsics.rs | 31 +- src/shims/posix/sync.rs | 39 +- src/thread.rs | 4 +- src/vector_clock.rs | 602 ++++++++++++++++++ 6 files changed, 1084 insertions(+), 929 deletions(-) create mode 100644 src/vector_clock.rs diff --git a/src/data_race.rs b/src/data_race.rs index 8e7a3548f5c..e992c5a1d58 100644 --- a/src/data_race.rs +++ b/src/data_race.rs @@ -13,21 +13,21 @@ //! - 1 of the operations happens-before the other operation (see link for definition) use std::{ - fmt::{self, Debug}, cmp::Ordering, rc::Rc, - cell::{Cell, RefCell, Ref, RefMut}, ops::Index, mem + fmt::Debug, rc::Rc, + cell::{Cell, RefCell, Ref, RefMut}, mem }; use rustc_index::vec::{Idx, IndexVec}; use rustc_target::abi::Size; use rustc_middle::ty::layout::TyAndLayout; -use rustc_data_structures::fx::FxHashMap; - -use smallvec::SmallVec; +use rustc_data_structures::fx::FxHashSet; use crate::{ - MiriEvalContext, ThreadId, Tag, MiriEvalContextExt, RangeMap, - MPlaceTy, ImmTy, InterpResult, Pointer, ScalarMaybeUninit, - OpTy, Immediate, MemPlaceMeta + MiriEvalContext, MiriEvalContextExt, + ThreadId, Tag, RangeMap, + InterpResult, Pointer, ScalarMaybeUninit, + MPlaceTy, OpTy, MemPlaceMeta, + VClock, VSmallClockSet, VectorIdx, VTimestamp }; pub type AllocExtra = VClockAlloc; @@ -73,194 +73,136 @@ pub enum AtomicFenceOp { impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {} pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { - /// Variant of `read_immediate` that does not perform `data-race` checks. - fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> { + // Temporarily allow data-races to occur, this should only be + // used if either one of the appropiate `validate_atomic` functions + // will be called to treat a memory access as atomic or if the memory + // being accessed should be treated as internal state, that cannot be + // accessed by the interpreted program. + #[inline] + fn allow_data_races_ref(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R { let this = self.eval_context_ref(); let data_race = &*this.memory.extra.data_race; - let old = data_race.multi_threaded.replace(false); - let res = this.read_immediate(op.into()); + let result = op(this); data_race.multi_threaded.set(old); - - res + result } - - /// Variant of `write_immediate` that does not perform `data-race` checks. - fn write_immediate_racy( - &mut self, src: Immediate, dest: MPlaceTy<'tcx, Tag> - ) -> InterpResult<'tcx> { + + /// Same as `allow_data_races_ref`, this temporarily disables any data-race detection and + /// so should only be used for atomic operations or internal state that the program cannot + /// access + #[inline] + fn allow_data_races_mut(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R { let this = self.eval_context_mut(); let data_race = &*this.memory.extra.data_race; let old = data_race.multi_threaded.replace(false); - - let imm = this.write_immediate(src, dest.into()); - + let result = op(this); let data_race = &*this.memory.extra.data_race; data_race.multi_threaded.set(old); - imm + result } - /// Variant of `read_scalar` that does not perform data-race checks. - fn read_scalar_racy( - &self, op: MPlaceTy<'tcx, Tag> - )-> InterpResult<'tcx, ScalarMaybeUninit> { - Ok(self.read_immediate_racy(op)?.to_scalar_or_uninit()) - } - /// Variant of `write_scalar` that does not perform data-race checks. - fn write_scalar_racy( - &mut self, val: ScalarMaybeUninit, dest: MPlaceTy<'tcx, Tag> - ) -> InterpResult<'tcx> { - self.write_immediate_racy(Immediate::Scalar(val.into()), dest) - } - - /// Variant of `read_scalar_at_offset` helper function that does not perform - /// `data-race checks. - fn read_scalar_at_offset_racy( - &self, - op: OpTy<'tcx, Tag>, - offset: u64, - layout: TyAndLayout<'tcx>, - ) -> InterpResult<'tcx, ScalarMaybeUninit> { - let this = self.eval_context_ref(); - let op_place = this.deref_operand(op)?; - let offset = Size::from_bytes(offset); - // Ensure that the following read at an offset is within bounds - assert!(op_place.layout.size >= offset + layout.size); - let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?; - this.read_scalar_racy(value_place.into()) - } - - /// Variant of `write_scalar_at_offfset` helper function that performs - /// an atomic load operation with verification instead fn read_scalar_at_offset_atomic( - &mut self, + &self, op: OpTy<'tcx, Tag>, offset: u64, layout: TyAndLayout<'tcx>, atomic: AtomicReadOp ) -> InterpResult<'tcx, ScalarMaybeUninit> { - let this = self.eval_context_mut(); + let this = self.eval_context_ref(); let op_place = this.deref_operand(op)?; let offset = Size::from_bytes(offset); // Ensure that the following read at an offset is within bounds assert!(op_place.layout.size >= offset + layout.size); let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?; - let res = this.read_scalar_racy(value_place.into())?; - this.validate_atomic_load(value_place, atomic)?; - Ok(res) + this.read_scalar_atomic(value_place, atomic) } - - /// Variant of `write_scalar_at_offfset` helper function that does not perform - /// data-race checks. - fn write_scalar_at_offset_racy( + fn write_scalar_at_offset_atomic( &mut self, op: OpTy<'tcx, Tag>, offset: u64, value: impl Into>, layout: TyAndLayout<'tcx>, - ) -> InterpResult<'tcx, ()> { + atomic: AtomicWriteOp + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); let op_place = this.deref_operand(op)?; let offset = Size::from_bytes(offset); // Ensure that the following read at an offset is within bounds assert!(op_place.layout.size >= offset + layout.size); let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?; - this.write_scalar_racy(value.into(), value_place.into()) + this.write_scalar_atomic(value.into(), value_place, atomic) } - - /// Load the data race allocation state for a given memory place - /// also returns the size and offset of the result in the allocation - /// metadata - /// This is used for atomic loads since unconditionally requesteing - /// mutable access causes issues for read-only memory, which will - /// fail validation on mutable access - fn load_data_race_state_ref<'a>( - &'a self, place: MPlaceTy<'tcx, Tag> - ) -> InterpResult<'tcx, (&'a VClockAlloc, Size, Size)> where 'mir: 'a { - let this = self.eval_context_ref(); - - let ptr = place.ptr.assert_ptr(); - let size = place.layout.size; - let data_race = &this.memory.get_raw(ptr.alloc_id)?.extra.data_race; - - Ok((data_race, size, ptr.offset)) - } - - /// Load the data race allocation state for a given memory place - /// also returns the size and the offset of the result in the allocation - /// metadata - fn load_data_race_state_mut<'a>( - &'a mut self, place: MPlaceTy<'tcx, Tag> - ) -> InterpResult<'tcx, (&'a mut VClockAlloc, Size, Size)> where 'mir: 'a { - let this = self.eval_context_mut(); - - let ptr = place.ptr.assert_ptr(); - let size = place.layout.size; - let data_race = &mut this.memory.get_raw_mut(ptr.alloc_id)?.extra.data_race; - - Ok((data_race, size, ptr.offset)) + fn read_scalar_atomic( + &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp + ) -> InterpResult<'tcx, ScalarMaybeUninit> { + let scalar = self.allow_data_races_ref(move |this| { + this.read_scalar(place.into()) + })?; + self.validate_atomic_load(place, atomic)?; + Ok(scalar) + } + fn write_scalar_atomic( + &mut self, val: ScalarMaybeUninit, dest: MPlaceTy<'tcx, Tag>, + atomic: AtomicWriteOp + ) -> InterpResult<'tcx> { + self.allow_data_races_mut(move |this| { + this.write_scalar(val, dest.into()) + })?; + self.validate_atomic_store(dest, atomic) } /// Update the data-race detector for an atomic read occuring at the /// associated memory-place and on the current thread fn validate_atomic_load( - &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp + &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp ) -> InterpResult<'tcx> { - let this = self.eval_context_mut(); + let this = self.eval_context_ref(); let data_race = &*this.memory.extra.data_race; if data_race.multi_threaded.get() { - data_race.advance_vector_clock(); - let ( - alloc, size, offset - ) = this.load_data_race_state_ref(place)?; + // Load an log the atomic operation + // the memory access has to be `get_raw` since otherwise this despite only + // mutating MemoryExtra will still trigger errors on read-only memory + let place_ptr = place.ptr.assert_ptr(); + let size = place.layout.size; + let alloc_meta = &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race; log::trace!( - "Atomic load on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})", - alloc.global.current_thread(), atomic, - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes() + "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})", + "Atomic load", &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes() ); - let current_thread = alloc.global.current_thread(); - let mut current_state = alloc.global.current_thread_state_mut(); - if atomic == AtomicReadOp::Relaxed { - // Perform relaxed atomic load - for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) { - if range.load_relaxed(&mut *current_state, current_thread) == Err(DataRace) { - mem::drop(current_state); + // Perform the atomic operation + let data_race = &alloc_meta.global; + data_race.maybe_perform_sync_operation(move |index, mut clocks| { + for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) { + let res = if atomic == AtomicReadOp::Relaxed { + range.load_relaxed(&mut *clocks, index) + }else{ + range.acquire(&mut *clocks, index) + }; + if let Err(DataRace) = res { + mem::drop(clocks); return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_LOAD", true, - place.ptr.assert_ptr(), size + &alloc_meta.global, range, "Atomic load", true, + place_ptr, size ); } } - }else{ - // Perform acquire(or seq-cst) atomic load - for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) { - if range.acquire(&mut *current_state, current_thread) == Err(DataRace) { - mem::drop(current_state); - return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_LOAD", true, - place.ptr.assert_ptr(), size - ); - } - } - } + Ok(()) + })?; // Log changes to atomic memory if log::log_enabled!(log::Level::Trace) { - for (_,range) in alloc.alloc_ranges.borrow_mut().iter(offset, size) { + for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) { log::trace!( - " updated atomic memory({:?}, offset={}, size={}) to {:#?}", - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(), + "Updated atomic memory({:?}, offset={}, size={}) to {:#?}", + place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(), range.atomic_ops ); } } - - mem::drop(current_state); - let data_race = &*this.memory.extra.data_race; - data_race.advance_vector_clock(); } Ok(()) } @@ -271,61 +213,16 @@ fn validate_atomic_store( &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - let data_race = &*this.memory.extra.data_race; - if data_race.multi_threaded.get() { - data_race.advance_vector_clock(); - - let ( - alloc, size, offset - ) = this.load_data_race_state_mut(place)?; - let current_thread = alloc.global.current_thread(); - let mut current_state = alloc.global.current_thread_state_mut(); - log::trace!( - "Atomic store on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})", - current_thread, atomic, - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes() - ); - - if atomic == AtomicWriteOp::Relaxed { - // Perform relaxed atomic store - for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) { - if range.store_relaxed(&mut *current_state, current_thread) == Err(DataRace) { - mem::drop(current_state); - return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_STORE", true, - place.ptr.assert_ptr(), size - ); - } - } - }else{ - // Perform release(or seq-cst) atomic store - for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) { - if range.release(&mut *current_state, current_thread) == Err(DataRace) { - mem::drop(current_state); - return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_STORE", true, - place.ptr.assert_ptr(), size - ); - } - } - } - - // Log changes to atomic memory - if log::log_enabled!(log::Level::Trace) { - for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) { - log::trace!( - " updated atomic memory({:?}, offset={}, size={}) to {:#?}", - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(), - range.atomic_ops - ); + this.validate_atomic_op_mut( + place, atomic, "Atomic Store", + move |memory, clocks, index, atomic| { + if atomic == AtomicWriteOp::Relaxed { + memory.store_relaxed(clocks, index) + }else{ + memory.release(clocks, index) } } - - mem::drop(current_state); - let data_race = &*this.memory.extra.data_race; - data_race.advance_vector_clock(); - } - Ok(()) + ) } /// Update the data-race detector for an atomic read-modify-write occuring @@ -334,97 +231,104 @@ fn validate_atomic_rmw( &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRWOp ) -> InterpResult<'tcx> { use AtomicRWOp::*; + let acquire = matches!(atomic, Acquire | AcqRel | SeqCst); + let release = matches!(atomic, Release | AcqRel | SeqCst); let this = self.eval_context_mut(); - let data_race = &*this.memory.extra.data_race; - if data_race.multi_threaded.get() { - data_race.advance_vector_clock(); - - let ( - alloc, size, offset - ) = this.load_data_race_state_mut(place)?; - let current_thread = alloc.global.current_thread(); - let mut current_state = alloc.global.current_thread_state_mut(); - log::trace!( - "Atomic RMW on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})", - current_thread, atomic, - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes() - ); - - let acquire = matches!(atomic, Acquire | AcqRel | SeqCst); - let release = matches!(atomic, Release | AcqRel | SeqCst); - for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) { - //FIXME: this is probably still slightly wrong due to the quirks - // in the c++11 memory model - let maybe_race = if acquire { - // Atomic RW-Op acquire - range.acquire(&mut *current_state, current_thread) + this.validate_atomic_op_mut( + place, atomic, "Atomic RMW", + move |memory, clocks, index, _| { + if acquire { + memory.acquire(clocks, index)?; }else{ - range.load_relaxed(&mut *current_state, current_thread) - }; - if maybe_race == Err(DataRace) { - mem::drop(current_state); - return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_RMW(LOAD)", true, - place.ptr.assert_ptr(), size - ); + memory.load_relaxed(clocks, index)?; } - let maybe_race = if release { - // Atomic RW-Op release - range.rmw_release(&mut *current_state, current_thread) + if release { + memory.rmw_release(clocks, index) }else{ - range.rmw_relaxed(&mut *current_state, current_thread) - }; - if maybe_race == Err(DataRace) { - mem::drop(current_state); - return VClockAlloc::report_data_race( - &alloc.global, range, "ATOMIC_RMW(STORE)", true, - place.ptr.assert_ptr(), size - ); + memory.rmw_relaxed(clocks, index) } } - - // Log changes to atomic memory - if log::log_enabled!(log::Level::Trace) { - for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) { - log::trace!( - " updated atomic memory({:?}, offset={}, size={}) to {:#?}", - place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(), - range.atomic_ops - ); - } - } - - mem::drop(current_state); - let data_race = &*this.memory.extra.data_race; - data_race.advance_vector_clock(); - } - Ok(()) + ) } /// Update the data-race detector for an atomic fence on the current thread fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> { let this = self.eval_context_mut(); let data_race = &*this.memory.extra.data_race; - if data_race.multi_threaded.get() { - data_race.advance_vector_clock(); - - log::trace!("Atomic fence on {:?} with ordering {:?}", data_race.current_thread(), atomic); + data_race.maybe_perform_sync_operation(move |index, mut clocks| { + log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic); // Apply data-race detection for the current fences // this treats AcqRel and SeqCst as the same as a acquire // and release fence applied in the same timestamp. if atomic != AtomicFenceOp::Release { // Either Acquire | AcqRel | SeqCst - data_race.current_thread_state_mut().apply_acquire_fence(); + clocks.apply_acquire_fence(); } if atomic != AtomicFenceOp::Acquire { // Either Release | AcqRel | SeqCst - data_race.current_thread_state_mut().apply_release_fence(); + clocks.apply_release_fence(); } + Ok(()) + }) + } +} - data_race.advance_vector_clock(); +impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {} +trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { + + /// Generic atomic operation implementation, this however + /// cannot be used for the atomic read operation since + /// that requires non mutable memory access to not trigger + /// the writing to read-only memory errors during `get_raw_mut` + fn validate_atomic_op_mut( + &mut self, place: MPlaceTy<'tcx, Tag>, + atomic: A, description: &str, + mut op: impl FnMut( + &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A + ) -> Result<(), DataRace> + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + let data_race = &*this.memory.extra.data_race; + if data_race.multi_threaded.get() { + + // Load an log the atomic operation + let place_ptr = place.ptr.assert_ptr(); + let size = place.layout.size; + let alloc_meta = &mut this.memory.get_raw_mut(place_ptr.alloc_id)?.extra.data_race; + log::trace!( + "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})", + description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes() + ); + + // Perform the atomic operation + let data_race = &alloc_meta.global; + data_race.maybe_perform_sync_operation(|index, mut clocks| { + for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) { + if let Err(DataRace) = op(range, &mut *clocks, index, atomic) { + mem::drop(clocks); + return VClockAlloc::report_data_race( + &alloc_meta.global, range, description, true, + place_ptr, size + ); + } + } + Ok(()) + })?; + + // Log changes to atomic memory + if log::log_enabled!(log::Level::Trace) { + for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) { + log::trace!( + "Updated atomic memory({:?}, offset={}, size={}) to {:#?}", + place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(), + range.atomic_ops + ); + } + } } Ok(()) } + } /// Handle for locks to express their @@ -439,7 +343,7 @@ pub struct DataRaceLockHandle { } impl DataRaceLockHandle { pub fn set_values(&mut self, other: &Self) { - self.clock.set_values(&other.clock) + self.clock.clone_from(&other.clock) } pub fn reset(&mut self) { self.clock.set_zero_vector(); @@ -447,126 +351,6 @@ pub fn reset(&mut self) { } -/// Avoid an atomic allocation for the common -/// case with atomic operations where the number -/// of active release sequences is small -#[derive(Clone, PartialEq, Eq)] -enum AtomicReleaseSequences { - - /// Contains one or no values - /// if empty: (None, reset vector clock) - /// if one: (Some(thread), thread_clock) - ReleaseOneOrEmpty(Option, VClock), - - /// Contains two or more values - /// stored in a hash-map of thread id to - /// vector clocks - ReleaseMany(FxHashMap) -} -impl AtomicReleaseSequences { - - /// Return an empty set of atomic release sequences - #[inline] - fn new() -> AtomicReleaseSequences { - Self::ReleaseOneOrEmpty(None, VClock::default()) - } - - /// Remove all values except for the value stored at `thread` and set - /// the vector clock to the associated `clock` value - #[inline] - fn clear_and_set(&mut self, thread: ThreadId, clock: &VClock) { - match self { - Self::ReleaseOneOrEmpty(id, rel_clock) => { - *id = Some(thread); - rel_clock.set_values(clock); - } - Self::ReleaseMany(_) => { - *self = Self::ReleaseOneOrEmpty(Some(thread), clock.clone()); - } - } - } - - /// Remove all values except for the value stored at `thread` - #[inline] - fn clear_and_retain(&mut self, thread: ThreadId) { - match self { - Self::ReleaseOneOrEmpty(id, rel_clock) => { - // If the id is the same, then reatin the value - // otherwise delete and clear the release vector clock - if *id != Some(thread) { - *id = None; - rel_clock.set_zero_vector(); - } - }, - Self::ReleaseMany(hash_map) => { - // Retain only the thread element, so reduce to size - // of 1 or 0, and move to smaller format - if let Some(clock) = hash_map.remove(&thread) { - *self = Self::ReleaseOneOrEmpty(Some(thread), clock); - }else{ - *self = Self::new(); - } - } - } - } - - /// Insert a release sequence at `thread` with values `clock` - fn insert(&mut self, thread: ThreadId, clock: &VClock) { - match self { - Self::ReleaseOneOrEmpty(id, rel_clock) => { - if id.map_or(true, |id| id == thread) { - *id = Some(thread); - rel_clock.set_values(clock); - }else{ - let mut hash_map = FxHashMap::default(); - hash_map.insert(thread, clock.clone()); - hash_map.insert(id.unwrap(), rel_clock.clone()); - *self = Self::ReleaseMany(hash_map); - } - }, - Self::ReleaseMany(hash_map) => { - hash_map.insert(thread, clock.clone()); - } - } - } - - /// Return the release sequence at `thread` if one exists - #[inline] - fn load(&self, thread: ThreadId) -> Option<&VClock> { - match self { - Self::ReleaseOneOrEmpty(id, clock) => { - if *id == Some(thread) { - Some(clock) - }else{ - None - } - }, - Self::ReleaseMany(hash_map) => { - hash_map.get(&thread) - } - } - } -} - -/// Custom debug implementation to correctly -/// print debug as a logical mapping from threads -/// to vector-clocks -impl Debug for AtomicReleaseSequences { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::ReleaseOneOrEmpty(None,_) => { - f.debug_map().finish() - }, - Self::ReleaseOneOrEmpty(Some(id), clock) => { - f.debug_map().entry(&id, &clock).finish() - }, - Self::ReleaseMany(hash_map) => { - Debug::fmt(hash_map, f) - } - } - } -} - /// Error returned by finding a data race /// should be elaborated upon #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] @@ -576,7 +360,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { /// explicitly to reduce memory usage for the /// common case where no atomic operations /// exists on the memory cell -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Default, Debug)] struct AtomicMemoryCellClocks { /// The clock-vector for the set of atomic read operations @@ -599,7 +383,7 @@ struct AtomicMemoryCellClocks { /// sequence exists in the memory cell, required /// since read-modify-write operations do not /// invalidate existing release sequences - release_sequences: AtomicReleaseSequences, + release_sequences: VSmallClockSet, } /// Memory Cell vector clock metadata @@ -609,11 +393,11 @@ struct MemoryCellClocks { /// The vector-clock of the last write, only one value is stored /// since all previous writes happened-before the current write - write: Timestamp, + write: VTimestamp, /// The identifier of the thread that performed the last write /// operation - write_thread: ThreadId, + write_index: VectorIdx, /// The vector-clock of the set of previous reads /// each index is set to the timestamp that the associated @@ -633,7 +417,7 @@ fn default() -> Self { MemoryCellClocks { read: VClock::default(), write: 0, - write_thread: ThreadId::new(u32::MAX as usize), + write_index: VectorIdx::MAX_INDEX, atomic_ops: None } } @@ -654,21 +438,14 @@ fn atomic(&self) -> Option<&AtomicMemoryCellClocks> { /// if it does not exist #[inline] fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks { - self.atomic_ops.get_or_insert_with(|| { - Box::new(AtomicMemoryCellClocks { - read_vector: VClock::default(), - write_vector: VClock::default(), - sync_vector: VClock::default(), - release_sequences: AtomicReleaseSequences::new() - }) - }) + self.atomic_ops.get_or_insert_with(Default::default) } /// Update memory cell data-race tracking for atomic /// load acquire semantics, is a no-op if this memory was /// not used previously as atomic memory - fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_read_detect(clocks, thread)?; + fn acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_read_detect(clocks, index)?; if let Some(atomic) = self.atomic() { clocks.clock.join(&atomic.sync_vector); } @@ -677,8 +454,8 @@ fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<( /// Update memory cell data-race tracking for atomic /// load relaxed semantics, is a no-op if this memory was /// not used previously as atomic memory - fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_read_detect(clocks, thread)?; + fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_read_detect(clocks, index)?; if let Some(atomic) = self.atomic() { clocks.fence_acquire.join(&atomic.sync_vector); } @@ -688,38 +465,39 @@ fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Res /// Update the memory cell data-race tracking for atomic /// store release semantics - fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_write_detect(clocks, thread)?; + fn release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_write_detect(clocks, index)?; let atomic = self.atomic_mut(); - atomic.sync_vector.set_values(&clocks.clock); - atomic.release_sequences.clear_and_set(thread, &clocks.clock); + atomic.sync_vector.clone_from(&clocks.clock); + atomic.release_sequences.clear(); + atomic.release_sequences.insert(index, &clocks.clock); Ok(()) } /// Update the memory cell data-race tracking for atomic /// store relaxed semantics - fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_write_detect(clocks, thread)?; + fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_write_detect(clocks, index)?; let atomic = self.atomic_mut(); - atomic.sync_vector.set_values(&clocks.fence_release); - if let Some(release) = atomic.release_sequences.load(thread) { + atomic.sync_vector.clone_from(&clocks.fence_release); + if let Some(release) = atomic.release_sequences.get(index) { atomic.sync_vector.join(release); } - atomic.release_sequences.clear_and_retain(thread); + atomic.release_sequences.retain_index(index); Ok(()) } /// Update the memory cell data-race tracking for atomic /// store release semantics for RMW operations - fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_write_detect(clocks, thread)?; + fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_write_detect(clocks, index)?; let atomic = self.atomic_mut(); atomic.sync_vector.join(&clocks.clock); - atomic.release_sequences.insert(thread, &clocks.clock); + atomic.release_sequences.insert(index, &clocks.clock); Ok(()) } /// Update the memory cell data-race tracking for atomic /// store relaxed semantics for RMW operations - fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { - self.atomic_write_detect(clocks, thread)?; + fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { + self.atomic_write_detect(clocks, index)?; let atomic = self.atomic_mut(); atomic.sync_vector.join(&clocks.fence_release); Ok(()) @@ -727,11 +505,11 @@ fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<( /// Detect data-races with an atomic read, caused by a non-atomic write that does /// not happen-before the atomic-read - fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { + fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks); - if self.write <= clocks.clock[self.write_thread] { + if self.write <= clocks.clock[self.write_index] { let atomic = self.atomic_mut(); - atomic.read_vector.set_at_thread(&clocks.clock, thread); + atomic.read_vector.set_at_index(&clocks.clock, index); Ok(()) }else{ Err(DataRace) @@ -740,11 +518,11 @@ fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> R /// Detect data-races with an atomic write, either with a non-atomic read or with /// a non-atomic write: - fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { + fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks); - if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock { + if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock { let atomic = self.atomic_mut(); - atomic.write_vector.set_at_thread(&clocks.clock, thread); + atomic.write_vector.set_at_index(&clocks.clock, index); Ok(()) }else{ Err(DataRace) @@ -753,16 +531,16 @@ fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> /// Detect races for non-atomic read operations at the current memory cell /// returns true if a data-race is detected - fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { + fn read_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks); - if self.write <= clocks.clock[self.write_thread] { + if self.write <= clocks.clock[self.write_index] { let race_free = if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock }else{ true }; if race_free { - self.read.set_at_thread(&clocks.clock, thread); + self.read.set_at_index(&clocks.clock, index); Ok(()) }else{ Err(DataRace) @@ -774,17 +552,17 @@ fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Res /// Detect races for non-atomic write operations at the current memory cell /// returns true if a data-race is detected - fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> { + fn write_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> { log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks); - if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock { + if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock { let race_free = if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock && atomic.read_vector <= clocks.clock }else{ true }; if race_free { - self.write = clocks.clock[thread]; - self.write_thread = thread; + self.write = clocks.clock[index]; + self.write_index = index; self.read.set_zero_vector(); Ok(()) }else{ @@ -822,7 +600,7 @@ pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc { // Find an index, if one exists where the value // in `l` is greater than the value in `r` - fn find_gt_index(l: &VClock, r: &VClock) -> Option { + fn find_gt_index(l: &VClock, r: &VClock) -> Option { let l_slice = l.as_slice(); let r_slice = r.as_slice(); l_slice.iter().zip(r_slice.iter()) @@ -844,7 +622,7 @@ fn find_gt_index(l: &VClock, r: &VClock) -> Option { }else{ None } - }) + }).map(|idx| VectorIdx::new(idx)) } /// Report a data-race found in the program @@ -859,35 +637,29 @@ fn report_data_race<'tcx>( action: &str, is_atomic: bool, pointer: Pointer, len: Size ) -> InterpResult<'tcx> { - let current_thread = global.current_thread(); - let current_state = global.current_thread_state(); - let mut write_clock = VClock::default(); + let (current_index, current_clocks) = global.current_thread_state(); + let write_clock; let ( other_action, other_thread, other_clock - ) = if range.write > current_state.clock[range.write_thread] { - + ) = if range.write > current_clocks.clock[range.write_index] { // Convert the write action into the vector clock it // represents for diagnostic purposes - let wclock = write_clock.get_mut_with_min_len( - current_state.clock.as_slice().len() - .max(range.write_thread.to_u32() as usize + 1) - ); - wclock[range.write_thread.to_u32() as usize] = range.write; - ("WRITE", range.write_thread, write_clock.as_slice()) + write_clock = VClock::new_with_index(range.write_index, range.write); + ("WRITE", range.write_index, &write_clock) }else if let Some(idx) = Self::find_gt_index( - &range.read, ¤t_state.clock + &range.read, ¤t_clocks.clock ){ - ("READ", ThreadId::new(idx), range.read.as_slice()) + ("READ", idx, &range.read) }else if !is_atomic { if let Some(atomic) = range.atomic() { if let Some(idx) = Self::find_gt_index( - &atomic.write_vector, ¤t_state.clock + &atomic.write_vector, ¤t_clocks.clock ) { - ("ATOMIC_STORE", ThreadId::new(idx), atomic.write_vector.as_slice()) + ("ATOMIC_STORE", idx, &atomic.write_vector) }else if let Some(idx) = Self::find_gt_index( - &atomic.read_vector, ¤t_state.clock + &atomic.read_vector, ¤t_clocks.clock ) { - ("ATOMIC_LOAD", ThreadId::new(idx), atomic.read_vector.as_slice()) + ("ATOMIC_LOAD", idx, &atomic.read_vector) }else{ unreachable!("Failed to find report data-race for non-atomic operation: no race found") } @@ -899,7 +671,7 @@ fn report_data_race<'tcx>( }; // Load elaborated thread information about the racing thread actions - let current_thread_info = global.print_thread_metadata(current_thread); + let current_thread_info = global.print_thread_metadata(current_index); let other_thread_info = global.print_thread_metadata(other_thread); // Throw the data-race detection @@ -910,7 +682,7 @@ fn report_data_race<'tcx>( action, current_thread_info, other_action, other_thread_info, pointer.alloc_id, pointer.offset.bytes(), len.bytes(), - current_state.clock, + current_clocks.clock, other_clock ) } @@ -921,14 +693,10 @@ fn report_data_race<'tcx>( /// operation pub fn read<'tcx>(&self, pointer: Pointer, len: Size) -> InterpResult<'tcx> { if self.global.multi_threaded.get() { - let current_thread = self.global.current_thread(); - let current_state = self.global.current_thread_state(); - - // The alloc-ranges are not split, however changes are not going to be made - // to the ranges being tested, so this is ok + let (index, clocks) = self.global.current_thread_state(); let mut alloc_ranges = self.alloc_ranges.borrow_mut(); for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) { - if range.read_race_detect(&*current_state, current_thread) == Err(DataRace) { + if range.read_race_detect(&*clocks, index) == Err(DataRace) { // Report data-race return Self::report_data_race( &self.global,range, "READ", false, pointer, len @@ -946,10 +714,9 @@ pub fn read<'tcx>(&self, pointer: Pointer, len: Size) -> InterpResult<'tcx> /// operation pub fn write<'tcx>(&mut self, pointer: Pointer, len: Size) -> InterpResult<'tcx> { if self.global.multi_threaded.get() { - let current_thread = self.global.current_thread(); - let current_state = self.global.current_thread_state(); + let (index, clocks) = self.global.current_thread_state(); for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) { - if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) { + if range.write_race_detect(&*clocks, index) == Err(DataRace) { // Report data-race return Self::report_data_race( &self.global, range, "WRITE", false, pointer, len @@ -967,10 +734,9 @@ pub fn write<'tcx>(&mut self, pointer: Pointer, len: Size) -> InterpResult< /// operation pub fn deallocate<'tcx>(&mut self, pointer: Pointer, len: Size) -> InterpResult<'tcx> { if self.global.multi_threaded.get() { - let current_thread = self.global.current_thread(); - let current_state = self.global.current_thread_state(); + let (index, clocks) = self.global.current_thread_state(); for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) { - if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) { + if range.write_race_detect(&*clocks, index) == Err(DataRace) { // Report data-race return Self::report_data_race( &self.global, range, "DEALLOCATE", false, pointer, len @@ -989,6 +755,7 @@ pub fn deallocate<'tcx>(&mut self, pointer: Pointer, len: Size) -> InterpRe /// additional metadata to model atomic fence operations #[derive(Clone, Default, Debug)] struct ThreadClockSet { + /// The increasing clock representing timestamps /// that happen-before this thread. clock: VClock, @@ -1008,7 +775,7 @@ impl ThreadClockSet { /// set of thread vector clocks #[inline] fn apply_release_fence(&mut self) { - self.fence_release.set_values(&self.clock); + self.fence_release.clone_from(&self.clock); } /// Apply the effects of a acquire fence to this @@ -1021,8 +788,8 @@ fn apply_acquire_fence(&mut self) { /// Increment the happens-before clock at a /// known index #[inline] - fn increment_clock(&mut self, thread: ThreadId) { - self.clock.increment_thread(thread); + fn increment_clock(&mut self, index: VectorIdx) { + self.clock.increment_index(index); } /// Join the happens-before clock with that of @@ -1047,81 +814,178 @@ pub struct GlobalState { /// any data-races multi_threaded: Cell, - /// The current vector clock for all threads - /// this includes threads that have terminated - /// execution - thread_clocks: RefCell>, + /// Mapping of a vector index to a known set of thread + /// clocks, this is not directly mapping from a thread id + /// since it may refer to multiple threads + vector_clocks: RefCell>, + + /// Mapping of a given vector index to the current thread + /// that the execution is representing, this may change + /// if a vector index is re-assigned to a new thread + vector_info: RefCell>, //FIXME: make option + + /// The mapping of a given thread to a known vector clock + thread_info: RefCell, Option>)>>, - /// Thread name cache for better diagnostics on the reporting - /// of a data-race - thread_names: RefCell>>>, + /// The current vector index being executed + current_index: Cell, - /// The current thread being executed, - /// this is mirrored from the scheduler since - /// it is required for loading the current vector - /// clock for data-race detection - current_thread_id: Cell, + /// Potential vector indices that could be re-used on thread creation + /// values are inserted here on thread join events, and can be + /// re-used once the vector clocks of all current threads + /// are equal to the vector clock of the joined thread + reuse_candidates: RefCell>, } impl GlobalState { /// Create a new global state, setup with just thread-id=0 /// advanced to timestamp = 1 pub fn new() -> Self { - let mut vec = IndexVec::new(); - let thread_id = vec.push(ThreadClockSet::default()); - vec[thread_id].increment_clock(thread_id); - GlobalState { + let global_state = GlobalState { multi_threaded: Cell::new(false), - thread_clocks: RefCell::new(vec), - thread_names: RefCell::new(IndexVec::new()), - current_thread_id: Cell::new(thread_id), - } + vector_clocks: RefCell::new(IndexVec::new()), + vector_info: RefCell::new(IndexVec::new()), + thread_info: RefCell::new(IndexVec::new()), + current_index: Cell::new(VectorIdx::new(0)), + reuse_candidates: RefCell::new(FxHashSet::default()), + }; + + // Setup the main-thread since it is not explicitly created: + // uses vector index and thread-id 0, also the rust runtime gives + // the main-thread a name of "main". + let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default()); + global_state.vector_info.borrow_mut().push(ThreadId::new(0)); + global_state.thread_info.borrow_mut().push( + (Some(index), Some("main".to_string().into_boxed_str()) + )); + + global_state } + // Try to find vector index values that can potentially be re-used + // by a new thread instead of a new vector index being created + fn find_vector_index_reuse_candidate(&self) -> Option { + let mut reuse = self.reuse_candidates.borrow_mut(); + let vector_clocks = self.vector_clocks.borrow(); + for &candidate in reuse.iter() { + let target_timestamp = vector_clocks[candidate].clock[candidate]; + if vector_clocks.iter().all(|clock| { + clock.clock[candidate] == target_timestamp + }) { + // All vector clocks for each vector index are equal to + // the target timestamp, therefore since the thread has + // terminated and cannot update the vector clock. + // No more data-races involving this vector index are possible + // so it can be re-used + assert!(reuse.remove(&candidate)); + return Some(candidate) + } + } + None + } // Hook for thread creation, enabled multi-threaded execution and marks // the current thread timestamp as happening-before the current thread #[inline] pub fn thread_created(&self, thread: ThreadId) { + let current_index = self.current_index(); - // Enable multi-threaded execution mode now that there are at least - // two threads + // Enable multi-threaded execution, there are now two threads + // so data-races are now possible. self.multi_threaded.set(true); - let current_thread = self.current_thread_id.get(); - let mut vectors = self.thread_clocks.borrow_mut(); - vectors.ensure_contains_elem(thread, Default::default); - let (current, created) = vectors.pick2_mut(current_thread, thread); - // Pre increment clocks before atomic operation - current.increment_clock(current_thread); + // Load and setup the associated thread metadata + let mut thread_info = self.thread_info.borrow_mut(); + thread_info.ensure_contains_elem(thread, Default::default); + + // Assign a vector index for the thread, attempting to re-use an old + // vector index that can no longer report any data-races if possible + let created_index = if let Some( + reuse_index + ) = self.find_vector_index_reuse_candidate() { + // Now re-configure the re-use candidate, increment the clock + // for the new sync use of the vector + let mut vector_clocks = self.vector_clocks.borrow_mut(); + vector_clocks[reuse_index].increment_clock(reuse_index); + + // Locate the old thread the vector was associated with and update + // it to represent the new thread instead + let mut vector_info = self.vector_info.borrow_mut(); + let old_thread = vector_info[reuse_index]; + vector_info[reuse_index] = thread; + + // Mark the thread the vector index was associated with as no longer + // representing a thread index + thread_info[old_thread].0 = None; + + reuse_index + }else{ + // No vector re-use candidates available, instead create + // a new vector index + let mut vector_info = self.vector_info.borrow_mut(); + vector_info.push(thread) + }; + + // Mark the chosen vector index as in use by the thread + thread_info[thread].0 = Some(created_index); + + // Create a thread clock set if applicable + let mut vector_clocks = self.vector_clocks.borrow_mut(); + if created_index == vector_clocks.next_index() { + vector_clocks.push(ThreadClockSet::default()); + } - // The current thread happens-before the created thread - // so update the created vector clock + // Now load the two clocks and configure the initial state + let (current, created) = vector_clocks.pick2_mut(current_index, created_index); + + // Advance the current thread before the synchronized operation + current.increment_clock(current_index); + + // Join the created with current, since the current threads + // previous actions happen-before the created thread created.join_with(current); - // Post increment clocks after atomic operation - current.increment_clock(current_thread); - created.increment_clock(thread); + // Advance both threads after the synchronized operation + current.increment_clock(current_index); + created.increment_clock(created_index); } /// Hook on a thread join to update the implicit happens-before relation - /// between the joined thead and the current thread + /// between the joined thead and the current thread. + /// Called after the join has occured, and hence implicitly also states + /// that the thread must have terminated as well #[inline] pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) { - let mut vectors = self.thread_clocks.borrow_mut(); - let (current, join) = vectors.pick2_mut(current_thread, join_thread); + let (current_index, join_index) = { + let thread_info = self.thread_info.borrow(); + let current_index = thread_info[current_thread].0 + .expect("Joining into thread with no assigned vector"); + let join_index = thread_info[join_thread].0 + .expect("Joining thread with no assigned vector"); + (current_index, join_index) + }; + let mut clocks_vec = self.vector_clocks.borrow_mut(); + let (current, join) = clocks_vec.pick2_mut(current_index, join_index); // Pre increment clocks before atomic operation - current.increment_clock(current_thread); - join.increment_clock(join_thread); + current.increment_clock(current_index); + join.increment_clock(join_index); // The join thread happens-before the current thread // so update the current vector clock current.join_with(join); // Post increment clocks after atomic operation - current.increment_clock(current_thread); - join.increment_clock(join_thread); + current.increment_clock(current_index); + join.increment_clock(join_index); + + // The joined thread vector clock is a potential candidate + // for re-use given sufficient time, mark as available once + // threads have been created. This is because this function + // is called once join_thread has terminated and such cannot + // update any-more + let mut reuse = self.reuse_candidates.borrow_mut(); + reuse.insert(join_index); } /// Hook for updating the local tracker of the currently @@ -1129,7 +993,10 @@ pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) { /// `active_thread` in thread.rs is updated #[inline] pub fn thread_set_active(&self, thread: ThreadId) { - self.current_thread_id.set(thread); + let thread_info = self.thread_info.borrow(); + let vector_idx = thread_info[thread].0 + .expect("Setting thread active with no assigned vector"); + self.current_index.set(vector_idx); } /// Hook for updating the local tracker of the threads name @@ -1137,33 +1004,40 @@ pub fn thread_set_active(&self, thread: ThreadId) { /// the thread name is used for improved diagnostics /// during a data-race #[inline] - pub fn thread_set_name(&self, name: String) { + pub fn thread_set_name(&self, thread: ThreadId, name: String) { let name = name.into_boxed_str(); - let mut names = self.thread_names.borrow_mut(); - let thread = self.current_thread_id.get(); - names.ensure_contains_elem(thread, Default::default); - names[thread] = Some(name); + let mut thread_info = self.thread_info.borrow_mut(); + thread_info[thread].1 = Some(name); } - /// Advance the vector clock for a thread - /// this is called before and after any atomic/synchronizing operations - /// that may manipulate state - #[inline] - fn advance_vector_clock(&self) { - let thread = self.current_thread_id.get(); - let mut vectors = self.thread_clocks.borrow_mut(); - vectors[thread].increment_clock(thread); - - // Log the increment in the atomic vector clock - log::trace!("Atomic vector clock increase for {:?} to {:?}",thread, vectors[thread].clock); + /// Attempt to perform a synchronized operation, this + /// will perform no operation if multi-threading is + /// not currently enabled. + /// Otherwise it will increment the clock for the current + /// vector before and after the operation for data-race + /// detection between any happens-before edges the + /// operation may create + fn maybe_perform_sync_operation<'tcx>( + &self, op: impl FnOnce(VectorIdx, RefMut<'_,ThreadClockSet>) -> InterpResult<'tcx>, + ) -> InterpResult<'tcx> { + if self.multi_threaded.get() { + let (index, mut clocks) = self.current_thread_state_mut(); + clocks.increment_clock(index); + op(index, clocks)?; + let (_, mut clocks) = self.current_thread_state_mut(); + clocks.increment_clock(index); + } + Ok(()) } /// Internal utility to identify a thread stored internally /// returns the id and the name for better diagnostics - fn print_thread_metadata(&self, thread: ThreadId) -> String { - if let Some(Some(name)) = self.thread_names.borrow().get(thread) { + fn print_thread_metadata(&self, vector: VectorIdx) -> String { + let thread = self.vector_info.borrow()[vector]; + let thread_name = &self.thread_info.borrow()[thread].1; + if let Some(name) = thread_name { let name: &str = name; format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name) }else{ @@ -1175,25 +1049,19 @@ fn print_thread_metadata(&self, thread: ThreadId) -> String { /// Acquire a lock, express that the previous call of /// `validate_lock_release` must happen before this pub fn validate_lock_acquire(&self, lock: &DataRaceLockHandle, thread: ThreadId) { - let mut ref_vector = self.thread_clocks.borrow_mut(); - ref_vector[thread].increment_clock(thread); - - let clocks = &mut ref_vector[thread]; + let (index, mut clocks) = self.load_thread_state_mut(thread); + clocks.increment_clock(index); clocks.clock.join(&lock.clock); - - ref_vector[thread].increment_clock(thread); + clocks.increment_clock(index); } /// Release a lock handle, express that this happens-before /// any subsequent calls to `validate_lock_acquire` pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) { - let mut ref_vector = self.thread_clocks.borrow_mut(); - ref_vector[thread].increment_clock(thread); - - let clocks = &ref_vector[thread]; - lock.clock.set_values(&clocks.clock); - - ref_vector[thread].increment_clock(thread); + let (index, mut clocks) = self.load_thread_state_mut(thread); + clocks.increment_clock(index); + lock.clock.clone_from(&clocks.clock); + clocks.increment_clock(index); } /// Release a lock handle, express that this happens-before @@ -1201,401 +1069,48 @@ pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: Threa /// as any previous calls to this function after any /// `validate_lock_release` calls pub fn validate_lock_release_shared(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) { - let mut ref_vector = self.thread_clocks.borrow_mut(); - ref_vector[thread].increment_clock(thread); - - let clocks = &ref_vector[thread]; + let (index, mut clocks) = self.load_thread_state_mut(thread); + clocks.increment_clock(index); lock.clock.join(&clocks.clock); - - ref_vector[thread].increment_clock(thread); - } - - /// Load the thread clock set associated with the current thread - #[inline] - fn current_thread_state(&self) -> Ref<'_, ThreadClockSet> { - let ref_vector = self.thread_clocks.borrow(); - let thread = self.current_thread_id.get(); - Ref::map(ref_vector, |vector| &vector[thread]) - } - - /// Load the thread clock set associated with the current thread - /// mutably for modification - #[inline] - fn current_thread_state_mut(&self) -> RefMut<'_, ThreadClockSet> { - let ref_vector = self.thread_clocks.borrow_mut(); - let thread = self.current_thread_id.get(); - RefMut::map(ref_vector, |vector| &mut vector[thread]) - } - - /// Return the current thread, should be the same - /// as the data-race active thread - #[inline] - fn current_thread(&self) -> ThreadId { - self.current_thread_id.get() + clocks.increment_clock(index); } -} - - -/// The size of the vector-clock to store inline -/// clock vectors larger than this will be stored on the heap -const SMALL_VECTOR: usize = 4; - -/// The type of the time-stamps recorded in the data-race detector -/// set to a type of unsigned integer -type Timestamp = u32; - -/// A vector clock for detecting data-races -/// invariants: -/// - the last element in a VClock must not be 0 -/// -- this means that derive(PartialEq & Eq) is correct -/// -- as there is no implicit zero tail that might be equal -/// -- also simplifies the implementation of PartialOrd -#[derive(Clone, PartialEq, Eq, Default, Debug)] -pub struct VClock(SmallVec<[Timestamp; SMALL_VECTOR]>); - -impl VClock { - /// Load the backing slice behind the clock vector. + /// Load the vector index used by the given thread as well as the set of vector clocks + /// used by the thread #[inline] - fn as_slice(&self) -> &[Timestamp] { - self.0.as_slice() + fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) { + let index = self.thread_info.borrow()[thread].0 + .expect("Loading thread state for thread with no assigned vector"); + let ref_vector = self.vector_clocks.borrow_mut(); + let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]); + (index, clocks) } - /// Get a mutable slice to the internal vector with minimum `min_len` - /// elements, to preserve invariants this vector must modify - /// the `min_len`-1 nth element to a non-zero value + /// Load the current vector clock in use and the current set of thread clocks + /// in use for the vector #[inline] - fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [Timestamp] { - if self.0.len() < min_len { - self.0.resize(min_len, 0); - } - assert!(self.0.len() >= min_len); - self.0.as_mut_slice() + fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) { + let index = self.current_index(); + let ref_vector = self.vector_clocks.borrow(); + let clocks = Ref::map(ref_vector, |vec| &vec[index]); + (index, clocks) } - /// Increment the vector clock at a known index + /// Load the current vector clock in use and the current set of thread clocks + /// in use for the vector mutably for modification #[inline] - fn increment_index(&mut self, idx: usize) { - let mut_slice = self.get_mut_with_min_len(idx + 1); - let idx_ref = &mut mut_slice[idx]; - *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow") + fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) { + let index = self.current_index(); + let ref_vector = self.vector_clocks.borrow_mut(); + let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]); + (index, clocks) } - // Increment the vector element representing the progress - // of execution in the given thread - #[inline] - pub fn increment_thread(&mut self, thread: ThreadId) { - self.increment_index(thread.to_u32() as usize); - } - - // Join the two vector-clocks together, this - // sets each vector-element to the maximum value - // of that element in either of the two source elements. - pub fn join(&mut self, other: &Self) { - let rhs_slice = other.as_slice(); - let lhs_slice = self.get_mut_with_min_len(rhs_slice.len()); - - // Element-wise set to maximum. - for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) { - *l = r.max(*l); - } - } - - /// Joins with a thread at a known index - fn set_at_index(&mut self, other: &Self, idx: usize){ - let mut_slice = self.get_mut_with_min_len(idx + 1); - let slice = other.as_slice(); - mut_slice[idx] = slice[idx]; - } - - /// Join with a threads vector clock only at the desired index - /// returns true if the value updated - #[inline] - pub fn set_at_thread(&mut self, other: &Self, thread: ThreadId){ - self.set_at_index(other, thread.to_u32() as usize); - } - - /// Clear the vector to all zeros, stored as an empty internal - /// vector - #[inline] - pub fn set_zero_vector(&mut self) { - self.0.clear(); - } - - /// Set the values stored in this vector clock - /// to the values stored in another. - pub fn set_values(&mut self, new_value: &VClock) { - let new_slice = new_value.as_slice(); - self.0.resize(new_slice.len(), 0); - self.0.copy_from_slice(new_slice); - } -} - - -impl PartialOrd for VClock { - fn partial_cmp(&self, other: &VClock) -> Option { - - // Load the values as slices - let lhs_slice = self.as_slice(); - let rhs_slice = other.as_slice(); - - // Iterate through the combined vector slice - // keeping track of the order that is currently possible to satisfy. - // If an ordering relation is detected to be impossible, then bail and - // directly return None - let mut iter = lhs_slice.iter().zip(rhs_slice.iter()); - let mut order = match iter.next() { - Some((lhs, rhs)) => lhs.cmp(rhs), - None => Ordering::Equal - }; - for (l, r) in iter { - match order { - Ordering::Equal => order = l.cmp(r), - Ordering::Less => if l > r { - return None - }, - Ordering::Greater => if l < r { - return None - } - } - } - - //Now test if either left or right have trailing elements - // by the invariant the trailing elements have at least 1 - // non zero value, so no additional calculation is required - // to determine the result of the PartialOrder - let l_len = lhs_slice.len(); - let r_len = rhs_slice.len(); - match l_len.cmp(&r_len) { - // Equal has no additional elements: return current order - Ordering::Equal => Some(order), - // Right has at least 1 element > than the implicit 0, - // so the only valid values are Ordering::Less or None - Ordering::Less => match order { - Ordering::Less | Ordering::Equal => Some(Ordering::Less), - Ordering::Greater => None - } - // Left has at least 1 element > than the implicit 0, - // so the only valid values are Ordering::Greater or None - Ordering::Greater => match order { - Ordering::Greater | Ordering::Equal => Some(Ordering::Greater), - Ordering::Less => None - } - } - } - - fn lt(&self, other: &VClock) -> bool { - // Load the values as slices - let lhs_slice = self.as_slice(); - let rhs_slice = other.as_slice(); - - // If l_len > r_len then at least one element - // in l_len is > than r_len, therefore the result - // is either Some(Greater) or None, so return false - // early. - let l_len = lhs_slice.len(); - let r_len = rhs_slice.len(); - if l_len <= r_len { - // If any elements on the left are greater than the right - // then the result is None or Some(Greater), both of which - // return false, the earlier test asserts that no elements in the - // extended tail violate this assumption. Otherwise l <= r, finally - // the case where the values are potentially equal needs to be considered - // and false returned as well - let mut equal = l_len == r_len; - for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) { - if l > r { - return false - }else if l < r { - equal = false; - } - } - !equal - }else{ - false - } - } - - fn le(&self, other: &VClock) -> bool { - // Load the values as slices - let lhs_slice = self.as_slice(); - let rhs_slice = other.as_slice(); - - // If l_len > r_len then at least one element - // in l_len is > than r_len, therefore the result - // is either Some(Greater) or None, so return false - // early. - let l_len = lhs_slice.len(); - let r_len = rhs_slice.len(); - if l_len <= r_len { - // If any elements on the left are greater than the right - // then the result is None or Some(Greater), both of which - // return false, the earlier test asserts that no elements in the - // extended tail violate this assumption. Otherwise l <= r - !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r) - }else{ - false - } - } - - fn gt(&self, other: &VClock) -> bool { - // Load the values as slices - let lhs_slice = self.as_slice(); - let rhs_slice = other.as_slice(); - - // If r_len > l_len then at least one element - // in r_len is > than l_len, therefore the result - // is either Some(Less) or None, so return false - // early. - let l_len = lhs_slice.len(); - let r_len = rhs_slice.len(); - if l_len >= r_len { - // If any elements on the left are less than the right - // then the result is None or Some(Less), both of which - // return false, the earlier test asserts that no elements in the - // extended tail violate this assumption. Otherwise l >=, finally - // the case where the values are potentially equal needs to be considered - // and false returned as well - let mut equal = l_len == r_len; - for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) { - if l < r { - return false - }else if l > r { - equal = false; - } - } - !equal - }else{ - false - } - } - - fn ge(&self, other: &VClock) -> bool { - // Load the values as slices - let lhs_slice = self.as_slice(); - let rhs_slice = other.as_slice(); - - // If r_len > l_len then at least one element - // in r_len is > than l_len, therefore the result - // is either Some(Less) or None, so return false - // early. - let l_len = lhs_slice.len(); - let r_len = rhs_slice.len(); - if l_len >= r_len { - // If any elements on the left are less than the right - // then the result is None or Some(Less), both of which - // return false, the earlier test asserts that no elements in the - // extended tail violate this assumption. Otherwise l >= r - !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r) - }else{ - false - } - } -} - -impl Index for VClock { - type Output = Timestamp; - + /// Return the current thread, should be the same + /// as the data-race active thread #[inline] - fn index(&self, index: ThreadId) -> &Timestamp { - self.as_slice().get(index.to_u32() as usize).unwrap_or(&0) + fn current_index(&self) -> VectorIdx { + self.current_index.get() } } - -/// Test vector clock ordering operations -/// data-race detection is tested in the external -/// test suite -#[cfg(test)] -mod tests { - use super::{VClock, Timestamp}; - use std::cmp::Ordering; - - #[test] - fn test_equal() { - let mut c1 = VClock::default(); - let mut c2 = VClock::default(); - assert_eq!(c1, c2); - c1.increment_index(5); - assert_ne!(c1, c2); - c2.increment_index(53); - assert_ne!(c1, c2); - c1.increment_index(53); - assert_ne!(c1, c2); - c2.increment_index(5); - assert_eq!(c1, c2); - } - - #[test] - fn test_partial_order() { - // Small test - assert_order(&[1], &[1], Some(Ordering::Equal)); - assert_order(&[1], &[2], Some(Ordering::Less)); - assert_order(&[2], &[1], Some(Ordering::Greater)); - assert_order(&[1], &[1,2], Some(Ordering::Less)); - assert_order(&[2], &[1,2], None); - - // Misc tests - assert_order(&[400], &[0, 1], None); - - // Large test - assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal)); - assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less)); - assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater)); - assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None); - assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less)); - assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less)); - } - - fn from_slice(mut slice: &[Timestamp]) -> VClock { - while let Some(0) = slice.last() { - slice = &slice[..slice.len() - 1] - } - VClock(smallvec::SmallVec::from_slice(slice)) - } - - fn assert_order(l: &[Timestamp], r: &[Timestamp], o: Option) { - let l = from_slice(l); - let r = from_slice(r); - - //Test partial_cmp - let compare = l.partial_cmp(&r); - assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r); - let alt_compare = r.partial_cmp(&l); - assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r); - - //Test operatorsm with faster implementations - assert_eq!( - matches!(compare,Some(Ordering::Less)), l < r, - "Invalid (<):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r, - "Invalid (<=):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(compare,Some(Ordering::Greater)), l > r, - "Invalid (>):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r, - "Invalid (>=):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(alt_compare,Some(Ordering::Less)), r < l, - "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l, - "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(alt_compare,Some(Ordering::Greater)), r > l, - "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r - ); - assert_eq!( - matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l, - "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r - ); - } -} diff --git a/src/lib.rs b/src/lib.rs index f384787e4c6..c8c9e70ec3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,7 @@ mod stacked_borrows; mod sync; mod thread; +mod vector_clock; // Establish a "crate-wide prelude": we often import `crate::*`. @@ -79,6 +80,9 @@ pub use crate::sync::{ EvalContextExt as SyncEvalContextExt, CondvarId, MutexId, RwLockId }; +pub use crate::vector_clock::{ + VClock, VSmallClockSet, VectorIdx, VTimestamp +}; /// Insert rustc arguments at the beginning of the argument list that Miri wants to be /// set per default, for maximal validation power. diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs index 2bb15e712c5..50f97af8453 100644 --- a/src/shims/intrinsics.rs +++ b/src/shims/intrinsics.rs @@ -469,8 +469,7 @@ fn atomic_load( let place = this.deref_operand(place)?; // make sure it fits into a scalar; otherwise it cannot be atomic - let val = this.read_scalar_racy(place)?; - this.validate_atomic_load(place, atomic)?; + let val = this.read_scalar_atomic(place, atomic)?; // Check alignment requirements. Atomics must always be aligned to their size, // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must @@ -495,9 +494,7 @@ fn atomic_store(&mut self, args: &[OpTy<'tcx, Tag>], atomic: AtomicWriteOp) -> I this.memory.check_ptr_access(place.ptr, place.layout.size, align)?; // Perform atomic store - this.write_scalar_racy(val, place)?; - - this.validate_atomic_store(place, atomic)?; + this.write_scalar_atomic(val, place, atomic)?; Ok(()) } @@ -527,7 +524,9 @@ fn atomic_op( bug!("Atomic arithmetic operations only work on integer types"); } let rhs = this.read_immediate(rhs)?; - let old = this.read_immediate_racy(place)?; + let old = this.allow_data_races_mut(|this| { + this.read_immediate(place. into()) + })?; // Check alignment requirements. Atomics must always be aligned to their size, // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must @@ -539,7 +538,9 @@ fn atomic_op( // Atomics wrap around on overflow. let val = this.binary_op(op, old, rhs)?; let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val }; - this.write_immediate_racy(*val, place)?; + this.allow_data_races_mut(|this| { + this.write_immediate(*val, place.into()) + })?; this.validate_atomic_rmw(place, atomic)?; Ok(()) @@ -553,7 +554,9 @@ fn atomic_exchange( let &[place, new] = check_arg_count(args)?; let place = this.deref_operand(place)?; let new = this.read_scalar(new)?; - let old = this.read_scalar_racy(place)?; + let old = this.allow_data_races_mut(|this| { + this.read_scalar(place.into()) + })?; // Check alignment requirements. Atomics must always be aligned to their size, // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must @@ -562,7 +565,9 @@ fn atomic_exchange( this.memory.check_ptr_access(place.ptr, place.layout.size, align)?; this.write_scalar(old, dest)?; // old value is returned - this.write_scalar_racy(new, place)?; + this.allow_data_races_mut(|this| { + this.write_scalar(new, place.into()) + })?; this.validate_atomic_rmw(place, atomic)?; Ok(()) @@ -583,7 +588,9 @@ fn atomic_compare_exchange( // to read with the failure ordering and if successfull then try again with the success // read ordering and write in the success case. // Read as immediate for the sake of `binary_op()` - let old = this.read_immediate_racy(place)?; + let old = this.allow_data_races_mut(|this| { + this.read_immediate(place.into()) + })?; // Check alignment requirements. Atomics must always be aligned to their size, // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must @@ -602,7 +609,9 @@ fn atomic_compare_exchange( // if successful, perform a full rw-atomic validation // otherwise treat this as an atomic load with the fail ordering if eq.to_bool()? { - this.write_scalar_racy(new, place)?; + this.allow_data_races_mut(|this| { + this.write_scalar(new, place.into()) + })?; this.validate_atomic_rmw(place, success)?; } else { this.validate_atomic_load(place, fail)?; diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs index 332e79071a0..d741ef346e9 100644 --- a/src/shims/posix/sync.rs +++ b/src/shims/posix/sync.rs @@ -62,7 +62,10 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>( mutex_op: OpTy<'tcx, Tag>, ) -> InterpResult<'tcx, ScalarMaybeUninit> { let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 }; - ecx.read_scalar_at_offset_racy(mutex_op, offset, ecx.machine.layouts.i32) + ecx.read_scalar_at_offset_atomic( + mutex_op, offset, ecx.machine.layouts.i32, + AtomicReadOp::SeqCst + ) } fn mutex_set_kind<'mir, 'tcx: 'mir>( @@ -71,14 +74,19 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>( kind: impl Into>, ) -> InterpResult<'tcx, ()> { let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 }; - ecx.write_scalar_at_offset_racy(mutex_op, offset, kind, ecx.machine.layouts.i32) + ecx.write_scalar_at_offset_atomic( + mutex_op, offset, kind, ecx.machine.layouts.i32, + AtomicWriteOp::SeqCst + ) } fn mutex_get_id<'mir, 'tcx: 'mir>( ecx: &MiriEvalContext<'mir, 'tcx>, mutex_op: OpTy<'tcx, Tag>, ) -> InterpResult<'tcx, ScalarMaybeUninit> { - ecx.read_scalar_at_offset_racy(mutex_op, 4, ecx.machine.layouts.u32) + ecx.read_scalar_at_offset_atomic( + mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::SeqCst + ) } fn mutex_set_id<'mir, 'tcx: 'mir>( @@ -86,7 +94,10 @@ fn mutex_set_id<'mir, 'tcx: 'mir>( mutex_op: OpTy<'tcx, Tag>, id: impl Into>, ) -> InterpResult<'tcx, ()> { - ecx.write_scalar_at_offset_racy(mutex_op, 4, id, ecx.machine.layouts.u32) + ecx.write_scalar_at_offset_atomic( + mutex_op, 4, id, ecx.machine.layouts.u32, + AtomicWriteOp::SeqCst + ) } fn mutex_get_or_create_id<'mir, 'tcx: 'mir>( @@ -116,7 +127,10 @@ fn rwlock_get_id<'mir, 'tcx: 'mir>( ecx: &MiriEvalContext<'mir, 'tcx>, rwlock_op: OpTy<'tcx, Tag>, ) -> InterpResult<'tcx, ScalarMaybeUninit> { - ecx.read_scalar_at_offset_racy(rwlock_op, 4, ecx.machine.layouts.u32) + ecx.read_scalar_at_offset_atomic( + rwlock_op, 4, ecx.machine.layouts.u32, + AtomicReadOp::SeqCst + ) } fn rwlock_set_id<'mir, 'tcx: 'mir>( @@ -124,7 +138,10 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>( rwlock_op: OpTy<'tcx, Tag>, id: impl Into>, ) -> InterpResult<'tcx, ()> { - ecx.write_scalar_at_offset_racy(rwlock_op, 4, id, ecx.machine.layouts.u32) + ecx.write_scalar_at_offset_atomic( + rwlock_op, 4, id, ecx.machine.layouts.u32, + AtomicWriteOp::SeqCst + ) } fn rwlock_get_or_create_id<'mir, 'tcx: 'mir>( @@ -177,7 +194,10 @@ fn cond_get_id<'mir, 'tcx: 'mir>( ecx: &MiriEvalContext<'mir, 'tcx>, cond_op: OpTy<'tcx, Tag>, ) -> InterpResult<'tcx, ScalarMaybeUninit> { - ecx.read_scalar_at_offset_racy(cond_op, 4, ecx.machine.layouts.u32) + ecx.read_scalar_at_offset_atomic( + cond_op, 4, ecx.machine.layouts.u32, + AtomicReadOp::SeqCst + ) } fn cond_set_id<'mir, 'tcx: 'mir>( @@ -185,7 +205,10 @@ fn cond_set_id<'mir, 'tcx: 'mir>( cond_op: OpTy<'tcx, Tag>, id: impl Into>, ) -> InterpResult<'tcx, ()> { - ecx.write_scalar_at_offset_racy(cond_op, 4, id, ecx.machine.layouts.u32) + ecx.write_scalar_at_offset_atomic( + cond_op, 4, id, ecx.machine.layouts.u32, + AtomicWriteOp::SeqCst + ) } fn cond_get_or_create_id<'mir, 'tcx: 'mir>( diff --git a/src/thread.rs b/src/thread.rs index 08aeaa4fd09..f94805ae022 100644 --- a/src/thread.rs +++ b/src/thread.rs @@ -638,7 +638,9 @@ fn active_thread_stack_mut(&mut self) -> &mut Vec) { let this = self.eval_context_mut(); if let Ok(string) = String::from_utf8(new_thread_name.clone()) { - this.memory.extra.data_race.thread_set_name(string); + this.memory.extra.data_race.thread_set_name( + this.machine.threads.active_thread, string + ); } this.machine.threads.set_thread_name(new_thread_name); } diff --git a/src/vector_clock.rs b/src/vector_clock.rs new file mode 100644 index 00000000000..8d05eb1b992 --- /dev/null +++ b/src/vector_clock.rs @@ -0,0 +1,602 @@ +use std::{ + fmt::{self, Debug}, cmp::Ordering, ops::Index, + num::TryFromIntError, convert::TryFrom, mem +}; +use smallvec::SmallVec; +use rustc_index::vec::Idx; +use rustc_data_structures::fx::FxHashMap; + +/// A vector clock index, this is associated with a thread id +/// but in some cases one vector index may be shared with +/// multiple thread ids. +#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] +pub struct VectorIdx(u32); + +impl VectorIdx{ + pub fn to_u32(self) -> u32 { + self.0 + } + pub const MAX_INDEX: VectorIdx = VectorIdx(u32::MAX); +} + +impl Idx for VectorIdx { + fn new(idx: usize) -> Self { + VectorIdx(u32::try_from(idx).unwrap()) + } + + fn index(self) -> usize { + usize::try_from(self.0).unwrap() + } +} + +impl TryFrom for VectorIdx { + type Error = TryFromIntError; + fn try_from(id: u64) -> Result { + u32::try_from(id).map(|id_u32| Self(id_u32)) + } +} + +impl From for VectorIdx { + fn from(id: u32) -> Self { + Self(id) + } +} + + +/// A sparse set of vector clocks, where each vector index +/// is associated with a vector clock. +/// This treats all vector clocks that have not been assigned +/// as equal to the all zero vector clocks +/// Is optimized for the common case where only 1 element is stored +/// in the set and the rest can be ignored, falling-back to +/// using an internal hash-map once more than 1 element is assigned +/// at any one time +#[derive(Clone)] +pub struct VSmallClockSet(VSmallClockSetInner); + +#[derive(Clone)] +enum VSmallClockSetInner { + /// Zero or 1 vector elements, common + /// case for the sparse set. + /// The all zero vector clock is treated + /// as equal to the empty element + Small(VectorIdx, VClock), + + /// Hash-map of vector clocks + Large(FxHashMap) +} + +impl VSmallClockSet { + + /// Remove all clock vectors from the map, setting them + /// to the zero vector + pub fn clear(&mut self) { + match &mut self.0 { + VSmallClockSetInner::Small(_, clock) => { + clock.set_zero_vector() + } + VSmallClockSetInner::Large(hash_map) => { + hash_map.clear(); + } + } + } + + /// Remove all clock vectors except for the clock vector + /// stored at the given index, which is retained + pub fn retain_index(&mut self, index: VectorIdx) { + match &mut self.0 { + VSmallClockSetInner::Small(small_idx, clock) => { + if index != *small_idx { + // The zero-vector is considered to equal + // the empty element + clock.set_zero_vector() + } + }, + VSmallClockSetInner::Large(hash_map) => { + hash_map.retain(|idx,_| { + *idx == index + }); + } + } + } + + /// Insert the vector clock into the associated vector + /// index + pub fn insert(&mut self, index: VectorIdx, clock: &VClock) { + match &mut self.0 { + VSmallClockSetInner::Small(small_idx, small_clock) => { + if small_clock.is_zero_vector() { + *small_idx = index; + small_clock.clone_from(clock); + }else if !clock.is_zero_vector() { + let mut hash_map = FxHashMap::default(); + hash_map.insert(*small_idx, mem::take(small_clock)); + hash_map.insert(index, clock.clone()); + self.0 = VSmallClockSetInner::Large(hash_map); + } + }, + VSmallClockSetInner::Large(hash_map) => { + if !clock.is_zero_vector() { + hash_map.insert(index, clock.clone()); + } + } + } + } + + /// Try to load the vector clock associated with the current + /// vector index. + pub fn get(&self, index: VectorIdx) -> Option<&VClock> { + match &self.0 { + VSmallClockSetInner::Small(small_idx, small_clock) => { + if *small_idx == index && !small_clock.is_zero_vector() { + Some(small_clock) + }else{ + None + } + }, + VSmallClockSetInner::Large(hash_map) => { + hash_map.get(&index) + } + } + } +} + +impl Default for VSmallClockSet { + #[inline] + fn default() -> Self { + VSmallClockSet( + VSmallClockSetInner::Small(VectorIdx::new(0), VClock::default()) + ) + } +} + +impl Debug for VSmallClockSet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Print the contents of the small vector clock set as the map + // of vector index to vector clock that they represent + let mut map = f.debug_map(); + match &self.0 { + VSmallClockSetInner::Small(small_idx, small_clock) => { + if !small_clock.is_zero_vector() { + map.entry(&small_idx, &small_clock); + } + }, + VSmallClockSetInner::Large(hash_map) => { + for (idx, elem) in hash_map.iter() { + map.entry(idx, elem); + } + } + } + map.finish() + } +} +impl PartialEq for VSmallClockSet { + fn eq(&self, other: &Self) -> bool { + use VSmallClockSetInner::*; + match (&self.0, &other.0) { + (Small(i1, c1), Small(i2, c2)) => { + if c1.is_zero_vector() { + // Either they are both zero or they are non-equal + c2.is_zero_vector() + }else{ + // At least one is non-zero, so the full comparison is correct + i1 == i2 && c1 == c2 + } + } + (VSmallClockSetInner::Small(idx, clock), VSmallClockSetInner::Large(hash_map)) | + (VSmallClockSetInner::Large(hash_map), VSmallClockSetInner::Small(idx, clock)) => { + if hash_map.len() == 0 { + // Equal to the empty hash-map + clock.is_zero_vector() + }else if hash_map.len() == 1 { + // Equal to the hash-map with one element + let (hash_idx, hash_clock) = hash_map.iter().next().unwrap(); + hash_idx == idx && hash_clock == clock + }else{ + false + } + } + (Large(map1), Large(map2)) => { + map1 == map2 + } + } + } +} +impl Eq for VSmallClockSet {} + + + +/// The size of the vector-clock to store inline +/// clock vectors larger than this will be stored on the heap +const SMALL_VECTOR: usize = 4; + +/// The type of the time-stamps recorded in the data-race detector +/// set to a type of unsigned integer +pub type VTimestamp = u32; + +/// A vector clock for detecting data-races +/// invariants: +/// - the last element in a VClock must not be 0 +/// -- this means that derive(PartialEq & Eq) is correct +/// -- as there is no implicit zero tail that might be equal +/// -- also simplifies the implementation of PartialOrd +#[derive(PartialEq, Eq, Default, Debug)] +pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>); + +impl VClock { + + /// Create a new vector-clock containing all zeros except + /// for a value at the given index + pub fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock { + let len = index.index() + 1; + let mut vec = smallvec::smallvec![0; len]; + vec[index.index()] = timestamp; + VClock(vec) + } + + /// Load the internal timestamp slice in the vector clock + #[inline] + pub fn as_slice(&self) -> &[VTimestamp] { + self.0.as_slice() + } + + /// Get a mutable slice to the internal vector with minimum `min_len` + /// elements, to preserve invariants this vector must modify + /// the `min_len`-1 nth element to a non-zero value + #[inline] + fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [VTimestamp] { + if self.0.len() < min_len { + self.0.resize(min_len, 0); + } + assert!(self.0.len() >= min_len); + self.0.as_mut_slice() + } + + /// Increment the vector clock at a known index + /// this will panic if the vector index overflows + #[inline] + pub fn increment_index(&mut self, idx: VectorIdx) { + let idx = idx.index(); + let mut_slice = self.get_mut_with_min_len(idx + 1); + let idx_ref = &mut mut_slice[idx]; + *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow") + } + + // Join the two vector-clocks together, this + // sets each vector-element to the maximum value + // of that element in either of the two source elements. + pub fn join(&mut self, other: &Self) { + let rhs_slice = other.as_slice(); + let lhs_slice = self.get_mut_with_min_len(rhs_slice.len()); + for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) { + *l = r.max(*l); + } + } + + /// Set the element at the current index of the vector + pub fn set_at_index(&mut self, other: &Self, idx: VectorIdx) { + let idx = idx.index(); + let mut_slice = self.get_mut_with_min_len(idx + 1); + let slice = other.as_slice(); + mut_slice[idx] = slice[idx]; + } + + /// Set the vector to the all-zero vector + #[inline] + pub fn set_zero_vector(&mut self) { + self.0.clear(); + } + + /// Return if this vector is the all-zero vector + pub fn is_zero_vector(&self) -> bool { + self.0.is_empty() + } +} + +impl Clone for VClock { + fn clone(&self) -> Self { + VClock(self.0.clone()) + } + fn clone_from(&mut self, source: &Self) { + let source_slice = source.as_slice(); + self.0.clear(); + self.0.extend_from_slice(source_slice); + } +} + +impl PartialOrd for VClock { + fn partial_cmp(&self, other: &VClock) -> Option { + + // Load the values as slices + let lhs_slice = self.as_slice(); + let rhs_slice = other.as_slice(); + + // Iterate through the combined vector slice + // keeping track of the order that is currently possible to satisfy. + // If an ordering relation is detected to be impossible, then bail and + // directly return None + let mut iter = lhs_slice.iter().zip(rhs_slice.iter()); + let mut order = match iter.next() { + Some((lhs, rhs)) => lhs.cmp(rhs), + None => Ordering::Equal + }; + for (l, r) in iter { + match order { + Ordering::Equal => order = l.cmp(r), + Ordering::Less => if l > r { + return None + }, + Ordering::Greater => if l < r { + return None + } + } + } + + //Now test if either left or right have trailing elements + // by the invariant the trailing elements have at least 1 + // non zero value, so no additional calculation is required + // to determine the result of the PartialOrder + let l_len = lhs_slice.len(); + let r_len = rhs_slice.len(); + match l_len.cmp(&r_len) { + // Equal has no additional elements: return current order + Ordering::Equal => Some(order), + // Right has at least 1 element > than the implicit 0, + // so the only valid values are Ordering::Less or None + Ordering::Less => match order { + Ordering::Less | Ordering::Equal => Some(Ordering::Less), + Ordering::Greater => None + } + // Left has at least 1 element > than the implicit 0, + // so the only valid values are Ordering::Greater or None + Ordering::Greater => match order { + Ordering::Greater | Ordering::Equal => Some(Ordering::Greater), + Ordering::Less => None + } + } + } + + fn lt(&self, other: &VClock) -> bool { + // Load the values as slices + let lhs_slice = self.as_slice(); + let rhs_slice = other.as_slice(); + + // If l_len > r_len then at least one element + // in l_len is > than r_len, therefore the result + // is either Some(Greater) or None, so return false + // early. + let l_len = lhs_slice.len(); + let r_len = rhs_slice.len(); + if l_len <= r_len { + // If any elements on the left are greater than the right + // then the result is None or Some(Greater), both of which + // return false, the earlier test asserts that no elements in the + // extended tail violate this assumption. Otherwise l <= r, finally + // the case where the values are potentially equal needs to be considered + // and false returned as well + let mut equal = l_len == r_len; + for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) { + if l > r { + return false + }else if l < r { + equal = false; + } + } + !equal + }else{ + false + } + } + + fn le(&self, other: &VClock) -> bool { + // Load the values as slices + let lhs_slice = self.as_slice(); + let rhs_slice = other.as_slice(); + + // If l_len > r_len then at least one element + // in l_len is > than r_len, therefore the result + // is either Some(Greater) or None, so return false + // early. + let l_len = lhs_slice.len(); + let r_len = rhs_slice.len(); + if l_len <= r_len { + // If any elements on the left are greater than the right + // then the result is None or Some(Greater), both of which + // return false, the earlier test asserts that no elements in the + // extended tail violate this assumption. Otherwise l <= r + !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r) + }else{ + false + } + } + + fn gt(&self, other: &VClock) -> bool { + // Load the values as slices + let lhs_slice = self.as_slice(); + let rhs_slice = other.as_slice(); + + // If r_len > l_len then at least one element + // in r_len is > than l_len, therefore the result + // is either Some(Less) or None, so return false + // early. + let l_len = lhs_slice.len(); + let r_len = rhs_slice.len(); + if l_len >= r_len { + // If any elements on the left are less than the right + // then the result is None or Some(Less), both of which + // return false, the earlier test asserts that no elements in the + // extended tail violate this assumption. Otherwise l >=, finally + // the case where the values are potentially equal needs to be considered + // and false returned as well + let mut equal = l_len == r_len; + for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) { + if l < r { + return false + }else if l > r { + equal = false; + } + } + !equal + }else{ + false + } + } + + fn ge(&self, other: &VClock) -> bool { + // Load the values as slices + let lhs_slice = self.as_slice(); + let rhs_slice = other.as_slice(); + + // If r_len > l_len then at least one element + // in r_len is > than l_len, therefore the result + // is either Some(Less) or None, so return false + // early. + let l_len = lhs_slice.len(); + let r_len = rhs_slice.len(); + if l_len >= r_len { + // If any elements on the left are less than the right + // then the result is None or Some(Less), both of which + // return false, the earlier test asserts that no elements in the + // extended tail violate this assumption. Otherwise l >= r + !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r) + }else{ + false + } + } +} + +impl Index for VClock { + type Output = VTimestamp; + + #[inline] + fn index(&self, index: VectorIdx) -> &VTimestamp { + self.as_slice().get(index.to_u32() as usize).unwrap_or(&0) + } +} + + +/// Test vector clock ordering operations +/// data-race detection is tested in the external +/// test suite +#[cfg(test)] +mod tests { + use super::{VClock, VTimestamp, VectorIdx, VSmallClockSet}; + use std::cmp::Ordering; + + #[test] + fn test_equal() { + let mut c1 = VClock::default(); + let mut c2 = VClock::default(); + assert_eq!(c1, c2); + c1.increment_index(VectorIdx(5)); + assert_ne!(c1, c2); + c2.increment_index(VectorIdx(53)); + assert_ne!(c1, c2); + c1.increment_index(VectorIdx(53)); + assert_ne!(c1, c2); + c2.increment_index(VectorIdx(5)); + assert_eq!(c1, c2); + } + + #[test] + fn test_partial_order() { + // Small test + assert_order(&[1], &[1], Some(Ordering::Equal)); + assert_order(&[1], &[2], Some(Ordering::Less)); + assert_order(&[2], &[1], Some(Ordering::Greater)); + assert_order(&[1], &[1,2], Some(Ordering::Less)); + assert_order(&[2], &[1,2], None); + + // Misc tests + assert_order(&[400], &[0, 1], None); + + // Large test + assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal)); + assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less)); + assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater)); + assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None); + assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less)); + assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less)); + } + + fn from_slice(mut slice: &[VTimestamp]) -> VClock { + while let Some(0) = slice.last() { + slice = &slice[..slice.len() - 1] + } + VClock(smallvec::SmallVec::from_slice(slice)) + } + + fn assert_order(l: &[VTimestamp], r: &[VTimestamp], o: Option) { + let l = from_slice(l); + let r = from_slice(r); + + //Test partial_cmp + let compare = l.partial_cmp(&r); + assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r); + let alt_compare = r.partial_cmp(&l); + assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r); + + //Test operatorsm with faster implementations + assert_eq!( + matches!(compare,Some(Ordering::Less)), l < r, + "Invalid (<):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r, + "Invalid (<=):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(compare,Some(Ordering::Greater)), l > r, + "Invalid (>):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r, + "Invalid (>=):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(alt_compare,Some(Ordering::Less)), r < l, + "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l, + "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(alt_compare,Some(Ordering::Greater)), r > l, + "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r + ); + assert_eq!( + matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l, + "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r + ); + } + + #[test] + pub fn test_vclock_set() { + let mut set = VSmallClockSet::default(); + let v1 = from_slice(&[3,0,1]); + let v2 = from_slice(&[4,2,3]); + let v3 = from_slice(&[4,8,3]); + set.insert(VectorIdx(0), &v1); + assert_eq!(set.get(VectorIdx(0)), Some(&v1)); + set.insert(VectorIdx(5), &v2); + assert_eq!(set.get(VectorIdx(0)), Some(&v1)); + assert_eq!(set.get(VectorIdx(5)), Some(&v2)); + set.insert(VectorIdx(53), &v3); + assert_eq!(set.get(VectorIdx(0)), Some(&v1)); + assert_eq!(set.get(VectorIdx(5)), Some(&v2)); + assert_eq!(set.get(VectorIdx(53)), Some(&v3)); + set.retain_index(VectorIdx(53)); + assert_eq!(set.get(VectorIdx(0)), None); + assert_eq!(set.get(VectorIdx(5)), None); + assert_eq!(set.get(VectorIdx(53)), Some(&v3)); + set.clear(); + assert_eq!(set.get(VectorIdx(0)), None); + assert_eq!(set.get(VectorIdx(5)), None); + assert_eq!(set.get(VectorIdx(53)), None); + set.insert(VectorIdx(53), &v3); + assert_eq!(set.get(VectorIdx(0)), None); + assert_eq!(set.get(VectorIdx(5)), None); + assert_eq!(set.get(VectorIdx(53)), Some(&v3)); + } +} -- 2.44.0