1 //! The virtual memory representation of the MIR interpreter.
4 use std::convert::TryFrom;
6 use std::ops::{Deref, DerefMut, Range};
9 use rustc_ast::Mutability;
10 use rustc_data_structures::sorted_map::SortedMap;
11 use rustc_target::abi::{Align, HasDataLayout, Size};
14 read_target_uint, write_target_uint, AllocId, InterpError, Pointer, Scalar, ScalarMaybeUninit,
15 UndefinedBehaviorInfo, UninitBytesAccess, UnsupportedOpInfo,
18 /// This type represents an Allocation in the Miri/CTFE core engine.
20 /// Its public API is rather low-level, working directly with allocation offsets and a custom error
21 /// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
22 /// module provides higher-level access.
23 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
25 pub struct Allocation<Tag = (), Extra = ()> {
26 /// The actual bytes of the allocation.
27 /// Note that the bytes of a pointer represent the offset of the pointer.
29 /// Maps from byte addresses to extra data for each pointer.
30 /// Only the first byte of a pointer is inserted into the map; i.e.,
31 /// every entry in this map applies to `pointer_size` consecutive bytes starting
32 /// at the given offset.
33 relocations: Relocations<Tag>,
34 /// Denotes which part of this allocation is initialized.
36 /// The alignment of the allocation to detect unaligned reads.
37 /// (`Align` guarantees that this is a power of two.)
39 /// `true` if the allocation is mutable.
40 /// Also used by codegen to determine if a static should be put into mutable memory,
41 /// which happens for `static mut` and `static` with interior mutability.
42 pub mutability: Mutability,
43 /// Extra state for the machine.
47 /// We have our own error type that does not know about the `AllocId`; that information
48 /// is added when converting to `InterpError`.
51 /// Encountered a pointer where we needed raw bytes.
53 /// Using uninitialized data where it is not allowed.
54 InvalidUninitBytes(Option<UninitBytesAccess>),
56 pub type AllocResult<T = ()> = Result<T, AllocError>;
59 pub fn to_interp_error<'tcx>(self, alloc_id: AllocId) -> InterpError<'tcx> {
61 AllocError::ReadPointerAsBytes => {
62 InterpError::Unsupported(UnsupportedOpInfo::ReadPointerAsBytes)
64 AllocError::InvalidUninitBytes(info) => InterpError::UndefinedBehavior(
65 UndefinedBehaviorInfo::InvalidUninitBytes(info.map(|b| (alloc_id, b))),
71 /// The information that makes up a memory access: offset and size.
72 #[derive(Copy, Clone, Debug)]
73 pub struct AllocRange {
78 /// Free-starting constructor for less syntactic overhead.
80 pub fn alloc_range(start: Size, size: Size) -> AllocRange {
81 AllocRange { start, size }
86 pub fn end(self) -> Size {
87 self.start + self.size // This does overflow checking.
90 /// Returns the `subrange` within this range; panics if it is not a subrange.
92 pub fn subrange(self, subrange: AllocRange) -> AllocRange {
93 let sub_start = self.start + subrange.start;
94 let range = alloc_range(sub_start, subrange.size);
95 assert!(range.end() <= self.end(), "access outside the bounds for given AllocRange");
100 // The constructors are all without extra; the extra gets added by a machine hook later.
101 impl<Tag> Allocation<Tag> {
102 /// Creates an allocation initialized by the given bytes
103 pub fn from_bytes<'a>(
104 slice: impl Into<Cow<'a, [u8]>>,
106 mutability: Mutability,
108 let bytes = slice.into().into_owned();
109 let size = Size::from_bytes(bytes.len());
112 relocations: Relocations::new(),
113 init_mask: InitMask::new(size, true),
120 pub fn from_bytes_byte_aligned_immutable<'a>(slice: impl Into<Cow<'a, [u8]>>) -> Self {
121 Allocation::from_bytes(slice, Align::ONE, Mutability::Not)
124 pub fn uninit(size: Size, align: Align) -> Self {
126 bytes: vec![0; size.bytes_usize()],
127 relocations: Relocations::new(),
128 init_mask: InitMask::new(size, false),
130 mutability: Mutability::Mut,
136 impl Allocation<()> {
137 /// Add Tag and Extra fields
138 pub fn with_tags_and_extra<T, E>(
140 mut tagger: impl FnMut(AllocId) -> T,
142 ) -> Allocation<T, E> {
145 relocations: Relocations::from_presorted(
148 // The allocations in the relocations (pointers stored *inside* this allocation)
149 // all get the base pointer tag.
150 .map(|&(offset, ((), alloc))| {
151 let tag = tagger(alloc);
152 (offset, (tag, alloc))
156 init_mask: self.init_mask,
158 mutability: self.mutability,
164 /// Raw accessors. Provide access to otherwise private bytes.
165 impl<Tag, Extra> Allocation<Tag, Extra> {
166 pub fn len(&self) -> usize {
170 pub fn size(&self) -> Size {
171 Size::from_bytes(self.len())
174 /// Looks at a slice which may describe uninitialized bytes or describe a relocation. This differs
175 /// from `get_bytes_with_uninit_and_ptr` in that it does no relocation checks (even on the
177 /// This must not be used for reads affecting the interpreter execution.
178 pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range<usize>) -> &[u8] {
182 /// Returns the mask indicating which bytes are initialized.
183 pub fn init_mask(&self) -> &InitMask {
187 /// Returns the relocation list.
188 pub fn relocations(&self) -> &Relocations<Tag> {
194 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
195 /// The last argument controls whether we error out when there are uninitialized
196 /// or pointer bytes. You should never call this, call `get_bytes` or
197 /// `get_bytes_with_uninit_and_ptr` instead,
199 /// This function also guarantees that the resulting pointer will remain stable
200 /// even when new allocations are pushed to the `HashMap`. `copy_repeatedly` relies
203 /// It is the caller's responsibility to check bounds and alignment beforehand.
204 fn get_bytes_internal(
206 cx: &impl HasDataLayout,
208 check_init_and_ptr: bool,
209 ) -> AllocResult<&[u8]> {
210 if check_init_and_ptr {
211 self.check_init(range)?;
212 self.check_relocations(cx, range)?;
214 // We still don't want relocations on the *edges*.
215 self.check_relocation_edges(cx, range)?;
218 Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
221 /// Checks that these bytes are initialized and not pointer bytes, and then return them
224 /// It is the caller's responsibility to check bounds and alignment beforehand.
225 /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
226 /// on `InterpCx` instead.
228 pub fn get_bytes(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult<&[u8]> {
229 self.get_bytes_internal(cx, range, true)
232 /// It is the caller's responsibility to handle uninitialized and pointer bytes.
233 /// However, this still checks that there are no relocations on the *edges*.
235 /// It is the caller's responsibility to check bounds and alignment beforehand.
237 pub fn get_bytes_with_uninit_and_ptr(
239 cx: &impl HasDataLayout,
241 ) -> AllocResult<&[u8]> {
242 self.get_bytes_internal(cx, range, false)
245 /// Just calling this already marks everything as defined and removes relocations,
246 /// so be sure to actually put data there!
248 /// It is the caller's responsibility to check bounds and alignment beforehand.
249 /// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
250 /// on `InterpCx` instead.
251 pub fn get_bytes_mut(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> &mut [u8] {
252 self.mark_init(range, true);
253 self.clear_relocations(cx, range);
255 &mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
258 /// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory.
259 pub fn get_bytes_mut_ptr(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> *mut [u8] {
260 self.mark_init(range, true);
261 self.clear_relocations(cx, range);
263 assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
264 let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize());
265 let len = range.end().bytes_usize() - range.start.bytes_usize();
266 ptr::slice_from_raw_parts_mut(begin_ptr, len)
270 /// Reading and writing.
271 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
272 /// Validates that `ptr.offset` and `ptr.offset + size` do not point to the middle of a
273 /// relocation. If `allow_uninit_and_ptr` is `false`, also enforces that the memory in the
274 /// given range contains neither relocations nor uninitialized bytes.
277 cx: &impl HasDataLayout,
279 allow_uninit_and_ptr: bool,
281 // Check bounds and relocations on the edges.
282 self.get_bytes_with_uninit_and_ptr(cx, range)?;
283 // Check uninit and ptr.
284 if !allow_uninit_and_ptr {
285 self.check_init(range)?;
286 self.check_relocations(cx, range)?;
291 /// Reads a *non-ZST* scalar.
293 /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
294 /// for ZSTness anyway due to integer pointers being valid for ZSTs.
296 /// It is the caller's responsibility to check bounds and alignment beforehand.
297 /// Most likely, you want to call `InterpCx::read_scalar` instead of this method.
300 cx: &impl HasDataLayout,
302 ) -> AllocResult<ScalarMaybeUninit<Tag>> {
303 // `get_bytes_unchecked` tests relocation edges.
304 let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
305 // Uninit check happens *after* we established that the alignment is correct.
306 // We must not return `Ok()` for unaligned pointers!
307 if self.is_init(range).is_err() {
308 // This inflates uninitialized bytes to the entire scalar, even if only a few
309 // bytes are uninitialized.
310 return Ok(ScalarMaybeUninit::Uninit);
312 // Now we do the actual reading.
313 let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
314 // See if we got a pointer.
315 if range.size != cx.data_layout().pointer_size {
317 // *Now*, we better make sure that the inside is free of relocations too.
318 self.check_relocations(cx, range)?;
321 if let Some(&(tag, alloc_id)) = self.relocations.get(&range.start) {
322 let ptr = Pointer::new_with_tag(alloc_id, Size::from_bytes(bits), tag);
323 return Ok(ScalarMaybeUninit::Scalar(ptr.into()));
326 // We don't. Just return the bits.
327 Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
330 /// Writes a *non-ZST* scalar.
332 /// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
333 /// for ZSTness anyway due to integer pointers being valid for ZSTs.
335 /// It is the caller's responsibility to check bounds and alignment beforehand.
336 /// Most likely, you want to call `InterpCx::write_scalar` instead of this method.
339 cx: &impl HasDataLayout,
341 val: ScalarMaybeUninit<Tag>,
343 let val = match val {
344 ScalarMaybeUninit::Scalar(scalar) => scalar,
345 ScalarMaybeUninit::Uninit => {
346 self.mark_init(range, false);
351 let bytes = match val.to_bits_or_ptr(range.size, cx) {
352 Err(val) => u128::from(val.offset.bytes()),
356 let endian = cx.data_layout().endian;
357 let dst = self.get_bytes_mut(cx, range);
358 write_target_uint(endian, dst, bytes).unwrap();
360 // See if we have to also write a relocation.
361 if let Scalar::Ptr(val) = val {
362 self.relocations.insert(range.start, (val.tag, val.alloc_id));
370 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
371 /// Returns all relocations overlapping with the given pointer-offset pair.
372 pub fn get_relocations(
374 cx: &impl HasDataLayout,
376 ) -> &[(Size, (Tag, AllocId))] {
377 // We have to go back `pointer_size - 1` bytes, as that one would still overlap with
378 // the beginning of this range.
379 let start = range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1);
380 self.relocations.range(Size::from_bytes(start)..range.end())
383 /// Checks that there are no relocations overlapping with the given range.
385 fn check_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
386 if self.get_relocations(cx, range).is_empty() {
389 Err(AllocError::ReadPointerAsBytes)
393 /// Removes all relocations inside the given range.
394 /// If there are relocations overlapping with the edges, they
395 /// are removed as well *and* the bytes they cover are marked as
396 /// uninitialized. This is a somewhat odd "spooky action at a distance",
397 /// but it allows strictly more code to run than if we would just error
398 /// immediately in that case.
399 fn clear_relocations(&mut self, cx: &impl HasDataLayout, range: AllocRange) {
400 // Find the start and end of the given range and its outermost relocations.
401 let (first, last) = {
402 // Find all relocations overlapping the given range.
403 let relocations = self.get_relocations(cx, range);
404 if relocations.is_empty() {
409 relocations.first().unwrap().0,
410 relocations.last().unwrap().0 + cx.data_layout().pointer_size,
413 let start = range.start;
414 let end = range.end();
416 // Mark parts of the outermost relocations as uninitialized if they partially fall outside the
419 self.init_mask.set_range(first, start, false);
422 self.init_mask.set_range(end, last, false);
425 // Forget all the relocations.
426 self.relocations.remove_range(first..last);
429 /// Errors if there are relocations overlapping with the edges of the
430 /// given memory range.
432 fn check_relocation_edges(&self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
433 self.check_relocations(cx, alloc_range(range.start, Size::ZERO))?;
434 self.check_relocations(cx, alloc_range(range.end(), Size::ZERO))?;
439 /// Uninitialized bytes.
440 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
441 /// Checks whether the given range is entirely initialized.
443 /// Returns `Ok(())` if it's initialized. Otherwise returns the range of byte
444 /// indexes of the first contiguous uninitialized access.
445 fn is_init(&self, range: AllocRange) -> Result<(), Range<Size>> {
446 self.init_mask.is_range_initialized(range.start, range.end()) // `Size` addition
449 /// Checks that a range of bytes is initialized. If not, returns the `InvalidUninitBytes`
450 /// error which will report the first range of bytes which is uninitialized.
451 fn check_init(&self, range: AllocRange) -> AllocResult {
452 self.is_init(range).or_else(|idx_range| {
453 Err(AllocError::InvalidUninitBytes(Some(UninitBytesAccess {
454 access_offset: range.start,
455 access_size: range.size,
456 uninit_offset: idx_range.start,
457 uninit_size: idx_range.end - idx_range.start, // `Size` subtraction
462 pub fn mark_init(&mut self, range: AllocRange, is_init: bool) {
463 if range.size.bytes() == 0 {
466 self.init_mask.set_range(range.start, range.end(), is_init);
470 /// Run-length encoding of the uninit mask.
471 /// Used to copy parts of a mask multiple times to another allocation.
472 pub struct InitMaskCompressed {
473 /// Whether the first range is initialized.
475 /// The lengths of ranges that are run-length encoded.
476 /// The initialization state of the ranges alternate starting with `initial`.
477 ranges: smallvec::SmallVec<[u64; 1]>,
480 impl InitMaskCompressed {
481 pub fn no_bytes_init(&self) -> bool {
482 // The `ranges` are run-length encoded and of alternating initialization state.
483 // So if `ranges.len() > 1` then the second block is an initialized range.
484 !self.initial && self.ranges.len() == 1
488 /// Transferring the initialization mask to other allocations.
489 impl<Tag, Extra> Allocation<Tag, Extra> {
490 /// Creates a run-length encoding of the initialization mask.
491 pub fn compress_uninit_range(&self, src: Pointer<Tag>, size: Size) -> InitMaskCompressed {
492 // Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
493 // a naive initialization mask copying algorithm would repeatedly have to read the initialization mask from
494 // the source and write it to the destination. Even if we optimized the memory accesses,
495 // we'd be doing all of this `repeat` times.
496 // Therefore we precompute a compressed version of the initialization mask of the source value and
497 // then write it back `repeat` times without computing any more information from the source.
499 // A precomputed cache for ranges of initialized / uninitialized bits
500 // 0000010010001110 will become
501 // `[5, 1, 2, 1, 3, 3, 1]`,
502 // where each element toggles the state.
504 let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
505 let initial = self.init_mask.get(src.offset);
507 let mut cur = initial;
509 for i in 1..size.bytes() {
510 // FIXME: optimize to bitshift the current uninitialized block's bits and read the top bit.
511 if self.init_mask.get(src.offset + Size::from_bytes(i)) == cur {
514 ranges.push(cur_len);
520 ranges.push(cur_len);
522 InitMaskCompressed { ranges, initial }
525 /// Applies multiple instances of the run-length encoding to the initialization mask.
526 pub fn mark_compressed_init_range(
528 defined: &InitMaskCompressed,
533 // An optimization where we can just overwrite an entire range of initialization
534 // bits if they are going to be uniformly `1` or `0`.
535 if defined.ranges.len() <= 1 {
536 self.init_mask.set_range_inbounds(
538 dest.offset + size * repeat, // `Size` operations
544 for mut j in 0..repeat {
546 j += dest.offset.bytes();
547 let mut cur = defined.initial;
548 for range in &defined.ranges {
551 self.init_mask.set_range_inbounds(
552 Size::from_bytes(old_j),
563 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, TyEncodable, TyDecodable)]
564 pub struct Relocations<Tag = (), Id = AllocId>(SortedMap<Size, (Tag, Id)>);
566 impl<Tag, Id> Relocations<Tag, Id> {
567 pub fn new() -> Self {
568 Relocations(SortedMap::new())
571 // The caller must guarantee that the given relocations are already sorted
572 // by address and contain no duplicates.
573 pub fn from_presorted(r: Vec<(Size, (Tag, Id))>) -> Self {
574 Relocations(SortedMap::from_presorted_elements(r))
578 impl<Tag> Deref for Relocations<Tag> {
579 type Target = SortedMap<Size, (Tag, AllocId)>;
581 fn deref(&self) -> &Self::Target {
586 impl<Tag> DerefMut for Relocations<Tag> {
587 fn deref_mut(&mut self) -> &mut Self::Target {
592 /// A partial, owned list of relocations to transfer into another allocation.
593 pub struct AllocationRelocations<Tag> {
594 relative_relocations: Vec<(Size, (Tag, AllocId))>,
597 impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
598 pub fn prepare_relocation_copy(
600 cx: &impl HasDataLayout,
604 ) -> AllocationRelocations<Tag> {
605 let relocations = self.get_relocations(cx, src);
606 if relocations.is_empty() {
607 return AllocationRelocations { relative_relocations: Vec::new() };
611 let mut new_relocations = Vec::with_capacity(relocations.len() * (count as usize));
614 new_relocations.extend(relocations.iter().map(|&(offset, reloc)| {
615 // compute offset for current repetition
616 let dest_offset = dest + size * i; // `Size` operations
618 // shift offsets from source allocation to destination allocation
619 (offset + dest_offset) - src.start, // `Size` operations
625 AllocationRelocations { relative_relocations: new_relocations }
628 /// Applies a relocation copy.
629 /// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected
630 /// to be clear of relocations.
631 pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations<Tag>) {
632 self.relocations.insert_presorted(relocations.relative_relocations);
636 ////////////////////////////////////////////////////////////////////////////////
637 // Uninitialized byte tracking
638 ////////////////////////////////////////////////////////////////////////////////
642 /// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
643 /// is initialized. If it is `false` the byte is uninitialized.
644 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
645 #[derive(HashStable)]
646 pub struct InitMask {
652 pub const BLOCK_SIZE: u64 = 64;
654 pub fn new(size: Size, state: bool) -> Self {
655 let mut m = InitMask { blocks: vec![], len: Size::ZERO };
660 /// Checks whether the range `start..end` (end-exclusive) is entirely initialized.
662 /// Returns `Ok(())` if it's initialized. Otherwise returns a range of byte
663 /// indexes for the first contiguous span of the uninitialized access.
665 pub fn is_range_initialized(&self, start: Size, end: Size) -> Result<(), Range<Size>> {
667 return Err(self.len..end);
670 // FIXME(oli-obk): optimize this for allocations larger than a block.
671 let idx = (start.bytes()..end.bytes()).map(Size::from_bytes).find(|&i| !self.get(i));
675 let uninit_end = (idx.bytes()..end.bytes())
676 .map(Size::from_bytes)
677 .find(|&i| self.get(i))
685 pub fn set_range(&mut self, start: Size, end: Size, new_state: bool) {
688 self.grow(end - len, new_state);
690 self.set_range_inbounds(start, end, new_state);
693 pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) {
694 let (blocka, bita) = bit_index(start);
695 let (blockb, bitb) = bit_index(end);
696 if blocka == blockb {
697 // First set all bits except the first `bita`,
698 // then unset the last `64 - bitb` bits.
699 let range = if bitb == 0 {
702 (u64::MAX << bita) & (u64::MAX >> (64 - bitb))
705 self.blocks[blocka] |= range;
707 self.blocks[blocka] &= !range;
711 // across block boundaries
713 // Set `bita..64` to `1`.
714 self.blocks[blocka] |= u64::MAX << bita;
715 // Set `0..bitb` to `1`.
717 self.blocks[blockb] |= u64::MAX >> (64 - bitb);
719 // Fill in all the other blocks (much faster than one bit at a time).
720 for block in (blocka + 1)..blockb {
721 self.blocks[block] = u64::MAX;
724 // Set `bita..64` to `0`.
725 self.blocks[blocka] &= !(u64::MAX << bita);
726 // Set `0..bitb` to `0`.
728 self.blocks[blockb] &= !(u64::MAX >> (64 - bitb));
730 // Fill in all the other blocks (much faster than one bit at a time).
731 for block in (blocka + 1)..blockb {
732 self.blocks[block] = 0;
738 pub fn get(&self, i: Size) -> bool {
739 let (block, bit) = bit_index(i);
740 (self.blocks[block] & (1 << bit)) != 0
744 pub fn set(&mut self, i: Size, new_state: bool) {
745 let (block, bit) = bit_index(i);
746 self.set_bit(block, bit, new_state);
750 fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) {
752 self.blocks[block] |= 1 << bit;
754 self.blocks[block] &= !(1 << bit);
758 pub fn grow(&mut self, amount: Size, new_state: bool) {
759 if amount.bytes() == 0 {
762 let unused_trailing_bits =
763 u64::try_from(self.blocks.len()).unwrap() * Self::BLOCK_SIZE - self.len.bytes();
764 if amount.bytes() > unused_trailing_bits {
765 let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1;
767 // FIXME(oli-obk): optimize this by repeating `new_state as Block`.
768 iter::repeat(0).take(usize::try_from(additional_blocks).unwrap()),
771 let start = self.len;
773 self.set_range_inbounds(start, start + amount, new_state); // `Size` operation
778 fn bit_index(bits: Size) -> (usize, usize) {
779 let bits = bits.bytes();
780 let a = bits / InitMask::BLOCK_SIZE;
781 let b = bits % InitMask::BLOCK_SIZE;
782 (usize::try_from(a).unwrap(), usize::try_from(b).unwrap())