From: bors Date: Wed, 11 Jul 2018 16:24:46 +0000 (+0000) Subject: Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk X-Git-Url: https://git.lizzy.rs/?a=commitdiff_plain;h=d573fe17786b9c2f5a766498d411d54eee5fa19f;hp=66787e05242d86e0bcfa227265559330c45cdc42;p=rust.git Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk Infinite loop detection for const evaluation Resolves #50637. An `EvalContext` stores the transient state (stack, heap, etc.) of the MIRI virtual machine while it executing code. As long as MIRI only executes pure functions, we can detect if a program is in a state where it will never terminate by periodically taking a "snapshot" of this transient state and comparing it to previous ones. If any two states are exactly equal, the machine must be in an infinite loop. Instead of fully cloning a snapshot every time the detector is run, we store a snapshot's hash. Only when a hash collision occurs do we fully clone the interpreter state. Future snapshots which cause a collision will be compared against this clone, causing the interpreter to abort if they are equal. At the moment, snapshots are not taken until MIRI has progressed a certain amount. After this threshold, snapshots are taken every `DETECTOR_SNAPSHOT_PERIOD` steps. This means that an infinite loop with period `P` will be detected after a maximum of `2 * P * DETECTOR_SNAPSHOT_PERIOD` interpreter steps. The factor of 2 arises because we only clone a snapshot after it causes a hash collision. --- diff --git a/src/librustc/ich/impls_ty.rs b/src/librustc/ich/impls_ty.rs index 5753557a102..a3600c04800 100644 --- a/src/librustc/ich/impls_ty.rs +++ b/src/librustc/ich/impls_ty.rs @@ -549,7 +549,8 @@ fn hash_stable(&self, RemainderByZero | DivisionByZero | GeneratorResumedAfterReturn | - GeneratorResumedAfterPanic => {} + GeneratorResumedAfterPanic | + InfiniteLoop => {} ReferencedConstant(ref err) => err.hash_stable(hcx, hasher), MachineError(ref err) => err.hash_stable(hcx, hasher), FunctionPointerTyMismatch(a, b) => { diff --git a/src/librustc/mir/interpret/error.rs b/src/librustc/mir/interpret/error.rs index 7e2c144e0a7..9125597a727 100644 --- a/src/librustc/mir/interpret/error.rs +++ b/src/librustc/mir/interpret/error.rs @@ -264,6 +264,7 @@ pub enum EvalErrorKind<'tcx, O> { ReferencedConstant(Lrc>), GeneratorResumedAfterReturn, GeneratorResumedAfterPanic, + InfiniteLoop, } pub type EvalResult<'tcx, T = ()> = Result>; @@ -398,6 +399,8 @@ pub fn description(&self) -> &str { RemainderByZero => "attempt to calculate the remainder with a divisor of zero", GeneratorResumedAfterReturn => "generator resumed after completion", GeneratorResumedAfterPanic => "generator resumed after panicking", + InfiniteLoop => + "duplicate interpreter state observed here, const evaluation will never terminate", } } } diff --git a/src/librustc/mir/interpret/mod.rs b/src/librustc/mir/interpret/mod.rs index f5b449d68e7..4164fe3fd93 100644 --- a/src/librustc/mir/interpret/mod.rs +++ b/src/librustc/mir/interpret/mod.rs @@ -36,7 +36,7 @@ macro_rules! err { use std::sync::atomic::{AtomicU32, Ordering}; use std::num::NonZeroU32; -#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)] pub enum Lock { NoLock, WriteLock(DynamicLifetime), diff --git a/src/librustc/mir/mod.rs b/src/librustc/mir/mod.rs index cd4b32735e5..fd443d8973c 100644 --- a/src/librustc/mir/mod.rs +++ b/src/librustc/mir/mod.rs @@ -1636,7 +1636,7 @@ fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { } // This is generic so that it can be reused by miri -#[derive(Clone, RustcEncodable, RustcDecodable)] +#[derive(Clone, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)] pub struct ValidationOperand<'tcx, T> { pub place: T, pub ty: Ty<'tcx>, diff --git a/src/librustc/ty/structural_impls.rs b/src/librustc/ty/structural_impls.rs index c84999a7e59..874dabaf1c9 100644 --- a/src/librustc/ty/structural_impls.rs +++ b/src/librustc/ty/structural_impls.rs @@ -587,6 +587,7 @@ fn lift_to_tcx<'b, 'gcx>(&self, tcx: TyCtxt<'b, 'gcx, 'tcx>) -> Option RemainderByZero, GeneratorResumedAfterReturn => GeneratorResumedAfterReturn, GeneratorResumedAfterPanic => GeneratorResumedAfterPanic, + InfiniteLoop => InfiniteLoop, }) } } diff --git a/src/librustc_mir/interpret/const_eval.rs b/src/librustc_mir/interpret/const_eval.rs index ea09bab5d14..e84132f27cc 100644 --- a/src/librustc_mir/interpret/const_eval.rs +++ b/src/librustc_mir/interpret/const_eval.rs @@ -185,6 +185,7 @@ fn eval_body_using_ecx<'a, 'mir, 'tcx>( Ok((value, ptr, layout.ty)) } +#[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct CompileTimeEvaluator; impl<'tcx> Into> for ConstEvalError { diff --git a/src/librustc_mir/interpret/eval_context.rs b/src/librustc_mir/interpret/eval_context.rs index 031c75013a2..a92c81e046a 100644 --- a/src/librustc_mir/interpret/eval_context.rs +++ b/src/librustc_mir/interpret/eval_context.rs @@ -1,4 +1,5 @@ use std::fmt::Write; +use std::hash::{Hash, Hasher}; use std::mem; use rustc::hir::def_id::DefId; @@ -9,6 +10,7 @@ use rustc::ty::subst::{Subst, Substs}; use rustc::ty::{self, Ty, TyCtxt, TypeAndMut}; use rustc::ty::query::TyCtxtAt; +use rustc_data_structures::fx::{FxHashSet, FxHasher}; use rustc_data_structures::indexed_vec::{IndexVec, Idx}; use rustc::mir::interpret::{ FrameInfo, GlobalId, Value, Scalar, @@ -41,13 +43,17 @@ pub struct EvalContext<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> { /// The maximum number of stack frames allowed pub(crate) stack_limit: usize, - /// The maximum number of terminators that may be evaluated. - /// This prevents infinite loops and huge computations from freezing up const eval. - /// Remove once halting problem is solved. - pub(crate) terminators_remaining: usize, + /// When this value is negative, it indicates the number of interpreter + /// steps *until* the loop detector is enabled. When it is positive, it is + /// the number of steps after the detector has been enabled modulo the loop + /// detector period. + pub(crate) steps_since_detector_enabled: isize, + + pub(crate) loop_detector: InfiniteLoopDetector<'a, 'mir, 'tcx, M>, } /// A stack frame. +#[derive(Clone)] pub struct Frame<'mir, 'tcx: 'mir> { //////////////////////////////////////////////////////////////////////////////// // Function and callsite information @@ -89,6 +95,121 @@ pub struct Frame<'mir, 'tcx: 'mir> { pub stmt: usize, } +impl<'mir, 'tcx: 'mir> Eq for Frame<'mir, 'tcx> {} + +impl<'mir, 'tcx: 'mir> PartialEq for Frame<'mir, 'tcx> { + fn eq(&self, other: &Self) -> bool { + let Frame { + mir: _, + instance, + span: _, + return_to_block, + return_place, + locals, + block, + stmt, + } = self; + + // Some of these are constant during evaluation, but are included + // anyways for correctness. + *instance == other.instance + && *return_to_block == other.return_to_block + && *return_place == other.return_place + && *locals == other.locals + && *block == other.block + && *stmt == other.stmt + } +} + +impl<'mir, 'tcx: 'mir> Hash for Frame<'mir, 'tcx> { + fn hash(&self, state: &mut H) { + let Frame { + mir: _, + instance, + span: _, + return_to_block, + return_place, + locals, + block, + stmt, + } = self; + + instance.hash(state); + return_to_block.hash(state); + return_place.hash(state); + locals.hash(state); + block.hash(state); + stmt.hash(state); + } +} + +/// The virtual machine state during const-evaluation at a given point in time. +type EvalSnapshot<'a, 'mir, 'tcx, M> + = (M, Vec>, Memory<'a, 'mir, 'tcx, M>); + +pub(crate) struct InfiniteLoopDetector<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> { + /// The set of all `EvalSnapshot` *hashes* observed by this detector. + /// + /// When a collision occurs in this table, we store the full snapshot in + /// `snapshots`. + hashes: FxHashSet, + + /// The set of all `EvalSnapshot`s observed by this detector. + /// + /// An `EvalSnapshot` will only be fully cloned once it has caused a + /// collision in `hashes`. As a result, the detector must observe at least + /// *two* full cycles of an infinite loop before it triggers. + snapshots: FxHashSet>, +} + +impl<'a, 'mir, 'tcx, M> Default for InfiniteLoopDetector<'a, 'mir, 'tcx, M> + where M: Machine<'mir, 'tcx>, + 'tcx: 'a + 'mir, +{ + fn default() -> Self { + InfiniteLoopDetector { + hashes: FxHashSet::default(), + snapshots: FxHashSet::default(), + } + } +} + +impl<'a, 'mir, 'tcx, M> InfiniteLoopDetector<'a, 'mir, 'tcx, M> + where M: Machine<'mir, 'tcx>, + 'tcx: 'a + 'mir, +{ + /// Returns `true` if the loop detector has not yet observed a snapshot. + pub fn is_empty(&self) -> bool { + self.hashes.is_empty() + } + + pub fn observe_and_analyze( + &mut self, + machine: &M, + stack: &Vec>, + memory: &Memory<'a, 'mir, 'tcx, M>, + ) -> EvalResult<'tcx, ()> { + let snapshot = (machine, stack, memory); + + let mut fx = FxHasher::default(); + snapshot.hash(&mut fx); + let hash = fx.finish(); + + if self.hashes.insert(hash) { + // No collision + return Ok(()) + } + + if self.snapshots.insert((machine.clone(), stack.clone(), memory.clone())) { + // Spurious collision or first cycle + return Ok(()) + } + + // Second cycle + Err(EvalErrorKind::InfiniteLoop.into()) + } +} + #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub enum StackPopCleanup { /// The stackframe existed to compute the initial value of a static/constant, make sure it @@ -173,7 +294,7 @@ fn layout_of(self, ty: Ty<'tcx>) -> Self::TyLayout { } } -const MAX_TERMINATORS: usize = 1_000_000; +const STEPS_UNTIL_DETECTOR_ENABLED: isize = 1_000_000; impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> { pub fn new( @@ -189,16 +310,17 @@ pub fn new( memory: Memory::new(tcx, memory_data), stack: Vec::new(), stack_limit: tcx.sess.const_eval_stack_frame_limit, - terminators_remaining: MAX_TERMINATORS, + loop_detector: Default::default(), + steps_since_detector_enabled: -STEPS_UNTIL_DETECTOR_ENABLED, } } pub(crate) fn with_fresh_body R, R>(&mut self, f: F) -> R { let stack = mem::replace(&mut self.stack, Vec::new()); - let terminators_remaining = mem::replace(&mut self.terminators_remaining, MAX_TERMINATORS); + let steps = mem::replace(&mut self.steps_since_detector_enabled, -STEPS_UNTIL_DETECTOR_ENABLED); let r = f(self); self.stack = stack; - self.terminators_remaining = terminators_remaining; + self.steps_since_detector_enabled = steps; r } @@ -538,8 +660,6 @@ pub(super) fn eval_rvalue_into_place( } Aggregate(ref kind, ref operands) => { - self.inc_step_counter_and_check_limit(operands.len()); - let (dest, active_field_index) = match **kind { mir::AggregateKind::Adt(adt_def, variant_index, _, active_field_index) => { self.write_discriminant_value(dest_ty, dest, variant_index)?; diff --git a/src/librustc_mir/interpret/machine.rs b/src/librustc_mir/interpret/machine.rs index 4d04900320f..e2086c57c7c 100644 --- a/src/librustc_mir/interpret/machine.rs +++ b/src/librustc_mir/interpret/machine.rs @@ -2,6 +2,8 @@ //! This separation exists to ensure that no fancy miri features like //! interpreting common C functions leak into CTFE. +use std::hash::Hash; + use rustc::mir::interpret::{AllocId, EvalResult, Scalar, Pointer, AccessKind, GlobalId}; use super::{EvalContext, Place, ValTy, Memory}; @@ -13,9 +15,9 @@ /// Methods of this trait signifies a point where CTFE evaluation would fail /// and some use case dependent behaviour can instead be applied -pub trait Machine<'mir, 'tcx>: Sized { +pub trait Machine<'mir, 'tcx>: Clone + Eq + Hash { /// Additional data that can be accessed via the Memory - type MemoryData; + type MemoryData: Clone + Eq + Hash; /// Additional memory kinds a machine wishes to distinguish from the builtin ones type MemoryKinds: ::std::fmt::Debug + PartialEq + Copy + Clone; diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 5cf734cce8a..ac4d1c74b8c 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -1,4 +1,5 @@ use std::collections::VecDeque; +use std::hash::{Hash, Hasher}; use std::ptr; use rustc::hir::def_id::DefId; @@ -9,7 +10,7 @@ use rustc::mir::interpret::{Pointer, AllocId, Allocation, AccessKind, Value, EvalResult, Scalar, EvalErrorKind, GlobalId, AllocType}; pub use rustc::mir::interpret::{write_target_uint, write_target_int, read_target_uint}; -use rustc_data_structures::fx::{FxHashSet, FxHashMap}; +use rustc_data_structures::fx::{FxHashSet, FxHashMap, FxHasher}; use syntax::ast::Mutability; @@ -19,7 +20,7 @@ // Allocations and pointers //////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, PartialEq, Copy, Clone)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum MemoryKind { /// Error if deallocated except during a stack pop Stack, @@ -31,6 +32,7 @@ pub enum MemoryKind { // Top-level interpreter memory //////////////////////////////////////////////////////////////////////////////// +#[derive(Clone)] pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> { /// Additional data required by the Machine pub data: M::MemoryData, @@ -47,6 +49,64 @@ pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> { pub tcx: TyCtxtAt<'a, 'tcx, 'tcx>, } +impl<'a, 'mir, 'tcx, M> Eq for Memory<'a, 'mir, 'tcx, M> + where M: Machine<'mir, 'tcx>, + 'tcx: 'a + 'mir, +{} + +impl<'a, 'mir, 'tcx, M> PartialEq for Memory<'a, 'mir, 'tcx, M> + where M: Machine<'mir, 'tcx>, + 'tcx: 'a + 'mir, +{ + fn eq(&self, other: &Self) -> bool { + let Memory { + data, + alloc_kind, + alloc_map, + cur_frame, + tcx: _, + } = self; + + *data == other.data + && *alloc_kind == other.alloc_kind + && *alloc_map == other.alloc_map + && *cur_frame == other.cur_frame + } +} + +impl<'a, 'mir, 'tcx, M> Hash for Memory<'a, 'mir, 'tcx, M> + where M: Machine<'mir, 'tcx>, + 'tcx: 'a + 'mir, +{ + fn hash(&self, state: &mut H) { + let Memory { + data, + alloc_kind: _, + alloc_map: _, + cur_frame, + tcx: _, + } = self; + + data.hash(state); + cur_frame.hash(state); + + // We ignore some fields which don't change between evaluation steps. + + // Since HashMaps which contain the same items may have different + // iteration orders, we use a commutative operation (in this case + // addition, but XOR would also work), to combine the hash of each + // `Allocation`. + self.allocations() + .map(|allocs| { + let mut h = FxHasher::default(); + allocs.hash(&mut h); + h.finish() + }) + .fold(0u64, |hash, x| hash.wrapping_add(x)) + .hash(state); + } +} + impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> { pub fn new(tcx: TyCtxtAt<'a, 'tcx, 'tcx>, data: M::MemoryData) -> Self { Memory { @@ -866,7 +926,7 @@ fn copy_undef_mask( for i in 0..size.bytes() { let defined = undef_mask.get(src.offset + Size::from_bytes(i)); - + for j in 0..repeat { dest_allocation.undef_mask.set( dest.offset + Size::from_bytes(i + (size.bytes() * j)), diff --git a/src/librustc_mir/interpret/place.rs b/src/librustc_mir/interpret/place.rs index 51b33fa54b2..bb8e5c99d49 100644 --- a/src/librustc_mir/interpret/place.rs +++ b/src/librustc_mir/interpret/place.rs @@ -7,7 +7,7 @@ use super::{EvalContext, Machine, ValTy}; use interpret::memory::HasMemory; -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] pub enum Place { /// A place referring to a value allocated in the `Memory` system. Ptr { @@ -24,7 +24,7 @@ pub enum Place { Local { frame: usize, local: mir::Local }, } -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] pub enum PlaceExtra { None, Length(u64), diff --git a/src/librustc_mir/interpret/step.rs b/src/librustc_mir/interpret/step.rs index ab15278219f..db90714d0e6 100644 --- a/src/librustc_mir/interpret/step.rs +++ b/src/librustc_mir/interpret/step.rs @@ -8,13 +8,34 @@ use super::{EvalContext, Machine}; impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> { - pub fn inc_step_counter_and_check_limit(&mut self, n: usize) { - self.terminators_remaining = self.terminators_remaining.saturating_sub(n); - if self.terminators_remaining == 0 { + pub fn inc_step_counter_and_detect_loops(&mut self) -> EvalResult<'tcx, ()> { + /// The number of steps between loop detector snapshots. + /// Should be a power of two for performance reasons. + const DETECTOR_SNAPSHOT_PERIOD: isize = 256; + + { + let steps = &mut self.steps_since_detector_enabled; + + *steps += 1; + if *steps < 0 { + return Ok(()); + } + + *steps %= DETECTOR_SNAPSHOT_PERIOD; + if *steps != 0 { + return Ok(()); + } + } + + if self.loop_detector.is_empty() { + // First run of the loop detector + // FIXME(#49980): make this warning a lint - self.tcx.sess.span_warn(self.frame().span, "Constant evaluating a complex constant, this might take some time"); - self.terminators_remaining = 1_000_000; + self.tcx.sess.span_warn(self.frame().span, + "Constant evaluating a complex constant, this might take some time"); } + + self.loop_detector.observe_and_analyze(&self.machine, &self.stack, &self.memory) } /// Returns true as long as there are more things to do. @@ -36,7 +57,7 @@ pub fn step(&mut self) -> EvalResult<'tcx, bool> { return Ok(true); } - self.inc_step_counter_and_check_limit(1); + self.inc_step_counter_and_detect_loops()?; let terminator = basic_block.terminator(); assert_eq!(old_frames, self.cur_frame()); diff --git a/src/test/ui/const-eval/infinite_loop.rs b/src/test/ui/const-eval/infinite_loop.rs new file mode 100644 index 00000000000..a1f8ab7f878 --- /dev/null +++ b/src/test/ui/const-eval/infinite_loop.rs @@ -0,0 +1,25 @@ +// Copyright 2018 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(const_let)] + +fn main() { + // Tests the Collatz conjecture with an incorrect base case (0 instead of 1). + // The value of `n` will loop indefinitely (4 - 2 - 1 - 4). + let _ = [(); { + //~^ WARNING Constant evaluating a complex constant, this might take some time + //~| ERROR could not evaluate repeat length + let mut n = 113383; // #20 in https://oeis.org/A006884 + while n != 0 { //~ ERROR constant contains unimplemented expression type + n = if n % 2 == 0 { n/2 } else { 3*n + 1 }; + } + n + }]; +} diff --git a/src/test/ui/const-eval/infinite_loop.stderr b/src/test/ui/const-eval/infinite_loop.stderr new file mode 100644 index 00000000000..f69aae29203 --- /dev/null +++ b/src/test/ui/const-eval/infinite_loop.stderr @@ -0,0 +1,41 @@ +error[E0019]: constant contains unimplemented expression type + --> $DIR/infinite_loop.rs:20:9 + | +LL | / while n != 0 { //~ ERROR constant contains unimplemented expression type +LL | | n = if n % 2 == 0 { n/2 } else { 3*n + 1 }; +LL | | } + | |_________^ + +warning: Constant evaluating a complex constant, this might take some time + --> $DIR/infinite_loop.rs:16:18 + | +LL | let _ = [(); { + | __________________^ +LL | | //~^ WARNING Constant evaluating a complex constant, this might take some time +LL | | //~| ERROR could not evaluate repeat length +LL | | let mut n = 113383; // #20 in https://oeis.org/A006884 +... | +LL | | n +LL | | }]; + | |_____^ + +error[E0080]: could not evaluate repeat length + --> $DIR/infinite_loop.rs:16:18 + | +LL | let _ = [(); { + | __________________^ +LL | | //~^ WARNING Constant evaluating a complex constant, this might take some time +LL | | //~| ERROR could not evaluate repeat length +LL | | let mut n = 113383; // #20 in https://oeis.org/A006884 +LL | | while n != 0 { //~ ERROR constant contains unimplemented expression type +LL | | n = if n % 2 == 0 { n/2 } else { 3*n + 1 }; + | | ---------- duplicate interpreter state observed here, const evaluation will never terminate +LL | | } +LL | | n +LL | | }]; + | |_____^ + +error: aborting due to 2 previous errors + +Some errors occurred: E0019, E0080. +For more information about an error, try `rustc --explain E0019`.