Skip to content

Commit

Permalink
Auto merge of #51702 - ecstatic-morse:infinite-loop-detection, r=oli-obk
Browse files Browse the repository at this point in the history
Infinite loop detection for const evaluation

Resolves #50637.

An `EvalContext` stores the transient state (stack, heap, etc.) of the MIRI virtual machine while it executing code. As long as MIRI only executes pure functions, we can detect if a program is in a state where it will never terminate by periodically taking a "snapshot" of this transient state and comparing it to previous ones. If any two states are exactly equal, the machine must be in an infinite loop.

Instead of fully cloning a snapshot every time the detector is run, we store a snapshot's hash. Only when a hash collision occurs do we fully clone the interpreter state. Future snapshots which cause a collision will be compared against this clone, causing the interpreter to abort if they are equal.

At the moment, snapshots are not taken until MIRI has progressed a certain amount. After this threshold, snapshots are taken every `DETECTOR_SNAPSHOT_PERIOD` steps. This means that an infinite loop with period `P` will be detected after a maximum of `2 * P * DETECTOR_SNAPSHOT_PERIOD` interpreter steps. The factor of 2 arises because we only clone a snapshot after it causes a hash collision.
  • Loading branch information
bors committed Jul 11, 2018
2 parents 66787e0 + cf5eaa7 commit d573fe1
Show file tree
Hide file tree
Showing 13 changed files with 301 additions and 26 deletions.
3 changes: 2 additions & 1 deletion src/librustc/ich/impls_ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,8 @@ for ::mir::interpret::EvalErrorKind<'gcx, O> {
RemainderByZero |
DivisionByZero |
GeneratorResumedAfterReturn |
GeneratorResumedAfterPanic => {}
GeneratorResumedAfterPanic |
InfiniteLoop => {}
ReferencedConstant(ref err) => err.hash_stable(hcx, hasher),
MachineError(ref err) => err.hash_stable(hcx, hasher),
FunctionPointerTyMismatch(a, b) => {
Expand Down
3 changes: 3 additions & 0 deletions src/librustc/mir/interpret/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ pub enum EvalErrorKind<'tcx, O> {
ReferencedConstant(Lrc<ConstEvalErr<'tcx>>),
GeneratorResumedAfterReturn,
GeneratorResumedAfterPanic,
InfiniteLoop,
}

pub type EvalResult<'tcx, T = ()> = Result<T, EvalError<'tcx>>;
Expand Down Expand Up @@ -398,6 +399,8 @@ impl<'tcx, O> EvalErrorKind<'tcx, O> {
RemainderByZero => "attempt to calculate the remainder with a divisor of zero",
GeneratorResumedAfterReturn => "generator resumed after completion",
GeneratorResumedAfterPanic => "generator resumed after panicking",
InfiniteLoop =>
"duplicate interpreter state observed here, const evaluation will never terminate",
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use ty::codec::TyDecoder;
use std::sync::atomic::{AtomicU32, Ordering};
use std::num::NonZeroU32;

#[derive(Clone, Debug, PartialEq, RustcEncodable, RustcDecodable)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)]
pub enum Lock {
NoLock,
WriteLock(DynamicLifetime),
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1636,7 +1636,7 @@ impl Debug for ValidationOp {
}

// This is generic so that it can be reused by miri
#[derive(Clone, RustcEncodable, RustcDecodable)]
#[derive(Clone, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)]
pub struct ValidationOperand<'tcx, T> {
pub place: T,
pub ty: Ty<'tcx>,
Expand Down
1 change: 1 addition & 0 deletions src/librustc/ty/structural_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ impl<'a, 'tcx, O: Lift<'tcx>> Lift<'tcx> for interpret::EvalErrorKind<'a, O> {
RemainderByZero => RemainderByZero,
GeneratorResumedAfterReturn => GeneratorResumedAfterReturn,
GeneratorResumedAfterPanic => GeneratorResumedAfterPanic,
InfiniteLoop => InfiniteLoop,
})
}
}
Expand Down
1 change: 1 addition & 0 deletions src/librustc_mir/interpret/const_eval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ fn eval_body_using_ecx<'a, 'mir, 'tcx>(
Ok((value, ptr, layout.ty))
}

#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct CompileTimeEvaluator;

impl<'tcx> Into<EvalError<'tcx>> for ConstEvalError {
Expand Down
140 changes: 130 additions & 10 deletions src/librustc_mir/interpret/eval_context.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::mem;

use rustc::hir::def_id::DefId;
Expand All @@ -9,6 +10,7 @@ use rustc::ty::layout::{self, Size, Align, HasDataLayout, IntegerExt, LayoutOf,
use rustc::ty::subst::{Subst, Substs};
use rustc::ty::{self, Ty, TyCtxt, TypeAndMut};
use rustc::ty::query::TyCtxtAt;
use rustc_data_structures::fx::{FxHashSet, FxHasher};
use rustc_data_structures::indexed_vec::{IndexVec, Idx};
use rustc::mir::interpret::{
FrameInfo, GlobalId, Value, Scalar,
Expand Down Expand Up @@ -41,13 +43,17 @@ pub struct EvalContext<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// The maximum number of stack frames allowed
pub(crate) stack_limit: usize,

/// The maximum number of terminators that may be evaluated.
/// This prevents infinite loops and huge computations from freezing up const eval.
/// Remove once halting problem is solved.
pub(crate) terminators_remaining: usize,
/// When this value is negative, it indicates the number of interpreter
/// steps *until* the loop detector is enabled. When it is positive, it is
/// the number of steps after the detector has been enabled modulo the loop
/// detector period.
pub(crate) steps_since_detector_enabled: isize,

pub(crate) loop_detector: InfiniteLoopDetector<'a, 'mir, 'tcx, M>,
}

/// A stack frame.
#[derive(Clone)]
pub struct Frame<'mir, 'tcx: 'mir> {
////////////////////////////////////////////////////////////////////////////////
// Function and callsite information
Expand Down Expand Up @@ -89,6 +95,121 @@ pub struct Frame<'mir, 'tcx: 'mir> {
pub stmt: usize,
}

impl<'mir, 'tcx: 'mir> Eq for Frame<'mir, 'tcx> {}

impl<'mir, 'tcx: 'mir> PartialEq for Frame<'mir, 'tcx> {
fn eq(&self, other: &Self) -> bool {
let Frame {
mir: _,
instance,
span: _,
return_to_block,
return_place,
locals,
block,
stmt,
} = self;

// Some of these are constant during evaluation, but are included
// anyways for correctness.
*instance == other.instance
&& *return_to_block == other.return_to_block
&& *return_place == other.return_place
&& *locals == other.locals
&& *block == other.block
&& *stmt == other.stmt
}
}

impl<'mir, 'tcx: 'mir> Hash for Frame<'mir, 'tcx> {
fn hash<H: Hasher>(&self, state: &mut H) {
let Frame {
mir: _,
instance,
span: _,
return_to_block,
return_place,
locals,
block,
stmt,
} = self;

instance.hash(state);
return_to_block.hash(state);
return_place.hash(state);
locals.hash(state);
block.hash(state);
stmt.hash(state);
}
}

/// The virtual machine state during const-evaluation at a given point in time.
type EvalSnapshot<'a, 'mir, 'tcx, M>
= (M, Vec<Frame<'mir, 'tcx>>, Memory<'a, 'mir, 'tcx, M>);

pub(crate) struct InfiniteLoopDetector<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// The set of all `EvalSnapshot` *hashes* observed by this detector.
///
/// When a collision occurs in this table, we store the full snapshot in
/// `snapshots`.
hashes: FxHashSet<u64>,

/// The set of all `EvalSnapshot`s observed by this detector.
///
/// An `EvalSnapshot` will only be fully cloned once it has caused a
/// collision in `hashes`. As a result, the detector must observe at least
/// *two* full cycles of an infinite loop before it triggers.
snapshots: FxHashSet<EvalSnapshot<'a, 'mir, 'tcx, M>>,
}

impl<'a, 'mir, 'tcx, M> Default for InfiniteLoopDetector<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn default() -> Self {
InfiniteLoopDetector {
hashes: FxHashSet::default(),
snapshots: FxHashSet::default(),
}
}
}

impl<'a, 'mir, 'tcx, M> InfiniteLoopDetector<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
/// Returns `true` if the loop detector has not yet observed a snapshot.
pub fn is_empty(&self) -> bool {
self.hashes.is_empty()
}

pub fn observe_and_analyze(
&mut self,
machine: &M,
stack: &Vec<Frame<'mir, 'tcx>>,
memory: &Memory<'a, 'mir, 'tcx, M>,
) -> EvalResult<'tcx, ()> {
let snapshot = (machine, stack, memory);

let mut fx = FxHasher::default();
snapshot.hash(&mut fx);
let hash = fx.finish();

if self.hashes.insert(hash) {
// No collision
return Ok(())
}

if self.snapshots.insert((machine.clone(), stack.clone(), memory.clone())) {
// Spurious collision or first cycle
return Ok(())
}

// Second cycle
Err(EvalErrorKind::InfiniteLoop.into())
}
}

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub enum StackPopCleanup {
/// The stackframe existed to compute the initial value of a static/constant, make sure it
Expand Down Expand Up @@ -173,7 +294,7 @@ impl<'c, 'b, 'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> LayoutOf
}
}

const MAX_TERMINATORS: usize = 1_000_000;
const STEPS_UNTIL_DETECTOR_ENABLED: isize = 1_000_000;

impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M> {
pub fn new(
Expand All @@ -189,16 +310,17 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
memory: Memory::new(tcx, memory_data),
stack: Vec::new(),
stack_limit: tcx.sess.const_eval_stack_frame_limit,
terminators_remaining: MAX_TERMINATORS,
loop_detector: Default::default(),
steps_since_detector_enabled: -STEPS_UNTIL_DETECTOR_ENABLED,
}
}

pub(crate) fn with_fresh_body<F: FnOnce(&mut Self) -> R, R>(&mut self, f: F) -> R {
let stack = mem::replace(&mut self.stack, Vec::new());
let terminators_remaining = mem::replace(&mut self.terminators_remaining, MAX_TERMINATORS);
let steps = mem::replace(&mut self.steps_since_detector_enabled, -STEPS_UNTIL_DETECTOR_ENABLED);
let r = f(self);
self.stack = stack;
self.terminators_remaining = terminators_remaining;
self.steps_since_detector_enabled = steps;
r
}

Expand Down Expand Up @@ -538,8 +660,6 @@ impl<'a, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> EvalContext<'a, 'mir, 'tcx, M
}

Aggregate(ref kind, ref operands) => {
self.inc_step_counter_and_check_limit(operands.len());

let (dest, active_field_index) = match **kind {
mir::AggregateKind::Adt(adt_def, variant_index, _, active_field_index) => {
self.write_discriminant_value(dest_ty, dest, variant_index)?;
Expand Down
6 changes: 4 additions & 2 deletions src/librustc_mir/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//! This separation exists to ensure that no fancy miri features like
//! interpreting common C functions leak into CTFE.

use std::hash::Hash;

use rustc::mir::interpret::{AllocId, EvalResult, Scalar, Pointer, AccessKind, GlobalId};
use super::{EvalContext, Place, ValTy, Memory};

Expand All @@ -13,9 +15,9 @@ use syntax::ast::Mutability;

/// Methods of this trait signifies a point where CTFE evaluation would fail
/// and some use case dependent behaviour can instead be applied
pub trait Machine<'mir, 'tcx>: Sized {
pub trait Machine<'mir, 'tcx>: Clone + Eq + Hash {
/// Additional data that can be accessed via the Memory
type MemoryData;
type MemoryData: Clone + Eq + Hash;

/// Additional memory kinds a machine wishes to distinguish from the builtin ones
type MemoryKinds: ::std::fmt::Debug + PartialEq + Copy + Clone;
Expand Down
66 changes: 63 additions & 3 deletions src/librustc_mir/interpret/memory.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::collections::VecDeque;
use std::hash::{Hash, Hasher};
use std::ptr;

use rustc::hir::def_id::DefId;
Expand All @@ -9,7 +10,7 @@ use rustc::ty::layout::{self, Align, TargetDataLayout, Size};
use rustc::mir::interpret::{Pointer, AllocId, Allocation, AccessKind, Value,
EvalResult, Scalar, EvalErrorKind, GlobalId, AllocType};
pub use rustc::mir::interpret::{write_target_uint, write_target_int, read_target_uint};
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
use rustc_data_structures::fx::{FxHashSet, FxHashMap, FxHasher};

use syntax::ast::Mutability;

Expand All @@ -19,7 +20,7 @@ use super::{EvalContext, Machine};
// Allocations and pointers
////////////////////////////////////////////////////////////////////////////////

#[derive(Debug, PartialEq, Copy, Clone)]
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum MemoryKind<T> {
/// Error if deallocated except during a stack pop
Stack,
Expand All @@ -31,6 +32,7 @@ pub enum MemoryKind<T> {
// Top-level interpreter memory
////////////////////////////////////////////////////////////////////////////////

#[derive(Clone)]
pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
/// Additional data required by the Machine
pub data: M::MemoryData,
Expand All @@ -47,6 +49,64 @@ pub struct Memory<'a, 'mir, 'tcx: 'a + 'mir, M: Machine<'mir, 'tcx>> {
pub tcx: TyCtxtAt<'a, 'tcx, 'tcx>,
}

impl<'a, 'mir, 'tcx, M> Eq for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{}

impl<'a, 'mir, 'tcx, M> PartialEq for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn eq(&self, other: &Self) -> bool {
let Memory {
data,
alloc_kind,
alloc_map,
cur_frame,
tcx: _,
} = self;

*data == other.data
&& *alloc_kind == other.alloc_kind
&& *alloc_map == other.alloc_map
&& *cur_frame == other.cur_frame
}
}

impl<'a, 'mir, 'tcx, M> Hash for Memory<'a, 'mir, 'tcx, M>
where M: Machine<'mir, 'tcx>,
'tcx: 'a + 'mir,
{
fn hash<H: Hasher>(&self, state: &mut H) {
let Memory {
data,
alloc_kind: _,
alloc_map: _,
cur_frame,
tcx: _,
} = self;

data.hash(state);
cur_frame.hash(state);

// We ignore some fields which don't change between evaluation steps.

// Since HashMaps which contain the same items may have different
// iteration orders, we use a commutative operation (in this case
// addition, but XOR would also work), to combine the hash of each
// `Allocation`.
self.allocations()
.map(|allocs| {
let mut h = FxHasher::default();
allocs.hash(&mut h);
h.finish()
})
.fold(0u64, |hash, x| hash.wrapping_add(x))
.hash(state);
}
}

impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
pub fn new(tcx: TyCtxtAt<'a, 'tcx, 'tcx>, data: M::MemoryData) -> Self {
Memory {
Expand Down Expand Up @@ -866,7 +926,7 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {

for i in 0..size.bytes() {
let defined = undef_mask.get(src.offset + Size::from_bytes(i));

for j in 0..repeat {
dest_allocation.undef_mask.set(
dest.offset + Size::from_bytes(i + (size.bytes() * j)),
Expand Down
Loading

0 comments on commit d573fe1

Please sign in to comment.