diff --git a/compiler/rustc_middle/src/arena.rs b/compiler/rustc_middle/src/arena.rs
index e3d7dff3c66bb..37c10b14054c5 100644
--- a/compiler/rustc_middle/src/arena.rs
+++ b/compiler/rustc_middle/src/arena.rs
@@ -61,10 +61,6 @@ macro_rules! arena_types {
             [] dtorck_constraint: rustc_middle::traits::query::DropckConstraint<'tcx>,
             [] candidate_step: rustc_middle::traits::query::CandidateStep<'tcx>,
             [] autoderef_bad_ty: rustc_middle::traits::query::MethodAutoderefBadTy<'tcx>,
-            [] canonical_goal_evaluation:
-                rustc_type_ir::solve::inspect::CanonicalGoalEvaluationStep<
-                    rustc_middle::ty::TyCtxt<'tcx>
-                >,
             [] query_region_constraints: rustc_middle::infer::canonical::QueryRegionConstraints<'tcx>,
             [] type_op_subtype:
                 rustc_middle::infer::canonical::Canonical<'tcx,
diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs
index 8f8fd09c9e4d9..8198b2fdc8930 100644
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@@ -107,8 +107,6 @@ impl<'tcx> Interner for TyCtxt<'tcx> {
         self.mk_predefined_opaques_in_body(data)
     }
     type DefiningOpaqueTypes = &'tcx ty::List<LocalDefId>;
-    type CanonicalGoalEvaluationStepRef =
-        &'tcx solve::inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>;
     type CanonicalVars = CanonicalVarInfos<'tcx>;
     fn mk_canonical_var_infos(self, infos: &[ty::CanonicalVarInfo<Self>]) -> Self::CanonicalVars {
         self.mk_canonical_var_infos(infos)
@@ -277,13 +275,6 @@ impl<'tcx> Interner for TyCtxt<'tcx> {
         self.debug_assert_args_compatible(def_id, args);
     }
 
-    fn intern_canonical_goal_evaluation_step(
-        self,
-        step: solve::inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>>,
-    ) -> &'tcx solve::inspect::CanonicalGoalEvaluationStep<TyCtxt<'tcx>> {
-        self.arena.alloc(step)
-    }
-
     fn mk_type_list_from_iter<I, T>(self, args: I) -> T::Output
     where
         I: Iterator<Item = T>,
diff --git a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
index a3c21666bd67c..86fb036cd3df8 100644
--- a/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/inspect/build.rs
@@ -5,11 +5,10 @@
 //! see the comment on [ProofTreeBuilder].
 
 use std::marker::PhantomData;
-use std::mem;
 
 use derive_where::derive_where;
 use rustc_type_ir::inherent::*;
-use rustc_type_ir::{self as ty, search_graph, Interner};
+use rustc_type_ir::{self as ty, Interner};
 
 use crate::delegate::SolverDelegate;
 use crate::solve::eval_ctxt::canonical;
@@ -94,31 +93,10 @@ impl<I: Interner> WipGoalEvaluation<I> {
     }
 }
 
-#[derive_where(PartialEq, Eq; I: Interner)]
-pub(in crate::solve) enum WipCanonicalGoalEvaluationKind<I: Interner> {
-    Overflow,
-    CycleInStack,
-    ProvisionalCacheHit,
-    Interned { final_revision: I::CanonicalGoalEvaluationStepRef },
-}
-
-impl<I: Interner> std::fmt::Debug for WipCanonicalGoalEvaluationKind<I> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Overflow => write!(f, "Overflow"),
-            Self::CycleInStack => write!(f, "CycleInStack"),
-            Self::ProvisionalCacheHit => write!(f, "ProvisionalCacheHit"),
-            Self::Interned { final_revision: _ } => {
-                f.debug_struct("Interned").finish_non_exhaustive()
-            }
-        }
-    }
-}
-
 #[derive_where(PartialEq, Eq, Debug; I: Interner)]
 struct WipCanonicalGoalEvaluation<I: Interner> {
     goal: CanonicalInput<I>,
-    kind: Option<WipCanonicalGoalEvaluationKind<I>>,
+    encountered_overflow: bool,
     /// Only used for uncached goals. After we finished evaluating
     /// the goal, this is interned and moved into `kind`.
     final_revision: Option<WipCanonicalGoalEvaluationStep<I>>,
@@ -127,25 +105,17 @@ struct WipCanonicalGoalEvaluation<I: Interner> {
 
 impl<I: Interner> WipCanonicalGoalEvaluation<I> {
     fn finalize(self) -> inspect::CanonicalGoalEvaluation<I> {
-        // We've already interned the final revision in
-        // `fn finalize_canonical_goal_evaluation`.
-        assert!(self.final_revision.is_none());
-        let kind = match self.kind.unwrap() {
-            WipCanonicalGoalEvaluationKind::Overflow => {
+        inspect::CanonicalGoalEvaluation {
+            goal: self.goal,
+            kind: if self.encountered_overflow {
+                assert!(self.final_revision.is_none());
                 inspect::CanonicalGoalEvaluationKind::Overflow
-            }
-            WipCanonicalGoalEvaluationKind::CycleInStack => {
-                inspect::CanonicalGoalEvaluationKind::CycleInStack
-            }
-            WipCanonicalGoalEvaluationKind::ProvisionalCacheHit => {
-                inspect::CanonicalGoalEvaluationKind::ProvisionalCacheHit
-            }
-            WipCanonicalGoalEvaluationKind::Interned { final_revision } => {
+            } else {
+                let final_revision = self.final_revision.unwrap().finalize();
                 inspect::CanonicalGoalEvaluationKind::Evaluation { final_revision }
-            }
-        };
-
-        inspect::CanonicalGoalEvaluation { goal: self.goal, kind, result: self.result.unwrap() }
+            },
+            result: self.result.unwrap(),
+        }
     }
 }
 
@@ -308,7 +278,7 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
     ) -> ProofTreeBuilder<D> {
         self.nested(|| WipCanonicalGoalEvaluation {
             goal,
-            kind: None,
+            encountered_overflow: false,
             final_revision: None,
             result: None,
         })
@@ -329,11 +299,11 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         }
     }
 
-    pub fn canonical_goal_evaluation_kind(&mut self, kind: WipCanonicalGoalEvaluationKind<I>) {
+    pub fn canonical_goal_evaluation_overflow(&mut self) {
         if let Some(this) = self.as_mut() {
             match this {
                 DebugSolver::CanonicalGoalEvaluation(canonical_goal_evaluation) => {
-                    assert_eq!(canonical_goal_evaluation.kind.replace(kind), None);
+                    canonical_goal_evaluation.encountered_overflow = true;
                 }
                 _ => unreachable!(),
             };
@@ -547,51 +517,3 @@ impl<D: SolverDelegate<Interner = I>, I: Interner> ProofTreeBuilder<D> {
         }
     }
 }
-
-impl<D, I> search_graph::ProofTreeBuilder<I> for ProofTreeBuilder<D>
-where
-    D: SolverDelegate<Interner = I>,
-    I: Interner,
-{
-    fn try_apply_proof_tree(
-        &mut self,
-        proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
-    ) -> bool {
-        if !self.is_noop() {
-            if let Some(final_revision) = proof_tree {
-                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                self.canonical_goal_evaluation_kind(kind);
-                true
-            } else {
-                false
-            }
-        } else {
-            true
-        }
-    }
-
-    fn on_provisional_cache_hit(&mut self) {
-        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::ProvisionalCacheHit);
-    }
-
-    fn on_cycle_in_stack(&mut self) {
-        self.canonical_goal_evaluation_kind(WipCanonicalGoalEvaluationKind::CycleInStack);
-    }
-
-    fn finalize_canonical_goal_evaluation(
-        &mut self,
-        tcx: I,
-    ) -> Option<I::CanonicalGoalEvaluationStepRef> {
-        self.as_mut().map(|this| match this {
-            DebugSolver::CanonicalGoalEvaluation(evaluation) => {
-                let final_revision = mem::take(&mut evaluation.final_revision).unwrap();
-                let final_revision =
-                    tcx.intern_canonical_goal_evaluation_step(final_revision.finalize());
-                let kind = WipCanonicalGoalEvaluationKind::Interned { final_revision };
-                assert_eq!(evaluation.kind.replace(kind), None);
-                final_revision
-            }
-            _ => unreachable!(),
-        })
-    }
-}
diff --git a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
index fe053a506e712..81c89fad8e8a0 100644
--- a/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
+++ b/compiler/rustc_next_trait_solver/src/solve/search_graph.rs
@@ -1,12 +1,13 @@
+use std::convert::Infallible;
 use std::marker::PhantomData;
 
 use rustc_type_ir::inherent::*;
-use rustc_type_ir::search_graph::{self, CycleKind, UsageKind};
+use rustc_type_ir::search_graph::{self, PathKind};
 use rustc_type_ir::solve::{CanonicalInput, Certainty, QueryResult};
 use rustc_type_ir::Interner;
 
-use super::inspect::{self, ProofTreeBuilder};
-use super::FIXPOINT_STEP_LIMIT;
+use super::inspect::ProofTreeBuilder;
+use super::{has_no_inference_or_external_constraints, FIXPOINT_STEP_LIMIT};
 use crate::delegate::SolverDelegate;
 
 /// This type is never constructed. We only use it to implement `search_graph::Delegate`
@@ -22,43 +23,48 @@ where
 {
     type Cx = D::Interner;
 
+    const ENABLE_PROVISIONAL_CACHE: bool = true;
+    type ValidationScope = Infallible;
+    fn enter_validation_scope(
+        _cx: Self::Cx,
+        _input: CanonicalInput<I>,
+    ) -> Option<Self::ValidationScope> {
+        None
+    }
+
     const FIXPOINT_STEP_LIMIT: usize = FIXPOINT_STEP_LIMIT;
 
     type ProofTreeBuilder = ProofTreeBuilder<D>;
+    fn inspect_is_noop(inspect: &mut Self::ProofTreeBuilder) -> bool {
+        inspect.is_noop()
+    }
 
+    const DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW: usize = 4;
     fn recursion_limit(cx: I) -> usize {
         cx.recursion_limit()
     }
 
     fn initial_provisional_result(
         cx: I,
-        kind: CycleKind,
+        kind: PathKind,
         input: CanonicalInput<I>,
     ) -> QueryResult<I> {
         match kind {
-            CycleKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
-            CycleKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
+            PathKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes),
+            PathKind::Inductive => response_no_constraints(cx, input, Certainty::overflow(false)),
         }
     }
 
-    fn reached_fixpoint(
-        cx: I,
-        kind: UsageKind,
+    fn is_initial_provisional_result(
+        cx: Self::Cx,
+        kind: PathKind,
         input: CanonicalInput<I>,
-        provisional_result: Option<QueryResult<I>>,
         result: QueryResult<I>,
     ) -> bool {
-        if let Some(r) = provisional_result {
-            r == result
-        } else {
-            match kind {
-                UsageKind::Single(CycleKind::Coinductive) => {
-                    response_no_constraints(cx, input, Certainty::Yes) == result
-                }
-                UsageKind::Single(CycleKind::Inductive) => {
-                    response_no_constraints(cx, input, Certainty::overflow(false)) == result
-                }
-                UsageKind::Mixed => false,
+        match kind {
+            PathKind::Coinductive => response_no_constraints(cx, input, Certainty::Yes) == result,
+            PathKind::Inductive => {
+                response_no_constraints(cx, input, Certainty::overflow(false)) == result
             }
         }
     }
@@ -68,7 +74,7 @@ where
         inspect: &mut ProofTreeBuilder<D>,
         input: CanonicalInput<I>,
     ) -> QueryResult<I> {
-        inspect.canonical_goal_evaluation_kind(inspect::WipCanonicalGoalEvaluationKind::Overflow);
+        inspect.canonical_goal_evaluation_overflow();
         response_no_constraints(cx, input, Certainty::overflow(true))
     }
 
@@ -76,6 +82,22 @@ where
         response_no_constraints(cx, input, Certainty::overflow(false))
     }
 
+    fn is_ambiguous_result(result: QueryResult<I>) -> bool {
+        result.is_ok_and(|response| {
+            has_no_inference_or_external_constraints(response)
+                && matches!(response.value.certainty, Certainty::Maybe(_))
+        })
+    }
+
+    fn propagate_ambiguity(
+        cx: I,
+        for_input: CanonicalInput<I>,
+        from_result: QueryResult<I>,
+    ) -> QueryResult<I> {
+        let certainty = from_result.unwrap().value.certainty;
+        response_no_constraints(cx, for_input, certainty)
+    }
+
     fn step_is_coinductive(cx: I, input: CanonicalInput<I>) -> bool {
         input.value.goal.predicate.is_coinductive(cx)
     }
diff --git a/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs b/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
index e8de8457440ff..4e4022830d46e 100644
--- a/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
+++ b/compiler/rustc_trait_selection/src/solve/inspect/analyse.rs
@@ -332,13 +332,9 @@ impl<'a, 'tcx> InspectGoal<'a, 'tcx> {
 
     pub fn candidates(&'a self) -> Vec<InspectCandidate<'a, 'tcx>> {
         let mut candidates = vec![];
-        let last_eval_step = match self.evaluation_kind {
-            inspect::CanonicalGoalEvaluationKind::Overflow
-            | inspect::CanonicalGoalEvaluationKind::CycleInStack
-            | inspect::CanonicalGoalEvaluationKind::ProvisionalCacheHit => {
-                warn!("unexpected root evaluation: {:?}", self.evaluation_kind);
-                return vec![];
-            }
+        let last_eval_step = match &self.evaluation_kind {
+            // An annoying edge case in case the recursion limit is 0.
+            inspect::CanonicalGoalEvaluationKind::Overflow => return vec![],
             inspect::CanonicalGoalEvaluationKind::Evaluation { final_revision } => final_revision,
         };
 
diff --git a/compiler/rustc_type_ir/src/binder.rs b/compiler/rustc_type_ir/src/binder.rs
index c1f6fb36324ed..8797288070e71 100644
--- a/compiler/rustc_type_ir/src/binder.rs
+++ b/compiler/rustc_type_ir/src/binder.rs
@@ -8,7 +8,7 @@ use derive_where::derive_where;
 use rustc_macros::{HashStable_NoContext, TyDecodable, TyEncodable};
 #[cfg(feature = "nightly")]
 use rustc_serialize::Decodable;
-use tracing::debug;
+use tracing::instrument;
 
 use crate::data_structures::SsoHashSet;
 use crate::fold::{FallibleTypeFolder, TypeFoldable, TypeFolder, TypeSuperFoldable};
@@ -831,28 +831,20 @@ impl<'a, I: Interner> ArgFolder<'a, I> {
     /// As indicated in the diagram, here the same type `&'a i32` is instantiated once, but in the
     /// first case we do not increase the De Bruijn index and in the second case we do. The reason
     /// is that only in the second case have we passed through a fn binder.
+    #[instrument(level = "trace", skip(self), fields(binders_passed = self.binders_passed), ret)]
     fn shift_vars_through_binders<T: TypeFoldable<I>>(&self, val: T) -> T {
-        debug!(
-            "shift_vars(val={:?}, binders_passed={:?}, has_escaping_bound_vars={:?})",
-            val,
-            self.binders_passed,
-            val.has_escaping_bound_vars()
-        );
-
         if self.binders_passed == 0 || !val.has_escaping_bound_vars() {
-            return val;
+            val
+        } else {
+            ty::fold::shift_vars(self.cx, val, self.binders_passed)
         }
-
-        let result = ty::fold::shift_vars(TypeFolder::cx(self), val, self.binders_passed);
-        debug!("shift_vars: shifted result = {:?}", result);
-
-        result
     }
 
     fn shift_region_through_binders(&self, region: I::Region) -> I::Region {
         if self.binders_passed == 0 || !region.has_escaping_bound_vars() {
-            return region;
+            region
+        } else {
+            ty::fold::shift_region(self.cx, region, self.binders_passed)
         }
-        ty::fold::shift_region(self.cx, region, self.binders_passed)
     }
 }
diff --git a/compiler/rustc_type_ir/src/fold.rs b/compiler/rustc_type_ir/src/fold.rs
index d37bacc7d359f..8e3534b0e9eb4 100644
--- a/compiler/rustc_type_ir/src/fold.rs
+++ b/compiler/rustc_type_ir/src/fold.rs
@@ -48,7 +48,7 @@
 use std::mem;
 
 use rustc_index::{Idx, IndexVec};
-use tracing::debug;
+use tracing::instrument;
 
 use crate::data_structures::Lrc;
 use crate::inherent::*;
@@ -417,15 +417,14 @@ pub fn shift_region<I: Interner>(cx: I, region: I::Region, amount: u32) -> I::Re
     }
 }
 
+#[instrument(level = "trace", skip(cx), ret)]
 pub fn shift_vars<I: Interner, T>(cx: I, value: T, amount: u32) -> T
 where
     T: TypeFoldable<I>,
 {
-    debug!("shift_vars(value={:?}, amount={})", value, amount);
-
     if amount == 0 || !value.has_escaping_bound_vars() {
-        return value;
+        value
+    } else {
+        value.fold_with(&mut Shifter::new(cx, amount))
     }
-
-    value.fold_with(&mut Shifter::new(cx, amount))
 }
diff --git a/compiler/rustc_type_ir/src/interner.rs b/compiler/rustc_type_ir/src/interner.rs
index c251540c0fc29..f2492ede4f5ea 100644
--- a/compiler/rustc_type_ir/src/interner.rs
+++ b/compiler/rustc_type_ir/src/interner.rs
@@ -11,7 +11,6 @@ use crate::inherent::*;
 use crate::ir_print::IrPrint;
 use crate::lang_items::TraitSolverLangItem;
 use crate::relate::Relate;
-use crate::solve::inspect::CanonicalGoalEvaluationStep;
 use crate::solve::{
     CanonicalInput, ExternalConstraintsData, PredefinedOpaquesData, QueryResult, SolverMode,
 };
@@ -65,11 +64,6 @@ pub trait Interner:
         + Eq
         + TypeVisitable<Self>
         + SliceLike<Item = Self::LocalDefId>;
-    type CanonicalGoalEvaluationStepRef: Copy
-        + Debug
-        + Hash
-        + Eq
-        + Deref<Target = CanonicalGoalEvaluationStep<Self>>;
 
     type CanonicalVars: Copy
         + Debug
@@ -177,11 +171,6 @@ pub trait Interner:
 
     fn debug_assert_args_compatible(self, def_id: Self::DefId, args: Self::GenericArgs);
 
-    fn intern_canonical_goal_evaluation_step(
-        self,
-        step: CanonicalGoalEvaluationStep<Self>,
-    ) -> Self::CanonicalGoalEvaluationStepRef;
-
     fn mk_type_list_from_iter<I, T>(self, args: I) -> T::Output
     where
         I: Iterator<Item = T>,
@@ -390,7 +379,6 @@ impl<T, R, E> CollectAndApply<T, R> for Result<T, E> {
 }
 
 impl<I: Interner> search_graph::Cx for I {
-    type ProofTree = Option<I::CanonicalGoalEvaluationStepRef>;
     type Input = CanonicalInput<I>;
     type Result = QueryResult<I>;
 
diff --git a/compiler/rustc_type_ir/src/search_graph/global_cache.rs b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
index be4f1069cd167..47f7cefac6ad1 100644
--- a/compiler/rustc_type_ir/src/search_graph/global_cache.rs
+++ b/compiler/rustc_type_ir/src/search_graph/global_cache.rs
@@ -1,18 +1,17 @@
 use derive_where::derive_where;
-use rustc_index::IndexVec;
 
-use super::{AvailableDepth, Cx, StackDepth, StackEntry};
-use crate::data_structures::{HashMap, HashSet};
-
-#[derive_where(Debug, Clone, Copy; X: Cx)]
-struct QueryData<X: Cx> {
-    result: X::Result,
-    proof_tree: X::ProofTree,
-}
+use super::{AvailableDepth, Cx, NestedGoals};
+use crate::data_structures::HashMap;
 
 struct Success<X: Cx> {
-    data: X::Tracked<QueryData<X>>,
     additional_depth: usize,
+    nested_goals: NestedGoals<X>,
+    result: X::Tracked<X::Result>,
+}
+
+struct WithOverflow<X: Cx> {
+    nested_goals: NestedGoals<X>,
+    result: X::Tracked<X::Result>,
 }
 
 /// The cache entry for a given input.
@@ -23,24 +22,15 @@ struct Success<X: Cx> {
 #[derive_where(Default; X: Cx)]
 struct CacheEntry<X: Cx> {
     success: Option<Success<X>>,
-    /// We have to be careful when caching roots of cycles.
-    ///
-    /// See the doc comment of `StackEntry::cycle_participants` for more
-    /// details.
-    nested_goals: HashSet<X::Input>,
-    with_overflow: HashMap<usize, X::Tracked<QueryData<X>>>,
+    with_overflow: HashMap<usize, WithOverflow<X>>,
 }
 
 #[derive_where(Debug; X: Cx)]
 pub(super) struct CacheData<'a, X: Cx> {
     pub(super) result: X::Result,
-    pub(super) proof_tree: X::ProofTree,
     pub(super) additional_depth: usize,
     pub(super) encountered_overflow: bool,
-    // FIXME: This is currently unused, but impacts the design
-    // by requiring a closure for `Cx::with_global_cache`.
-    #[allow(dead_code)]
-    pub(super) nested_goals: &'a HashSet<X::Input>,
+    pub(super) nested_goals: &'a NestedGoals<X>,
 }
 #[derive_where(Default; X: Cx)]
 pub struct GlobalCache<X: Cx> {
@@ -55,20 +45,21 @@ impl<X: Cx> GlobalCache<X> {
         input: X::Input,
 
         result: X::Result,
-        proof_tree: X::ProofTree,
         dep_node: X::DepNodeIndex,
 
         additional_depth: usize,
         encountered_overflow: bool,
-        nested_goals: &HashSet<X::Input>,
+        nested_goals: NestedGoals<X>,
     ) {
-        let data = cx.mk_tracked(QueryData { result, proof_tree }, dep_node);
+        let result = cx.mk_tracked(result, dep_node);
         let entry = self.map.entry(input).or_default();
-        entry.nested_goals.extend(nested_goals);
         if encountered_overflow {
-            entry.with_overflow.insert(additional_depth, data);
+            let with_overflow = WithOverflow { nested_goals, result };
+            let prev = entry.with_overflow.insert(additional_depth, with_overflow);
+            assert!(prev.is_none());
         } else {
-            entry.success = Some(Success { data, additional_depth });
+            let prev = entry.success.replace(Success { additional_depth, nested_goals, result });
+            assert!(prev.is_none());
         }
     }
 
@@ -80,36 +71,37 @@ impl<X: Cx> GlobalCache<X> {
         &'a self,
         cx: X,
         input: X::Input,
-        stack: &IndexVec<StackDepth, StackEntry<X>>,
         available_depth: AvailableDepth,
+        mut candidate_is_applicable: impl FnMut(&NestedGoals<X>) -> bool,
     ) -> Option<CacheData<'a, X>> {
         let entry = self.map.get(&input)?;
-        if stack.iter().any(|e| entry.nested_goals.contains(&e.input)) {
-            return None;
-        }
-
-        if let Some(ref success) = entry.success {
-            if available_depth.cache_entry_is_applicable(success.additional_depth) {
-                let QueryData { result, proof_tree } = cx.get_tracked(&success.data);
+        if let Some(Success { additional_depth, ref nested_goals, ref result }) = entry.success {
+            if available_depth.cache_entry_is_applicable(additional_depth)
+                && candidate_is_applicable(nested_goals)
+            {
                 return Some(CacheData {
-                    result,
-                    proof_tree,
-                    additional_depth: success.additional_depth,
+                    result: cx.get_tracked(&result),
+                    additional_depth,
                     encountered_overflow: false,
-                    nested_goals: &entry.nested_goals,
+                    nested_goals,
                 });
             }
         }
 
-        entry.with_overflow.get(&available_depth.0).map(|e| {
-            let QueryData { result, proof_tree } = cx.get_tracked(e);
-            CacheData {
-                result,
-                proof_tree,
-                additional_depth: available_depth.0,
-                encountered_overflow: true,
-                nested_goals: &entry.nested_goals,
+        let additional_depth = available_depth.0;
+        if let Some(WithOverflow { nested_goals, result }) =
+            entry.with_overflow.get(&additional_depth)
+        {
+            if candidate_is_applicable(nested_goals) {
+                return Some(CacheData {
+                    result: cx.get_tracked(result),
+                    additional_depth,
+                    encountered_overflow: true,
+                    nested_goals,
+                });
             }
-        })
+        }
+
+        None
     }
 }
diff --git a/compiler/rustc_type_ir/src/search_graph/mod.rs b/compiler/rustc_type_ir/src/search_graph/mod.rs
index 4abf99b1ded8a..d47c9e725f350 100644
--- a/compiler/rustc_type_ir/src/search_graph/mod.rs
+++ b/compiler/rustc_type_ir/src/search_graph/mod.rs
@@ -1,19 +1,32 @@
+/// The search graph is responsible for caching and cycle detection in the trait
+/// solver. Making sure that caching doesn't result in soundness bugs or unstable
+/// query results is very challenging and makes this one of the most-involved
+/// self-contained components of the compiler.
+///
+/// We added fuzzing support to test its correctness. The fuzzers used to verify
+/// the current implementation can be found in https://github.com/lcnr/search_graph_fuzz.
+///
+/// This is just a quick overview of the general design, please check out the relevant
+/// [rustc-dev-guide chapter](https://rustc-dev-guide.rust-lang.org/solve/caching.html) for
+/// more details. Caching is split between a global cache and the per-cycle `provisional_cache`.
+/// The global cache has to be completely unobservable, while the per-cycle cache may impact
+/// behavior as long as the resulting behavior is still correct.
+use std::cmp::Ordering;
+use std::collections::BTreeSet;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
-use std::mem;
 
 use derive_where::derive_where;
 use rustc_index::{Idx, IndexVec};
 use tracing::debug;
 
-use crate::data_structures::{HashMap, HashSet};
+use crate::data_structures::HashMap;
 use crate::solve::SolverMode;
 
 mod global_cache;
 use global_cache::CacheData;
 pub use global_cache::GlobalCache;
-mod validate;
 
 /// The search graph does not simply use `Interner` directly
 /// to enable its fuzzing without having to stub the rest of
@@ -22,7 +35,6 @@ mod validate;
 /// about `Input` and `Result` as they are implementation details
 /// of the search graph.
 pub trait Cx: Copy {
-    type ProofTree: Debug + Copy;
     type Input: Debug + Eq + Hash + Copy;
     type Result: Debug + Eq + Hash + Copy;
 
@@ -43,30 +55,41 @@ pub trait Cx: Copy {
     ) -> R;
 }
 
-pub trait ProofTreeBuilder<X: Cx> {
-    fn try_apply_proof_tree(&mut self, proof_tree: X::ProofTree) -> bool;
-    fn on_provisional_cache_hit(&mut self);
-    fn on_cycle_in_stack(&mut self);
-    fn finalize_canonical_goal_evaluation(&mut self, cx: X) -> X::ProofTree;
-}
-
 pub trait Delegate {
     type Cx: Cx;
+    /// Whether to use the provisional cache. Set to `false` by a fuzzer when
+    /// validating the search graph.
+    const ENABLE_PROVISIONAL_CACHE: bool;
+    type ValidationScope;
+    /// Returning `Some` disables the global cache for the current goal.
+    ///
+    /// The `ValidationScope` is used when fuzzing the search graph to track
+    /// for which goals the global cache has been disabled. This is necessary
+    /// as we may otherwise ignore the global cache entry for some goal `G`
+    /// only to later use it, failing to detect a cycle goal and potentially
+    /// changing the result.
+    fn enter_validation_scope(
+        cx: Self::Cx,
+        input: <Self::Cx as Cx>::Input,
+    ) -> Option<Self::ValidationScope>;
+
     const FIXPOINT_STEP_LIMIT: usize;
-    type ProofTreeBuilder: ProofTreeBuilder<Self::Cx>;
 
+    type ProofTreeBuilder;
+    fn inspect_is_noop(inspect: &mut Self::ProofTreeBuilder) -> bool;
+
+    const DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW: usize;
     fn recursion_limit(cx: Self::Cx) -> usize;
 
     fn initial_provisional_result(
         cx: Self::Cx,
-        kind: CycleKind,
+        kind: PathKind,
         input: <Self::Cx as Cx>::Input,
     ) -> <Self::Cx as Cx>::Result;
-    fn reached_fixpoint(
+    fn is_initial_provisional_result(
         cx: Self::Cx,
-        kind: UsageKind,
+        kind: PathKind,
         input: <Self::Cx as Cx>::Input,
-        provisional_result: Option<<Self::Cx as Cx>::Result>,
         result: <Self::Cx as Cx>::Result,
     ) -> bool;
     fn on_stack_overflow(
@@ -79,6 +102,13 @@ pub trait Delegate {
         input: <Self::Cx as Cx>::Input,
     ) -> <Self::Cx as Cx>::Result;
 
+    fn is_ambiguous_result(result: <Self::Cx as Cx>::Result) -> bool;
+    fn propagate_ambiguity(
+        cx: Self::Cx,
+        for_input: <Self::Cx as Cx>::Input,
+        from_result: <Self::Cx as Cx>::Result,
+    ) -> <Self::Cx as Cx>::Result;
+
     fn step_is_coinductive(cx: Self::Cx, input: <Self::Cx as Cx>::Input) -> bool;
 }
 
@@ -86,19 +116,20 @@ pub trait Delegate {
 /// result. In the case we return an initial provisional result depending
 /// on the kind of cycle.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum CycleKind {
+pub enum PathKind {
     Coinductive,
     Inductive,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum UsageKind {
-    Single(CycleKind),
+    Single(PathKind),
     Mixed,
 }
 impl UsageKind {
     fn merge(self, other: Self) -> Self {
         match (self, other) {
+            (UsageKind::Mixed, _) | (_, UsageKind::Mixed) => UsageKind::Mixed,
             (UsageKind::Single(lhs), UsageKind::Single(rhs)) => {
                 if lhs == rhs {
                     UsageKind::Single(lhs)
@@ -106,11 +137,11 @@ impl UsageKind {
                     UsageKind::Mixed
                 }
             }
-            (UsageKind::Mixed, UsageKind::Mixed)
-            | (UsageKind::Mixed, UsageKind::Single(_))
-            | (UsageKind::Single(_), UsageKind::Mixed) => UsageKind::Mixed,
         }
     }
+    fn and_merge(&mut self, other: Self) {
+        *self = self.merge(other);
+    }
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -132,7 +163,7 @@ impl AvailableDepth {
             }
 
             Some(if last.encountered_overflow {
-                AvailableDepth(last.available_depth.0 / 2)
+                AvailableDepth(last.available_depth.0 / D::DIVIDE_AVAILABLE_DEPTH_ON_OVERFLOW)
             } else {
                 AvailableDepth(last.available_depth.0 - 1)
             })
@@ -148,97 +179,181 @@ impl AvailableDepth {
     }
 }
 
+/// All cycle heads a given goal depends on, ordered by their stack depth.
+///
+/// We therefore pop the cycle heads from highest to lowest.
+#[derive(Clone, Debug, PartialEq, Eq, Default)]
+struct CycleHeads {
+    heads: BTreeSet<StackDepth>,
+}
+
+impl CycleHeads {
+    fn is_empty(&self) -> bool {
+        self.heads.is_empty()
+    }
+
+    fn highest_cycle_head(&self) -> StackDepth {
+        *self.heads.last().unwrap()
+    }
+
+    fn opt_highest_cycle_head(&self) -> Option<StackDepth> {
+        self.heads.last().copied()
+    }
+
+    fn opt_lowest_cycle_head(&self) -> Option<StackDepth> {
+        self.heads.first().copied()
+    }
+
+    fn remove_highest_cycle_head(&mut self) {
+        let last = self.heads.pop_last();
+        debug_assert_ne!(last, None);
+    }
+
+    fn insert(&mut self, head: StackDepth) {
+        self.heads.insert(head);
+    }
+
+    fn merge(&mut self, heads: &CycleHeads) {
+        for &head in heads.heads.iter() {
+            self.insert(head);
+        }
+    }
+
+    /// Update the cycle heads of a goal at depth `this` given the cycle heads
+    /// of a nested goal. This merges the heads after filtering the parent goal
+    /// itself.
+    fn extend_from_child(&mut self, this: StackDepth, child: &CycleHeads) {
+        for &head in child.heads.iter() {
+            match head.cmp(&this) {
+                Ordering::Less => {}
+                Ordering::Equal => continue,
+                Ordering::Greater => unreachable!(),
+            }
+
+            self.insert(head);
+        }
+    }
+}
+
+/// The nested goals of each stack entry and the path from the
+/// stack entry to that nested goal.
+///
+/// We only start tracking nested goals once we've either encountered
+/// overflow or a solver cycle. This is a performance optimization to
+/// avoid tracking nested goals on the happy path.
+///
+/// We use nested goals for two reasons:
+/// - when rebasing provisional cache entries
+/// - when checking whether we have to ignore a global cache entry as reevaluating
+///   it would encounter a cycle or use a provisional cache entry.
+///
+/// We need to disable the global cache if using it would hide a cycle, as
+/// cycles can impact behavior. The cycle ABA may have different final
+/// results from a the cycle BAB depending on the cycle root.
+#[derive_where(Debug, Default; X: Cx)]
+struct NestedGoals<X: Cx> {
+    nested_goals: HashMap<X::Input, UsageKind>,
+}
+impl<X: Cx> NestedGoals<X> {
+    fn is_empty(&self) -> bool {
+        self.nested_goals.is_empty()
+    }
+
+    fn insert(&mut self, input: X::Input, path_from_entry: UsageKind) {
+        self.nested_goals.entry(input).or_insert(path_from_entry).and_merge(path_from_entry);
+    }
+
+    fn merge(&mut self, nested_goals: &NestedGoals<X>) {
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_entry) in nested_goals.iter() {
+            self.insert(input, path_from_entry);
+        }
+    }
+
+    /// Adds the nested goals of a nested goal, given that the path `step_kind` from this goal
+    /// to the parent goal.
+    ///
+    /// If the path from this goal to the nested goal is inductive, the paths from this goal
+    /// to all nested goals of that nested goal are also inductive. Otherwise the paths are
+    /// the same as for the child.
+    fn extend_from_child(&mut self, step_kind: PathKind, nested_goals: &NestedGoals<X>) {
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_entry) in nested_goals.iter() {
+            let path_from_entry = match step_kind {
+                PathKind::Coinductive => path_from_entry,
+                PathKind::Inductive => UsageKind::Single(PathKind::Inductive),
+            };
+            self.insert(input, path_from_entry);
+        }
+    }
+
+    #[rustc_lint_query_instability]
+    #[allow(rustc::potential_query_instability)]
+    fn iter(&self) -> impl Iterator<Item = (X::Input, UsageKind)> + '_ {
+        self.nested_goals.iter().map(|(i, p)| (*i, *p))
+    }
+
+    fn get(&self, input: X::Input) -> Option<UsageKind> {
+        self.nested_goals.get(&input).copied()
+    }
+
+    fn contains(&self, input: X::Input) -> bool {
+        self.nested_goals.contains_key(&input)
+    }
+}
+
 rustc_index::newtype_index! {
     #[orderable]
     #[gate_rustc_only]
     pub struct StackDepth {}
 }
 
+/// Stack entries of the evaluation stack. Its fields tend to be lazily
+/// when popping a child goal or completely immutable.
 #[derive_where(Debug; X: Cx)]
 struct StackEntry<X: Cx> {
     input: X::Input,
 
+    /// The available depth of a given goal, immutable.
     available_depth: AvailableDepth,
 
     /// The maximum depth reached by this stack entry, only up-to date
     /// for the top of the stack and lazily updated for the rest.
     reached_depth: StackDepth,
 
-    /// Whether this entry is a non-root cycle participant.
-    ///
-    /// We must not move the result of non-root cycle participants to the
-    /// global cache. We store the highest stack depth of a head of a cycle
-    /// this goal is involved in. This necessary to soundly cache its
-    /// provisional result.
-    non_root_cycle_participant: Option<StackDepth>,
+    /// All cycle heads this goal depends on. Lazily updated and only
+    /// up-to date for the top of the stack.
+    heads: CycleHeads,
 
+    /// Whether evaluating this goal encountered overflow. Lazily updated.
     encountered_overflow: bool,
 
+    /// Whether this goal has been used as the root of a cycle. This gets
+    /// eagerly updated when encountering a cycle.
     has_been_used: Option<UsageKind>,
 
-    /// We put only the root goal of a coinductive cycle into the global cache.
-    ///
-    /// If we were to use that result when later trying to prove another cycle
-    /// participant, we can end up with unstable query results.
-    ///
-    /// See tests/ui/next-solver/coinduction/incompleteness-unstable-result.rs for
-    /// an example of where this is needed.
-    ///
-    /// There can  be multiple roots on the same stack, so we need to track
-    /// cycle participants per root:
-    /// ```plain
-    /// A :- B
-    /// B :- A, C
-    /// C :- D
-    /// D :- C
-    /// ```
-    nested_goals: HashSet<X::Input>,
+    /// The nested goals of this goal, see the doc comment of the type.
+    nested_goals: NestedGoals<X>,
+
     /// Starts out as `None` and gets set when rerunning this
     /// goal in case we encounter a cycle.
     provisional_result: Option<X::Result>,
 }
 
-/// The provisional result for a goal which is not on the stack.
-#[derive(Debug)]
-struct DetachedEntry<X: Cx> {
-    /// The head of the smallest non-trivial cycle involving this entry.
-    ///
-    /// Given the following rules, when proving `A` the head for
-    /// the provisional entry of `C` would be `B`.
-    /// ```plain
-    /// A :- B
-    /// B :- C
-    /// C :- A + B + C
-    /// ```
-    head: StackDepth,
-    result: X::Result,
-}
-
-/// Stores the stack depth of a currently evaluated goal *and* already
-/// computed results for goals which depend on other goals still on the stack.
-///
-/// The provisional result may depend on whether the stack above it is inductive
-/// or coinductive. Because of this, we store separate provisional results for
-/// each case. If an provisional entry is not applicable, it may be the case
-/// that we already have provisional result while computing a goal. In this case
-/// we prefer the provisional result to potentially avoid fixpoint iterations.
-/// See tests/ui/traits/next-solver/cycles/mixed-cycles-2.rs for an example.
-///
-/// The provisional cache can theoretically result in changes to the observable behavior,
-/// see tests/ui/traits/next-solver/cycles/provisional-cache-impacts-behavior.rs.
-#[derive_where(Default; X: Cx)]
+/// A provisional result of an already computed goals which depends on other
+/// goals still on the stack.
+#[derive_where(Debug; X: Cx)]
 struct ProvisionalCacheEntry<X: Cx> {
-    stack_depth: Option<StackDepth>,
-    with_inductive_stack: Option<DetachedEntry<X>>,
-    with_coinductive_stack: Option<DetachedEntry<X>>,
-}
-
-impl<X: Cx> ProvisionalCacheEntry<X> {
-    fn is_empty(&self) -> bool {
-        self.stack_depth.is_none()
-            && self.with_inductive_stack.is_none()
-            && self.with_coinductive_stack.is_none()
-    }
+    /// Whether evaluating the goal encountered overflow. This is used to
+    /// disable the cache entry except if the last goal on the stack is
+    /// already involved in this cycle.
+    encountered_overflow: bool,
+    /// All cycle heads this cache entry depends on.
+    heads: CycleHeads,
+    /// The path from the highest cycle head to this goal.
+    path_from_head: PathKind,
+    nested_goals: NestedGoals<X>,
+    result: X::Result,
 }
 
 pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
@@ -247,7 +362,11 @@ pub struct SearchGraph<D: Delegate<Cx = X>, X: Cx = <D as Delegate>::Cx> {
     ///
     /// An element is *deeper* in the stack if its index is *lower*.
     stack: IndexVec<StackDepth, StackEntry<X>>,
-    provisional_cache: HashMap<X::Input, ProvisionalCacheEntry<X>>,
+    /// The provisional cache contains entries for already computed goals which
+    /// still depend on goals higher-up in the stack. We don't move them to the
+    /// global cache and track them locally instead. A provisional cache entry
+    /// is only valid until the result of one of its cycle heads changes.
+    provisional_cache: HashMap<X::Input, Vec<ProvisionalCacheEntry<X>>>,
 
     _marker: PhantomData<D>,
 }
@@ -266,77 +385,66 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         self.mode
     }
 
-    fn update_parent_goal(&mut self, reached_depth: StackDepth, encountered_overflow: bool) {
-        if let Some(parent) = self.stack.raw.last_mut() {
+    /// Lazily update the stack entry for the parent goal.
+    /// This behavior is shared between actually evaluating goals
+    /// and using existing global cache entries to make sure they
+    /// have the same impact on the remaining evaluation.
+    fn update_parent_goal(
+        cx: X,
+        stack: &mut IndexVec<StackDepth, StackEntry<X>>,
+        reached_depth: StackDepth,
+        heads: &CycleHeads,
+        encountered_overflow: bool,
+        nested_goals: &NestedGoals<X>,
+    ) {
+        if let Some(parent_index) = stack.last_index() {
+            let parent = &mut stack[parent_index];
             parent.reached_depth = parent.reached_depth.max(reached_depth);
             parent.encountered_overflow |= encountered_overflow;
+
+            parent.heads.extend_from_child(parent_index, heads);
+            let step_kind = Self::step_kind(cx, parent.input);
+            parent.nested_goals.extend_from_child(step_kind, nested_goals);
+            // Once we've got goals which encountered overflow or a cycle,
+            // we track all goals whose behavior may depend depend on these
+            // goals as this change may cause them to now depend on additional
+            // goals, resulting in new cycles. See the dev-guide for examples.
+            if !nested_goals.is_empty() {
+                parent.nested_goals.insert(parent.input, UsageKind::Single(PathKind::Coinductive))
+            }
         }
     }
 
     pub fn is_empty(&self) -> bool {
-        self.stack.is_empty()
+        if self.stack.is_empty() {
+            debug_assert!(self.provisional_cache.is_empty());
+            true
+        } else {
+            false
+        }
     }
 
-    fn stack_coinductive_from(
-        cx: X,
-        stack: &IndexVec<StackDepth, StackEntry<X>>,
-        head: StackDepth,
-    ) -> bool {
-        stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input))
-    }
-
-    // When encountering a solver cycle, the result of the current goal
-    // depends on goals lower on the stack.
-    //
-    // We have to therefore be careful when caching goals. Only the final result
-    // of the cycle root, i.e. the lowest goal on the stack involved in this cycle,
-    // is moved to the global cache while all others are stored in a provisional cache.
-    //
-    // We update both the head of this cycle to rerun its evaluation until
-    // we reach a fixpoint and all other cycle participants to make sure that
-    // their result does not get moved to the global cache.
-    fn tag_cycle_participants(
-        stack: &mut IndexVec<StackDepth, StackEntry<X>>,
-        usage_kind: Option<UsageKind>,
-        head: StackDepth,
-    ) {
-        if let Some(usage_kind) = usage_kind {
-            stack[head].has_been_used =
-                Some(stack[head].has_been_used.map_or(usage_kind, |prev| prev.merge(usage_kind)));
-        }
-        debug_assert!(stack[head].has_been_used.is_some());
-
-        // The current root of these cycles. Note that this may not be the final
-        // root in case a later goal depends on a goal higher up the stack.
-        let mut current_root = head;
-        while let Some(parent) = stack[current_root].non_root_cycle_participant {
-            current_root = parent;
-            debug_assert!(stack[current_root].has_been_used.is_some());
-        }
+    /// The number of goals currently in the search graph. This should only be
+    /// used for debugging purposes.
+    pub fn debug_current_depth(&self) -> usize {
+        self.stack.len()
+    }
 
-        let (stack, cycle_participants) = stack.raw.split_at_mut(head.index() + 1);
-        let current_cycle_root = &mut stack[current_root.as_usize()];
-        for entry in cycle_participants {
-            entry.non_root_cycle_participant = entry.non_root_cycle_participant.max(Some(head));
-            current_cycle_root.nested_goals.insert(entry.input);
-            current_cycle_root.nested_goals.extend(mem::take(&mut entry.nested_goals));
-        }
+    fn step_kind(cx: X, input: X::Input) -> PathKind {
+        if D::step_is_coinductive(cx, input) { PathKind::Coinductive } else { PathKind::Inductive }
     }
 
-    fn clear_dependent_provisional_results(
-        provisional_cache: &mut HashMap<X::Input, ProvisionalCacheEntry<X>>,
+    /// Whether the path from `head` to the current stack entry is inductive or coinductive.
+    fn stack_path_kind(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
         head: StackDepth,
-    ) {
-        #[allow(rustc::potential_query_instability)]
-        provisional_cache.retain(|_, entry| {
-            if entry.with_coinductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_coinductive_stack.take();
-            }
-            if entry.with_inductive_stack.as_ref().is_some_and(|p| p.head == head) {
-                entry.with_inductive_stack.take();
-            }
-            !entry.is_empty()
-        });
+    ) -> PathKind {
+        if stack.raw[head.index()..].iter().all(|entry| D::step_is_coinductive(cx, entry.input)) {
+            PathKind::Coinductive
+        } else {
+            PathKind::Inductive
+        }
     }
 
     /// Probably the most involved method of the whole solver.
@@ -348,89 +456,65 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         cx: X,
         input: X::Input,
         inspect: &mut D::ProofTreeBuilder,
-        mut prove_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+        mut evaluate_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
     ) -> X::Result {
-        self.check_invariants();
-        // Check for overflow.
         let Some(available_depth) = AvailableDepth::allowed_depth_for_nested::<D>(cx, &self.stack)
         else {
-            if let Some(last) = self.stack.raw.last_mut() {
-                last.encountered_overflow = true;
-            }
-
-            debug!("encountered stack overflow");
-            return D::on_stack_overflow(cx, inspect, input);
+            return self.handle_overflow(cx, input, inspect);
         };
 
-        if let Some(result) = self.lookup_global_cache(cx, input, available_depth, inspect) {
+        // We check the provisional cache before checking the global cache. This simplifies
+        // the implementation as we can avoid worrying about cases where both the global and
+        // provisional cache may apply, e.g. consider the following example
+        //
+        // - xxBA overflow
+        // - A
+        //     - BA cycle
+        //     - CB :x:
+        if let Some(result) = self.lookup_provisional_cache(cx, input) {
             return result;
         }
 
-        // Check whether the goal is in the provisional cache.
-        // The provisional result may rely on the path to its cycle roots,
-        // so we have to check the path of the current goal matches that of
-        // the cache entry.
-        let cache_entry = self.provisional_cache.entry(input).or_default();
-        if let Some(entry) = cache_entry
-            .with_coinductive_stack
-            .as_ref()
-            .filter(|p| Self::stack_coinductive_from(cx, &self.stack, p.head))
-            .or_else(|| {
-                cache_entry
-                    .with_inductive_stack
-                    .as_ref()
-                    .filter(|p| !Self::stack_coinductive_from(cx, &self.stack, p.head))
-            })
-        {
-            debug!("provisional cache hit");
-            // We have a nested goal which is already in the provisional cache, use
-            // its result. We do not provide any usage kind as that should have been
-            // already set correctly while computing the cache entry.
-            inspect.on_provisional_cache_hit();
-            Self::tag_cycle_participants(&mut self.stack, None, entry.head);
-            return entry.result;
-        } else if let Some(stack_depth) = cache_entry.stack_depth {
-            debug!("encountered cycle with depth {stack_depth:?}");
-            // We have a nested goal which directly relies on a goal deeper in the stack.
-            //
-            // We start by tagging all cycle participants, as that's necessary for caching.
-            //
-            // Finally we can return either the provisional response or the initial response
-            // in case we're in the first fixpoint iteration for this goal.
-            inspect.on_cycle_in_stack();
-
-            let is_coinductive_cycle = Self::stack_coinductive_from(cx, &self.stack, stack_depth);
-            let cycle_kind =
-                if is_coinductive_cycle { CycleKind::Coinductive } else { CycleKind::Inductive };
-            Self::tag_cycle_participants(
-                &mut self.stack,
-                Some(UsageKind::Single(cycle_kind)),
-                stack_depth,
-            );
-
-            // Return the provisional result or, if we're in the first iteration,
-            // start with no constraints.
-            return if let Some(result) = self.stack[stack_depth].provisional_result {
-                result
-            } else {
-                D::initial_provisional_result(cx, cycle_kind, input)
-            };
+        // Lookup the global cache unless we're building proof trees or are currently
+        // fuzzing.
+        let validate_cache = if !D::inspect_is_noop(inspect) {
+            None
+        } else if let Some(scope) = D::enter_validation_scope(cx, input) {
+            // When validating the global cache we need to track the goals for which the
+            // global cache has been disabled as it may otherwise change the result for
+            // cyclic goals. We don't care about goals which are not on the current stack
+            // so it's fine to drop their scope eagerly.
+            self.lookup_global_cache_untracked(cx, input, available_depth)
+                .inspect(|expected| debug!(?expected, "validate cache entry"))
+                .map(|r| (scope, r))
+        } else if let Some(result) = self.lookup_global_cache(cx, input, available_depth) {
+            return result;
         } else {
-            // No entry, we push this goal on the stack and try to prove it.
-            let depth = self.stack.next_index();
-            let entry = StackEntry {
-                input,
-                available_depth,
-                reached_depth: depth,
-                non_root_cycle_participant: None,
-                encountered_overflow: false,
-                has_been_used: None,
-                nested_goals: Default::default(),
-                provisional_result: None,
-            };
-            assert_eq!(self.stack.push(entry), depth);
-            cache_entry.stack_depth = Some(depth);
+            None
+        };
+
+        // Detect cycles on the stack. We do this after the global cache lookup to
+        // avoid iterating over the stack in case a goal has already been computed.
+        // This may not have an actual performance impact and we could reorder them
+        // as it may reduce the number of `nested_goals` we need to track.
+        if let Some(result) = self.check_cycle_on_stack(cx, input) {
+            debug_assert!(validate_cache.is_none(), "global cache and cycle on stack");
+            return result;
+        }
+
+        // Unfortunate, it looks like we actually have to compute this goalrar.
+        let depth = self.stack.next_index();
+        let entry = StackEntry {
+            input,
+            available_depth,
+            reached_depth: depth,
+            heads: Default::default(),
+            encountered_overflow: false,
+            has_been_used: None,
+            nested_goals: Default::default(),
+            provisional_result: None,
         };
+        assert_eq!(self.stack.push(entry), depth);
 
         // This is for global caching, so we properly track query dependencies.
         // Everything that affects the `result` should be performed within this
@@ -439,65 +523,320 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         // must not be added to the global cache. Notably, this is the case for
         // trait solver cycles participants.
         let ((final_entry, result), dep_node) = cx.with_cached_task(|| {
-            for _ in 0..D::FIXPOINT_STEP_LIMIT {
-                match self.fixpoint_step_in_task(cx, input, inspect, &mut prove_goal) {
-                    StepResult::Done(final_entry, result) => return (final_entry, result),
-                    StepResult::HasChanged => debug!("fixpoint changed provisional results"),
-                }
+            self.evaluate_goal_in_task(cx, input, inspect, &mut evaluate_goal)
+        });
+
+        // We've finished computing the goal and have popped it from the stack,
+        // lazily update its parent goal.
+        Self::update_parent_goal(
+            cx,
+            &mut self.stack,
+            final_entry.reached_depth,
+            &final_entry.heads,
+            final_entry.encountered_overflow,
+            &final_entry.nested_goals,
+        );
+
+        // We're now done with this goal. We only add the root of cycles to the global cache.
+        // In case this goal is involved in a larger cycle add it to the provisional cache.
+        if final_entry.heads.is_empty() {
+            if let Some((_scope, expected)) = validate_cache {
+                // Do not try to move a goal into the cache again if we're testing
+                // the global cache.
+                assert_eq!(result, expected, "input={input:?}");
+            } else if D::inspect_is_noop(inspect) {
+                self.insert_global_cache(cx, input, final_entry, result, dep_node)
             }
+        } else if D::ENABLE_PROVISIONAL_CACHE {
+            debug_assert!(validate_cache.is_none());
+            let entry = self.provisional_cache.entry(input).or_default();
+            let StackEntry { heads, nested_goals, encountered_overflow, .. } = final_entry;
+            let path_from_head = Self::stack_path_kind(cx, &self.stack, heads.highest_cycle_head());
+            entry.push(ProvisionalCacheEntry {
+                encountered_overflow,
+                heads,
+                path_from_head,
+                nested_goals,
+                result,
+            });
+        } else {
+            debug_assert!(validate_cache.is_none());
+        }
+
+        result
+    }
+
+    fn handle_overflow(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        inspect: &mut D::ProofTreeBuilder,
+    ) -> X::Result {
+        if let Some(last) = self.stack.raw.last_mut() {
+            last.encountered_overflow = true;
+            // If computing a goal `B` depends on another goal `A` and
+            // `A` has a nested goal which overflows, then computing `B`
+            // at the same depth, but with `A` already on the stack,
+            // would encounter a solver cycle instead, potentially
+            // changing the result.
+            //
+            // We must therefore not use the global cache entry for `B` in that case.
+            // See tests/ui/traits/next-solver/cycles/hidden-by-overflow.rs
+            last.nested_goals.insert(last.input, UsageKind::Single(PathKind::Coinductive));
+        }
 
-            debug!("canonical cycle overflow");
-            let current_entry = self.stack.pop().unwrap();
-            debug_assert!(current_entry.has_been_used.is_none());
-            let result = D::on_fixpoint_overflow(cx, input);
-            (current_entry, result)
+        debug!("encountered stack overflow");
+        D::on_stack_overflow(cx, inspect, input)
+    }
+
+    /// When reevaluating a goal with a changed provisional result, all provisional cache entry
+    /// which depend on this goal get invalidated.
+    fn clear_dependent_provisional_results(&mut self) {
+        let head = self.stack.next_index();
+        #[allow(rustc::potential_query_instability)]
+        self.provisional_cache.retain(|_, entries| {
+            entries.retain(|entry| entry.heads.highest_cycle_head() != head);
+            !entries.is_empty()
         });
+    }
 
-        let proof_tree = inspect.finalize_canonical_goal_evaluation(cx);
+    /// A necessary optimization to handle complex solver cycles. A provisional cache entry
+    /// relies on a set of cycle heads and the path towards these heads. When popping a cycle
+    /// head from the stack after we've finished computing it, we can't be sure that the
+    /// provisional cache entry is still applicable. We need to keep the cache entries to
+    /// prevent hangs.
+    ///
+    /// What we therefore do is check whether the cycle kind of all cycles the goal of a
+    /// provisional cache entry is involved in would stay the same when computing the
+    /// goal without its cycle head on the stack. For more details, see the relevant
+    /// [rustc-dev-guide chapter](https://rustc-dev-guide.rust-lang.org/solve/caching.html).
+    ///
+    /// This can be thought of rotating the sub-tree of this provisional result and changing
+    /// its entry point while making sure that all paths through this sub-tree stay the same.
+    ///
+    ///
+    /// In case the popped cycle head failed to reach a fixpoint anything which depends on
+    /// its provisional result is invalid. Actually discarding provisional cache entries in
+    /// this case would cause hangs, so we instead change the result of dependant provisional
+    /// cache entries to also be ambiguous. This causes some undesirable ambiguity for nested
+    /// goals whose result doesn't actually depend on this cycle head, but that's acceptable
+    /// to me.
+    fn rebase_provisional_cache_entries(
+        &mut self,
+        cx: X,
+        stack_entry: &StackEntry<X>,
+        mut mutate_result: impl FnMut(X::Input, X::Result) -> X::Result,
+    ) {
+        let head = self.stack.next_index();
+        #[allow(rustc::potential_query_instability)]
+        self.provisional_cache.retain(|&input, entries| {
+            entries.retain_mut(|entry| {
+                let ProvisionalCacheEntry {
+                    encountered_overflow: _,
+                    heads,
+                    path_from_head,
+                    nested_goals,
+                    result,
+                } = entry;
+                if heads.highest_cycle_head() != head {
+                    return true;
+                }
 
-        self.update_parent_goal(final_entry.reached_depth, final_entry.encountered_overflow);
+                // We don't try rebasing if the path from the current head
+                // to the cache entry is not coinductive or if the path from
+                // the cache entry to the current head is not coinductive.
+                //
+                // Both of these constraints could be weakened, but by only
+                // accepting coinductive paths we don't have to worry about
+                // changing the cycle kind of the remaining cycles. We can
+                // extend this in the future once there's a known issue
+                // caused by it.
+                if *path_from_head != PathKind::Coinductive
+                    || nested_goals.get(stack_entry.input).unwrap()
+                        != UsageKind::Single(PathKind::Coinductive)
+                {
+                    return false;
+                }
 
-        // We're now done with this goal. In case this goal is involved in a larger cycle
-        // do not remove it from the provisional cache and update its provisional result.
-        // We only add the root of cycles to the global cache.
-        if let Some(head) = final_entry.non_root_cycle_participant {
-            let coinductive_stack = Self::stack_coinductive_from(cx, &self.stack, head);
+                // Merge the cycle heads of the provisional cache entry and the
+                // popped head. If the popped cycle head was a root, discard all
+                // provisional cache entries which depend on it.
+                heads.remove_highest_cycle_head();
+                heads.merge(&stack_entry.heads);
+                let Some(head) = heads.opt_highest_cycle_head() else {
+                    return false;
+                };
 
-            let entry = self.provisional_cache.get_mut(&input).unwrap();
-            entry.stack_depth = None;
-            if coinductive_stack {
-                entry.with_coinductive_stack = Some(DetachedEntry { head, result });
-            } else {
-                entry.with_inductive_stack = Some(DetachedEntry { head, result });
+                // As we've made sure that the path from the new highest cycle
+                // head to the uses of the popped cycle head are fully coinductive,
+                // we can be sure that the paths to all nested goals of the popped
+                // cycle head remain the same. We can simply merge them.
+                nested_goals.merge(&stack_entry.nested_goals);
+                // We now care about the path from the next highest cycle head to the
+                // provisional cache entry.
+                *path_from_head = Self::stack_path_kind(cx, &self.stack, head);
+                // Mutate the result of the provisional cache entry in case we did
+                // not reach a fixpoint.
+                *result = mutate_result(input, *result);
+                true
+            });
+            !entries.is_empty()
+        });
+    }
+
+    fn lookup_provisional_cache(&mut self, cx: X, input: X::Input) -> Option<X::Result> {
+        if !D::ENABLE_PROVISIONAL_CACHE {
+            return None;
+        }
+
+        let entries = self.provisional_cache.get(&input)?;
+        for &ProvisionalCacheEntry {
+            encountered_overflow,
+            ref heads,
+            path_from_head,
+            ref nested_goals,
+            result,
+        } in entries
+        {
+            let head = heads.highest_cycle_head();
+            if encountered_overflow {
+                // This check is overly strict and very subtle. We need to make sure that if
+                // a global cache entry depends on some goal without adding it to its
+                // `nested_goals`, that goal must never have an applicable provisional
+                // cache entry to avoid incorrectly applying the cache entry.
+                //
+                // As we'd have to otherwise track literally all nested goals, we only
+                // apply provisional cache entries which encountered overflow once the
+                // current goal is already part of the same cycle. This check could be
+                // improved but seems to be good enough for now.
+                let last = self.stack.raw.last().unwrap();
+                if !last.heads.opt_lowest_cycle_head().is_some_and(|lowest| lowest <= head) {
+                    continue;
+                }
             }
-        } else {
-            // When encountering a cycle, both inductive and coinductive, we only
-            // move the root into the global cache. We also store all other cycle
-            // participants involved.
-            //
-            // We must not use the global cache entry of a root goal if a cycle
-            // participant is on the stack. This is necessary to prevent unstable
-            // results. See the comment of `StackEntry::nested_goals` for
-            // more details.
-            self.provisional_cache.remove(&input);
-            let additional_depth = final_entry.reached_depth.as_usize() - self.stack.len();
-            cx.with_global_cache(self.mode, |cache| {
-                cache.insert(
+
+            // A provisional cache entry is only valid if the current path from its
+            // highest cycle head to the goal is the same.
+            if path_from_head == Self::stack_path_kind(cx, &self.stack, head) {
+                // While we don't have to track the full depth of the provisional cache entry,
+                // we do have to increment the required depth by one as we'd have already failed
+                // with overflow otherwise
+                let next_index = self.stack.next_index();
+                let last = &mut self.stack.raw.last_mut().unwrap();
+                let path_from_entry = Self::step_kind(cx, last.input);
+                last.nested_goals.insert(input, UsageKind::Single(path_from_entry));
+
+                Self::update_parent_goal(
                     cx,
-                    input,
-                    result,
-                    proof_tree,
-                    dep_node,
-                    additional_depth,
-                    final_entry.encountered_overflow,
-                    &final_entry.nested_goals,
-                )
-            })
+                    &mut self.stack,
+                    next_index,
+                    heads,
+                    false,
+                    nested_goals,
+                );
+                debug_assert!(self.stack[head].has_been_used.is_some());
+                debug!(?head, ?path_from_head, "provisional cache hit");
+                return Some(result);
+            }
         }
 
-        self.check_invariants();
+        None
+    }
 
-        result
+    /// Even if there is a global cache entry for a given goal, we need to make sure
+    /// evaluating this entry would not have ended up depending on either a goal
+    /// already on the stack or a provisional cache entry.
+    fn candidate_is_applicable(
+        cx: X,
+        stack: &IndexVec<StackDepth, StackEntry<X>>,
+        provisional_cache: &HashMap<X::Input, Vec<ProvisionalCacheEntry<X>>>,
+        nested_goals: &NestedGoals<X>,
+    ) -> bool {
+        // If the global cache entry didn't depend on any nested goals, it always
+        // applies.
+        if nested_goals.is_empty() {
+            return true;
+        }
+
+        // If a nested goal of the global cache entry is on the stack, we would
+        // definitely encounter a cycle.
+        if stack.iter().any(|e| nested_goals.contains(e.input)) {
+            debug!("cache entry not applicable due to stack");
+            return false;
+        }
+
+        // The global cache entry is also invalid if there's a provisional cache entry
+        // would apply for any of its nested goals.
+        #[allow(rustc::potential_query_instability)]
+        for (input, path_from_global_entry) in nested_goals.iter() {
+            let Some(entries) = provisional_cache.get(&input) else {
+                continue;
+            };
+
+            debug!(?input, ?path_from_global_entry, ?entries, "candidate_is_applicable");
+            // A provisional cache entry is applicable if the path to
+            // its highest cycle head is equal to the expected path.
+            for &ProvisionalCacheEntry {
+                encountered_overflow,
+                ref heads,
+                path_from_head,
+                nested_goals: _,
+                result: _,
+            } in entries.iter()
+            {
+                // We don't have to worry about provisional cache entries which encountered
+                // overflow, see the relevant comment in `lookup_provisional_cache`.
+                if encountered_overflow {
+                    continue;
+                }
+
+                // A provisional cache entry only applies if the path from its highest head
+                // matches the path when encountering the goal.
+                let head = heads.highest_cycle_head();
+                let full_path = match Self::stack_path_kind(cx, stack, head) {
+                    PathKind::Coinductive => path_from_global_entry,
+                    PathKind::Inductive => UsageKind::Single(PathKind::Inductive),
+                };
+
+                match (full_path, path_from_head) {
+                    (UsageKind::Mixed, _)
+                    | (UsageKind::Single(PathKind::Coinductive), PathKind::Coinductive)
+                    | (UsageKind::Single(PathKind::Inductive), PathKind::Inductive) => {
+                        debug!(
+                            ?full_path,
+                            ?path_from_head,
+                            "cache entry not applicable due to matching paths"
+                        );
+                        return false;
+                    }
+                    _ => debug!(?full_path, ?path_from_head, "paths don't match"),
+                }
+            }
+        }
+
+        true
+    }
+
+    /// Used when fuzzing the global cache. Accesses the global cache without
+    /// updating the state of the search graph.
+    fn lookup_global_cache_untracked(
+        &self,
+        cx: X,
+        input: X::Input,
+        available_depth: AvailableDepth,
+    ) -> Option<X::Result> {
+        cx.with_global_cache(self.mode, |cache| {
+            cache
+                .get(cx, input, available_depth, |nested_goals| {
+                    Self::candidate_is_applicable(
+                        cx,
+                        &self.stack,
+                        &self.provisional_cache,
+                        nested_goals,
+                    )
+                })
+                .map(|c| c.result)
+        })
     }
 
     /// Try to fetch a previously computed result from the global cache,
@@ -508,97 +847,206 @@ impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
         cx: X,
         input: X::Input,
         available_depth: AvailableDepth,
-        inspect: &mut D::ProofTreeBuilder,
     ) -> Option<X::Result> {
         cx.with_global_cache(self.mode, |cache| {
-            let CacheData {
-                result,
-                proof_tree,
-                additional_depth,
-                encountered_overflow,
-                nested_goals: _, // FIXME: consider nested goals here.
-            } = cache.get(cx, input, &self.stack, available_depth)?;
-
-            // If we're building a proof tree and the current cache entry does not
-            // contain a proof tree, we do not use the entry but instead recompute
-            // the goal. We simply overwrite the existing entry once we're done,
-            // caching the proof tree.
-            if !inspect.try_apply_proof_tree(proof_tree) {
-                return None;
-            }
+            let CacheData { result, additional_depth, encountered_overflow, nested_goals } = cache
+                .get(cx, input, available_depth, |nested_goals| {
+                    Self::candidate_is_applicable(
+                        cx,
+                        &self.stack,
+                        &self.provisional_cache,
+                        nested_goals,
+                    )
+                })?;
 
             // Update the reached depth of the current goal to make sure
             // its state is the same regardless of whether we've used the
             // global cache or not.
             let reached_depth = self.stack.next_index().plus(additional_depth);
-            self.update_parent_goal(reached_depth, encountered_overflow);
+            // We don't move cycle participants to the global cache, so the
+            // cycle heads are always empty.
+            let heads = Default::default();
+            Self::update_parent_goal(
+                cx,
+                &mut self.stack,
+                reached_depth,
+                &heads,
+                encountered_overflow,
+                nested_goals,
+            );
 
-            debug!("global cache hit");
+            debug!(?additional_depth, "global cache hit");
             Some(result)
         })
     }
-}
 
-enum StepResult<X: Cx> {
-    Done(StackEntry<X>, X::Result),
-    HasChanged,
-}
+    fn check_cycle_on_stack(&mut self, cx: X, input: X::Input) -> Option<X::Result> {
+        let (head, _stack_entry) = self.stack.iter_enumerated().find(|(_, e)| e.input == input)?;
+        debug!("encountered cycle with depth {head:?}");
+        // We have a nested goal which directly relies on a goal deeper in the stack.
+        //
+        // We start by tagging all cycle participants, as that's necessary for caching.
+        //
+        // Finally we can return either the provisional response or the initial response
+        // in case we're in the first fixpoint iteration for this goal.
+        let path_kind = Self::stack_path_kind(cx, &self.stack, head);
+        let usage_kind = UsageKind::Single(path_kind);
+        self.stack[head].has_been_used =
+            Some(self.stack[head].has_been_used.map_or(usage_kind, |prev| prev.merge(usage_kind)));
+
+        // Subtle: when encountering a cyclic goal, we still first checked for overflow,
+        // so we have to update the reached depth.
+        let next_index = self.stack.next_index();
+        let last_index = self.stack.last_index().unwrap();
+        let last = &mut self.stack[last_index];
+        last.reached_depth = last.reached_depth.max(next_index);
+
+        let path_from_entry = Self::step_kind(cx, last.input);
+        last.nested_goals.insert(input, UsageKind::Single(path_from_entry));
+        last.nested_goals.insert(last.input, UsageKind::Single(PathKind::Coinductive));
+        if last_index != head {
+            last.heads.insert(head);
+        }
+
+        // Return the provisional result or, if we're in the first iteration,
+        // start with no constraints.
+        if let Some(result) = self.stack[head].provisional_result {
+            Some(result)
+        } else {
+            Some(D::initial_provisional_result(cx, path_kind, input))
+        }
+    }
+
+    /// Whether we've reached a fixpoint when evaluating a cycle head.
+    fn reached_fixpoint(
+        &mut self,
+        cx: X,
+        stack_entry: &StackEntry<X>,
+        usage_kind: UsageKind,
+        result: X::Result,
+    ) -> bool {
+        if let Some(prev) = stack_entry.provisional_result {
+            prev == result
+        } else if let UsageKind::Single(kind) = usage_kind {
+            D::is_initial_provisional_result(cx, kind, stack_entry.input, result)
+        } else {
+            false
+        }
+    }
 
-impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
     /// When we encounter a coinductive cycle, we have to fetch the
     /// result of that cycle while we are still computing it. Because
     /// of this we continuously recompute the cycle until the result
     /// of the previous iteration is equal to the final result, at which
     /// point we are done.
-    fn fixpoint_step_in_task<F>(
+    fn evaluate_goal_in_task(
         &mut self,
         cx: X,
         input: X::Input,
         inspect: &mut D::ProofTreeBuilder,
-        prove_goal: &mut F,
-    ) -> StepResult<X>
-    where
-        F: FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
-    {
-        let result = prove_goal(self, inspect);
-        let stack_entry = self.stack.pop().unwrap();
-        debug_assert_eq!(stack_entry.input, input);
-
-        // If the current goal is not the root of a cycle, we are done.
-        let Some(usage_kind) = stack_entry.has_been_used else {
-            return StepResult::Done(stack_entry, result);
-        };
+        mut evaluate_goal: impl FnMut(&mut Self, &mut D::ProofTreeBuilder) -> X::Result,
+    ) -> (StackEntry<X>, X::Result) {
+        let mut i = 0;
+        loop {
+            let result = evaluate_goal(self, inspect);
+            let stack_entry = self.stack.pop().unwrap();
+            debug_assert_eq!(stack_entry.input, input);
 
-        // If it is a cycle head, we have to keep trying to prove it until
-        // we reach a fixpoint. We need to do so for all cycle heads,
-        // not only for the root.
-        //
-        // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
-        // for an example.
-
-        // Start by clearing all provisional cache entries which depend on this
-        // the current goal.
-        Self::clear_dependent_provisional_results(
-            &mut self.provisional_cache,
-            self.stack.next_index(),
-        );
+            // If the current goal is not the root of a cycle, we are done.
+            //
+            // There are no provisional cache entries which depend on this goal.
+            let Some(usage_kind) = stack_entry.has_been_used else {
+                return (stack_entry, result);
+            };
 
-        // Check whether we reached a fixpoint, either because the final result
-        // is equal to the provisional result of the previous iteration, or because
-        // this was only the root of either coinductive or inductive cycles, and the
-        // final result is equal to the initial response for that case.
-        //
-        // If we did not reach a fixpoint, update the provisional result and reevaluate.
-        if D::reached_fixpoint(cx, usage_kind, input, stack_entry.provisional_result, result) {
-            StepResult::Done(stack_entry, result)
-        } else {
-            let depth = self.stack.push(StackEntry {
+            // If it is a cycle head, we have to keep trying to prove it until
+            // we reach a fixpoint. We need to do so for all cycle heads,
+            // not only for the root.
+            //
+            // See tests/ui/traits/next-solver/cycles/fixpoint-rerun-all-cycle-heads.rs
+            // for an example.
+            //
+            // Check whether we reached a fixpoint, either because the final result
+            // is equal to the provisional result of the previous iteration, or because
+            // this was only the root of either coinductive or inductive cycles, and the
+            // final result is equal to the initial response for that case.
+            if self.reached_fixpoint(cx, &stack_entry, usage_kind, result) {
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |_, result| result);
+                return (stack_entry, result);
+            }
+
+            // If computing this goal results in ambiguity with no constraints,
+            // we do not rerun it. It's incredibly difficult to get a different
+            // response in the next iteration in this case. These changes would
+            // likely either be caused by incompleteness or can change the maybe
+            // cause from ambiguity to overflow. Returning ambiguity always
+            // preserves soundness and completeness even if the goal is be known
+            // to succeed or fail.
+            //
+            // This prevents exponential blowup affecting multiple major crates.
+            // As we only get to this branch if we haven't yet reached a fixpoint,
+            // we also taint all provisional cache entries which depend on the
+            // current goal.
+            if D::is_ambiguous_result(result) {
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |input, _| {
+                    D::propagate_ambiguity(cx, input, result)
+                });
+                return (stack_entry, result);
+            };
+
+            // If we've reached the fixpoint step limit, we bail with overflow and taint all
+            // provisional cache entries which depend on the current goal.
+            i += 1;
+            if i >= D::FIXPOINT_STEP_LIMIT {
+                debug!("canonical cycle overflow");
+                let result = D::on_fixpoint_overflow(cx, input);
+                self.rebase_provisional_cache_entries(cx, &stack_entry, |input, _| {
+                    D::on_fixpoint_overflow(cx, input)
+                });
+                return (stack_entry, result);
+            }
+
+            // Clear all provisional cache entries which depend on a previous provisional
+            // result of this goal and rerun.
+            self.clear_dependent_provisional_results();
+
+            debug!(?result, "fixpoint changed provisional results");
+            self.stack.push(StackEntry {
                 has_been_used: None,
                 provisional_result: Some(result),
                 ..stack_entry
             });
-            debug_assert_eq!(self.provisional_cache[&input].stack_depth, Some(depth));
-            StepResult::HasChanged
         }
     }
+
+    /// When encountering a cycle, both inductive and coinductive, we only
+    /// move the root into the global cache. We also store all other cycle
+    /// participants involved.
+    ///
+    /// We must not use the global cache entry of a root goal if a cycle
+    /// participant is on the stack. This is necessary to prevent unstable
+    /// results. See the comment of `StackEntry::nested_goals` for
+    /// more details.
+    fn insert_global_cache(
+        &mut self,
+        cx: X,
+        input: X::Input,
+        final_entry: StackEntry<X>,
+        result: X::Result,
+        dep_node: X::DepNodeIndex,
+    ) {
+        let additional_depth = final_entry.reached_depth.as_usize() - self.stack.len();
+        debug!(?final_entry, ?result, "insert global cache");
+        cx.with_global_cache(self.mode, |cache| {
+            cache.insert(
+                cx,
+                input,
+                result,
+                dep_node,
+                additional_depth,
+                final_entry.encountered_overflow,
+                final_entry.nested_goals,
+            )
+        })
+    }
 }
diff --git a/compiler/rustc_type_ir/src/search_graph/validate.rs b/compiler/rustc_type_ir/src/search_graph/validate.rs
deleted file mode 100644
index 1ae806834ba7d..0000000000000
--- a/compiler/rustc_type_ir/src/search_graph/validate.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-use super::*;
-
-impl<D: Delegate<Cx = X>, X: Cx> SearchGraph<D> {
-    #[allow(rustc::potential_query_instability)]
-    pub(super) fn check_invariants(&self) {
-        if !cfg!(debug_assertions) {
-            return;
-        }
-
-        let SearchGraph { mode: _, stack, provisional_cache, _marker } = self;
-        if stack.is_empty() {
-            assert!(provisional_cache.is_empty());
-        }
-
-        for (depth, entry) in stack.iter_enumerated() {
-            let StackEntry {
-                input,
-                available_depth: _,
-                reached_depth: _,
-                non_root_cycle_participant,
-                encountered_overflow: _,
-                has_been_used,
-                ref nested_goals,
-                provisional_result,
-            } = *entry;
-            let cache_entry = provisional_cache.get(&entry.input).unwrap();
-            assert_eq!(cache_entry.stack_depth, Some(depth));
-            if let Some(head) = non_root_cycle_participant {
-                assert!(head < depth);
-                assert!(nested_goals.is_empty());
-                assert_ne!(stack[head].has_been_used, None);
-
-                let mut current_root = head;
-                while let Some(parent) = stack[current_root].non_root_cycle_participant {
-                    current_root = parent;
-                }
-                assert!(stack[current_root].nested_goals.contains(&input));
-            }
-
-            if !nested_goals.is_empty() {
-                assert!(provisional_result.is_some() || has_been_used.is_some());
-                for entry in stack.iter().take(depth.as_usize()) {
-                    assert_eq!(nested_goals.get(&entry.input), None);
-                }
-            }
-        }
-
-        for (&input, entry) in &self.provisional_cache {
-            let ProvisionalCacheEntry { stack_depth, with_coinductive_stack, with_inductive_stack } =
-                entry;
-            assert!(
-                stack_depth.is_some()
-                    || with_coinductive_stack.is_some()
-                    || with_inductive_stack.is_some()
-            );
-
-            if let &Some(stack_depth) = stack_depth {
-                assert_eq!(stack[stack_depth].input, input);
-            }
-
-            let check_detached = |detached_entry: &DetachedEntry<X>| {
-                let DetachedEntry { head, result: _ } = *detached_entry;
-                assert_ne!(stack[head].has_been_used, None);
-            };
-
-            if let Some(with_coinductive_stack) = with_coinductive_stack {
-                check_detached(with_coinductive_stack);
-            }
-
-            if let Some(with_inductive_stack) = with_inductive_stack {
-                check_detached(with_inductive_stack);
-            }
-        }
-    }
-}
diff --git a/compiler/rustc_type_ir/src/solve/inspect.rs b/compiler/rustc_type_ir/src/solve/inspect.rs
index 47d5e0dace71f..099c66f6bdc81 100644
--- a/compiler/rustc_type_ir/src/solve/inspect.rs
+++ b/compiler/rustc_type_ir/src/solve/inspect.rs
@@ -69,9 +69,7 @@ pub struct CanonicalGoalEvaluation<I: Interner> {
 #[derive_where(PartialEq, Eq, Hash, Debug; I: Interner)]
 pub enum CanonicalGoalEvaluationKind<I: Interner> {
     Overflow,
-    CycleInStack,
-    ProvisionalCacheHit,
-    Evaluation { final_revision: I::CanonicalGoalEvaluationStepRef },
+    Evaluation { final_revision: CanonicalGoalEvaluationStep<I> },
 }
 
 #[derive_where(PartialEq, Eq, Hash, Debug; I: Interner)]
diff --git a/compiler/rustc_type_ir/src/solve/mod.rs b/compiler/rustc_type_ir/src/solve/mod.rs
index 444fd01f01281..00fc6ba1c5c8f 100644
--- a/compiler/rustc_type_ir/src/solve/mod.rs
+++ b/compiler/rustc_type_ir/src/solve/mod.rs
@@ -340,11 +340,3 @@ impl MaybeCause {
         }
     }
 }
-
-#[derive_where(PartialEq, Eq, Debug; I: Interner)]
-pub struct CacheData<I: Interner> {
-    pub result: QueryResult<I>,
-    pub proof_tree: Option<I::CanonicalGoalEvaluationStepRef>,
-    pub additional_depth: usize,
-    pub encountered_overflow: bool,
-}