Auto merge of rust-lang#1935 - saethlin:optimize-sb, r=RalfJung

Optimizing Stacked Borrows (part 1?): Cache locations of Tags in a Borrow Stack Before this PR, a profile of Miri under almost any workload points quite squarely at these regions of code as being incredibly hot (each being ~40% of cycles): https://github.com/rust-lang/miri/blob/dadcbebfbd017aac2358cf652a4bd71a91694edc/src/stacked_borrows.rs#L259-L269 https://github.com/rust-lang/miri/blob/dadcbebfbd017aac2358cf652a4bd71a91694edc/src/stacked_borrows.rs#L362-L369 This code is one of at least three reasons that stacked borrows analysis is super-linear: These are both linear in the number of borrows in the stack and they are positioned along the most commonly-taken paths. I'm addressing the first loop (which is in `Stack::find_granting`) by adding a very very simple sort of LRU cache implemented on a `VecDeque`, which maps recently-looked-up tags to their position in the stack. For `Untagged` access we fall back to the same sort of linear search. But as far as I can tell there are never enough `Untagged` items to be significant. I'm addressing the second loop by keeping track of the region of stack where there could be items granting `Permission::Unique`. This optimization is incredibly effective because `Read` access tends to dominate and many trips through this code path now skip the loop entirely. These optimizations result in pretty enormous improvements: Without raw pointer tagging, `mse` 34.5s -> 2.4s, `serde1` 5.6s -> 3.6s With raw pointer tagging, `mse` 35.3s -> 2.4s, `serde1` 5.7s -> 3.6s And there is hardly any impact on memory usage: Memory usage on `mse` 844 MB -> 848 MB, `serde1` 184 MB -> 184 MB (jitter on these is a few MB).
rust-lang-ci · Jul 3, 2022 · cfad9d1 · cfad9d1
2 parents 5382f46 + b004a03
commit cfad9d1
Show file tree

Hide file tree

Showing 5 changed files with 421 additions and 108 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -50,3 +50,9 @@ rustc_private = true
 [[test]]
 name = "compiletest"
 harness = false
+
+[features]
+default = ["stack-cache"]
+# Will be enabled on CI via `--all-features`.
+expensive-debug-assertions = []
+stack-cache = []
diff --git a/src/lib.rs b/src/lib.rs
@@ -90,8 +90,8 @@ pub use crate::mono_hash_map::MonoHashMap;
 pub use crate::operator::EvalContextExt as OperatorEvalContextExt;
 pub use crate::range_map::RangeMap;
 pub use crate::stacked_borrows::{
-    CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, SbTagExtra, Stack,
-    Stacks,
+    stack::Stack, CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag,
+    SbTagExtra, Stacks,
 };
 pub use crate::sync::{CondvarId, EvalContextExt as SyncEvalContextExt, MutexId, RwLockId};
 pub use crate::thread::{

diff --git a/src/stacked_borrows.rs b/src/stacked_borrows.rs
@@ -23,6 +23,9 @@ use crate::*;
 pub mod diagnostics;
 use diagnostics::{AllocHistory, TagHistory};
 
+pub mod stack;
+use stack::Stack;
+
 pub type CallId = NonZeroU64;
 
 // Even reading memory can have effects on the stack, so we need a `RefCell` here.
@@ -111,23 +114,6 @@ impl fmt::Debug for Item {
     }
 }
 
-/// Extra per-location state.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct Stack {
-    /// Used *mostly* as a stack; never empty.
-    /// Invariants:
-    /// * Above a `SharedReadOnly` there can only be more `SharedReadOnly`.
-    /// * No tag occurs in the stack more than once.
-    borrows: Vec<Item>,
-    /// If this is `Some(id)`, then the actual current stack is unknown. This can happen when
-    /// wildcard pointers are used to access this location. What we do know is that `borrows` are at
-    /// the top of the stack, and below it are arbitrarily many items whose `tag` is strictly less
-    /// than `id`.
-    /// When the bottom is unknown, `borrows` always has a `SharedReadOnly` or `Unique` at the bottom;
-    /// we never have the unknown-to-known boundary in an SRW group.
-    unknown_bottom: Option<SbTag>,
-}
-
 /// Extra per-allocation state.
 #[derive(Clone, Debug)]
 pub struct Stacks {
@@ -298,65 +284,10 @@ impl Permission {
 
 /// Core per-location operations: access, dealloc, reborrow.
 impl<'tcx> Stack {
-    /// Find the item granting the given kind of access to the given tag, and return where
-    /// it is on the stack. For wildcard tags, the given index is approximate, but if *no*
-    /// index is given it means the match was *not* in the known part of the stack.
-    /// `Ok(None)` indicates it matched the "unknown" part of the stack.
-    /// `Err` indicates it was not found.
-    fn find_granting(
-        &self,
-        access: AccessKind,
-        tag: SbTagExtra,
-        exposed_tags: &FxHashSet<SbTag>,
-    ) -> Result<Option<usize>, ()> {
-        let SbTagExtra::Concrete(tag) = tag else {
-            // Handle the wildcard case.
-            // Go search the stack for an exposed tag.
-            if let Some(idx) =
-                self.borrows
-                    .iter()
-                    .enumerate() // we also need to know *where* in the stack
-                    .rev() // search top-to-bottom
-                    .find_map(|(idx, item)| {
-                        // If the item fits and *might* be this wildcard, use it.
-                        if item.perm.grants(access) && exposed_tags.contains(&item.tag) {
-                            Some(idx)
-                        } else {
-                            None
-                        }
-                    })
-            {
-                return Ok(Some(idx));
-            }
-            // If we couldn't find it in the stack, check the unknown bottom.
-            return if self.unknown_bottom.is_some() { Ok(None) } else { Err(()) };
-        };
-
-        if let Some(idx) =
-            self.borrows
-                .iter()
-                .enumerate() // we also need to know *where* in the stack
-                .rev() // search top-to-bottom
-                // Return permission of first item that grants access.
-                // We require a permission with the right tag, ensuring U3 and F3.
-                .find_map(|(idx, item)| {
-                    if tag == item.tag && item.perm.grants(access) { Some(idx) } else { None }
-                })
-        {
-            return Ok(Some(idx));
-        }
-
-        // Couldn't find it in the stack; but if there is an unknown bottom it might be there.
-        let found = self.unknown_bottom.is_some_and(|&unknown_limit| {
-            tag.0 < unknown_limit.0 // unknown_limit is an upper bound for what can be in the unknown bottom.
-        });
-        if found { Ok(None) } else { Err(()) }
-    }
-
     /// Find the first write-incompatible item above the given one --
     /// i.e, find the height to which the stack will be truncated when writing to `granting`.
     fn find_first_write_incompatible(&self, granting: usize) -> usize {
-        let perm = self.borrows[granting].perm;
+        let perm = self.get(granting).unwrap().perm;
         match perm {
             Permission::SharedReadOnly => bug!("Cannot use SharedReadOnly for writing"),
             Permission::Disabled => bug!("Cannot use Disabled for anything"),
@@ -367,7 +298,7 @@ impl<'tcx> Stack {
             Permission::SharedReadWrite => {
                 // The SharedReadWrite *just* above us are compatible, to skip those.
                 let mut idx = granting + 1;
-                while let Some(item) = self.borrows.get(idx) {
+                while let Some(item) = self.get(idx) {
                     if item.perm == Permission::SharedReadWrite {
                         // Go on.
                         idx += 1;
@@ -462,16 +393,16 @@ impl<'tcx> Stack {
                 // There is a SRW group boundary between the unknown and the known, so everything is incompatible.
                 0
             };
-            for item in self.borrows.drain(first_incompatible_idx..).rev() {
-                trace!("access: popping item {:?}", item);
+            self.pop_items_after(first_incompatible_idx, |item| {
                 Stack::item_popped(
                     &item,
                     Some((tag, alloc_range, offset, access)),
                     global,
                     alloc_history,
                 )?;
                 alloc_history.log_invalidation(item.tag, alloc_range, current_span);
-            }
+                Ok(())
+            })?;
         } else {
             // On a read, *disable* all `Unique` above the granting item.  This ensures U2 for read accesses.
             // The reason this is not following the stack discipline (by removing the first Unique and
@@ -488,44 +419,39 @@ impl<'tcx> Stack {
                 // We are reading from something in the unknown part. That means *all* `Unique` we know about are dead now.
                 0
             };
-            for idx in (first_incompatible_idx..self.borrows.len()).rev() {
-                let item = &mut self.borrows[idx];
-
-                if item.perm == Permission::Unique {
-                    trace!("access: disabling item {:?}", item);
-                    Stack::item_popped(
-                        item,
-                        Some((tag, alloc_range, offset, access)),
-                        global,
-                        alloc_history,
-                    )?;
-                    item.perm = Permission::Disabled;
-                    alloc_history.log_invalidation(item.tag, alloc_range, current_span);
-                }
-            }
+            self.disable_uniques_starting_at(first_incompatible_idx, |item| {
+                Stack::item_popped(
+                    &item,
+                    Some((tag, alloc_range, offset, access)),
+                    global,
+                    alloc_history,
+                )?;
+                alloc_history.log_invalidation(item.tag, alloc_range, current_span);
+                Ok(())
+            })?;
         }
 
         // If this was an approximate action, we now collapse everything into an unknown.
         if granting_idx.is_none() || matches!(tag, SbTagExtra::Wildcard) {
             // Compute the upper bound of the items that remain.
             // (This is why we did all the work above: to reduce the items we have to consider here.)
             let mut max = NonZeroU64::new(1).unwrap();
-            for item in &self.borrows {
+            for i in 0..self.len() {
+                let item = self.get(i).unwrap();
                 // Skip disabled items, they cannot be matched anyway.
                 if !matches!(item.perm, Permission::Disabled) {
                     // We are looking for a strict upper bound, so add 1 to this tag.
                     max = cmp::max(item.tag.0.checked_add(1).unwrap(), max);
                 }
             }
-            if let Some(unk) = self.unknown_bottom {
+            if let Some(unk) = self.unknown_bottom() {
                 max = cmp::max(unk.0, max);
             }
             // Use `max` as new strict upper bound for everything.
             trace!(
                 "access: forgetting stack to upper bound {max} due to wildcard or unknown access"
             );
-            self.borrows.clear();
-            self.unknown_bottom = Some(SbTag(max));
+            self.set_unknown_bottom(SbTag(max));
         }
 
         // Done.
@@ -553,8 +479,9 @@ impl<'tcx> Stack {
             )
         })?;
 
-        // Step 2: Remove all items.  Also checks for protectors.
-        for item in self.borrows.drain(..).rev() {
+        // Step 2: Consider all items removed. This checks for protectors.
+        for idx in (0..self.len()).rev() {
+            let item = self.get(idx).unwrap();
             Stack::item_popped(&item, None, global, alloc_history)?;
         }
         Ok(())
@@ -602,8 +529,7 @@ impl<'tcx> Stack {
                 // The new thing is SRW anyway, so we cannot push it "on top of the unkown part"
                 // (for all we know, it might join an SRW group inside the unknown).
                 trace!("reborrow: forgetting stack entirely due to SharedReadWrite reborrow from wildcard or unknown");
-                self.borrows.clear();
-                self.unknown_bottom = Some(global.next_ptr_tag);
+                self.set_unknown_bottom(global.next_ptr_tag);
                 return Ok(());
             };
 
@@ -630,19 +556,18 @@ impl<'tcx> Stack {
             // on top of `derived_from`, and we want the new item at the top so that we
             // get the strongest possible guarantees.
             // This ensures U1 and F1.
-            self.borrows.len()
+            self.len()
         };
 
         // Put the new item there. As an optimization, deduplicate if it is equal to one of its new neighbors.
         // `new_idx` might be 0 if we just cleared the entire stack.
-        if self.borrows.get(new_idx) == Some(&new)
-            || (new_idx > 0 && self.borrows[new_idx - 1] == new)
+        if self.get(new_idx) == Some(new) || (new_idx > 0 && self.get(new_idx - 1).unwrap() == new)
         {
             // Optimization applies, done.
             trace!("reborrow: avoiding adding redundant item {:?}", new);
         } else {
             trace!("reborrow: adding item {:?}", new);
-            self.borrows.insert(new_idx, new);
+            self.insert(new_idx, new);
         }
         Ok(())
     }
@@ -654,7 +579,7 @@ impl<'tcx> Stacks {
     /// Creates new stack with initial tag.
     fn new(size: Size, perm: Permission, tag: SbTag) -> Self {
         let item = Item { perm, tag, protector: None };
-        let stack = Stack { borrows: vec![item], unknown_bottom: None };
+        let stack = Stack::new(item);
 
         Stacks {
             stacks: RangeMap::new(size, stack),

diff --git a/src/stacked_borrows/diagnostics.rs b/src/stacked_borrows/diagnostics.rs
@@ -185,7 +185,10 @@ fn operation_summary(
 
 fn error_cause(stack: &Stack, tag: SbTagExtra) -> &'static str {
     if let SbTagExtra::Concrete(tag) = tag {
-        if stack.borrows.iter().any(|item| item.tag == tag && item.perm != Permission::Disabled) {
+        if (0..stack.len())
+            .map(|i| stack.get(i).unwrap())
+            .any(|item| item.tag == tag && item.perm != Permission::Disabled)
+        {
             ", but that tag only grants SharedReadOnly permission for this location"
         } else {
             ", but that tag does not exist in the borrow stack for this location"