Skip to content

Commit e5f11af

Browse files
committed
Auto merge of rust-lang#136115 - Mark-Simulacrum:shard-alloc-id, r=RalfJung
Shard AllocMap Lock This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores, which is about the same as what I see with the data structure proposed in rust-lang#136105 - I haven't analyzed but I suspect the sharding might actually work out better if we commonly insert "densely" since sharding would split the cache lines and the OnceVec packs locks close together. Of course, we could do something similar with the bitset lock too. Either way, this seems like a very reasonable starting point that solves the problem ~equally well on what I can test locally. r? `@RalfJung`
2 parents bef3c3b + 7f1231c commit e5f11af

File tree

2 files changed

+44
-26
lines changed

2 files changed

+44
-26
lines changed

compiler/rustc_middle/src/mir/interpret/mod.rs

+42-24
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ use std::{fmt, io};
1515
use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size};
1616
use rustc_ast::{LitKind, Mutability};
1717
use rustc_data_structures::fx::FxHashMap;
18-
use rustc_data_structures::sync::Lock;
18+
use rustc_data_structures::sharded::ShardedHashMap;
19+
use rustc_data_structures::sync::{AtomicU64, Lock};
1920
use rustc_hir::def::DefKind;
2021
use rustc_hir::def_id::{DefId, LocalDefId};
2122
use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
@@ -389,35 +390,39 @@ pub const CTFE_ALLOC_SALT: usize = 0;
389390

390391
pub(crate) struct AllocMap<'tcx> {
391392
/// Maps `AllocId`s to their corresponding allocations.
392-
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
393+
// Note that this map on rustc workloads seems to be rather dense, but in miri workloads should
394+
// be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but
395+
// since there are workloads where it can be sparse we decided to go with sharding for now. At
396+
// least up to 32 cores the one workload tested didn't exhibit much difference between the two.
397+
//
398+
// Should be locked *after* locking dedup if locking both to avoid deadlocks.
399+
to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>,
393400

394401
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
395402
///
396403
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
397404
/// the actual guarantees.
398-
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
405+
dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>,
399406

400407
/// The `AllocId` to assign to the next requested ID.
401408
/// Always incremented; never gets smaller.
402-
next_id: AllocId,
409+
next_id: AtomicU64,
403410
}
404411

405412
impl<'tcx> AllocMap<'tcx> {
406413
pub(crate) fn new() -> Self {
407414
AllocMap {
408-
alloc_map: Default::default(),
415+
to_alloc: Default::default(),
409416
dedup: Default::default(),
410-
next_id: AllocId(NonZero::new(1).unwrap()),
417+
next_id: AtomicU64::new(1),
411418
}
412419
}
413-
fn reserve(&mut self) -> AllocId {
414-
let next = self.next_id;
415-
self.next_id.0 = self.next_id.0.checked_add(1).expect(
416-
"You overflowed a u64 by incrementing by 1... \
417-
You've just earned yourself a free drink if we ever meet. \
418-
Seriously, how did you do that?!",
419-
);
420-
next
420+
fn reserve(&self) -> AllocId {
421+
// Technically there is a window here where we overflow and then another thread
422+
// increments `next_id` *again* and uses it before we panic and tear down the entire session.
423+
// We consider this fine since such overflows cannot realistically occur.
424+
let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
425+
AllocId(NonZero::new(next_id).unwrap())
421426
}
422427
}
423428

@@ -428,26 +433,34 @@ impl<'tcx> TyCtxt<'tcx> {
428433
/// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such
429434
/// an `AllocId` from a query.
430435
pub fn reserve_alloc_id(self) -> AllocId {
431-
self.alloc_map.lock().reserve()
436+
self.alloc_map.reserve()
432437
}
433438

434439
/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
435440
/// Should not be used for mutable memory.
436441
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
437-
let mut alloc_map = self.alloc_map.lock();
438442
if let GlobalAlloc::Memory(mem) = alloc {
439443
if mem.inner().mutability.is_mut() {
440444
bug!("trying to dedup-reserve mutable memory");
441445
}
442446
}
443447
let alloc_salt = (alloc, salt);
444-
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
448+
// Locking this *before* `to_alloc` also to ensure correct lock order.
449+
let mut dedup = self.alloc_map.dedup.lock();
450+
if let Some(&alloc_id) = dedup.get(&alloc_salt) {
445451
return alloc_id;
446452
}
447-
let id = alloc_map.reserve();
453+
let id = self.alloc_map.reserve();
448454
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
449-
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
450-
alloc_map.dedup.insert(alloc_salt, id);
455+
let had_previous = self
456+
.alloc_map
457+
.to_alloc
458+
.lock_shard_by_value(&id)
459+
.insert(id, alloc_salt.0.clone())
460+
.is_some();
461+
// We just reserved, so should always be unique.
462+
assert!(!had_previous);
463+
dedup.insert(alloc_salt, id);
451464
id
452465
}
453466

@@ -497,7 +510,7 @@ impl<'tcx> TyCtxt<'tcx> {
497510
/// local dangling pointers and allocations in constants/statics.
498511
#[inline]
499512
pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> {
500-
self.alloc_map.lock().alloc_map.get(&id).cloned()
513+
self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned()
501514
}
502515

503516
#[inline]
@@ -516,16 +529,21 @@ impl<'tcx> TyCtxt<'tcx> {
516529
/// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to
517530
/// call this function twice, even with the same `Allocation` will ICE the compiler.
518531
pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) {
519-
if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) {
532+
if let Some(old) =
533+
self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem))
534+
{
520535
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
521536
}
522537
}
523538

524539
/// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to
525540
/// call this function twice, even with the same `DefId` will ICE the compiler.
526541
pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) {
527-
if let Some(old) =
528-
self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
542+
if let Some(old) = self
543+
.alloc_map
544+
.to_alloc
545+
.lock_shard_by_value(&id)
546+
.insert(id, GlobalAlloc::Static(def_id.to_def_id()))
529547
{
530548
bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}");
531549
}

compiler/rustc_middle/src/ty/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> {
13661366
pub data_layout: TargetDataLayout,
13671367

13681368
/// Stores memory for globals (statics/consts).
1369-
pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>,
1369+
pub(crate) alloc_map: interpret::AllocMap<'tcx>,
13701370

13711371
current_gcx: CurrentGcx,
13721372
}
@@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> {
15831583
new_solver_evaluation_cache: Default::default(),
15841584
canonical_param_env_cache: Default::default(),
15851585
data_layout,
1586-
alloc_map: Lock::new(interpret::AllocMap::new()),
1586+
alloc_map: interpret::AllocMap::new(),
15871587
current_gcx,
15881588
});
15891589

0 commit comments

Comments
 (0)