Skip to content

Commit

Permalink
Rollup merge of rust-lang#40237 - arthurprs:hm-adapt2, r=alexcrichton
Browse files Browse the repository at this point in the history
Reduce size overhead of adaptative hashmap

Exposes a boolean flag in RawTable and use it instead of a bool field in HashMap.

Taking a bit from capacity or length would make overflow handling tricky.

Fixes: rust-lang#40042
  • Loading branch information
Ariel Ben-Yehuda authored Mar 8, 2017
2 parents 68a5a16 + 3273003 commit 2c252ff
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 24 deletions.
24 changes: 8 additions & 16 deletions src/libstd/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ pub struct HashMap<K, V, S = RandomState> {
table: RawTable<K, V>,

resize_policy: DefaultResizePolicy,

long_probes: bool,
}

/// Search for a pre-hashed key.
Expand Down Expand Up @@ -655,7 +653,6 @@ impl<K, V, S> HashMap<K, V, S>
hash_builder: hash_builder,
resize_policy: DefaultResizePolicy::new(),
table: RawTable::new(0),
long_probes: false,
}
}

Expand Down Expand Up @@ -688,7 +685,6 @@ impl<K, V, S> HashMap<K, V, S>
hash_builder: hash_builder,
resize_policy: resize_policy,
table: RawTable::new(raw_cap),
long_probes: false,
}
}

Expand Down Expand Up @@ -746,7 +742,7 @@ impl<K, V, S> HashMap<K, V, S>
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
let raw_cap = self.resize_policy.raw_capacity(min_cap);
self.resize(raw_cap);
} else if self.long_probes && remaining <= self.len() {
} else if self.table.tag() && remaining <= self.len() {
// Probe sequence is too long and table is half full,
// resize early to reduce probing length.
let new_capacity = self.table.capacity() * 2;
Expand All @@ -763,7 +759,6 @@ impl<K, V, S> HashMap<K, V, S>
assert!(self.table.size() <= new_raw_cap);
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);

self.long_probes = false;
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
let old_size = old_table.size();

Expand Down Expand Up @@ -844,8 +839,7 @@ impl<K, V, S> HashMap<K, V, S>
/// If the key already exists, the hashtable will be returned untouched
/// and a reference to the existing element will be returned.
fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option<V> {
let entry = search_hashed(&mut self.table, hash, |key| *key == k)
.into_entry(k, &mut self.long_probes);
let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k);
match entry {
Some(Occupied(mut elem)) => Some(elem.insert(v)),
Some(Vacant(elem)) => {
Expand Down Expand Up @@ -1002,7 +996,7 @@ impl<K, V, S> HashMap<K, V, S>
self.reserve(1);
let hash = self.make_hash(&key);
search_hashed(&mut self.table, hash, |q| q.eq(&key))
.into_entry(key, &mut self.long_probes).expect("unreachable")
.into_entry(key).expect("unreachable")
}

/// Returns the number of elements in the map.
Expand Down Expand Up @@ -1456,7 +1450,7 @@ impl<K, V, M> InternalEntry<K, V, M> {

impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
#[inline]
fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option<Entry<'a, K, V>> {
fn into_entry(self, key: K) -> Option<Entry<'a, K, V>> {
match self {
InternalEntry::Occupied { elem } => {
Some(Occupied(OccupiedEntry {
Expand All @@ -1469,7 +1463,6 @@ impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
hash: hash,
key: key,
elem: elem,
long_probes: long_probes,
}))
}
InternalEntry::TableIsEmpty => None,
Expand Down Expand Up @@ -1542,7 +1535,6 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
hash: SafeHash,
key: K,
elem: VacantEntryState<K, V, &'a mut RawTable<K, V>>,
long_probes: &'a mut bool,
}

#[stable(feature= "debug_hash_map", since = "1.12.0")]
Expand Down Expand Up @@ -2117,15 +2109,15 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn insert(self, value: V) -> &'a mut V {
match self.elem {
NeqElem(bucket, disp) => {
NeqElem(mut bucket, disp) => {
if disp >= DISPLACEMENT_THRESHOLD {
*self.long_probes = true;
bucket.table_mut().set_tag(true);
}
robin_hood(bucket, disp, self.hash, self.key, value)
},
NoElem(bucket, disp) => {
NoElem(mut bucket, disp) => {
if disp >= DISPLACEMENT_THRESHOLD {
*self.long_probes = true;
bucket.table_mut().set_tag(true);
}
bucket.put(self.hash, self.key, value).into_mut_refs().1
},
Expand Down
74 changes: 66 additions & 8 deletions src/libstd/collections/hash/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,42 @@ type HashUint = usize;

const EMPTY_BUCKET: HashUint = 0;

/// Special `Unique<HashUint>` that uses the lower bit of the pointer
/// to expose a boolean tag.
/// Note: when the pointer is initialized to EMPTY `.ptr()` will return
/// null and the tag functions shouldn't be used.
struct TaggedHashUintPtr(Unique<HashUint>);

impl TaggedHashUintPtr {
#[inline]
unsafe fn new(ptr: *mut HashUint) -> Self {
debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize);
TaggedHashUintPtr(Unique::new(ptr))
}

#[inline]
fn set_tag(&mut self, value: bool) {
let usize_ptr = &*self.0 as *const *mut HashUint as *mut usize;
unsafe {
if value {
*usize_ptr |= 1;
} else {
*usize_ptr &= !1;
}
}
}

#[inline]
fn tag(&self) -> bool {
(*self.0 as usize) & 1 == 1
}

#[inline]
fn ptr(&self) -> *mut HashUint {
(*self.0 as usize & !1) as *mut HashUint
}
}

/// The raw hashtable, providing safe-ish access to the unzipped and highly
/// optimized arrays of hashes, and key-value pairs.
///
Expand Down Expand Up @@ -72,10 +108,14 @@ const EMPTY_BUCKET: HashUint = 0;
/// around just the "table" part of the hashtable. It enforces some
/// invariants at the type level and employs some performance trickery,
/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
///
/// The hashtable also exposes a special boolean tag. The tag defaults to false
/// when the RawTable is created and is accessible with the `tag` and `set_tag`
/// functions.
pub struct RawTable<K, V> {
capacity: usize,
size: usize,
hashes: Unique<HashUint>,
hashes: TaggedHashUintPtr,

// Because K/V do not appear directly in any of the types in the struct,
// inform rustc that in fact instances of K and V are reachable from here.
Expand Down Expand Up @@ -208,6 +248,10 @@ impl<K, V, M> FullBucket<K, V, M> {
pub fn table(&self) -> &M {
&self.table
}
/// Borrow a mutable reference to the table.
pub fn table_mut(&mut self) -> &mut M {
&mut self.table
}
/// Move out the reference to the table.
pub fn into_table(self) -> M {
self.table
Expand All @@ -227,6 +271,10 @@ impl<K, V, M> EmptyBucket<K, V, M> {
pub fn table(&self) -> &M {
&self.table
}
/// Borrow a mutable reference to the table.
pub fn table_mut(&mut self) -> &mut M {
&mut self.table
}
}

impl<K, V, M> Bucket<K, V, M> {
Expand Down Expand Up @@ -687,7 +735,7 @@ impl<K, V> RawTable<K, V> {
return RawTable {
size: 0,
capacity: 0,
hashes: Unique::new(EMPTY as *mut HashUint),
hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint),
marker: marker::PhantomData,
};
}
Expand Down Expand Up @@ -728,7 +776,7 @@ impl<K, V> RawTable<K, V> {
RawTable {
capacity: capacity,
size: 0,
hashes: Unique::new(hashes),
hashes: TaggedHashUintPtr::new(hashes),
marker: marker::PhantomData,
}
}
Expand All @@ -737,13 +785,13 @@ impl<K, V> RawTable<K, V> {
let hashes_size = self.capacity * size_of::<HashUint>();
let pairs_size = self.capacity * size_of::<(K, V)>();

let buffer = *self.hashes as *mut u8;
let buffer = self.hashes.ptr() as *mut u8;
let (pairs_offset, _, oflo) =
calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
debug_assert!(!oflo, "capacity overflow");
unsafe {
RawBucket {
hash: *self.hashes,
hash: self.hashes.ptr(),
pair: buffer.offset(pairs_offset as isize) as *const _,
_marker: marker::PhantomData,
}
Expand All @@ -755,7 +803,7 @@ impl<K, V> RawTable<K, V> {
pub fn new(capacity: usize) -> RawTable<K, V> {
unsafe {
let ret = RawTable::new_uninitialized(capacity);
ptr::write_bytes(*ret.hashes, 0, capacity);
ptr::write_bytes(ret.hashes.ptr(), 0, capacity);
ret
}
}
Expand All @@ -774,7 +822,7 @@ impl<K, V> RawTable<K, V> {
fn raw_buckets(&self) -> RawBuckets<K, V> {
RawBuckets {
raw: self.first_bucket_raw(),
hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
hashes_end: unsafe { self.hashes.ptr().offset(self.capacity as isize) },
marker: marker::PhantomData,
}
}
Expand Down Expand Up @@ -832,6 +880,16 @@ impl<K, V> RawTable<K, V> {
marker: marker::PhantomData,
}
}

/// Set the table tag
pub fn set_tag(&mut self, value: bool) {
self.hashes.set_tag(value)
}

/// Get the table tag
pub fn tag(&self) -> bool {
self.hashes.tag()
}
}

/// A raw iterator. The basis for some other iterators in this module. Although
Expand Down Expand Up @@ -1156,7 +1214,7 @@ unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable<K, V> {
debug_assert!(!oflo, "should be impossible");

unsafe {
deallocate(*self.hashes as *mut u8, size, align);
deallocate(self.hashes.ptr() as *mut u8, size, align);
// Remember how everything was allocated out of one buffer
// during initialization? We only need one call to free here.
}
Expand Down

0 comments on commit 2c252ff

Please sign in to comment.