Skip to content

Commit

Permalink
Rollup merge of rust-lang#40561 - arthurprs:hm-adapt2, r=pczarn
Browse files Browse the repository at this point in the history
Simplify HashMap Bucket interface

> Simplify HashMap Bucket interface
>
> * Store capacity_mask instead of capacity
> * Move bucket index into RawBucket
> * Valid bucket index is now always within [0..table_capacity)
> * Simplify iterators by moving logic into RawBuckets
> * Clone RawTable using RawBucket
> * Make retain aware of the number of elements

The idea was to put idx in RawBucket instead of the other Bucket types and simplify next() and prev() as much as possible. The rest was a side-effect of that change, except maybe the last 2.

This change makes iteration and other next/prev() heavy operations noticeably faster. Clone is way faster.

```
➜  hashmap2 git:(adapt) ✗ cargo benchcmp pre:: adp:: bench.txt
 name                        pre:: ns/iter  adp:: ns/iter  diff ns/iter   diff %
 clone_10_000                74,364         39,736              -34,628  -46.57%
 grow_100_000                8,343,553      8,233,785          -109,768   -1.32%
 grow_10_000                 817,825        723,958             -93,867  -11.48%
 grow_big_value_100_000      18,418,979     17,906,186         -512,793   -2.78%
 grow_big_value_10_000       1,219,242      1,103,334          -115,908   -9.51%
 insert_1000                 74,546         58,343              -16,203  -21.74%
 insert_100_000              6,743,770      6,238,017          -505,753   -7.50%
 insert_10_000               798,079        719,123             -78,956   -9.89%
 insert_1_000_000            275,215,605    266,975,875      -8,239,730   -2.99%
 insert_int_bigvalue_10_000  1,517,387      1,419,838           -97,549   -6.43%
 insert_str_10_000           316,179        278,896             -37,283  -11.79%
 insert_string_10_000        770,927        747,449             -23,478   -3.05%
 iter_keys_100_000           386,099        333,104             -52,995  -13.73%
 iterate_100_000             387,320        355,707             -31,613   -8.16%
 lookup_100_000              206,757        193,063             -13,694   -6.62%
 lookup_100_000_unif         219,366        193,180             -26,186  -11.94%
 lookup_1_000_000            206,456        205,716                -740   -0.36%
 lookup_1_000_000_unif       659,934        629,659             -30,275   -4.59%
 lru_sim                     20,194,334     18,442,149       -1,752,185   -8.68%
 merge_shuffle               1,168,044      1,063,055          -104,989   -8.99%
```

Note 2: I may have messed up porting the diff, let's see what CI says.
  • Loading branch information
Ariel Ben-Yehuda authored Apr 5, 2017
2 parents 540fc2c + f07ebd6 commit 327b9be
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 191 deletions.
32 changes: 14 additions & 18 deletions src/libstd/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>)
}

// Now we've done all our shifting. Return the value we grabbed earlier.
(retkey, retval, gap.into_bucket().into_table())
(retkey, retval, gap.into_table())
}

/// Perform robin hood bucket stealing at the given `bucket`. You must
Expand All @@ -485,14 +485,14 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>,
mut key: K,
mut val: V)
-> FullBucketMut<'a, K, V> {
let start_index = bucket.index();
let size = bucket.table().size();
// Save the *starting point*.
let mut bucket = bucket.stash();
let raw_capacity = bucket.table().capacity();
// There can be at most `size - dib` buckets to displace, because
// in the worst case, there are `size` elements and we already are
// `displacement` buckets away from the initial one.
let idx_end = start_index + size - bucket.displacement();
let idx_end = (bucket.index() + size - bucket.displacement()) % raw_capacity;
// Save the *starting point*.
let mut bucket = bucket.stash();

loop {
let (old_hash, old_key, old_val) = bucket.replace(hash, key, val);
Expand Down Expand Up @@ -568,11 +568,8 @@ impl<K, V, S> HashMap<K, V, S>
// The caller should ensure that invariants by Robin Hood Hashing hold
// and that there's space in the underlying table.
fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) {
let raw_cap = self.raw_capacity();
let mut buckets = Bucket::new(&mut self.table, hash);
// note that buckets.index() keeps increasing
// even if the pointer wraps back to the first bucket.
let limit_bucket = buckets.index() + raw_cap;
let start_index = buckets.index();

loop {
// We don't need to compare hashes for value swap.
Expand All @@ -585,7 +582,7 @@ impl<K, V, S> HashMap<K, V, S>
Full(b) => b.into_bucket(),
};
buckets.next();
debug_assert!(buckets.index() < limit_bucket);
debug_assert!(buckets.index() != start_index);
}
}
}
Expand Down Expand Up @@ -1244,24 +1241,25 @@ impl<K, V, S> HashMap<K, V, S>
pub fn retain<F>(&mut self, mut f: F)
where F: FnMut(&K, &mut V) -> bool
{
if self.table.capacity() == 0 || self.table.size() == 0 {
if self.table.size() == 0 {
return;
}
let mut elems_left = self.table.size();
let mut bucket = Bucket::head_bucket(&mut self.table);
bucket.prev();
let tail = bucket.index();
loop {
let start_index = bucket.index();
while elems_left != 0 {
bucket = match bucket.peek() {
Full(mut full) => {
elems_left -= 1;
let should_remove = {
let (k, v) = full.read_mut();
!f(k, v)
};
if should_remove {
let prev_idx = full.index();
let prev_raw = full.raw();
let (_, _, t) = pop_internal(full);
Bucket::new_from(prev_raw, prev_idx, t)
Bucket::new_from(prev_raw, t)
} else {
full.into_bucket()
}
Expand All @@ -1271,9 +1269,7 @@ impl<K, V, S> HashMap<K, V, S>
}
};
bucket.prev(); // reverse iteration
if bucket.index() == tail {
break;
}
debug_assert!(elems_left == 0 || bucket.index() != start_index);
}
}
}
Expand Down
Loading

0 comments on commit 327b9be

Please sign in to comment.