Skip to content

Commit

Permalink
improve AbstractHash performance
Browse files Browse the repository at this point in the history
  • Loading branch information
iverase committed Oct 10, 2024
1 parent 14f0b48 commit 712f022
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,52 +12,53 @@
import org.elasticsearch.core.Releasables;

/**
* Base implementation for {@link BytesRefHash} and {@link LongHash}, or any class that
* needs to map values to dense ords. This class is not thread-safe.
* Base implementation for {@link BytesRefHash},{@link LongHash}, {@link LongLongHash} and {@link Int3Hash}
* or any class that needs to map values to dense ords. This class is not thread-safe.
*/
// IDs are internally stored as id + 1 so that 0 encodes for an empty slot
abstract class AbstractHash extends AbstractPagedHashMap {

LongArray ids;

AbstractHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
super(capacity, maxLoadFactor, bigArrays);
ids = bigArrays.newLongArray(capacity(), true);
ids = bigArrays.newLongArray(capacity(), false);
ids.fill(0L, capacity(), -1L);
}

/**
* Get the id associated with key at <code>0 &lt;= index &lt;= capacity()</code> or -1 if this slot is unused.
*/
public long id(long index) {
return ids.get(index) - 1;
public final long id(long index) {
return ids.get(index);
}

/**
* Set the id provided key at <code>0 &lt;= index &lt;= capacity()</code> .
*/
protected final void setId(long index, long id) {
ids.set(index, id + 1);
}

/**
* Set the id provided key at <code>0 &lt;= index &lt;= capacity()</code> and get the previous value or -1 if this slot is unused.
*/
protected final long getAndSetId(long index, long id) {
return ids.getAndSet(index, id + 1) - 1;
ids.set(index, id);
}

@Override
protected void resize(long capacity) {
ids = bigArrays.resize(ids, capacity);
public void close() {
Releasables.close(ids);
}

@Override
protected boolean used(long bucket) {
return id(bucket) >= 0;
protected final void rehash(long buckets, long newBuckets) {
// grow and reset all ids to -1
ids = bigArrays.resize(ids, newBuckets);
ids.fill(0L, newBuckets, -1L);
// rehash all elements
final long size = size();
for (long i = 0; i < size; ++i) {
rehash(i);
}
}

@Override
public void close() {
Releasables.close(ids);
}
/**
* rehash the id.
*/
protected abstract void rehash(long id);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.common.util;

/**
* Base implementation for {@link LongObjectPagedHashMap} and {@link ObjectObjectPagedHashMap}.
*/
public abstract class AbstractObjectHash extends AbstractPagedHashMap {

AbstractObjectHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
super(capacity, maxLoadFactor, bigArrays);
}

/** Is the current buckets used */
protected abstract boolean used(long bucket);

/** Remove the entry at the given index and add it back */
protected abstract void removeAndAdd(long index);

/** Resize to the given capacity. */
protected abstract void resize(long capacity);

@Override
protected final void rehash(long buckets, long newBuckets) {
// Resize arrays
resize(newBuckets);
for (long i = 0; i < buckets; ++i) {
if (used(i)) {
removeAndAdd(i);
}
}
// The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
// is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and collision
// resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
// allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
for (long i = buckets; i < newBuckets; ++i) {
if (used(i)) {
removeAndAdd(i); // add it back
} else {
break;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ static long hash(long value) {
/**
* Return the number of allocated slots to store this hash table.
*/
public long capacity() {
public final long capacity() {
return mask + 1;
}

/**
* Return the number of longs in this hash table.
*/
public long size() {
public final long size() {
return size;
}

Expand All @@ -73,45 +73,24 @@ static long nextSlot(long curSlot, long mask) {
return (curSlot + 1) & mask; // linear probing
}

/** Resize to the given capacity. */
protected abstract void resize(long capacity);

protected abstract boolean used(long bucket);

/** Remove the entry at the given index and add it back */
protected abstract void removeAndAdd(long index);
/**
* rehash the current hash table when the capacity have been increased from {@code buckets} to {@code newBuckets}.
*/
protected abstract void rehash(long buckets, long newBuckets);

protected final void grow() {
// The difference of this implementation of grow() compared to standard hash tables is that we are growing in-place, which makes
// the re-mapping of keys to slots a bit more tricky.
assert size == maxSize;
final long prevSize = size;
final long buckets = capacity();
// Resize arrays
// compute new sizes
final long newBuckets = buckets << 1;
assert newBuckets == Long.highestOneBit(newBuckets) : newBuckets; // power of 2
resize(newBuckets);
mask = newBuckets - 1;
// First let's remap in-place: most data will be put in its final position directly
for (long i = 0; i < buckets; ++i) {
if (used(i)) {
removeAndAdd(i);
}
}
// The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
// is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and collision
// resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
// allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
for (long i = buckets; i < newBuckets; ++i) {
if (used(i)) {
removeAndAdd(i); // add it back
} else {
break;
}
}
assert size == prevSize;
maxSize = (long) (newBuckets * maxLoadFactor);
assert size < maxSize;
rehash(buckets, newBuckets);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,7 @@ public long add(BytesRef key) {
}

@Override
protected void removeAndAdd(long index) {
final long id = getAndSetId(index, -1);
assert id >= 0;
protected void rehash(long id) {
final int code = hashes.get(id);
reset(code, id);
}
Expand Down
10 changes: 2 additions & 8 deletions server/src/main/java/org/elasticsearch/common/util/Int3Hash.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ private void append(long id, int key1, int key2, int key3) {
keys.set(keyOffset + 2, key3);
}

private void reset(long id) {
@Override
protected void rehash(long id) {
final IntArray keys = this.keys;
final long keyOffset = id * 3;
final int key1 = keys.get(keyOffset);
Expand Down Expand Up @@ -129,13 +130,6 @@ public long add(int key1, int key2, int key3) {
return set(key1, key2, key3, size);
}

@Override
protected void removeAndAdd(long index) {
final long id = getAndSetId(index, -1);
assert id >= 0;
reset(id);
}

@Override
public void close() {
Releasables.close(keys, super::close);
Expand Down
10 changes: 2 additions & 8 deletions server/src/main/java/org/elasticsearch/common/util/LongHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ private void append(long id, long key) {
keys.set(id, key);
}

private void reset(long id) {
@Override
protected void rehash(long id) {
final long key = keys.get(id);
final long slot = slot(hash(key), mask);
for (long index = slot;; index = nextSlot(index, mask)) {
Expand All @@ -107,13 +108,6 @@ public long add(long key) {
return set(key, size);
}

@Override
protected void removeAndAdd(long index) {
final long id = getAndSetId(index, -1);
assert id >= 0;
reset(id);
}

@Override
public void close() {
try (Releasable releasable = keys) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ private void append(long id, long key1, long key2) {
keys.set(keyOffset + 1, key2);
}

private void reset(long id) {
@Override
protected void rehash(long id) {
final LongArray keys = this.keys;
final long keyOffset = id * 2;
final long key1 = keys.get(keyOffset);
Expand Down Expand Up @@ -133,13 +134,6 @@ public long add(long key1, long key2) {
return set(key1, key2, size);
}

@Override
protected void removeAndAdd(long index) {
final long id = getAndSetId(index, -1);
assert id >= 0;
reset(id);
}

@Override
public void close() {
Releasables.close(keys, () -> super.close());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* A hash table from native longs to objects. This implementation resolves collisions
* using open-addressing and does not support null values. This class is not thread-safe.
*/
public final class LongObjectPagedHashMap<T> extends AbstractPagedHashMap implements Iterable<LongObjectPagedHashMap.Cursor<T>> {
public final class LongObjectPagedHashMap<T> extends AbstractObjectHash implements Iterable<LongObjectPagedHashMap.Cursor<T>> {

private LongArray keys;
private ObjectArray<T> values;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
* Note that this class does not track either the actual keys or values. It is responsibility of
* the caller to release those objects if necessary.
*/
public final class ObjectObjectPagedHashMap<K, V> extends AbstractPagedHashMap implements Iterable<ObjectObjectPagedHashMap.Cursor<K, V>> {
public final class ObjectObjectPagedHashMap<K, V> extends AbstractObjectHash implements Iterable<ObjectObjectPagedHashMap.Cursor<K, V>> {

private ObjectArray<K> keys;
private ObjectArray<V> values;
Expand Down

0 comments on commit 712f022

Please sign in to comment.