improve AbstractHash performance

iverase · Oct 10, 2024 · 712f022 · 712f022
1 parent 14f0b48
commit 712f022
Show file tree

Hide file tree

Showing 9 changed files with 91 additions and 80 deletions.
diff --git a/server/src/main/java/org/elasticsearch/common/util/AbstractHash.java b/server/src/main/java/org/elasticsearch/common/util/AbstractHash.java
@@ -12,52 +12,53 @@
 import org.elasticsearch.core.Releasables;
 
 /**
- * Base implementation for {@link BytesRefHash} and {@link LongHash}, or any class that
- * needs to map values to dense ords. This class is not thread-safe.
+ * Base implementation for {@link BytesRefHash},{@link LongHash}, {@link LongLongHash} and {@link Int3Hash}
+ * or any class that needs to map values to dense ords. This class is not thread-safe.
  */
-// IDs are internally stored as id + 1 so that 0 encodes for an empty slot
 abstract class AbstractHash extends AbstractPagedHashMap {
 
     LongArray ids;
 
     AbstractHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
         super(capacity, maxLoadFactor, bigArrays);
-        ids = bigArrays.newLongArray(capacity(), true);
+        ids = bigArrays.newLongArray(capacity(), false);
+        ids.fill(0L, capacity(), -1L);
     }
 
     /**
      * Get the id associated with key at <code>0 &lt;= index &lt;= capacity()</code> or -1 if this slot is unused.
      */
-    public long id(long index) {
-        return ids.get(index) - 1;
+    public final long id(long index) {
+        return ids.get(index);
     }
 
     /**
      * Set the id provided key at <code>0 &lt;= index &lt;= capacity()</code> .
      */
     protected final void setId(long index, long id) {
-        ids.set(index, id + 1);
-    }
-
-    /**
-     * Set the id provided key at <code>0 &lt;= index &lt;= capacity()</code>  and get the previous value or -1 if this slot is unused.
-     */
-    protected final long getAndSetId(long index, long id) {
-        return ids.getAndSet(index, id + 1) - 1;
+        ids.set(index, id);
     }
 
     @Override
-    protected void resize(long capacity) {
-        ids = bigArrays.resize(ids, capacity);
+    public void close() {
+        Releasables.close(ids);
     }
 
     @Override
-    protected boolean used(long bucket) {
-        return id(bucket) >= 0;
+    protected final void rehash(long buckets, long newBuckets) {
+        // grow and reset all ids to -1
+        ids = bigArrays.resize(ids, newBuckets);
+        ids.fill(0L, newBuckets, -1L);
+        // rehash all elements
+        final long size = size();
+        for (long i = 0; i < size; ++i) {
+            rehash(i);
+        }
     }
 
-    @Override
-    public void close() {
-        Releasables.close(ids);
-    }
+    /**
+     * rehash the id.
+     */
+    protected abstract void rehash(long id);
+
 }
diff --git a/server/src/main/java/org/elasticsearch/common/util/AbstractObjectHash.java b/server/src/main/java/org/elasticsearch/common/util/AbstractObjectHash.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.common.util;
+
+/**
+ * Base implementation for {@link LongObjectPagedHashMap} and {@link ObjectObjectPagedHashMap}.
+ */
+public abstract class AbstractObjectHash extends AbstractPagedHashMap {
+
+    AbstractObjectHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
+        super(capacity, maxLoadFactor, bigArrays);
+    }
+
+    /** Is the current buckets used */
+    protected abstract boolean used(long bucket);
+
+    /** Remove the entry at the given index and add it back */
+    protected abstract void removeAndAdd(long index);
+
+    /** Resize to the given capacity. */
+    protected abstract void resize(long capacity);
+
+    @Override
+    protected final void rehash(long buckets, long newBuckets) {
+        // Resize arrays
+        resize(newBuckets);
+        for (long i = 0; i < buckets; ++i) {
+            if (used(i)) {
+                removeAndAdd(i);
+            }
+        }
+        // The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
+        // is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and collision
+        // resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
+        // allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
+        for (long i = buckets; i < newBuckets; ++i) {
+            if (used(i)) {
+                removeAndAdd(i); // add it back
+            } else {
+                break;
+            }
+        }
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/util/AbstractPagedHashMap.java b/server/src/main/java/org/elasticsearch/common/util/AbstractPagedHashMap.java
@@ -54,14 +54,14 @@ static long hash(long value) {
     /**
      * Return the number of allocated slots to store this hash table.
      */
-    public long capacity() {
+    public final long capacity() {
         return mask + 1;
     }
 
     /**
      * Return the number of longs in this hash table.
      */
-    public long size() {
+    public final long size() {
         return size;
     }
 
@@ -73,45 +73,24 @@ static long nextSlot(long curSlot, long mask) {
         return (curSlot + 1) & mask; // linear probing
     }
 
-    /** Resize to the given capacity. */
-    protected abstract void resize(long capacity);
-
-    protected abstract boolean used(long bucket);
-
-    /** Remove the entry at the given index and add it back */
-    protected abstract void removeAndAdd(long index);
+    /**
+     * rehash the current hash table when the capacity have been increased from {@code buckets} to {@code newBuckets}.
+     */
+    protected abstract void rehash(long buckets, long newBuckets);
 
     protected final void grow() {
         // The difference of this implementation of grow() compared to standard hash tables is that we are growing in-place, which makes
         // the re-mapping of keys to slots a bit more tricky.
         assert size == maxSize;
         final long prevSize = size;
         final long buckets = capacity();
-        // Resize arrays
+        // compute new sizes
         final long newBuckets = buckets << 1;
         assert newBuckets == Long.highestOneBit(newBuckets) : newBuckets; // power of 2
-        resize(newBuckets);
         mask = newBuckets - 1;
-        // First let's remap in-place: most data will be put in its final position directly
-        for (long i = 0; i < buckets; ++i) {
-            if (used(i)) {
-                removeAndAdd(i);
-            }
-        }
-        // The only entries which have not been put in their final position in the previous loop are those that were stored in a slot that
-        // is < slot(key, mask). This only happens when slot(key, mask) returned a slot that was close to the end of the array and collision
-        // resolution has put it back in the first slots. This time, collision resolution will have put them at the beginning of the newly
-        // allocated slots. Let's re-add them to make sure they are in the right slot. This 2nd loop will typically exit very early.
-        for (long i = buckets; i < newBuckets; ++i) {
-            if (used(i)) {
-                removeAndAdd(i); // add it back
-            } else {
-                break;
-            }
-        }
         assert size == prevSize;
         maxSize = (long) (newBuckets * maxLoadFactor);
         assert size < maxSize;
+        rehash(buckets, newBuckets);
     }
-
 }
diff --git a/server/src/main/java/org/elasticsearch/common/util/BytesRefHash.java b/server/src/main/java/org/elasticsearch/common/util/BytesRefHash.java
@@ -222,9 +222,7 @@ public long add(BytesRef key) {
     }
 
     @Override
-    protected void removeAndAdd(long index) {
-        final long id = getAndSetId(index, -1);
-        assert id >= 0;
+    protected void rehash(long id) {
         final int code = hashes.get(id);
         reset(code, id);
     }

diff --git a/server/src/main/java/org/elasticsearch/common/util/Int3Hash.java b/server/src/main/java/org/elasticsearch/common/util/Int3Hash.java
@@ -99,7 +99,8 @@ private void append(long id, int key1, int key2, int key3) {
         keys.set(keyOffset + 2, key3);
     }
 
-    private void reset(long id) {
+    @Override
+    protected void rehash(long id) {
         final IntArray keys = this.keys;
         final long keyOffset = id * 3;
         final int key1 = keys.get(keyOffset);
@@ -129,13 +130,6 @@ public long add(int key1, int key2, int key3) {
         return set(key1, key2, key3, size);
     }
 
-    @Override
-    protected void removeAndAdd(long index) {
-        final long id = getAndSetId(index, -1);
-        assert id >= 0;
-        reset(id);
-    }
-
     @Override
     public void close() {
         Releasables.close(keys, super::close);

diff --git a/server/src/main/java/org/elasticsearch/common/util/LongHash.java b/server/src/main/java/org/elasticsearch/common/util/LongHash.java
@@ -82,7 +82,8 @@ private void append(long id, long key) {
         keys.set(id, key);
     }
 
-    private void reset(long id) {
+    @Override
+    protected void rehash(long id) {
         final long key = keys.get(id);
         final long slot = slot(hash(key), mask);
         for (long index = slot;; index = nextSlot(index, mask)) {
@@ -107,13 +108,6 @@ public long add(long key) {
         return set(key, size);
     }
 
-    @Override
-    protected void removeAndAdd(long index) {
-        final long id = getAndSetId(index, -1);
-        assert id >= 0;
-        reset(id);
-    }
-
     @Override
     public void close() {
         try (Releasable releasable = keys) {

diff --git a/server/src/main/java/org/elasticsearch/common/util/LongLongHash.java b/server/src/main/java/org/elasticsearch/common/util/LongLongHash.java
@@ -104,7 +104,8 @@ private void append(long id, long key1, long key2) {
         keys.set(keyOffset + 1, key2);
     }
 
-    private void reset(long id) {
+    @Override
+    protected void rehash(long id) {
         final LongArray keys = this.keys;
         final long keyOffset = id * 2;
         final long key1 = keys.get(keyOffset);
@@ -133,13 +134,6 @@ public long add(long key1, long key2) {
         return set(key1, key2, size);
     }
 
-    @Override
-    protected void removeAndAdd(long index) {
-        final long id = getAndSetId(index, -1);
-        assert id >= 0;
-        reset(id);
-    }
-
     @Override
     public void close() {
         Releasables.close(keys, () -> super.close());

diff --git a/server/src/main/java/org/elasticsearch/common/util/LongObjectPagedHashMap.java b/server/src/main/java/org/elasticsearch/common/util/LongObjectPagedHashMap.java
@@ -18,7 +18,7 @@
  * A hash table from native longs to objects. This implementation resolves collisions
  * using open-addressing and does not support null values. This class is not thread-safe.
  */
-public final class LongObjectPagedHashMap<T> extends AbstractPagedHashMap implements Iterable<LongObjectPagedHashMap.Cursor<T>> {
+public final class LongObjectPagedHashMap<T> extends AbstractObjectHash implements Iterable<LongObjectPagedHashMap.Cursor<T>> {
 
     private LongArray keys;
     private ObjectArray<T> values;

diff --git a/server/src/main/java/org/elasticsearch/common/util/ObjectObjectPagedHashMap.java b/server/src/main/java/org/elasticsearch/common/util/ObjectObjectPagedHashMap.java
@@ -22,7 +22,7 @@
  *  Note that this class does not track either the actual keys or values. It is responsibility of
  *  the caller to release those objects if necessary.
  */
-public final class ObjectObjectPagedHashMap<K, V> extends AbstractPagedHashMap implements Iterable<ObjectObjectPagedHashMap.Cursor<K, V>> {
+public final class ObjectObjectPagedHashMap<K, V> extends AbstractObjectHash implements Iterable<ObjectObjectPagedHashMap.Cursor<K, V>> {
 
     private ObjectArray<K> keys;
     private ObjectArray<V> values;