From 90e687c4e6e9d32e992c5c11b58940a6bd9fcf11 Mon Sep 17 00:00:00 2001 From: Anton Lamtev Date: Sun, 13 Nov 2022 23:17:09 +0300 Subject: [PATCH] Release 8th homework --- README.md | 9 + build.gradle.kts | 12 +- .../vk/polis/ads/hash/DoubleHashingMap.java | 76 ++++++ .../java/company/vk/polis/ads/hash/Map.java | 62 +++++ .../polis/ads/hash/SeparateChainingMap.java | 85 +++++++ .../vk/polis/ads/hash/package-info.java | 4 + .../vk/polis/ads/hash/HashMapMapImpl.java | 53 +++++ .../company/vk/polis/ads/hash/MapTest.java | 225 ++++++++++++++++++ 8 files changed, 525 insertions(+), 1 deletion(-) create mode 100644 src/main/java/company/vk/polis/ads/hash/DoubleHashingMap.java create mode 100644 src/main/java/company/vk/polis/ads/hash/Map.java create mode 100644 src/main/java/company/vk/polis/ads/hash/SeparateChainingMap.java create mode 100644 src/main/java/company/vk/polis/ads/hash/package-info.java create mode 100644 src/test/java/company/vk/polis/ads/hash/HashMapMapImpl.java create mode 100644 src/test/java/company/vk/polis/ads/hash/MapTest.java diff --git a/README.md b/README.md index 19625b49..81033461 100644 --- a/README.md +++ b/README.md @@ -195,3 +195,12 @@ $ git checkout -b 0831f19beb82ce628e4f7284f8c02ce994dd0eb5 ```commandline ./gradlew test ``` + +## ДЗ 8. Hash tables. Дедлайн 22.11.2022 18:29:59 + +* Реализовать в `company.vk.polis.ads.hash.SeparateChainingMap` и `company.vk.polis.ads.hash.DoubleHashingMap` методы, чтобы выполнялись все тесты в `company.vk.polis.ads.hash.MapTest` + +Локально запускать тесты можно через +```commandline +./gradlew test +``` diff --git a/build.gradle.kts b/build.gradle.kts index 60e54bac..dc75396f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -21,9 +21,14 @@ java { } dependencies { - implementation("org.jetbrains:annotations:20.1.0") + compileOnly("com.google.code.findbugs:jsr305:3.0.2") + val jetbrainsAnno = "org.jetbrains:annotations:23.0.0" + compileOnly(jetbrainsAnno) + + testCompileOnly(jetbrainsAnno) testImplementation("it.unimi.dsi:fastutil:8.5.9") testImplementation("org.junit.jupiter:junit-jupiter-api:5.9.0") + testImplementation("org.junit.jupiter:junit-jupiter-params:5.9.0") testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.9.0") jmh("org.openjdk.jmh:jmh-core:1.35") @@ -38,4 +43,9 @@ tasks.getByName("test") { testLogging { events(PASSED, SKIPPED, FAILED) } + + jvmArgs( + "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.util=ALL-UNNAMED", + ) } diff --git a/src/main/java/company/vk/polis/ads/hash/DoubleHashingMap.java b/src/main/java/company/vk/polis/ads/hash/DoubleHashingMap.java new file mode 100644 index 00000000..45d57689 --- /dev/null +++ b/src/main/java/company/vk/polis/ads/hash/DoubleHashingMap.java @@ -0,0 +1,76 @@ +package company.vk.polis.ads.hash; + +import java.util.function.BiConsumer; + +import org.jetbrains.annotations.Nullable; + +/** + * Map implementation with double hashing collision resolution approach + * + * @param key + * @param value + */ +public final class DoubleHashingMap implements Map { + // Do not edit these 3 instance fields!!! + private K[] keys; + private V[] values; + private boolean[] removed; + + /** + * Создает новый ассоциативный массив в соответствии с expectedMaxSize и loadFactor. + * Сразу выделяет начальное количество памяти на основе expectedMaxSize и loadFactor. + * + * @param expectedMaxSize ожидаемое максимальное количество элементов в ассоциативном массие. + * Это значит, что capacity - размер массивов под капотом - + * не будет увеличиваться до тех пор, пока количество элементов + * не станет больше чем expectedMaxSize + * @param loadFactor отношение количества элементов к размеру массивов + */ + public DoubleHashingMap(int expectedMaxSize, float loadFactor) { + keys = allocate(0); + values = allocate(0); + removed = new boolean[0]; + } + + @Override + public int size() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean containsKey(K key) { + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public V get(K key) { + throw new UnsupportedOperationException(); + } + + /** + * Если capacity * loadFactor == size() и будет добавлен новый ключ, + * то нужно выполнить расширение массивов + */ + @Nullable + @Override + public V put(K key, V value) { + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public V remove(K key) { + throw new UnsupportedOperationException(); + } + + @Override + public void forEach(BiConsumer consumer) { + throw new UnsupportedOperationException(); + } + + @SuppressWarnings("unchecked") + private static T[] allocate(int capacity) { + return (T[]) new Object[capacity]; + } +} diff --git a/src/main/java/company/vk/polis/ads/hash/Map.java b/src/main/java/company/vk/polis/ads/hash/Map.java new file mode 100644 index 00000000..e25a61ca --- /dev/null +++ b/src/main/java/company/vk/polis/ads/hash/Map.java @@ -0,0 +1,62 @@ +package company.vk.polis.ads.hash; + +import java.util.function.BiConsumer; + +import org.jetbrains.annotations.Nullable; + +/** + * Map aka Dictionary or Associative array + * + * @param key + * @param value + */ +public interface Map { + int size(); + + default boolean isEmpty() { + return size() == 0; + } + + /** + * Checks if key is present. + * + * @param key key + * @return true if key is present and false otherwise + */ + boolean containsKey(K key); + + /** + * Returns value associated with key + * + * @param key key + * @return value associated with key + */ + @Nullable + V get(K key); + + /** + * Puts key and value associated with it + * + * @param key key + * @param value value + * @return old value associated with key if one present or null otherwise + */ + @Nullable + V put(K key, V value); + + /** + * Removes value associated with key + * + * @param key key + * @return value removed from map if one was or null otherwise + */ + @Nullable + V remove(K key); + + /** + * Iterates over map and passes key-value pairs to consumer + * + * @param consumer object that consumes key-value pairs + */ + void forEach(BiConsumer consumer); +} diff --git a/src/main/java/company/vk/polis/ads/hash/SeparateChainingMap.java b/src/main/java/company/vk/polis/ads/hash/SeparateChainingMap.java new file mode 100644 index 00000000..ce8f114e --- /dev/null +++ b/src/main/java/company/vk/polis/ads/hash/SeparateChainingMap.java @@ -0,0 +1,85 @@ +package company.vk.polis.ads.hash; + +import java.util.function.BiConsumer; + +import org.jetbrains.annotations.Nullable; + +/** + * Map implementation with separate chaining collision resolution approach + * + * @param key + * @param value + */ +public final class SeparateChainingMap implements Map { + // Do not edit this field!!! + private Node[] array; + + /** + * Создает новый ассоциативный массив в соответствии с expectedMaxSize и loadFactor. + * Сразу выделяет начальное количество памяти на основе expectedMaxSize и loadFactor. + * + * @param expectedMaxSize ожидаемое максимальное количество элементов в ассоциативном массие. + * Это значит, что capacity - размер массива связных списков - + * не будет увеличиваться до тех пор, пока количество элементов + * не станет больше чем expectedMaxSize + * @param loadFactor отношение количества элементов к размеру массива связных списков + */ + public SeparateChainingMap(int expectedMaxSize, float loadFactor) { + array = allocate(0); + throw new UnsupportedOperationException(); + } + + @Override + public int size() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean containsKey(K key) { + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public V get(K key) { + throw new UnsupportedOperationException(); + } + + /** + * Если capacity * loadFactor == size() и будет добавлен новый ключ, + * то нужно выполнить расширение массивов + */ + @Nullable + @Override + public V put(K key, V value) { + throw new UnsupportedOperationException(); + } + + @Nullable + @Override + public V remove(K key) { + throw new UnsupportedOperationException(); + } + + @Override + public void forEach(BiConsumer consumer) { + throw new UnsupportedOperationException(); + } + + @SuppressWarnings("unchecked") + private static Node[] allocate(int capacity) { + return (Node[]) new Node[capacity]; + } + + private static final class Node { + K key; + V value; + Node prev; + Node next; + + Node(K key, V value) { + this.key = key; + this.value = value; + } + } +} diff --git a/src/main/java/company/vk/polis/ads/hash/package-info.java b/src/main/java/company/vk/polis/ads/hash/package-info.java new file mode 100644 index 00000000..34fabc86 --- /dev/null +++ b/src/main/java/company/vk/polis/ads/hash/package-info.java @@ -0,0 +1,4 @@ +@ParametersAreNonnullByDefault +package company.vk.polis.ads.hash; + +import javax.annotation.ParametersAreNonnullByDefault; diff --git a/src/test/java/company/vk/polis/ads/hash/HashMapMapImpl.java b/src/test/java/company/vk/polis/ads/hash/HashMapMapImpl.java new file mode 100644 index 00000000..6c814b0a --- /dev/null +++ b/src/test/java/company/vk/polis/ads/hash/HashMapMapImpl.java @@ -0,0 +1,53 @@ +package company.vk.polis.ads.hash; + +import java.util.HashMap; +import java.util.Objects; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; + +import org.jetbrains.annotations.Nullable; + +final class HashMapMapImpl implements Map { + private final java.util.Map hashMap; + + HashMapMapImpl(BiFunction> m, int capacity, float loadFactor) { + hashMap = m.apply(capacity, loadFactor); + } + + HashMapMapImpl(int capacity, float loadFactor) { + this(HashMap::new, capacity, loadFactor); + } + + @Override + public int size() { + return hashMap.size(); + } + + @Override + public boolean containsKey(K key) { + return hashMap.containsKey(Objects.requireNonNull(key)); + } + + @Nullable + @Override + public V get(K key) { + return hashMap.get(Objects.requireNonNull(key)); + } + + @Nullable + @Override + public V put(K key, V value) { + return hashMap.put(Objects.requireNonNull(key), Objects.requireNonNull(value)); + } + + @Nullable + @Override + public V remove(K key) { + return hashMap.remove(Objects.requireNonNull(key)); + } + + @Override + public void forEach(BiConsumer consumer) { + hashMap.forEach(consumer); + } +} diff --git a/src/test/java/company/vk/polis/ads/hash/MapTest.java b/src/test/java/company/vk/polis/ads/hash/MapTest.java new file mode 100644 index 00000000..aecd836a --- /dev/null +++ b/src/test/java/company/vk/polis/ads/hash/MapTest.java @@ -0,0 +1,225 @@ +package company.vk.polis.ads.hash; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.DoubleStream; +import java.util.stream.IntStream; + +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class MapTest { + @Test + void testSeparateChainingBase() { + baseTest(HashMapMapImpl::new, SeparateChainingMap::new); + } + + @Test + void testDoubleHashingBase() { + baseTest(HashMapMapImpl::new, DoubleHashingMap::new); + } + + @ParameterizedTest + @MethodSource("loadFactors") + void testSeparateChainingLoadFactorAndCapacity(double loadFactor) { + testLoadFactorAndCapacity(SeparateChainingMap::new, loadFactor, MapTest::separateChainingMapCapacity); + } + + @ParameterizedTest + @MethodSource("loadFactors") + void testDoubleHashingLoadFactorAndCapacity(double loadFactor) { + testLoadFactorAndCapacity(DoubleHashingMap::new, loadFactor, MapTest::doubleHashingMapCapacity); + } + + @Test + void testSeparateChainingCollisions() { + testCollisions(SeparateChainingMap::new); + } + + @Test + void testDoubleHashingCollisions() { + testCollisions(DoubleHashingMap::new); + } + + @Disabled + @Test + void testHashMapBase() { + baseTest(HashMapMapImpl::new, (a, b) -> new HashMapMapImpl<>(LinkedHashMap::new, a, b)); + baseTest(HashMapMapImpl::new, (a, b) -> new HashMapMapImpl<>(ConcurrentHashMap::new, a, b)); + } + + @Disabled + @Test + void testHashMapLoadFactorAndCapacity() { + var map = new HashMapMapImpl<>(16, 0.75f); + map.put(new Object(), new Object()); + assertEquals(16, MapTest.hashMapCapacity(map)); + } + + @Disabled + @Test + void testHashMapCollisions() { + testCollisions(HashMapMapImpl::new); + } + + private static DoubleStream loadFactors() { + return DoubleStream.of(0.6f, 0.7f, 0.75f, 0.8f); + } + + private static void baseTest(BiFunction> expectedProducer, + BiFunction> actualProducer) { + int expectedMaxSize = 100_000; + float loadFactor = 0.8f; + var expectedMapCapacity = (int) (expectedMaxSize * loadFactor); + var expected = expectedProducer.apply(expectedMapCapacity * 2, loadFactor); + var actual = actualProducer.apply(expectedMaxSize * 2, loadFactor); + record Kv(UUID k, UUID v) implements Comparable { + @Override + public int compareTo(@NotNull Kv o) { + return k.compareTo(o.k); + } + } + var duplicatesByKey = new ArrayList(); + IntStream.range(0, expectedMaxSize).forEach(__ -> { + var key = randomUUID(); + var value = randomUUID(); + assertEquals(expected.put(key, value), actual.put(key, value), "Bug in put"); + assertEquals(expected.size(), actual.size(), "Bug in put"); + if (randomBoolean()) { + duplicatesByKey.add(new Kv(key, value)); + } + }); + duplicatesByKey.forEach(kv -> { + if (randomBoolean()) { + assertEquals(expected.put(kv.k, kv.v), actual.put(kv.k, kv.v), "Bug in put"); + assertEquals(expected.size(), actual.size(), "Bug in put"); + } else { + var newValue = randomUUID(); + assertEquals(expected.put(kv.k, newValue), actual.put(kv.k, newValue), "Bug in put"); + assertEquals(expected.size(), actual.size(), "Bug in put"); + } + }); + duplicatesByKey.forEach(kv -> { + assertEquals(expected.remove(kv.k), actual.remove(kv.k), "Bug in remove"); + assertEquals(expected.size(), actual.size(), "Bug in remove"); + var key = randomUUID(); + assertEquals(expected.remove(key), actual.remove(key), "Bug in remove"); + assertEquals(expected.size(), actual.size(), "Bug in remove"); + }); + var expectedSet = new HashSet<>(); + expected.forEach((k, v) -> expectedSet.add(new Kv(k, v))); + var actualSet = new HashSet<>(); + actual.forEach((k, v) -> actualSet.add(new Kv(k, v))); + assertTrue(expectedSet.containsAll(actualSet), "Bug somewhere in put/delete/forEach"); + assertEquals(expectedSet.size(), actualSet.size(), "Bug somewhere in put/delete/forEach"); + } + + private static void testLoadFactorAndCapacity(BiFunction> mapProducer, + double loadFactor, + Function, Integer> capacityExtractor) { + var capacity = 100; + var expectedMaxSize = (int) (capacity * loadFactor); + var map = mapProducer.apply(expectedMaxSize, (float) loadFactor); + var expectedCapacity = capacityExtractor.apply(map); + // Add elements + IntStream.range(0, expectedMaxSize).forEach(__ -> map.put(randomUUID(), randomUUID())); + var actualCapacity = capacityExtractor.apply(map); + // Expecting that capacity has not been changed + assertEquals(expectedCapacity, actualCapacity, "Capacity has been changed despite the loadFactor and expectedMaxSize"); + // Add one more elem + map.put(randomUUID(), randomUUID()); + actualCapacity = capacityExtractor.apply(map); + // Expecting that array has been grown up + assertNotEquals(expectedCapacity, actualCapacity, "Capacity has not been changed despite the loadFactor and expectedMaxSize"); + } + + private static void testCollisions(BiFunction> mapProducer) { + var elementCount = 30_000; + var map = mapProducer.apply(elementCount, 0.75f); + IntStream.range(0, elementCount / 4).forEach(__ -> map.put(new Object(), randomUUID())); + var duplicatesByHash = new ArrayList(elementCount / 4 * 3); + IntStream.range(0, elementCount / 4 * 3).forEach(__ -> { + final var random = ConstHashObject.random(); + duplicatesByHash.add(random); + map.put(random, randomUUID()); + }); + duplicatesByHash.forEach(dup -> { + assertTrue(map.containsKey(dup), "Incorrect collision resolution implementation"); + if (randomBoolean()) { + assertNotNull(map.remove(dup), "Incorrect collision resolution implementation"); + } + }); + } + + record ConstHashObject(String s, Long l, int i) { + static ConstHashObject random() { + return new ConstHashObject( + UUID.randomUUID().toString(), + ThreadLocalRandom.current().nextLong(), + ThreadLocalRandom.current().nextInt() + ); + } + + @Override + public int hashCode() { + return 1; + } + } + + private static int hashMapCapacity(Map map) { + return arrayFieldLength(declaredField(map, "hashMap"), "table"); + } + + private static int separateChainingMapCapacity(Map map) { + return arrayFieldLength(map, "array"); + } + + private static int doubleHashingMapCapacity(Map map) { + return arrayFieldLength(map, "keys"); + } + + @SuppressWarnings("unchecked") + private static T declaredField(Object o, String name) { + final Field field; + try { + field = o.getClass().getDeclaredField(name); + } catch (NoSuchFieldException e) { + throw new AssertionError("Field renaming is not permitted"); + } + field.setAccessible(true); + final T value; + try { + value = (T) field.get(o); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + return value; + } + + private static int arrayFieldLength(Object o, String name) { + return ((Object[]) declaredField(o, name)).length; + } + + private static boolean randomBoolean() { + return ThreadLocalRandom.current().nextBoolean(); + } + + private static UUID randomUUID() { + return UUID.randomUUID(); + } +}