From d8a3521a654c2b2d9a22c5dceefe10091877a8d0 Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Fri, 5 Nov 2021 15:36:22 +0100 Subject: [PATCH 01/10] Implement our own iterator for Bucket.Entry: the stream based implementation was surprisingly slow --- .../concepts/tree/ConceptTreeConnector.java | 11 +--- .../conquery/models/events/Bucket.java | 54 ++++++++++++++++--- .../conquery/models/events/BucketEntry.java | 13 ----- .../conquery/models/events/CBlock.java | 23 ++++---- 4 files changed, 62 insertions(+), 39 deletions(-) delete mode 100644 backend/src/main/java/com/bakdata/conquery/models/events/BucketEntry.java diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeConnector.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeConnector.java index a882441d72..a6e187f57f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeConnector.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeConnector.java @@ -2,24 +2,17 @@ import java.util.ArrayList; import java.util.List; -import java.util.Map; import javax.annotation.CheckForNull; import javax.validation.Valid; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.conditions.CTCondition; import com.bakdata.conquery.models.datasets.concepts.filters.Filter; -import com.bakdata.conquery.models.datasets.Column; -import com.bakdata.conquery.models.datasets.Table; -import com.bakdata.conquery.models.events.Bucket; -import com.bakdata.conquery.models.events.BucketEntry; -import com.bakdata.conquery.models.events.CBlock; import com.bakdata.conquery.models.events.MajorTypeId; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; -import com.bakdata.conquery.util.CalculatedValue; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonManagedReference; import io.dropwizard.validation.ValidationMethod; diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java index 52503f812a..d8a61871cd 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java @@ -3,9 +3,9 @@ import java.math.BigDecimal; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; import java.util.Map; import java.util.Set; -import java.util.stream.IntStream; import javax.validation.constraints.Min; import javax.validation.constraints.NotNull; @@ -31,6 +31,8 @@ import com.bakdata.conquery.models.identifiable.ids.specific.BucketId; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.collect.AbstractIterator; +import lombok.Data; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Setter; @@ -94,13 +96,10 @@ public boolean containsEntity(int entity) { return getEntityStart(entity) != -1; } - public Iterable entries() { - return () -> entities() - .stream() - .flatMap(entity -> IntStream.range(getEntityStart(entity), getEntityEnd(entity)) - .mapToObj(e -> new BucketEntry(entity, e)) - ) - .iterator(); + + + public Iterable entries() { + return () -> new BucketIterator(entities.iterator()); } public int getEntityStart(int entityId) { @@ -193,4 +192,43 @@ public Map calculateMap(int event) { public Dataset getDataset() { return getTable().getDataset(); } + + /** + * Implementation of an Iterator of a Bucket: Iterate all Entities and all their Events. + */ + @RequiredArgsConstructor + private class BucketIterator extends AbstractIterator { + + private final Iterator entities; + + private int entity = -1; + private int position = -1; + + @Override + protected Entry computeNext() { + // Initialize entity, or advance to next entity + if(entity == -1 || position >= getEntityEnd(entity)){ + + if(!entities.hasNext()){ + return endOfData(); + } + + entity = entities.next(); + position = getEntityStart(entity); + } + + // Advance to next position of entity. + return new Entry(entity, position++); + } + } + + /** + * Container class for co-Iteration of Entity and their Events. + */ + @Data + @ToString + public static class Entry { + private final int entity; + private final int event; + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/BucketEntry.java b/backend/src/main/java/com/bakdata/conquery/models/events/BucketEntry.java deleted file mode 100644 index dc1c62c1fb..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/BucketEntry.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.bakdata.conquery.models.events; - -import lombok.Data; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; - -@RequiredArgsConstructor @Getter -@Data @ToString -public class BucketEntry { - private final int entity; - private final int event; -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 450c8f80a5..ce01cecb68 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -1,5 +1,11 @@ package com.bakdata.conquery.models.events; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import javax.validation.constraints.NotNull; + import com.bakdata.conquery.io.jackson.serializer.CBlockDeserializer; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; import com.bakdata.conquery.models.common.daterange.CDateRange; @@ -8,7 +14,11 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.Connector; -import com.bakdata.conquery.models.datasets.concepts.tree.*; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeCache; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; +import com.bakdata.conquery.models.datasets.concepts.tree.TreeChildPrefixIndex; +import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; import com.bakdata.conquery.models.identifiable.IdentifiableImpl; @@ -24,11 +34,6 @@ import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import javax.validation.constraints.NotNull; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - /** * Metadata for connection of {@link Bucket} and {@link Concept} *

@@ -173,7 +178,7 @@ else if(treeConcept.countElements() == 1){ final int[] root = treeConcept.getPrefix(); - for (BucketEntry entry : bucket.entries()) { + for (Bucket.Entry entry : bucket.entries()) { try { final int event = entry.getEvent(); @@ -239,7 +244,7 @@ else if(treeConcept.countElements() == 1){ */ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Bucket bucket, int[][] mostSpecificChildren) { long[] includedConcepts = new long[bucketSize]; - for (BucketEntry entry : bucket.entries()) { + for (Bucket.Entry entry : bucket.entries()) { final int[] mostSpecificChild = mostSpecificChildren[entry.getEvent()]; for (int i = 0; i < mostSpecificChild.length; i++) { @@ -280,7 +285,7 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket continue; } - for (BucketEntry entry : bucket.entries()) { + for (Bucket.Entry entry : bucket.entries()) { if (!bucket.has(entry.getEvent(), column)) { continue; } From bcff18c36d5a25ea7f5a3cb5ae362487f09a5a1b Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Fri, 5 Nov 2021 16:45:31 +0100 Subject: [PATCH 02/10] We can also just inline all those Iterations as they are quite obvious --- .../conquery/models/events/Bucket.java | 47 ---------------- .../conquery/models/events/CBlock.java | 55 +++++++++++-------- 2 files changed, 33 insertions(+), 69 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java index d8a61871cd..63cee77efc 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java @@ -3,7 +3,6 @@ import java.math.BigDecimal; import java.util.Collection; import java.util.HashMap; -import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -31,8 +30,6 @@ import com.bakdata.conquery.models.identifiable.ids.specific.BucketId; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.collect.AbstractIterator; -import lombok.Data; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.Setter; @@ -97,11 +94,6 @@ public boolean containsEntity(int entity) { } - - public Iterable entries() { - return () -> new BucketIterator(entities.iterator()); - } - public int getEntityStart(int entityId) { return start[getEntityIndex(entityId)]; } @@ -192,43 +184,4 @@ public Map calculateMap(int event) { public Dataset getDataset() { return getTable().getDataset(); } - - /** - * Implementation of an Iterator of a Bucket: Iterate all Entities and all their Events. - */ - @RequiredArgsConstructor - private class BucketIterator extends AbstractIterator { - - private final Iterator entities; - - private int entity = -1; - private int position = -1; - - @Override - protected Entry computeNext() { - // Initialize entity, or advance to next entity - if(entity == -1 || position >= getEntityEnd(entity)){ - - if(!entities.hasNext()){ - return endOfData(); - } - - entity = entities.next(); - position = getEntityStart(entity); - } - - // Advance to next position of entity. - return new Entry(entity, position++); - } - } - - /** - * Container class for co-Iteration of Entity and their Events. - */ - @Data - @ToString - public static class Entry { - private final int entity; - private final int event; - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index ce01cecb68..09fa19c5ed 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -49,6 +49,7 @@ public class CBlock extends IdentifiableImpl implements NamespacedIden /** * Estimate the memory usage of CBlocks. + * * @param depthEstimate estimate of depth of mostSpecificChildren */ public static long estimateMemoryBytes(long entities, long entries, double depthEstimate) { @@ -76,7 +77,7 @@ public static long estimateMemoryBytes(long entities, long entries, double depth private final int root; /** - * Crude Bloomfilter for Concept inclusion per Entity: Each set bit denotes that the concept (with localId <= 64) or a descendant of that concept (with localId > 64) is present for the entity in this Bucket. + * Crude Bloomfilter for Concept inclusion per Entity: Each set bit denotes that the concept (with localId <= 64) or a descendant of that concept (with localId > 64) is present for the entity in this Bucket. */ private final long[] includedConceptElementsPerEntity; @@ -162,7 +163,7 @@ private static int[][] calculateSpecificChildrenPaths(Bucket bucket, ConceptTree treeConcept.initializeIdCache(stringStore, bucket.getImp()); } // No column only possible if we have just one tree element! - else if(treeConcept.countElements() == 1){ + else if (treeConcept.countElements() == 1) { stringStore = null; } else { @@ -178,10 +179,12 @@ else if(treeConcept.countElements() == 1){ final int[] root = treeConcept.getPrefix(); - for (Bucket.Entry entry : bucket.entries()) { - try { - final int event = entry.getEvent(); + for (int _event = 0; _event < bucket.getNumberOfEvents(); _event++) { + + // We need to copy so the closure is satisfied. + final int event = _event; + try { // Events without values are omitted // Events can also be filtered, allowing a single table to be used by multiple connectors. if (column != null && !bucket.has(event, column)) { @@ -206,8 +209,8 @@ else if(treeConcept.countElements() == 1){ } ConceptTreeChild child = cache == null - ? treeConcept.findMostSpecificChild(stringValue, rowMap) - : cache.findMostSpecificChild(valueIndex, stringValue, rowMap); + ? treeConcept.findMostSpecificChild(stringValue, rowMap) + : cache.findMostSpecificChild(valueIndex, stringValue, rowMap); // All unresolved elements resolve to the root. if (child == null) { @@ -219,7 +222,7 @@ else if(treeConcept.countElements() == 1){ mostSpecificChildren[event] = child.getPrefix(); } catch (ConceptConfigurationException ex) { - log.error("Failed to resolve event " + bucket + "-" + entry.getEvent() + " against concept " + treeConcept, ex); + log.error("Failed to resolve event " + bucket + "-" + event + " against concept " + treeConcept, ex); } } @@ -244,15 +247,21 @@ else if(treeConcept.countElements() == 1){ */ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Bucket bucket, int[][] mostSpecificChildren) { long[] includedConcepts = new long[bucketSize]; - for (Bucket.Entry entry : bucket.entries()) { - final int[] mostSpecificChild = mostSpecificChildren[entry.getEvent()]; - for (int i = 0; i < mostSpecificChild.length; i++) { + for (int entity : bucket.getEntities()) { + for (int event = bucket.getEntityStart(entity); event < bucket.getEntityEnd(entity); event++) { + + final int[] mostSpecificChild = mostSpecificChildren[event]; - final long mask = calculateBitMask(i, mostSpecificChild); - includedConcepts[entry.getEntity() - bucketSize*bucket.getBucket()] |= mask; + for (int i = 0; i < mostSpecificChild.length; i++) { + + final long mask = calculateBitMask(i, mostSpecificChild); + + includedConcepts[bucket.getEntityIndex(entity)] |= mask; + } } } + return includedConcepts; } @@ -267,11 +276,10 @@ public static long calculateBitMask(int pathIndex, int[] mostSpecificChild) { if (mostSpecificChild[pathIndex] < 64) { return 1L << mostSpecificChild[pathIndex]; } - return calculateBitMask(pathIndex-1, mostSpecificChild); + return calculateBitMask(pathIndex - 1, mostSpecificChild); } - /** * For every included entity, calculate min and max and store them as statistics in the CBlock. */ @@ -285,16 +293,19 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket continue; } - for (Bucket.Entry entry : bucket.entries()) { - if (!bucket.has(entry.getEvent(), column)) { - continue; - } + for (int entity : bucket.getEntities()) { + for (int event = bucket.getEntityStart(entity); event < bucket.getEntityEnd(entity); event++) { + if (!bucket.has(event, column)) { + continue; + } + + CDateRange range = bucket.getAsDateRange(event, column); - CDateRange range = bucket.getAsDateRange(entry.getEvent(), column); + final int index = bucket.getEntityIndex(entity); - final int index = bucket.getEntityIndex(entry.getEntity()); + spans[index] = spans[index].spanClosed(range); + } - spans[index] = spans[index].spanClosed(range); } } From 72213cf3a64c7377495c77090874e54dc96c8d0f Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Nov 2021 09:48:50 +0100 Subject: [PATCH 03/10] code style feedback --- .../conquery/models/events/CBlock.java | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 09fa19c5ed..78978e9ea3 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -179,10 +179,8 @@ else if (treeConcept.countElements() == 1) { final int[] root = treeConcept.getPrefix(); - for (int _event = 0; _event < bucket.getNumberOfEvents(); _event++) { + for (int event = 0; event < bucket.getNumberOfEvents(); event++) { - // We need to copy so the closure is satisfied. - final int event = _event; try { // Events without values are omitted @@ -200,7 +198,9 @@ else if (treeConcept.countElements() == 1) { } // Lazy evaluation of map to avoid allocations if possible. - final CalculatedValue> rowMap = new CalculatedValue<>(() -> bucket.calculateMap(event)); + // Copy event for closure. + final int _event = event; + final CalculatedValue> rowMap = new CalculatedValue<>(() -> bucket.calculateMap(_event)); if ((connector.getCondition() != null && !connector.getCondition().matches(stringValue, rowMap))) { @@ -249,15 +249,25 @@ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Buc long[] includedConcepts = new long[bucketSize]; for (int entity : bucket.getEntities()) { - for (int event = bucket.getEntityStart(entity); event < bucket.getEntityEnd(entity); event++) { + + final int entityIndex = bucket.getEntityIndex(entity); + final int end = bucket.getEntityEnd(entity); + + for (int event = bucket.getEntityStart(entity); event < end; event++) { final int[] mostSpecificChild = mostSpecificChildren[event]; + children: for (int i = 0; i < mostSpecificChild.length; i++) { final long mask = calculateBitMask(i, mostSpecificChild); + final long newConcepts = includedConcepts[entityIndex] | mask; + + if (newConcepts == mask) { + break children; + } - includedConcepts[bucket.getEntityIndex(entity)] |= mask; + includedConcepts[entityIndex] = newConcepts; } } } @@ -273,7 +283,7 @@ public static long calculateBitMask(int pathIndex, int[] mostSpecificChild) { if (pathIndex < 0) { return 0; } - if (mostSpecificChild[pathIndex] < 64) { + if (mostSpecificChild[pathIndex] < Long.SIZE) { return 1L << mostSpecificChild[pathIndex]; } return calculateBitMask(pathIndex - 1, mostSpecificChild); @@ -294,15 +304,15 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket } for (int entity : bucket.getEntities()) { - for (int event = bucket.getEntityStart(entity); event < bucket.getEntityEnd(entity); event++) { + final int index = bucket.getEntityIndex(entity); + final int end = bucket.getEntityEnd(entity); + + for (int event = bucket.getEntityStart(entity); event < end; event++) { if (!bucket.has(event, column)) { continue; } CDateRange range = bucket.getAsDateRange(event, column); - - final int index = bucket.getEntityIndex(entity); - spans[index] = spans[index].spanClosed(range); } From 9ecb8c10752e6b06c0f847ab3cb8afce3e670207 Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Nov 2021 11:17:59 +0100 Subject: [PATCH 04/10] Fix a bug, where Concept was not initialised in test --- .../io/jackson/serializer/CBlockDeserializer.java | 15 ++++++++++----- .../conquery/models/events/CBlockTest.java | 6 +++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CBlockDeserializer.java b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CBlockDeserializer.java index f8c9ba2009..31241e6468 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CBlockDeserializer.java +++ b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CBlockDeserializer.java @@ -1,10 +1,18 @@ package com.bakdata.conquery.io.jackson.serializer; +import java.io.IOException; +import java.util.Optional; + import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; import com.bakdata.conquery.models.events.CBlock; import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.*; +import com.fasterxml.jackson.databind.BeanDescription; +import com.fasterxml.jackson.databind.BeanProperty; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JavaType; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.deser.ContextualDeserializer; import com.fasterxml.jackson.databind.deser.ResolvableDeserializer; import com.fasterxml.jackson.databind.jsontype.TypeDeserializer; @@ -12,9 +20,6 @@ import lombok.NoArgsConstructor; import lombok.extern.slf4j.Slf4j; -import java.io.IOException; -import java.util.Optional; - @Slf4j @AllArgsConstructor @NoArgsConstructor public class CBlockDeserializer extends JsonDeserializer implements ContextualDeserializer { @@ -27,7 +32,7 @@ public CBlock deserialize(JsonParser p, DeserializationContext ctxt) throws IOEx TreeConcept concept = block.getConnector().getConcept(); - if(concept != null && block.getMostSpecificChildren() != null) { + if(block.getMostSpecificChildren() != null) { // deduplicate concrete paths after loading from disk. for (int event = 0; event < block.getMostSpecificChildren().length; event++) { diff --git a/backend/src/test/java/com/bakdata/conquery/models/events/CBlockTest.java b/backend/src/test/java/com/bakdata/conquery/models/events/CBlockTest.java index 394afa9573..4eba0d3919 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/events/CBlockTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/events/CBlockTest.java @@ -13,9 +13,11 @@ import com.bakdata.conquery.models.events.stores.root.ColumnStore; import com.bakdata.conquery.models.exceptions.JSONException; import com.bakdata.conquery.models.identifiable.CentralRegistry; +import lombok.SneakyThrows; import org.junit.jupiter.api.Test; class CBlockTest { + @SneakyThrows @Test public void serialize() throws IOException, JSONException { final CentralRegistry registry = new CentralRegistry(); @@ -40,7 +42,9 @@ public void serialize() throws IOException, JSONException { final Import imp = new Import(table); imp.setName("import"); - final Bucket bucket = new Bucket(0, 0, 10, new ColumnStore[0], Collections.emptySet(),new int[10], new int[10], imp); + concept.initElements(); + + final Bucket bucket = new Bucket(0, 0, 10, new ColumnStore[0], Collections.emptySet(), new int[10], new int[10], imp); final CBlock cBlock = CBlock.createCBlock(connector, bucket, 10); From 594f28e0a6507731fe593031f0acab5394be9499 Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Nov 2021 11:56:30 +0100 Subject: [PATCH 05/10] don't exit early, didn't work as anticipated --- .../java/com/bakdata/conquery/models/events/CBlock.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 78978e9ea3..7767050d45 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -257,17 +257,11 @@ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Buc final int[] mostSpecificChild = mostSpecificChildren[event]; - children: for (int i = 0; i < mostSpecificChild.length; i++) { final long mask = calculateBitMask(i, mostSpecificChild); - final long newConcepts = includedConcepts[entityIndex] | mask; - if (newConcepts == mask) { - break children; - } - - includedConcepts[entityIndex] = newConcepts; + includedConcepts[entityIndex] |= mask; } } } From 6002b5a3b6df27906ac65c03ced6045e66dd85bf Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Wed, 10 Nov 2021 14:32:58 +0100 Subject: [PATCH 06/10] Update backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java Co-authored-by: MT <12283268+thoniTUB@users.noreply.github.com> --- .../main/java/com/bakdata/conquery/models/events/CBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 7767050d45..638f55b4f2 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -222,7 +222,7 @@ else if (treeConcept.countElements() == 1) { mostSpecificChildren[event] = child.getPrefix(); } catch (ConceptConfigurationException ex) { - log.error("Failed to resolve event " + bucket + "-" + event + " against concept " + treeConcept, ex); + log.error("Failed to resolve event {}-{} against concept {}", bucket, event, treeConcept, ex); } } From 690e91980f78af9f7f8c4b592641214e6bab7fa0 Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Wed, 10 Nov 2021 17:19:50 +0100 Subject: [PATCH 07/10] we unroll spanClosed to avoid a lot of allocations and some funky business --- .../conquery/models/events/CBlock.java | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 638f55b4f2..f184eb9dd0 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -286,12 +286,20 @@ public static long calculateBitMask(int pathIndex, int[] mostSpecificChild) { /** * For every included entity, calculate min and max and store them as statistics in the CBlock. + * + * @implNote This is an unrolled implementation of {@link CDateRange#spanClosed(CDateRange)}. */ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucketSize) { - CDateRange[] spans = new CDateRange[bucketSize]; - Arrays.fill(spans, CDateRange.all()); + int[] mins = new int[bucketSize]; + int[] maxs = new int[bucketSize]; + + // First initialize to an illegal state that's easy on our comparisons + Arrays.fill(mins, Integer.MAX_VALUE); + Arrays.fill(maxs, Integer.MAX_VALUE); Table table = bucket.getTable(); + + for (Column column : table.getColumns()) { if (!column.getType().isDateCompatible()) { continue; @@ -307,12 +315,47 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket } CDateRange range = bucket.getAsDateRange(event, column); - spans[index] = spans[index].spanClosed(range); + + if (range.hasLowerBound()) { + final int minValue = range.getMinValue(); + + maxs[index] = Math.max(maxs[index], minValue); + mins[index] = Math.min(mins[index], minValue); + } + + if (range.hasUpperBound()) { + final int maxValue = range.getMaxValue(); + + maxs[index] = Math.max(maxs[index], maxValue); + mins[index] = Math.min(mins[index], maxValue); + } } } } + CDateRange[] spans = new CDateRange[bucketSize]; + + for (int index = 0; index < bucketSize; index++) { + if (mins[index] == Integer.MAX_VALUE && maxs[index] == Integer.MIN_VALUE) { + spans[index] = CDateRange.all(); + continue; + } + + if (mins[index] == Integer.MAX_VALUE) { + spans[index] = CDateRange.atMost(maxs[index]); + continue; + } + + if (maxs[index] == Integer.MAX_VALUE) { + spans[index] = CDateRange.atLeast(mins[index]); + continue; + } + + spans[index] = CDateRange.of(mins[index], maxs[index]); + } + + return spans; } } From 7aca965052df3727db1b46947553927d5fafce59 Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 15 Nov 2021 15:47:46 +0100 Subject: [PATCH 08/10] Update backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java Co-authored-by: MT <12283268+thoniTUB@users.noreply.github.com> --- .../main/java/com/bakdata/conquery/models/events/CBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index f184eb9dd0..e426adfa86 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -295,7 +295,7 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket // First initialize to an illegal state that's easy on our comparisons Arrays.fill(mins, Integer.MAX_VALUE); - Arrays.fill(maxs, Integer.MAX_VALUE); + Arrays.fill(maxs, Integer.MIN_VALUE); Table table = bucket.getTable(); From 784af87a4524e29ff83f1dec9acf1cf12bcdea2d Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 15 Nov 2021 16:20:37 +0100 Subject: [PATCH 09/10] Update backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java --- .../main/java/com/bakdata/conquery/models/events/CBlock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index e426adfa86..63193ab545 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -347,7 +347,7 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket continue; } - if (maxs[index] == Integer.MAX_VALUE) { + if (maxs[index] == Integer.MIN_VALUE) { spans[index] = CDateRange.atLeast(mins[index]); continue; } From 87ef5374da5a2e3c5a60e678bafdf289d80e6035 Mon Sep 17 00:00:00 2001 From: Fabian Kovacs <1553491+awildturtok@users.noreply.github.com> Date: Tue, 16 Nov 2021 16:59:50 +0100 Subject: [PATCH 10/10] roll some allocations back simplifying the code --- .../conquery/models/events/CBlock.java | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 63193ab545..42b434aed7 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -290,12 +290,11 @@ public static long calculateBitMask(int pathIndex, int[] mostSpecificChild) { * @implNote This is an unrolled implementation of {@link CDateRange#spanClosed(CDateRange)}. */ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucketSize) { - int[] mins = new int[bucketSize]; - int[] maxs = new int[bucketSize]; + CDateRange[] spans = new CDateRange[bucketSize]; + + Arrays.fill(spans, CDateRange.all()); // First initialize to an illegal state that's easy on our comparisons - Arrays.fill(mins, Integer.MAX_VALUE); - Arrays.fill(maxs, Integer.MIN_VALUE); Table table = bucket.getTable(); @@ -309,6 +308,11 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket final int index = bucket.getEntityIndex(entity); final int end = bucket.getEntityEnd(entity); + // We unroll spanClosed for the whole bucket/entity, this avoids costly + int max = Integer.MIN_VALUE; + int min = Integer.MAX_VALUE; + + for (int event = bucket.getEntityStart(entity); event < end; event++) { if (!bucket.has(event, column)) { continue; @@ -319,43 +323,42 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket if (range.hasLowerBound()) { final int minValue = range.getMinValue(); - maxs[index] = Math.max(maxs[index], minValue); - mins[index] = Math.min(mins[index], minValue); + max = Math.max(max, minValue); + min = Math.min(min, minValue); } if (range.hasUpperBound()) { final int maxValue = range.getMaxValue(); - maxs[index] = Math.max(maxs[index], maxValue); - mins[index] = Math.min(mins[index], maxValue); + max = Math.max(max, maxValue); + min = Math.min(min, maxValue); } } - } - } - CDateRange[] spans = new CDateRange[bucketSize]; - - for (int index = 0; index < bucketSize; index++) { - if (mins[index] == Integer.MAX_VALUE && maxs[index] == Integer.MIN_VALUE) { - spans[index] = CDateRange.all(); - continue; + spans[index] = createClosed(max, min, spans[index]); } + } - if (mins[index] == Integer.MAX_VALUE) { - spans[index] = CDateRange.atMost(maxs[index]); - continue; - } + return spans; + } - if (maxs[index] == Integer.MIN_VALUE) { - spans[index] = CDateRange.atLeast(mins[index]); - continue; - } + /** + * Helper method for calculateEntityDateIndices, swapping {@link Integer#MIN_VALUE}/{@link Integer#MAX_VALUE} for higher performance. + */ + private static CDateRange createClosed(int max, int min, CDateRange in) { + if(max == Integer.MIN_VALUE && min == Integer.MAX_VALUE){ + return in; + } - spans[index] = CDateRange.of(mins[index], maxs[index]); + if (max == Integer.MIN_VALUE){ + return in.spanClosed(CDateRange.atLeast(min)); } + if (min == Integer.MAX_VALUE) { + return in.spanClosed(CDateRange.atMost(max)); + } - return spans; + return in.spanClosed(CDateRange.of(min, max)); } }