Skip to content

Commit

Permalink
[Iceberg] Add histogram statistic support
Browse files Browse the repository at this point in the history
Utilizes the sketch_kll function to generate histograms and store them
into the Iceberg table's puffin files for table-level statistic storage.

Histograms are always collected by ANALYZE, but they are not used by the
cost calculator unless enabled via optimizer.use-histograms
  • Loading branch information
ZacBlanco committed Sep 5, 2024
1 parent e702e26 commit 6413d50
Show file tree
Hide file tree
Showing 34 changed files with 1,465 additions and 312 deletions.
196 changes: 195 additions & 1 deletion presto-common/src/main/java/com/facebook/presto/common/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,15 @@
import com.facebook.presto.common.predicate.Primitives;
import com.facebook.presto.common.type.Type;

import javax.annotation.Nullable;

import java.util.Arrays;
import java.util.function.Supplier;

import static com.facebook.presto.common.type.TypeUtils.readNativeValue;
import static com.facebook.presto.common.type.TypeUtils.writeNativeValue;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;

public final class Utils
{
Expand All @@ -30,7 +37,7 @@ private Utils()
public static Block nativeValueToBlock(Type type, Object object)
{
if (object != null && !Primitives.wrap(type.getJavaType()).isInstance(object)) {
throw new IllegalArgumentException(String.format("Object '%s' does not match type %s", object, type.getJavaType()));
throw new IllegalArgumentException(format("Object '%s' does not match type %s", object, type.getJavaType()));
}
BlockBuilder blockBuilder = type.createBlockBuilder(null, 1);
writeNativeValue(type, blockBuilder, object);
Expand All @@ -48,4 +55,191 @@ public static void checkArgument(boolean expression)
throw new IllegalArgumentException();
}
}

public static void checkArgument(boolean expression, String message, Object... args)
{
if (!expression) {
throw new IllegalArgumentException(format(message, args));
}
}

/**
* Returns a supplier which caches the instance retrieved during the first call to {@code get()}
* and returns that value on subsequent calls to {@code get()}.
*/
public static <T> Supplier<T> memoizedSupplier(Supplier<T> delegate)
{
if (delegate instanceof MemoizingSupplier) {
return delegate;
}
return new MemoizingSupplier<>(delegate);
}

static class MemoizingSupplier<T>
implements Supplier<T>
{
volatile Supplier<T> delegate;
volatile boolean initialized;
// "value" does not need to be volatile; visibility piggy-backs
// on volatile read of "initialized".
@Nullable T value;

MemoizingSupplier(Supplier<T> delegate)
{
this.delegate = requireNonNull(delegate);
}

@Override
public T get()
{
// A 2-field variant of Double Checked Locking.
if (!initialized) {
synchronized (this) {
if (!initialized) {
T t = delegate.get();
value = t;
initialized = true;
// Release the delegate to GC.
delegate = null;
return t;
}
}
}
return value;
}

@Override
public String toString()
{
Supplier<T> delegate = this.delegate;
return "Suppliers.memoize("
+ (delegate == null ? "<supplier that returned " + value + ">" : delegate)
+ ")";
}
}

public static ToStringHelper toStringHelper(Object self)
{
return new ToStringHelper(self.getClass().getSimpleName());
}

public static ToStringHelper toStringHelper(String className)
{
return new ToStringHelper(className);
}

public static final class ToStringHelper
{
private final String className;
private final ValueHolder holderHead = new ValueHolder();
private ValueHolder holderTail = holderHead;
private boolean omitNullValues;

private ToStringHelper(String className)
{
this.className = requireNonNull(className);
}

public ToStringHelper omitNullValues()
{
omitNullValues = true;
return this;
}

public ToStringHelper add(String name, @Nullable Object value)
{
return addHolder(name, value);
}

public ToStringHelper add(String name, boolean value)
{
return addHolder(name, String.valueOf(value));
}

public ToStringHelper add(String name, char value)
{
return addHolder(name, String.valueOf(value));
}

public ToStringHelper add(String name, double value)
{
return addHolder(name, String.valueOf(value));
}

public ToStringHelper add(String name, float value)
{
return addHolder(name, String.valueOf(value));
}

public ToStringHelper add(String name, int value)
{
return addHolder(name, String.valueOf(value));
}

public ToStringHelper add(String name, long value)
{
return addHolder(name, String.valueOf(value));
}

@Override
public String toString()
{
// create a copy to keep it consistent in case value changes
boolean omitNullValuesSnapshot = omitNullValues;
String nextSeparator = "";
StringBuilder builder = new StringBuilder(32).append(className).append('{');
for (ValueHolder valueHolder = holderHead.next;
valueHolder != null;
valueHolder = valueHolder.next) {
Object value = valueHolder.value;
if (!omitNullValuesSnapshot || value != null) {
builder.append(nextSeparator);
nextSeparator = ", ";

if (valueHolder.name != null) {
builder.append(valueHolder.name).append('=');
}
if (value != null && value.getClass().isArray()) {
Object[] objectArray = {value};
String arrayString = Arrays.deepToString(objectArray);
builder.append(arrayString, 1, arrayString.length() - 1);
}
else {
builder.append(value);
}
}
}
return builder.append('}').toString();
}

private ValueHolder addHolder()
{
ValueHolder valueHolder = new ValueHolder();
holderTail.next = valueHolder;
holderTail = valueHolder;
return valueHolder;
}

private ToStringHelper addHolder(@Nullable Object value)
{
ValueHolder valueHolder = addHolder();
valueHolder.value = value;
return this;
}

private ToStringHelper addHolder(String name, @Nullable Object value)
{
ValueHolder valueHolder = addHolder();
valueHolder.value = value;
valueHolder.name = requireNonNull(name);
return this;
}

private static final class ValueHolder
{
@Nullable String name;
@Nullable Object value;
@Nullable ValueHolder next;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ public Object getValue()
return Utils.blockToNativeValue(type, valueBlock.get());
}

public Optional<Object> getObjectValue()
{
return valueBlock.map(block -> Utils.blockToNativeValue(type, block));
}

public Object getPrintableValue(SqlFunctionProperties properties)
{
if (!valueBlock.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,16 @@ public boolean equals(Object obj)
Objects.equals(this.high, other.high);
}

@Override
public String toString()
{
return (low.getBound() == Marker.Bound.EXACTLY ? "[" : "(") +
low.getObjectValue().orElse(Double.NEGATIVE_INFINITY) +
".." +
high.getObjectValue().orElse(Double.POSITIVE_INFINITY) +
(high.getBound() == Marker.Bound.EXACTLY ? "]" : ")");
}

private void appendQuotedValue(StringBuilder buffer, Marker marker, SqlFunctionProperties properties)
{
buffer.append('"');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,28 @@ public Object getSingleValue()
return lowIndexedRanges.values().iterator().next().getSingleValue();
}

/**
* Build a new {@link SortedRangeSet} that contains ranges which lie within the argument range
*
* @param span the range which the new set should span
* @return a new range set
*/
public SortedRangeSet subRangeSet(Range span)
{
Builder builder = new Builder(type);

for (Range range : getOrderedRanges()) {
if (span.contains(range)) {
builder.add(range);
}
else if (span.overlaps(range)) {
builder.add(range.intersect(span));
}
}

return builder.build();
}

@Override
public boolean containsValue(Object value)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
import com.google.common.collect.Iterables;
import org.testng.annotations.Test;

import java.util.Arrays;
import java.util.stream.Collectors;

import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.BooleanType.BOOLEAN;
import static com.facebook.presto.common.type.DoubleType.DOUBLE;
Expand Down Expand Up @@ -500,6 +503,65 @@ public void testCanonicalize()
assertDifferentSet(SortedRangeSet.all(BIGINT), SortedRangeSet.all(BOOLEAN), true);
}

@Test
public void testSubRangeSet()
{
// test subrange no overlap below and above
assertEquals(SortedRangeSet.of(Range.lessThan(BIGINT, 10L))
.subRangeSet(Range.greaterThan(BIGINT, 10L))
.getOrderedRanges()
.size(),
0);
assertEquals(SortedRangeSet.of(Range.greaterThan(BIGINT, 10L))
.subRangeSet(Range.lessThan(BIGINT, 10L))
.getOrderedRanges()
.size(),
0);
assertEquals(SortedRangeSet.of(Range.greaterThanOrEqual(BIGINT, 10L))
.subRangeSet(Range.lessThan(BIGINT, 10L))
.getOrderedRanges()
.size(),
0);
assertEquals(SortedRangeSet.of(Range.lessThanOrEqual(BIGINT, 10L))
.subRangeSet(Range.greaterThan(BIGINT, 10L))
.getOrderedRanges()
.size(),
0);

// test with equal bounds
assertEquals(SortedRangeSet.of(Range.lessThanOrEqual(BIGINT, 10L))
.subRangeSet(Range.greaterThanOrEqual(BIGINT, 10L))
.getOrderedRanges()
.size(),
1);
assertEquals(SortedRangeSet.of(Range.greaterThanOrEqual(BIGINT, 10L))
.subRangeSet(Range.lessThanOrEqual(BIGINT, 10L))
.getOrderedRanges()
.size(),
1);
assertEquals(SortedRangeSet.of(Range.lessThanOrEqual(BIGINT, 10L))
.subRangeSet(Range.greaterThanOrEqual(BIGINT, 10L))
.getOrderedRanges().get(0), Range.range(BIGINT, 10L, true, 10L, true));
// two ranges
assertEquals(SortedRangeSet.of(Range.lessThan(BIGINT, -10L), Range.greaterThan(BIGINT, 10L))
.subRangeSet(Range.range(BIGINT, -20L, true, 20L, true)).getOrderedRanges(),
Arrays.stream(new Range[] {
Range.range(BIGINT, -20L, true, -10L, false),
Range.range(BIGINT, 10L, false, 20L, true)})
.collect(Collectors.toList()));
// range entirely contained
assertEquals(SortedRangeSet.of(
Range.lessThan(BIGINT, -10L),
Range.greaterThan(BIGINT, 10L),
Range.range(BIGINT, -5L, true, 5L, true))
.subRangeSet(Range.range(BIGINT, -20L, true, 20L, true)).getOrderedRanges(),
Arrays.stream(new Range[] {
Range.range(BIGINT, -20L, true, -10L, false),
Range.range(BIGINT, -5L, true, 5L, true),
Range.range(BIGINT, 10L, false, 20L, true)})
.collect(Collectors.toList()));
}

private void assertSameSet(SortedRangeSet set1, SortedRangeSet set2, boolean removeSafeConstants)
throws Exception
{
Expand Down
15 changes: 8 additions & 7 deletions presto-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -510,19 +510,16 @@
<artifactId>presto-cache</artifactId>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-main</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-parser</artifactId>
<scope>test</scope>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-parser</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-analyzer</artifactId>
Expand Down Expand Up @@ -597,7 +594,7 @@
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
<version>1.5.0</version>
<version>${dep.iceberg.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
Expand Down Expand Up @@ -627,6 +624,10 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</dependency>
</dependencies>

<build>
Expand Down
Loading

0 comments on commit 6413d50

Please sign in to comment.