Skip to content

Commit

Permalink
Treat low confidence, zero estimations as unknown during joins
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavmuk04 committed Jul 8, 2024
1 parent 5b1cde7 commit 959e198
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ public final class SystemSessionProperties
public static final String FAST_INEQUALITY_JOINS = "fast_inequality_joins";
public static final String QUERY_PRIORITY = "query_priority";
public static final String CONFIDENCE_BASED_BROADCAST_ENABLED = "confidence_based_broadcast_enabled";
public static final String TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED = "treat_low_confidence_zero_estimation_unknown_enabled";
public static final String SPILL_ENABLED = "spill_enabled";
public static final String JOIN_SPILL_ENABLED = "join_spill_enabled";
public static final String AGGREGATION_SPILL_ENABLED = "aggregation_spill_enabled";
Expand Down Expand Up @@ -429,6 +430,11 @@ public SystemSessionProperties(
"Enable confidence based broadcasting when enabled",
false,
false),
booleanProperty(
TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED,
"Treat low confidence zero estimations as unknowns during joins when enabled",
false,
false),
booleanProperty(
DISTRIBUTED_INDEX_JOIN,
"Distribute index joins on join keys instead of executing inline",
Expand Down Expand Up @@ -2030,6 +2036,11 @@ public static boolean confidenceBasedBroadcastEnabled(Session session)
return session.getSystemProperty(CONFIDENCE_BASED_BROADCAST_ENABLED, Boolean.class);
}

public static boolean treatLowConfidenceZeroEstimationAsUnknownEnabled(Session session)
{
return session.getSystemProperty(TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED, Boolean.class);
}

public static int getHashPartitionCount(Session session)
{
return session.getSystemProperty(HASH_PARTITION_COUNT, Integer.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.facebook.presto.Session;
import com.facebook.presto.cost.CostComparator;
import com.facebook.presto.cost.LocalCostEstimate;
import com.facebook.presto.cost.PlanCostEstimate;
import com.facebook.presto.cost.PlanNodeStatsEstimate;
import com.facebook.presto.cost.StatsProvider;
import com.facebook.presto.cost.TaskCountEstimator;
Expand Down Expand Up @@ -47,9 +48,11 @@
import static com.facebook.presto.SystemSessionProperties.getJoinMaxBroadcastTableSize;
import static com.facebook.presto.SystemSessionProperties.isSizeBasedJoinDistributionTypeEnabled;
import static com.facebook.presto.SystemSessionProperties.isUseBroadcastJoinWhenBuildSizeSmallProbeSizeUnknownEnabled;
import static com.facebook.presto.SystemSessionProperties.treatLowConfidenceZeroEstimationAsUnknownEnabled;
import static com.facebook.presto.cost.CostCalculatorWithEstimatedExchanges.calculateJoinCostWithoutOutput;
import static com.facebook.presto.spi.plan.JoinDistributionType.PARTITIONED;
import static com.facebook.presto.spi.plan.JoinDistributionType.REPLICATED;
import static com.facebook.presto.spi.statistics.SourceInfo.ConfidenceLevel.LOW;
import static com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.AUTOMATIC;
import static com.facebook.presto.sql.planner.iterative.ConfidenceBasedBroadcastUtil.confidenceBasedBroadcast;
import static com.facebook.presto.sql.planner.iterative.rule.JoinSwappingUtils.isBelowBroadcastLimit;
Expand Down Expand Up @@ -261,6 +264,13 @@ private static boolean mustReplicate(JoinNode joinNode, Context context)

private PlanNodeWithCost getJoinNodeWithCost(Context context, JoinNode possibleJoinNode)
{
PlanNodeStatsEstimate buildSideStats = context.getStatsProvider().getStats(possibleJoinNode.getRight());
boolean isBuildSideStatsUnknown = buildSideStats.getOutputRowCount() == 0 && buildSideStats.confidenceLevel() == LOW;

if (treatLowConfidenceZeroEstimationAsUnknownEnabled(context.getSession()) && isBuildSideStatsUnknown) {
return new PlanNodeWithCost(PlanCostEstimate.unknown(), possibleJoinNode);
}

StatsProvider stats = context.getStatsProvider();
boolean replicated = possibleJoinNode.getDistributionType().get().equals(REPLICATED);
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import static com.facebook.presto.SystemSessionProperties.CONFIDENCE_BASED_BROADCAST_ENABLED;
import static com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE;
import static com.facebook.presto.SystemSessionProperties.JOIN_MAX_BROADCAST_TABLE_SIZE;
import static com.facebook.presto.SystemSessionProperties.TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED;
import static com.facebook.presto.SystemSessionProperties.USE_BROADCAST_WHEN_BUILDSIZE_SMALL_PROBESIDE_UNKNOWN;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType;
Expand Down Expand Up @@ -460,6 +461,117 @@ public void testLeftAndRightHighConfidenceLeftSmaller()
values(ImmutableMap.of("A1", 0, "A2", 1))));
}

@Test
public void testJoinWithLowConfidenceZeroStatistics()
{
int aRows = 100;
int bRows = 0;
assertDetermineJoinDistributionType()
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name())
.setSystemProperty(TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED, "TRUE")
.overrideStats("valuesA", PlanNodeStatsEstimate.builder()
.setOutputRowCount(aRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.build())
.overrideStats("valuesB", PlanNodeStatsEstimate.builder()
.setOutputRowCount(bRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.setConfidence(LOW)
.build())
.on(p -> {
VariableReferenceExpression a1 = p.variable("A1", BIGINT);
VariableReferenceExpression b1 = p.variable("B1", BIGINT);
return p.join(
INNER,
p.values(new PlanNodeId("valuesA"), aRows, a1),
p.values(new PlanNodeId("valuesB"), bRows, b1),
ImmutableList.of(new EquiJoinClause(a1, b1)),
ImmutableList.of(a1, b1),
Optional.empty());
})
.matches(join(
INNER,
ImmutableList.of(equiJoinClause("B1", "A1")),
Optional.empty(),
Optional.of(REPLICATED),
values(ImmutableMap.of("B1", 0)),
values(ImmutableMap.of("A1", 0))));
}

@Test
public void testJoinWithLowConfidenceZeroStatisticsOnLeft()
{
int aRows = 0;
int bRows = 100;
assertDetermineJoinDistributionType()
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name())
.setSystemProperty(TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED, "TRUE")
.overrideStats("valuesA", PlanNodeStatsEstimate.builder()
.setConfidence(LOW)
.setOutputRowCount(aRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.build())
.overrideStats("valuesB", PlanNodeStatsEstimate.builder()
.setOutputRowCount(bRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.build())
.on(p -> {
VariableReferenceExpression a1 = p.variable("A1", BIGINT);
VariableReferenceExpression b1 = p.variable("B1", BIGINT);
return p.join(
INNER,
p.values(new PlanNodeId("valuesA"), aRows, a1),
p.values(new PlanNodeId("valuesB"), bRows, b1),
ImmutableList.of(new EquiJoinClause(a1, b1)),
ImmutableList.of(a1, b1),
Optional.empty());
})
.matches(join(
INNER,
ImmutableList.of(equiJoinClause("B1", "A1")),
Optional.empty(),
Optional.of(REPLICATED),
values(ImmutableMap.of("B1", 0)),
values(ImmutableMap.of("A1", 0))));
}

@Test
public void testJoinWithHighConfidenceZeroStatistics()
{
int aRows = 100;
int bRows = 0;
assertDetermineJoinDistributionType()
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name())
.setSystemProperty(TREAT_LOW_CONFIDENCE_ZERO_ESTIMATION_AS_UNKNOWN_ENABLED, "TRUE")
.overrideStats("valuesA", PlanNodeStatsEstimate.builder()
.setOutputRowCount(aRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.build())
.overrideStats("valuesB", PlanNodeStatsEstimate.builder()
.setOutputRowCount(bRows)
.addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 6400, 100)))
.setConfidence(HIGH)
.build())
.on(p -> {
VariableReferenceExpression a1 = p.variable("A1", BIGINT);
VariableReferenceExpression b1 = p.variable("B1", BIGINT);
return p.join(
INNER,
p.values(new PlanNodeId("valuesA"), aRows, a1),
p.values(new PlanNodeId("valuesB"), bRows, b1),
ImmutableList.of(new EquiJoinClause(a1, b1)),
ImmutableList.of(a1, b1),
Optional.empty());
})
.matches(join(
INNER,
ImmutableList.of(equiJoinClause("A1", "B1")),
Optional.empty(),
Optional.of(REPLICATED),
values(ImmutableMap.of("A1", 0)),
values(ImmutableMap.of("B1", 0))));
}

@Test
public void testFlipAndReplicateWhenOneTableMuchSmaller()
{
Expand Down

0 comments on commit 959e198

Please sign in to comment.