Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve heuristic of using dictionary in project and filter #15266

Merged
merged 1 commit into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public SelectedPositions filter(ConnectorSession session, Page page)

if (block instanceof RunLengthEncodedBlock) {
Block value = ((RunLengthEncodedBlock) block).getValue();
Optional<boolean[]> selectedPosition = processDictionary(session, value);
Optional<boolean[]> selectedPosition = processDictionary(session, value, block.getPositionCount());
// single value block is always considered effective, but the processing could have thrown
// in that case we fallback and process again so the correct error message sent
if (selectedPosition.isPresent()) {
Expand All @@ -71,7 +71,7 @@ public SelectedPositions filter(ConnectorSession session, Page page)
if (block instanceof DictionaryBlock) {
DictionaryBlock dictionaryBlock = (DictionaryBlock) block;
// Attempt to process the dictionary. If dictionary is processing has not been considered effective, an empty response will be returned
Optional<boolean[]> selectedDictionaryPositions = processDictionary(session, dictionaryBlock.getDictionary());
Optional<boolean[]> selectedDictionaryPositions = processDictionary(session, dictionaryBlock.getDictionary(), block.getPositionCount());
// record the usage count regardless of dictionary processing choice, so we have stats for next time
lastDictionaryUsageCount += page.getPositionCount();
// if dictionary was processed, produce a dictionary block; otherwise do normal processing
Expand All @@ -83,17 +83,17 @@ public SelectedPositions filter(ConnectorSession session, Page page)
return filter.filter(session, new Page(block));
}

private Optional<boolean[]> processDictionary(ConnectorSession session, Block dictionary)
private Optional<boolean[]> processDictionary(ConnectorSession session, Block dictionary, int blockPositionsCount)
{
if (lastInputDictionary == dictionary) {
return lastOutputDictionary;
}

// Process dictionary if:
// this is the first block
// there is only entry in the dictionary
// dictionary positions count is not greater than block positions count
// the last dictionary was used for more positions than were in the dictionary
boolean shouldProcessDictionary = lastInputDictionary == null || dictionary.getPositionCount() == 1 || lastDictionaryUsageCount >= lastInputDictionary.getPositionCount();
boolean shouldProcessDictionary = lastInputDictionary == null || dictionary.getPositionCount() <= blockPositionsCount || lastDictionaryUsageCount >= lastInputDictionary.getPositionCount();

lastDictionaryUsageCount = 0;
lastInputDictionary = dictionary;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ else if (block instanceof DictionaryBlock) {
}

// Try use dictionary processing first; if it fails, fall back to the generic case
dictionaryProcessingProjectionWork = createDictionaryBlockProjection(dictionary);
dictionaryProcessingProjectionWork = createDictionaryBlockProjection(dictionary, block.getPositionCount());
}

private Work<Block> createDictionaryBlockProjection(Optional<Block> dictionary)
private Work<Block> createDictionaryBlockProjection(Optional<Block> dictionary, int blockPositionsCount)
{
if (dictionary.isEmpty()) {
lastOutputDictionary = Optional.empty();
Expand All @@ -232,10 +232,10 @@ private Work<Block> createDictionaryBlockProjection(Optional<Block> dictionary)
}

// Process dictionary if:
// there is only one entry in the dictionary
// dictionary positions count is not greater than block positions count
// this is the first block
// the last dictionary was used for more positions than were in the dictionary
boolean shouldProcessDictionary = dictionary.get().getPositionCount() == 1 || lastInputDictionary == null || lastDictionaryUsageCount >= lastInputDictionary.getPositionCount();
boolean shouldProcessDictionary = dictionary.get().getPositionCount() <= blockPositionsCount || lastInputDictionary == null || lastDictionaryUsageCount >= lastInputDictionary.getPositionCount();

// record the usage count regardless of dictionary processing choice, so we have stats for next time
lastDictionaryUsageCount = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
import java.util.Arrays;
import java.util.stream.IntStream;

import static io.airlift.testing.Assertions.assertInstanceOf;
import static io.trino.block.BlockAssertions.createLongSequenceBlock;
import static io.trino.block.BlockAssertions.createLongsBlock;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestDictionaryAwarePageFilter
{
Expand Down Expand Up @@ -105,8 +105,8 @@ public void testDictionaryBlockProcessingWithUnusedFailure()
// match some
testFilter(createDictionaryBlockWithUnusedEntries(20, 100), DictionaryBlock.class);

// match none
testFilter(createDictionaryBlockWithUnusedEntries(20, 0), DictionaryBlock.class);
// match none, blockSize must be > 1 to actually create a DictionaryBlock
testFilter(createDictionaryBlockWithUnusedEntries(20, 2), DictionaryBlock.class);

// match all
testFilter(DictionaryBlock.create(100, createLongsBlock(4, 5, -1), new int[100]), DictionaryBlock.class);
Expand All @@ -120,21 +120,26 @@ public void testDictionaryProcessingEnableDisable()

Block ineffectiveBlock = createDictionaryBlock(100, 20);
Block effectiveBlock = createDictionaryBlock(10, 100);
Block anotherIneffectiveBlock = createDictionaryBlock(100, 25);

// function will always processes the first dictionary
// function will always process the first dictionary
nestedFilter.setExpectedType(LongArrayBlock.class);
testFilter(filter, ineffectiveBlock, true);

// last dictionary not effective, so dictionary processing is disabled
// last dictionary not effective and incoming dictionary is also ineffective, so dictionary processing is disabled
nestedFilter.setExpectedType(DictionaryBlock.class);
testFilter(filter, effectiveBlock, true);
testFilter(filter, anotherIneffectiveBlock, true);

// last dictionary not effective, so dictionary processing is enabled again
for (int i = 0; i < 5; i++) {
// Increase usage count of large ineffective dictionary with multiple pages of small positions count
testFilter(filter, anotherIneffectiveBlock, true);
}
// last dictionary effective, so dictionary processing is enabled
nestedFilter.setExpectedType(LongArrayBlock.class);
testFilter(filter, ineffectiveBlock, true);

// last dictionary not effective, so dictionary processing is disabled again
nestedFilter.setExpectedType(DictionaryBlock.class);
// last dictionary not effective, but incoming dictionary is effective, so dictionary processing stays enabled
nestedFilter.setExpectedType(LongArrayBlock.class);
testFilter(filter, effectiveBlock, true);
}

Expand Down Expand Up @@ -286,6 +291,13 @@ public SelectedPositions filter(ConnectorSession session, Page page)
selectedPositions.add(position);
}
}

// verify the input block is the expected type (this is to assure that
// dictionary processing enabled and disabled as expected)
// this check is performed last so that dictionary processing that fails
// is not checked (only the fall back processing is checked)
assertInstanceOf(block, expectedType);

if (selectedPositions.isEmpty()) {
return SelectedPositions.positionsRange(0, 0);
}
Expand All @@ -298,12 +310,6 @@ public SelectedPositions filter(ConnectorSession session, Page page)
selectedPositions.add(-1);
}

// verify the input block is the expected type (this is to assure that
// dictionary processing enabled and disabled as expected)
// this check is performed last so that dictionary processing that fails
// is not checked (only the fall back processing is checked)
assertTrue(expectedType.isInstance(block));

return SelectedPositions.positionsList(selectedPositions.elements(), 3, selectedPositions.size() - 6);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,23 @@ public void testDictionaryProcessingEnableDisable(boolean forceYield, boolean pr
{
DictionaryAwarePageProjection projection = createProjection(produceLazyBlock);

// function will always processes the first dictionary
// function will always process the first dictionary
Block ineffectiveBlock = createDictionaryBlock(100, 20);
testProjectRange(ineffectiveBlock, DictionaryBlock.class, projection, forceYield, produceLazyBlock);
testProjectFastReturnIgnoreYield(ineffectiveBlock, projection, produceLazyBlock);
// dictionary processing can reuse the last dictionary
// in this case, we don't even check yield signal; make yieldForce to false
testProjectList(ineffectiveBlock, DictionaryBlock.class, projection, false, produceLazyBlock);

// last dictionary not effective, so dictionary processing is disabled
Block effectiveBlock = createDictionaryBlock(10, 100);
testProjectRange(effectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
testProjectList(effectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
// last dictionary not effective, and incoming dictionary is also not effective, so dictionary processing is disabled
Block anotherIneffectiveBlock = createDictionaryBlock(100, 25);
testProjectRange(anotherIneffectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
testProjectList(anotherIneffectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);

for (int i = 0; i < 15; i++) {
// Increase usage count of large ineffective dictionary with multiple pages of small positions count
testProjectRange(anotherIneffectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
}

// last dictionary effective, so dictionary processing is enabled again
testProjectRange(ineffectiveBlock, DictionaryBlock.class, projection, forceYield, produceLazyBlock);
Expand All @@ -167,9 +172,10 @@ public void testDictionaryProcessingEnableDisable(boolean forceYield, boolean pr
// in this case, we don't even check yield signal; make yieldForce to false
testProjectList(ineffectiveBlock, DictionaryBlock.class, projection, false, produceLazyBlock);

// last dictionary not effective, so dictionary processing is disabled again
testProjectRange(effectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
testProjectList(effectiveBlock, LongArrayBlock.class, projection, forceYield, produceLazyBlock);
// last dictionary not effective, but incoming dictionary is effective, so dictionary processing stays enabled
Block effectiveBlock = createDictionaryBlock(10, 100);
testProjectRange(effectiveBlock, DictionaryBlock.class, projection, forceYield, produceLazyBlock);
testProjectFastReturnIgnoreYield(effectiveBlock, projection, produceLazyBlock);
}

@Test
Expand Down