Skip to content

Commit

Permalink
Fix crash in HashTable::groupProbe (facebookincubator#1302)
Browse files Browse the repository at this point in the history
Summary:
The actual fix is to replace

```
lookup.normalizedKeys.resize(numRows);
```

with

```
lookup.normalizedKeys.resize(lookup.rows.back() + 1);
```

Since this logic is duplicated in groupProbe and joinProbe, this PR extracts it into a helper function.

Added a test. Before the fix the test would crash in ASAN mode:

```
==1665828==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x631001514800 at pc 0x7fb451ebad8f bp 0x7ffe2d8b1130 sp 0x7ffe2d8b1128
WRITE of size 8 at 0x631001514800 thread T0
SCARINESS: 42 (8-byte-write-heap-buffer-overflow)
    #0 0x7fb451ebad8e in facebook::velox::exec::HashTable<false>::groupProbe(facebook::velox::exec::HashLookup&) velox/exec/HashTable.cpp:373
    facebookincubator#1 0x8bcb57 in HashTableTest::insertGroups(facebook::velox::RowVector const&, facebook::velox::SelectivityVector const&, facebook::velox::exec::HashLookup&, facebook::velox::exec::HashTable<false>&) velox/exec/tests/HashTableTest.cpp:186
```

Pull Request resolved: facebookincubator#1302

Reviewed By: kgpai

Differential Revision: D35205256

Pulled By: mbasmanova

fbshipit-source-id: b3719df55513192f64b01997d293807310ebad51
  • Loading branch information
mbasmanova authored and artem.malyshev committed May 31, 2022
1 parent 0758019 commit 0c760e0
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 30 deletions.
44 changes: 19 additions & 25 deletions velox/exec/HashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,16 @@ inline uint64_t mixNormalizedKey(uint64_t k, uint8_t bits) {
auto h = (k ^ ((k >> 32))) * prime1;
return h + (h >> bits) * prime2 + (h >> (2 * bits)) * prime3;
}

void populateNormalizedKeys(HashLookup& lookup, int8_t sizeBits) {
lookup.normalizedKeys.resize(lookup.rows.back() + 1);
auto hashes = lookup.hashes.data();
for (auto row : lookup.rows) {
auto hash = hashes[row];
lookup.normalizedKeys[row] = hash; // NOLINT
hashes[row] = mixNormalizedKey(hash, sizeBits);
}
}
} // namespace

template <bool ignoreNullKeys>
Expand All @@ -359,20 +369,10 @@ void HashTable<ignoreNullKeys>::groupProbe(HashLookup& lookup) {
return;
}
// Do size-based rehash before mixing hashes from normalized keys
// because The
// size of the table affects the mixing.
// because the size of the table affects the mixing.
checkSize(lookup.rows.size());
if (hashMode_ == HashMode::kNormalizedKey) {
auto numRows = lookup.rows.size();
lookup.normalizedKeys.resize(numRows);
auto rows = lookup.rows.data();
for (int i = 0; i < numRows; ++i) {
auto row = rows[i];
auto hashes = lookup.hashes.data();
auto hash = hashes[row];
lookup.normalizedKeys[row] = hash; // NOLINT
hashes[row] = mixNormalizedKey(hash, sizeBits_);
}
populateNormalizedKeys(lookup, sizeBits_);
}
ProbeState state1;
ProbeState state2;
Expand Down Expand Up @@ -410,12 +410,12 @@ void HashTable<ignoreNullKeys>::groupProbe(HashLookup& lookup) {

template <bool ignoreNullKeys>
void HashTable<ignoreNullKeys>::arrayGroupProbe(HashLookup& lookup) {
VELOX_DCHECK(!lookup.hashes.empty());
VELOX_DCHECK(!lookup.hits.empty());

int32_t numProbes = lookup.rows.size();
const vector_size_t* rows = lookup.rows.data();
assert(!lookup.hashes.empty());
assert(!lookup.hits.empty());
auto hashes = lookup.hashes.data();
assert(!lookup.hits.empty());
auto groups = lookup.hits.data();
int32_t i = 0;
if (process::hasAvx2() && simd::isDense(rows, numProbes)) {
Expand Down Expand Up @@ -471,13 +471,7 @@ void HashTable<ignoreNullKeys>::joinProbe(HashLookup& lookup) {
return;
}
if (hashMode_ == HashMode::kNormalizedKey) {
lookup.normalizedKeys.resize(lookup.rows.back() + 1);
auto hashes = lookup.hashes.data();
for (auto row : lookup.rows) {
auto hash = hashes[row];
lookup.normalizedKeys[row] = hash; // NOLINT
hashes[row] = mixNormalizedKey(hash, sizeBits_);
}
populateNormalizedKeys(lookup, sizeBits_);
}
int32_t probeIndex = 0;
int32_t numProbes = lookup.rows.size();
Expand Down Expand Up @@ -619,8 +613,8 @@ void HashTable<ignoreNullKeys>::insertForGroupBy(
if (hashMode_ == HashMode::kArray) {
for (auto i = 0; i < numGroups; ++i) {
auto index = hashes[i];
VELOX_CHECK(index < size_);
VELOX_CHECK(table_[index] == nullptr);
VELOX_CHECK_LT(index, size_);
VELOX_CHECK_NULL(table_[index]);
table_[index] = groups[i];
}
} else {
Expand Down Expand Up @@ -746,7 +740,7 @@ void HashTable<ignoreNullKeys>::insertForJoin(
if (hashMode_ == HashMode::kArray) {
for (auto i = 0; i < numGroups; ++i) {
auto index = hashes[i];
VELOX_CHECK(index < size_);
VELOX_CHECK_LT(index, size_);
arrayPushRow(groups[i], index);
}
return;
Expand Down
44 changes: 39 additions & 5 deletions velox/exec/tests/HashTableTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,27 +149,38 @@ class HashTableTest : public testing::Test {
const RowVector& input,
HashLookup& lookup,
HashTable<false>& table) {
const SelectivityVector rows(input.size());
insertGroups(input, rows, lookup, table);
}

void insertGroups(
const RowVector& input,
const SelectivityVector& rows,
HashLookup& lookup,
HashTable<false>& table) {
lookup.reset(rows.end());
lookup.rows.clear();
rows.applyToSelected([&](auto row) { lookup.rows.push_back(row); });

auto& hashers = table.hashers();
SelectivityVector activeRows(input.size());
auto mode = table.hashMode();
bool rehash = false;
for (int32_t i = 0; i < hashers.size(); ++i) {
auto key = input.childAt(hashers[i]->channel());
if (mode != BaseHashTable::HashMode::kHash) {
if (!hashers[i]->computeValueIds(*key, activeRows, lookup.hashes)) {
if (!hashers[i]->computeValueIds(*key, rows, lookup.hashes)) {
rehash = true;
}
} else {
hashers[i]->hash(*key, activeRows, i > 0, lookup.hashes);
hashers[i]->hash(*key, rows, i > 0, lookup.hashes);
}
}
std::iota(lookup.rows.begin(), lookup.rows.end(), 0);

if (rehash) {
if (table.hashMode() != BaseHashTable::HashMode::kHash) {
table.decideHashMode(input.size());
}
insertGroups(input, lookup, table);
insertGroups(input, rows, lookup, table);
return;
}
table.groupProbe(lookup);
Expand Down Expand Up @@ -538,3 +549,26 @@ TEST_F(HashTableTest, enableRangeWhereCan) {
lookup->reset(data->size());
insertGroups(*data, *lookup, *table);
}

TEST_F(HashTableTest, arrayProbeNormalizedKey) {
auto table = createHashTableForAggregation(ROW({"a"}, {BIGINT()}), 1);
auto lookup = std::make_unique<HashLookup>(table->hashers());

for (auto i = 0; i < 200; ++i) {
auto data = vectorMaker_->rowVector({
vectorMaker_->flatVector<int64_t>(
10'000, [&](auto row) { return i * 10'000 + row; }),
});

SelectivityVector rows(5'000);
insertGroups(*data, rows, *lookup, *table);

rows.resize(10'000);
rows.clearAll();
rows.setValidRange(5'000, 10'000, true);
rows.updateBounds();
insertGroups(*data, rows, *lookup, *table);
}

ASSERT_TRUE(table->hashMode() == BaseHashTable::HashMode::kNormalizedKey);
}

0 comments on commit 0c760e0

Please sign in to comment.