Skip to content

Commit

Permalink
Fix prepareForReuse for string vectors inside array/map/struct (#7465)
Browse files Browse the repository at this point in the history
Summary:
BaseVector::prepareForReuse used to clear all string buffers for string vectors
inside a complex types vector, effectively prevented reuse of the string vectors.

This happened because BaseVector::prepareForReuse for complex vectors calls
BaseVector::resize(0) for child vectors and FlatVector<StringView>::resize(0) used
to clear all string buffers.

A fix is to modify FlatVector<StringView>::resize(0) to keep at most one string
buffer, just like FlatVector<StringView>::prepareForReuse() does.

Pull Request resolved: #7465

Reviewed By: spershin

Differential Revision: D51097758

Pulled By: mbasmanova

fbshipit-source-id: 7ab1e44c1bc5d4ada391668b4b30a52b845ab6be
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Nov 8, 2023
1 parent e3d28a1 commit 42b604b
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 17 deletions.
2 changes: 1 addition & 1 deletion velox/vector/FlatVector-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ void FlatVector<T>::resize(vector_size_t newSize, bool setNotNull) {
SimpleVector<StringView>::resizeIsAsciiIfNotEmpty(newSize, false);
}
if (newSize == 0) {
clearStringBuffers();
keepAtMostOneStringBuffer();
}
} else {
resizeValues(newSize, std::nullopt);
Expand Down
13 changes: 1 addition & 12 deletions velox/vector/FlatVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,7 @@ void FlatVector<StringView>::prepareForReuse() {
rawValues_ = nullptr;
}

// Check string buffers. Keep at most one singly-referenced buffer if it is
// not too large.
if (!stringBuffers_.empty()) {
auto& firstBuffer = stringBuffers_.front();
if (firstBuffer->isMutable() &&
firstBuffer->capacity() <= kMaxStringSizeForReuse) {
firstBuffer->setSize(0);
setStringBuffers({firstBuffer});
} else {
clearStringBuffers();
}
}
keepAtMostOneStringBuffer();

// Clear the StringViews to avoid referencing freed memory.
if (rawValues_) {
Expand Down
17 changes: 17 additions & 0 deletions velox/vector/FlatVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,23 @@ class FlatVector final : public SimpleVector<T> {
vector_size_t newSize,
const std::optional<T>& initialValue);

// Check string buffers. Keep at most one singly-referenced buffer if it is
// not too large.
void keepAtMostOneStringBuffer() {
if (stringBuffers_.empty()) {
return;
}

auto& firstBuffer = stringBuffers_.front();
if (firstBuffer->isMutable() &&
firstBuffer->capacity() <= kMaxStringSizeForReuse) {
firstBuffer->setSize(0);
setStringBuffers({firstBuffer});
} else {
clearStringBuffers();
}
}

// Contiguous values.
// If strings, these are velox::StringViews into memory held by
// 'stringBuffers_'
Expand Down
34 changes: 34 additions & 0 deletions velox/vector/tests/VectorPrepareForReuseTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,40 @@ TEST_F(VectorPrepareForReuseTest, arrays) {
ASSERT_EQ(originalSize, vector->retainedSize());
}

TEST_F(VectorPrepareForReuseTest, arrayOfStrings) {
VectorPtr vector = makeArrayVector<std::string>(
1'000,
[](auto /*row*/) { return 1; },
[](auto row, auto index) {
return std::string(20 + index, 'a' + row % 5);
});
auto originalSize = vector->retainedSize();
BaseVector* originalVector = vector.get();

MemoryAllocationChecker allocationChecker(pool());
BaseVector::prepareForReuse(vector, vector->size());
ASSERT_EQ(originalVector, vector.get());
ASSERT_EQ(originalSize, vector->retainedSize());

auto* arrayVector = vector->as<ArrayVector>();
for (auto i = 0; i < 1'000; i++) {
ASSERT_EQ(0, arrayVector->sizeAt(i));
ASSERT_EQ(0, arrayVector->offsetAt(i));
}

// Cannot use BaseVector::copy because it is too smart and acquired string
// buffers instead of copying the strings.
auto* elementsVector = arrayVector->elements()->as<FlatVector<StringView>>();
elementsVector->resize(1'000);
for (auto i = 0; i < 1'000; i++) {
arrayVector->setOffsetAndSize(i, i, 1);
std::string newValue(21, 'b' + i % 7);
elementsVector->set(i, StringView(newValue));
}

ASSERT_EQ(originalSize, vector->retainedSize());
}

TEST_F(VectorPrepareForReuseTest, dataDependentFlags) {
auto size = 10;

Expand Down
17 changes: 17 additions & 0 deletions velox/vector/tests/VectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,23 @@ TEST_F(VectorTest, resizeStringAsciiness) {
ASSERT_FALSE(stringVector->isAscii(rows));
}

TEST_F(VectorTest, resizeZeroString) {
auto vector = makeFlatVector<std::string>(
{"This is a string",
"This is another string",
"This is the third string"});
ASSERT_EQ(1, vector->stringBuffers().size());
ASSERT_LT(0, vector->stringBuffers()[0]->size());

const auto capacity = vector->stringBuffers()[0]->capacity();
ASSERT_GT(capacity, 0);

vector->resize(0);
ASSERT_EQ(1, vector->stringBuffers().size());
ASSERT_EQ(0, vector->stringBuffers()[0]->size());
ASSERT_EQ(capacity, vector->stringBuffers()[0]->capacity());
}

TEST_F(VectorTest, copyNoRows) {
{
auto source = makeFlatVector<int32_t>({1, 2, 3});
Expand Down
8 changes: 4 additions & 4 deletions velox/vector/tests/utils/VectorMaker.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,16 +367,16 @@ class VectorMaker {
auto numElements =
createOffsetsAndSizes(size, sizeAt, isNullAt, &nulls, &offsets, &sizes);

auto flatVector =
BaseVector::create<FlatVector<T>>(type->childAt(0), numElements, pool_);
auto flatVector = BaseVector::create<FlatVector<EvalType<T>>>(
type->childAt(0), numElements, pool_);
vector_size_t currentIndex = 0;
for (vector_size_t i = 0; i < size; ++i) {
if (isNullAt && isNullAt(i)) {
continue;
}
for (vector_size_t j = 0; j < sizeAt(i); ++j) {
auto ret = valueAt(i, j);
flatVector->set(currentIndex, valueAt(i, j));
auto value = valueAt(i, j);
flatVector->set(currentIndex, EvalType<T>(value));
currentIndex++;
}
}
Expand Down

0 comments on commit 42b604b

Please sign in to comment.