Skip to content

Commit

Permalink
Fix memory leak issue when running window fuzzer test (facebookincuba…
Browse files Browse the repository at this point in the history
…tor#10847)

Summary:
We got the following exception when running window fuzzer test with `ROWS between k0 PRECEDING and -2157330091482954703 FOLLOWING` frame in ubuntu 20.04 os.
```
I0822 23:45:06.386909 2118850 WindowFuzzer.cpp:277] ==============================> Done with iteration 228
I0822 23:45:06.387009 2118850 WindowFuzzer.cpp:217] ==============================> Started iteration 229 (seed: 2266604503)
I0822 23:45:06.413928 2118850 AggregationFuzzerBase.cpp:427] Executing query plan:
-- Window[1][partition by [p0, p1] order by [s0 ASC NULLS FIRST, s1 DESC NULLS FIRST, row_number ASC NULLS LAST] w0 := approx_percentile(ROW["c0"],ROW["c1"],ROW["c2"]) ROWS between k0 FOLLOWING and -8268856359831993385 FOLLOWING] -> c0:DOUBLE, c1:BIGINT, c2:DOUBLE, s0:ROW<f0:BOOLEAN,f1:INTERVAL DAY TO SECOND,f2:BOOLEAN,f3:VARCHAR,f4:INTEGER>, s1:SMALLINT, p0:ARRAY<DOUBLE>, p1:ARRAY<DOUBLE>, k0:INTEGER, row_number:BIGINT, w0:DOUBLE
  -- Values[0][1000 rows in 10 vectors] -> c0:DOUBLE, c1:BIGINT, c2:DOUBLE, s0:ROW<f0:BOOLEAN,f1:INTERVAL DAY TO SECOND,f2:BOOLEAN,f3:VARCHAR,f4:INTEGER>, s1:SMALLINT, p0:ARRAY<DOUBLE>, p1:ARRAY<DOUBLE>, k0:INTEGER, row_number:BIGINT
I0822 23:45:06.414655 2121801 Task.cpp:1920] Terminating task test_cursor 1170 with state Failed after running for 0ms
I0822 23:45:06.414741 2121801 Task.cpp:1155] All drivers (1) finished for task test_cursor 1170 after running for 0ms
E0822 23:45:06.414860 2121801 MemoryPool.cpp:448] [MEM] Memory leak (Used memory): Memory Pool[op.1.0.0.Window LEAF root[query.TaskCursorQuery_1169.1619] parent[node.1] MALLOC track-usage thread-safe]<unlimited max capacity unlimited capacity used 128B available 1023.88KB reservation [used 128B, reserved 1.00MB, min 0B] counters [allocs 9, frees 8, reserves 0, releases 1, collisions 0])>
E0822 23:45:06.414930 2121801 Exceptions.h:67] Line: /mnt/DP_disk3/jk/projects/fb-velox/velox/velox/common/memory/MemoryArbitrator.cpp:100, Function:removePool, Expression: pool->reservedBytes() == 0 (1048576 vs. 0), Source: RUNTIME, ErrorCode: INVALID_STATE
terminate called after throwing an instance of 'facebook::velox::VeloxRuntimeError'
  what():  Exception: VeloxRuntimeError
Error Source: RUNTIME
Error Code: INVALID_STATE
Reason: (1048576 vs. 0)
Retriable: False
Expression: pool->reservedBytes() == 0
Function: removePool
File: /mnt/DP_disk3/jk/projects/fb-velox/velox/velox/common/memory/MemoryArbitrator.cpp
Line: 100
Stack trace:
# 0  facebook::velox::VeloxException::VeloxException(char const*, unsigned long, char const*, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, bool, facebook::velox::VeloxException::Type, std::basic_string_view<char, std::char_traits<char> >)
# 1  void facebook::velox::detail::veloxCheckFail<facebook::velox::VeloxRuntimeError, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(facebook::velox::detail::VeloxCheckFailArgs const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
# 2  facebook::velox::memory::(anonymous namespace)::NoopArbitrator::removePool(facebook::velox::memory::MemoryPool*)
# 3  facebook::velox::memory::MemoryManager::dropPool(facebook::velox::memory::MemoryPool*)
# 4  facebook::velox::memory::MemoryPoolImpl::~MemoryPoolImpl()
# 5  std::_Sp_counted_ptr<facebook::velox::core::QueryCtx*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
# 6  std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
# 7  facebook::velox::exec::Task::~Task()
# 8  std::_Sp_counted_ptr<facebook::velox::exec::Task*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
# 9  std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
# 10 std::_Sp_counted_ptr<facebook::velox::exec::Driver*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
# 11 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
# 12 unsigned long folly::detail::function::DispatchSmall::exec<facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::Driver>)::{lambda()https://github.com/facebookincubator/velox/issues/1}>(folly::detail::function::Op, folly::detail::function::Data*, folly::detail::function::Data)
# 13 folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&)
# 14 folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)
# 15 void folly::detail::function::call_<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>, true, false, void>(, folly::detail::function::Data&)
# 16 0x00000000000d6df3
# 17 start_thread
# 18 clone

*** Aborted at 1724341506 (Unix time, try 'date -d 1724341506') ***
*** Signal 6 (SIGABRT) (0x2054c2) received by PID 2118850 (pthread TID 0x7f7e077fe700) (linux TID 2121801) (maybe from PID 2118850, UID 0) (code: -6), stack trace: ***
    @ 0000000007449637 folly::symbolizer::(anonymous namespace)::signalHandler(int, siginfo_t*, void*)
                       /mnt/DP_disk3/jk/projects/gluten/ep/build-velox/build/velox_ep/folly/folly/experimental/symbolizer/SignalHandler.cpp:453
    @ 000000000001441f (unknown)
    @ 000000000004300b gsignal
    @ 0000000000022858 abort
    @ 000000000009e8d0 (unknown)
    @ 00000000000aa37b (unknown)
    @ 00000000000a9358 (unknown)
    @ 00000000000a9d10 __gxx_personality_v0
    @ 0000000000003f88 __libunwind_Unwind_Resume
    @ 00000000023e7d28 facebook::velox::memory::MemoryManager::dropPool(facebook::velox::memory::MemoryPool*) [clone .cold]
    @ 0000000007395017 facebook::velox::memory::MemoryPoolImpl::~MemoryPoolImpl()
    @ 00000000071d6a98 std::_Sp_counted_ptr<facebook::velox::core::QueryCtx*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
    @ 00000000024332d9 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
    @ 0000000006c75269 facebook::velox::exec::Task::~Task()
    @ 0000000006c92975 std::_Sp_counted_ptr<facebook::velox::exec::Task*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
    @ 00000000024332d9 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
    @ 0000000006d0fdd3 std::_Sp_counted_ptr<facebook::velox::exec::Driver*, (__gnu_cxx::_Lock_policy)2>::_M_dispose()
    @ 00000000024332d9 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() [clone .part.0]
    @ 0000000006ce647f unsigned long folly::detail::function::DispatchSmall::exec<facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::Driver>)::{lambda()https://github.com/facebookincubator/velox/issues/1}>(folly::detail::function::Op, folly::detail::function::Data*, folly::detail::function::Data)
    @ 000000000743f420 folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&)
                       /mnt/DP_disk3/jk/projects/gluten/ep/build-velox/build/velox_ep/folly/folly/Function.h:639
                       -> /mnt/DP_disk3/jk/projects/gluten/ep/build-velox/build/velox_ep/folly/folly/executors/ThreadPoolExecutor.cpp
    @ 0000000007427d25 folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)
                       /mnt/DP_disk3/jk/projects/gluten/ep/build-velox/build/velox_ep/folly/folly/executors/CPUThreadPoolExecutor.cpp:350
    @ 0000000007445d2d void folly::detail::function::call_<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>, true, false, void>(, folly::detail::function::Data&)
                       /usr/include/c++/9/bits/invoke.h:73
                       -> /mnt/DP_disk3/jk/projects/gluten/ep/build-velox/build/velox_ep/folly/folly/executors/ThreadPoolExecutor.cpp
    @ 00000000000d6df3 (unknown)
    @ 0000000000008608 start_thread
    @ 000000000011f352 clone
Aborted (core dumped)
```

We discovered that the memory leak occurs during the creation of the frame.

```
Detected total of 1 leaked allocations:
======== Leaked memory from 1 total allocations of 96B total size ========
# 0  facebook::velox::memory::MemoryPoolImpl::recordAllocDbg(void const*, unsigned
# 1  facebook::velox::memory::MemoryPoolImpl::allocate(long)
# 2  boost::intrusive_ptr<facebook::velox::Buffer> facebook::velox::AlignedBuffer::e<int>(unsigned long, facebook::velox::memory::MemoryPool*, std::optional<int> cons
# 3  std::shared_ptr<facebook::velox::BaseVector> facebook::velox::createEmpty<(facvelox::TypeKind)3>(int, facebook::velox::memory::MemoryPool*, std::shared_ptr<faceblox::Type const> const&)
# 4  facebook::velox::BaseVector::createInternal(std::shared_ptr<facebook::velox::Tst> const&, int, facebook::velox::memory::MemoryPool*)::{lambda()https://github.com/facebookincubator/velox/issues/1}::operator()() {lambda()https://github.com/facebookincubator/velox/issues/1}::operator()() const
# 5  facebook::velox::BaseVector::createInternal(std::shared_ptr<facebook::velox::Tst> const&, int, facebook::velox::memory::MemoryPool*)::{lambda()https://github.com/facebookincubator/velox/issues/1}::operator()()
# 6  facebook::velox::BaseVector::createInternal(std::shared_ptr<facebook::velox::Tst> const&, int, facebook::velox::memory::MemoryPool*)
# 7  std::shared_ptr<facebook::velox::BaseVector> facebook::velox::BaseVector::creabook::velox::BaseVector>(std::shared_ptr<facebook::velox::Type const> const&, int, k::velox::memory::MemoryPool*)
# 8  facebook::velox::exec::Window::createWindowFrame(std::shared_ptr<facebook::vele::WindowNode const> const&, facebook::velox::core::WindowNode::Frame const&, std::ptr<facebook::velox::RowType const> const&)::{lambda(std::shared_ptr<facebook::velo::ITypedExpr const> const&)https://github.com/facebookincubator/velox/issues/1}::operator()(std::shared_ptr<facebook::velox::core::Ipr const> const&) const
# 9  facebook::velox::exec::Window::createWindowFrame(std::shared_ptr<facebook::vele::WindowNode const> const&, facebook::velox::core::WindowNode::Frame const&, std::ptr<facebook::velox::RowType const> const&)
# 10 facebook::velox::exec::Window::createWindowFunctions()
# 11 facebook::velox::exec::Window::initialize()
# 12 facebook::velox::exec::Driver::initializeOperators()
# 13 facebook::velox::exec::Driver::runInternal(std::shared_ptr<facebook::velox::exver>&, std::shared_ptr<facebook::velox::exec::BlockingState>&, std::shared_ptr<faceelox::RowVector>&)
# 14 facebook::velox::exec::Driver::run(std::shared_ptr<facebook::velox::exec::Driv
# 15 facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::)::{lambda()https://github.com/facebookincubator/velox/issues/1}::operator()() const
# 16 void folly::detail::function::call_<facebook::velox::exec::Driver::enqueue(stdd_ptr<facebook::velox::exec::Driver>)::{lambda()facebookincubator#1}, true, false, void>(, folly::deunction::Data&)
# 17 folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor:: const&, folly::ThreadPoolExecutor::Task&&)
# 18 folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecuread>)
# 19 void folly::detail::function::call_<std::_Bind<void (folly::ThreadPoolExecutorly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::ptr<folly::ThreadPoolExecutor::Thread>)>, true, false, void>(, folly::detail::functta&)
# 20 folly::detail::function::FunctionTraits<void ()>::operator()()
# 21 folly::NamedThreadFactory::newThread(folly::Function<void ()>&&)::{lambda()https://github.com/facebookincubator/velox/issues/1}tor()()
# 22 void std::__invoke_impl<void, folly::NamedThreadFactory::newThread(folly::Funcid ()>&&)::{lambda()https://github.com/facebookincubator/velox/issues/1}>(std::__invoke_other, folly::NamedThreadFactory::newThread(Function<void ()>&&)::{lambda()facebookincubator#1}&&)
# 23 std::__invoke_result<folly::NamedThreadFactory::newThread(folly::Function<void::{lambda()https://github.com/facebookincubator/velox/issues/1}>::type std::__invoke<folly::NamedThreadFactory::newThread(folly::Funoid ()>&&)::{lambda()https://github.com/facebookincubator/velox/issues/1}>(std::__invoke_result&&, (folly::NamedThreadFactory::newThlly::Function<void ()>&&)::{lambda()facebookincubator#1}&&)...)
# 24 void std::thread::_Invoker<std::tuple<folly::NamedThreadFactory::newThread(folction<void ()>&&)::{lambda()facebookincubator#1}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>)
# 25 std::thread::_Invoker<std::tuple<folly::NamedThreadFactory::newThread(folly::F<void ()>&&)::{lambda()facebookincubator#1}> >::operator()()
# 26 std::thread::_State_impl<std::thread::_Invoker<std::tuple<folly::NamedThreadFanewThread(folly::Function<void ()>&&)::{lambda()facebookincubator#1}> > >::_M_run()
# 27 0x00000000000d6df3
# 28 start_thread
# 29 clone
```

The frame start has already been established and a buffer has been allocated [here](https://github.com/facebookincubator/velox/blob/main/velox/exec/Window.cpp#L144). However, when creating the frame end, the offset is negative, which triggers an error [here](https://github.com/facebookincubator/velox/blob/main/velox/exec/Window.cpp#L137). At this point, the buffer allocated for the frame start has not been released in time, resulting in the reported memory leak. This PR ensures that the expected error check for negative frame does not result in a memory leak.

Pull Request resolved: facebookincubator#10847

Reviewed By: Yuhta

Differential Revision: D61894144

Pulled By: xiaoxmeng

fbshipit-source-id: 859d95a37cf1481f1af7f8127e7544ef6ea4a402
  • Loading branch information
JkSelf authored and Joe-Abraham committed Sep 2, 2024
1 parent 8dcf455 commit d35910c
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 2 deletions.
6 changes: 4 additions & 2 deletions velox/exec/Window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,14 @@ Window::WindowFrame Window::createWindowFrame(
}
};

auto startFrameArg = createFrameChannelArg(frame.startValue);
auto endFrameArg = createFrameChannelArg(frame.endValue);
return WindowFrame(
{frame.type,
frame.startType,
frame.endType,
createFrameChannelArg(frame.startValue),
createFrameChannelArg(frame.endValue)});
std::move(startFrameArg),
std::move(endFrameArg)});
}

void Window::createWindowFunctions() {
Expand Down
64 changes: 64 additions & 0 deletions velox/exec/tests/WindowTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,5 +439,69 @@ TEST_F(WindowTest, duplicateOrOverlappingKeys) {
"Sorting keys must be unique and not overlap with partitioning keys. Found duplicate key: b");
}

TEST_F(WindowTest, nagativeFrameArg) {
const vector_size_t size = 1'000;

auto sizeAt = [](vector_size_t row) { return row % 5; };
auto keyAt = [](vector_size_t row) { return row % 11; };
auto valueAt = [](vector_size_t row) { return row % 13; };
auto keys = makeArrayVector<float>(size, sizeAt, keyAt);
auto data = makeRowVector(
{"c0", "c1", "p0", "p1", "k0", "row_number"},
{
// Payload.
makeFlatVector<float>(size, [](auto row) { return row; }),
makeFlatVector<float>(size, [](auto row) { return row; }),
// Partition key.
keys,
makeFlatVector<std::string>(
size, [](auto row) { return fmt::format("{}", row + 20); }),
makeFlatVector<int32_t>(size, [](auto row) { return row; }),
// Sorting key.
makeFlatVector<int64_t>(size, [](auto row) { return row; }),
});

createDuckDbTable({data});

struct {
std::string fragmentStart;
std::string fragmentEnd;

std::string debugString() const {
if (fragmentStart[0] == '-') {
return fmt::format(
"Window frame {} offset must not be negative", fragmentStart);
} else {
return fmt::format(
"Window frame {} offset must not be negative", fragmentEnd);
}
}
} testSettings[] = {
{"k0", "-1"}, // Negative end
{"-1", "k0"}, // Negative start
{"-1", "-3"} // Negative start, negative end
};
for (const auto& testData : testSettings) {
SCOPED_TRACE(testData.debugString());
const auto& startOffset = testData.fragmentStart;
const auto& endOffset = testData.fragmentEnd;
auto plan =
PlanBuilder()
.values(split(data, 10))
.window({fmt::format(
"regr_count(c0, c1) over (partition by p0, p1 order by row_number ROWS between {} PRECEDING and {} FOLLOWING)",
startOffset,
endOffset)})
.planNode();
VELOX_ASSERT_USER_THROW(
AssertQueryBuilder(plan, duckDbQueryRunner_)
.assertResults(fmt::format(
"SELECT *, regr_count(c0, c1) over (partition by p0, p1 order by row_number ROWS between {} PRECEDING and {} FOLLOWING) from tmp",
startOffset,
endOffset)),
testData.debugString());
}
}

} // namespace
} // namespace facebook::velox::exec

0 comments on commit d35910c

Please sign in to comment.