Skip to content

Commit

Permalink
test alternative plans in WindowFuzzer (facebookincubator#8359)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#8359

Allow window fuzzer to test the following alternative plans and ensure their results are the same with the basic plan `values(input).window(...)`.
* values(input).orderBy(allKeys, false).streamingWindow(...)
* tableScan(inputRowType).localPartition(partitionKeys).window(...)
* tableScan(inputRowType).orderBy(allKeys, false).streamingWindow(...)

Also, this diff allows fuzzer to specify IGNORE NULLS in window function calls with 50% chance.

This is part of facebookincubator#7754.

Reviewed By: kgpai

Differential Revision: D52720371

fbshipit-source-id: 447c1ef2be4a455718194c2177f471885ec03ba2
  • Loading branch information
kagamiori authored and facebook-github-bot committed Mar 13, 2024
1 parent cb583e2 commit 81c609a
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 6 deletions.
3 changes: 3 additions & 0 deletions velox/core/PlanNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,9 @@ void addWindowFunction(
std::stringstream& stream,
const WindowNode::Function& windowFunction) {
stream << windowFunction.functionCall->toString() << " ";
if (windowFunction.ignoreNulls) {
stream << "IGNORE NULLS ";
}
auto frame = windowFunction.frame;
if (frame.startType == WindowNode::BoundType::kUnboundedFollowing) {
VELOX_USER_FAIL("Window frame start cannot be UNBOUNDED FOLLOWING");
Expand Down
6 changes: 5 additions & 1 deletion velox/exec/fuzzer/AggregationFuzzerBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,8 @@ std::string makeFunctionCall(
const std::string& name,
const std::vector<std::string>& argNames,
bool sortedInputs,
bool distinctInputs) {
bool distinctInputs,
bool ignoreNulls) {
std::ostringstream call;
call << name << "(";

Expand All @@ -613,6 +614,9 @@ std::string makeFunctionCall(
} else {
call << args;
}
if (ignoreNulls) {
call << " IGNORE NULLS";
}
call << ")";

return call.str();
Expand Down
3 changes: 2 additions & 1 deletion velox/exec/fuzzer/AggregationFuzzerBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,8 @@ std::string makeFunctionCall(
const std::string& name,
const std::vector<std::string>& argNames,
bool sortedInputs = false,
bool distinctInputs = false);
bool distinctInputs = false,
bool ignoreNulls = false);

// Returns a list of column names from c0 to cn.
std::vector<std::string> makeNames(size_t n);
Expand Down
3 changes: 2 additions & 1 deletion velox/exec/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ target_link_libraries(
velox_vector_fuzzer
velox_exec_test_lib
velox_expression_test_utility
velox_aggregation_fuzzer_base)
velox_aggregation_fuzzer_base
velox_temp_path)
15 changes: 14 additions & 1 deletion velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,19 @@ std::string toAggregateCallSql(
return sql.str();
}

std::string toWindowCallSql(
const core::CallTypedExprPtr& call,
bool ignoreNulls = false) {
std::stringstream sql;
sql << call->name() << "(";
toCallInputsSql(call->inputs(), sql);
sql << ")";
if (ignoreNulls) {
sql << " IGNORE NULLS";
}
return sql.str();
}

bool isSupportedDwrfType(const TypePtr& type) {
if (type->isDate() || type->isIntervalDayTime() || type->isUnKnown()) {
return false;
Expand Down Expand Up @@ -454,7 +467,7 @@ std::optional<std::string> PrestoQueryRunner::toSql(
const auto& functions = windowNode->windowFunctions();
for (auto i = 0; i < functions.size(); ++i) {
appendComma(i, sql);
sql << toCallSql(functions[i].functionCall);
sql << toWindowCallSql(functions[i].functionCall, functions[i].ignoreNulls);

sql << " OVER (";

Expand Down
105 changes: 103 additions & 2 deletions velox/exec/fuzzer/WindowFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "velox/exec/fuzzer/WindowFuzzer.h"

#include "velox/exec/tests/utils/PlanBuilder.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"

DEFINE_bool(
enable_window_reference_verification,
Expand All @@ -36,6 +37,20 @@ void logVectors(const std::vector<RowVectorPtr>& vectors) {
}
}

bool supportIgnoreNulls(const std::string& name) {
// Below are all functions that support ignore nulls. Aggregation functions in
// window operations do not support ignore nulls.
// https://github.com/prestodb/presto/issues/21304.
static std::unordered_set<std::string> supportedFunctions{
"first_value",
"last_value",
"nth_value",
"lead",
"lag",
};
return supportedFunctions.count(name) > 0;
}

} // namespace

void WindowFuzzer::addWindowFunctionSignatures(
Expand Down Expand Up @@ -128,7 +143,10 @@ void WindowFuzzer::go() {
std::vector<TypePtr> argTypes = signature.args;
std::vector<std::string> argNames = makeNames(argTypes.size());

auto call = makeFunctionCall(signature.name, argNames, false);
bool ignoreNulls =
supportIgnoreNulls(signature.name) && vectorFuzzer_.coinToss(0.5);
auto call =
makeFunctionCall(signature.name, argNames, false, false, ignoreNulls);

std::vector<SortingKeyAndOrder> sortingKeysAndOrders;
// 50% chance without order-by clause.
Expand Down Expand Up @@ -184,6 +202,78 @@ void WindowFuzzer::go(const std::string& /*planPath*/) {
VELOX_NYI();
}

void WindowFuzzer::testAlternativePlans(
const std::vector<std::string>& partitionKeys,
const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
const std::string& frame,
const std::string& functionCall,
const std::vector<RowVectorPtr>& input,
bool customVerification,
const velox::test::ResultOrError& expected) {
std::vector<AggregationFuzzerBase::PlanWithSplits> plans;

std::vector<std::string> allKeys;
for (const auto& key : partitionKeys) {
allKeys.push_back(key + " NULLS FIRST");
}
for (const auto& keyAndOrder : sortingKeysAndOrders) {
allKeys.push_back(folly::to<std::string>(
keyAndOrder.key_,
" ",
keyAndOrder.order_,
" ",
keyAndOrder.nullsOrder_));
}

// Streaming window from values.
if (!allKeys.empty()) {
plans.push_back(
{PlanBuilder()
.values(input)
.orderBy(allKeys, false)
.streamingWindow(
{fmt::format("{} over ({})", functionCall, frame)})
.planNode(),
{}});
}

// With TableScan.
auto directory = exec::test::TempDirectoryPath::create();
const auto inputRowType = asRowType(input[0]->type());
if (isTableScanSupported(inputRowType)) {
auto splits = makeSplits(input, directory->path);

plans.push_back(
{PlanBuilder()
.tableScan(inputRowType)
.localPartition(partitionKeys)
.window({fmt::format("{} over ({})", functionCall, frame)})
.planNode(),
splits});

if (!allKeys.empty()) {
plans.push_back(
{PlanBuilder()
.tableScan(inputRowType)
.orderBy(allKeys, false)
.streamingWindow(
{fmt::format("{} over ({})", functionCall, frame)})
.planNode(),
splits});
}
}

for (const auto& plan : plans) {
testPlan(
plan,
false,
false,
customVerification,
/*customVerifiers*/ {},
expected);
}
}

bool WindowFuzzer::verifyWindow(
const std::vector<std::string>& partitionKeys,
const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
Expand All @@ -200,8 +290,10 @@ bool WindowFuzzer::verifyWindow(
if (persistAndRunOnce_) {
persistReproInfo({{plan, {}}}, reproPersistPath_);
}

velox::test::ResultOrError resultOrError;
try {
auto resultOrError = execute(plan);
resultOrError = execute(plan);
if (resultOrError.exceptionPtr) {
++stats_.numFailed;
}
Expand Down Expand Up @@ -229,6 +321,15 @@ bool WindowFuzzer::verifyWindow(
++stats_.numVerificationSkipped;
}

testAlternativePlans(
partitionKeys,
sortingKeysAndOrders,
frame,
functionCall,
input,
customVerification,
resultOrError);

return resultOrError.exceptionPtr != nullptr;
} catch (...) {
if (!reproPersistPath_.empty()) {
Expand Down
9 changes: 9 additions & 0 deletions velox/exec/fuzzer/WindowFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,15 @@ class WindowFuzzer : public AggregationFuzzerBase {
bool customVerification,
bool enableWindowVerification);

void testAlternativePlans(
const std::vector<std::string>& partitionKeys,
const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
const std::string& frame,
const std::string& functionCall,
const std::vector<RowVectorPtr>& input,
bool customVerification,
const velox::test::ResultOrError& expected);

const std::unordered_set<std::string> orderDependentFunctions_;

struct Stats : public AggregationFuzzerBase::Stats {
Expand Down

0 comments on commit 81c609a

Please sign in to comment.