Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for smallint to PrefixSort #10946

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions velox/exec/PrefixSort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ FOLLY_ALWAYS_INLINE void extractRowColumnToPrefix(
char* const row,
char* const prefix) {
switch (typeKind) {
case TypeKind::SMALLINT: {
encodeRowColumn<int16_t>(prefixSortLayout, index, rowColumn, row, prefix);
return;
}
case TypeKind::INTEGER: {
encodeRowColumn<int32_t>(prefixSortLayout, index, rowColumn, row, prefix);
return;
Expand Down
73 changes: 72 additions & 1 deletion velox/exec/benchmarks/PrefixSortBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,30 @@ class PrefixSortBenchmark {
}
}

std::vector<RowTypePtr> smallintRowTypes(bool noPayload) {
if (noPayload) {
return {
ROW({SMALLINT()}),
ROW({SMALLINT(), SMALLINT()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT(), SMALLINT()}),
};
} else {
return {
ROW({SMALLINT(), VARCHAR(), VARCHAR()}),
ROW({SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}),
ROW({SMALLINT(), SMALLINT(), SMALLINT(), VARCHAR(), VARCHAR()}),
ROW(
{SMALLINT(),
SMALLINT(),
SMALLINT(),
SMALLINT(),
VARCHAR(),
VARCHAR()}),
};
}
}

void bigint(
bool noPayload,
int numIterations,
Expand Down Expand Up @@ -296,6 +320,49 @@ class PrefixSortBenchmark {
"no-payloads", "varchar", batchSizes, rowTypes, numKeys, iterations);
}

void smallint(
bool noPayload,
int numIterations,
const std::vector<vector_size_t>& batchSizes) {
std::vector<RowTypePtr> rowTypes = smallintRowTypes(noPayload);
std::vector<int> numKeys = {1, 2, 3, 4};
benchmark(
noPayload ? "no-payload" : "payload",
"smallint",
batchSizes,
rowTypes,
numKeys,
numIterations);
}

void smallSmallint() {
// For small dateset, iterations need to be large enough to ensure that the
// benchmark runs for enough time.
const auto iterations = 100'000;
const std::vector<vector_size_t> batchSizes = {10, 50, 100, 500};
smallint(true, iterations, batchSizes);
}

void smallSmallintWithPayload() {
const auto iterations = 100'000;
const std::vector<vector_size_t> batchSizes = {10, 50, 100, 500};
smallint(false, iterations, batchSizes);
}

void largeSmallint() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
1'000, 10'000, 100'000, 1'000'000};
smallint(true, iterations, batchSizes);
}

void largeSmallintWithPayloads() {
const auto iterations = 10;
const std::vector<vector_size_t> batchSizes = {
1'000, 10'000, 100'000, 1'000'000};
smallint(false, iterations, batchSizes);
}

private:
std::vector<std::unique_ptr<TestCase>> testCases_;
memory::MemoryPool* pool_;
Expand All @@ -316,7 +383,11 @@ int main(int argc, char** argv) {
bm.largeBigintWithPayloads();
bm.smallBigintWithPayload();
bm.largeVarchar();
folly::runBenchmarks();
bm.smallSmallint();
bm.largeSmallint();
bm.smallSmallintWithPayload();
bm.largeSmallintWithPayloads();

folly::runBenchmarks();
return 0;
}
24 changes: 23 additions & 1 deletion velox/exec/prefixsort/PrefixSortEncoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class PrefixSortEncoder {
}

/// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t,
/// int32_t, float, double, Timestamp. TODO Add support for int16_t, uint16_t.
/// int32_t, int16_t, uint16_t, float, double, Timestamp.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need support for uint16_t?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just followed the logic of int32/uint32 to support int16/uint16. Its only a specific explanation focused on the function itself.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@skadilover Do we need support for unsigned integer types? Is there any use case for these?

Copy link
Contributor

@skadilover skadilover Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are used internal, singed ints are always converted to usigned ints

template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
    int64_t value,
    char* dest) const {
  encodeNoNulls((uint64_t)(value ^ (1ull << 63)), dest);
}

template <typename T>
FOLLY_ALWAYS_INLINE void encodeNoNulls(T value, char* dest) const;

Expand All @@ -71,6 +71,9 @@ class PrefixSortEncoder {
FOLLY_ALWAYS_INLINE static std::optional<uint32_t> encodedSize(
TypeKind typeKind) {
switch ((typeKind)) {
case ::facebook::velox::TypeKind::SMALLINT: {
return 3;
}
case ::facebook::velox::TypeKind::INTEGER: {
return 5;
}
Expand Down Expand Up @@ -147,6 +150,25 @@ FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
encodeNoNulls((uint64_t)(value ^ (1ull << 63)), dest);
}

/// Logic is as same as int32_t.
template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
uint16_t value,
char* dest) const {
auto& v = *reinterpret_cast<uint16_t*>(dest);
v = __builtin_bswap16(value);
if (!ascending_) {
v = ~v;
}
}

template <>
FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
int16_t value,
char* dest) const {
encodeNoNulls(static_cast<uint16_t>(value ^ (1u << 15)), dest);
}

namespace detail {
/// Convert double to a uint64_t, their value comparison semantics remain
/// consistent.
Expand Down
17 changes: 17 additions & 0 deletions velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,17 @@ TEST_F(PrefixEncoderTest, encode) {
testEncode<int32_t>(0x11223344, (char*)&ascExpected, (char*)&descExpected);
}

{
uint16_t ascExpected = 0x2211;
uint16_t descExpected = 0xddee;
testEncode<uint16_t>(0x1122, (char*)&ascExpected, (char*)&descExpected);
}
{
int16_t ascExpected = 0x2291;
int16_t descExpected = 0xdd6e;
testEncode<int16_t>(0x1122, (char*)&ascExpected, (char*)&descExpected);
}

{
uint32_t ascExpected = 0x0050c3c7;
uint32_t descExpected = 0xffaf3c38;
Expand Down Expand Up @@ -330,13 +341,19 @@ TEST_F(PrefixEncoderTest, encode) {
TEST_F(PrefixEncoderTest, compare) {
testCompare<uint64_t>();
testCompare<uint32_t>();
testCompare<uint16_t>();
testCompare<int64_t>();
testCompare<int32_t>();
testCompare<int16_t>();
testCompare<float>();
testCompare<double>();
testCompare<Timestamp>();
}

TEST_F(PrefixEncoderTest, fuzzySmallInt) {
testFuzz<TypeKind::SMALLINT>();
}

TEST_F(PrefixEncoderTest, fuzzyInteger) {
testFuzz<TypeKind::INTEGER>();
}
Expand Down
Loading