From 862c7a615fa612cacde67b03858178f9999eccd3 Mon Sep 17 00:00:00 2001
From: "hengjiang.ly" <hengjiang.ly@alibaba-inc.com>
Date: Fri, 2 Feb 2024 11:59:59 -0800
Subject: [PATCH] Add null support to PrefixSortEncoder (#8350)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
When processing normalization-encoding in the PrefixSort, we cannot simply equate null to max-value or min-value. Because in this way, we cannot distinguish between null and max/min value in a null-first scenario, and we have to add a nullbyte in normalize encoding.
The changes ：
1. Add CompareFlag: ascending nullsFirst and isNull to the encode() method
2. Add new method encodeNoNulls to handle cases where there are no nulls, suports: uint64_t/int64_t/uint32_t/int32_t/float/double/Timestamp

Design doc: https://docs.google.com/document/d/1wa1lbbR-bhf0eg1mSaH7JUzeG7vhwz94a6ElUTK0J8k/edit?usp=sharing

Part of https://github.com/facebookincubator/velox/issues/6766

Pull Request resolved: https://github.com/facebookincubator/velox/pull/8350

Reviewed By: pedroerp

Differential Revision: D53348718

Pulled By: mbasmanova

fbshipit-source-id: 6f7887fb9d09b17af6a786de82fc00e116156a62
---
 velox/exec/prefixsort/PrefixSortEncoder.h     | 181 ++++++++-
 velox/exec/prefixsort/tests/CMakeLists.txt    |   3 +-
 .../prefixsort/tests/PrefixEncoderTest.cpp    | 361 ++++++++++++++++++
 .../tests/utils/EncoderTestUtils.cpp          |   4 +-
 4 files changed, 531 insertions(+), 18 deletions(-)
 create mode 100644 velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
diff --git a/velox/exec/prefixsort/PrefixSortEncoder.h b/velox/exec/prefixsort/PrefixSortEncoder.h
index d8a0cd8c923f..7408390ecabf 100644
--- a/velox/exec/prefixsort/PrefixSortEncoder.h
+++ b/velox/exec/prefixsort/PrefixSortEncoder.h
@@ -22,33 +22,184 @@
 #include "velox/common/base/BitUtil.h"
 #include "velox/common/base/Exceptions.h"
 #include "velox/common/base/SimdUtil.h"
+#include "velox/type/Timestamp.h"
 
 namespace facebook::velox::exec::prefixsort {
 
 /// Provides encode/decode methods for PrefixSort.
 class PrefixSortEncoder {
  public:
-  /// 1. Only int64_t is supported now.
-  /// 2. Encoding is compatible with sorting ascending with no nulls.
+  PrefixSortEncoder(bool ascending, bool nullsFirst)
+      : ascending_(ascending), nullsFirst_(nullsFirst){};
+
+  /// Encode native primitive types(such as uint64_t, int64_t, uint32_t,
+  /// int32_t, float, double, Timestamp). TODO: Add support for strings.
+  /// 1. The first byte of the encoded result is null byte. The value is 0 if
+  ///    (nulls first and value is null) or (nulls last and value is not null).
+  ///    Otherwise, the value is 1.
+  /// 2. The remaining bytes are the encoding result of value:
+  ///    -If value is null, we set the remaining sizeof(T) bytes to '0', they
+  ///     do not affect the comparison results at all.
+  ///    -If value is not null, the result is set by calling encodeNoNulls.
   template <typename T>
-  static FOLLY_ALWAYS_INLINE void encode(T value, char* row);
+  FOLLY_ALWAYS_INLINE void encode(std::optional<T> value, char* dest) const {
+    if (value.has_value()) {
+      dest[0] = nullsFirst_ ? 1 : 0;
+      encodeNoNulls(value.value(), dest + 1);
+    } else {
+      dest[0] = nullsFirst_ ? 0 : 1;
+      simd::memset(dest + 1, 0, sizeof(T));
+    }
+  }
 
- private:
-  FOLLY_ALWAYS_INLINE static uint8_t flipSignBit(uint8_t byte) {
-    return byte ^ 128;
+  /// @tparam T Type of value. Supported type are: uint64_t, int64_t, uint32_t,
+  /// int32_t, float, double, Timestamp. TODO Add support for int16_t, uint16_t.
+  template <typename T>
+  FOLLY_ALWAYS_INLINE void encodeNoNulls(T value, char* dest) const;
+
+  bool isAscending() const {
+    return ascending_;
   }
+
+  bool isNullsFirst() const {
+    return nullsFirst_;
+  }
+
+ private:
+  const bool ascending_;
+  const bool nullsFirst_;
 };
 
-/// Assuming that value is little-endian encoded, we encode it as follows to
-/// make sure encoding is compatible with sorting ascending:
-/// 1 Reverse each byte, for example, 0xaabbccdd becomes 0xddccbbaa.
-/// 2 Flip the sign bit.
-/// The decode logic is exactly the opposite of the above approach.
+/// Assuming that value is little-endian encoded, means:
+/// for an unsigned integer '0x aa bb cc dd', The content of bytes,
+/// starting at the address of it, would be '0xdd 0xcc 0xbb 0xaa'. If we store
+/// them into a buffer, and reverse the bytes of the buffer : [0xaa, 0xbb,
+/// 0xcc, 0xdd], and then we can compare two buffers from the first byte to
+/// last byte, the compare result is equal to value-compare. For any two
+/// unsigned integers, a < b <==> ~a > ~b so we invert bits when descending
+/// order.
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    uint32_t value,
+    char* dest) const {
+  auto& v = *reinterpret_cast<uint32_t*>(dest);
+  v = __builtin_bswap32(value);
+  if (!ascending_) {
+    v = ~v;
+  }
+}
+
+/// Compare two positive signed integers: storage layout is as same as
+/// unsigned integer, their sign-bit are same, flip sign-bit do not change
+/// result. Compare two negative signed integers: -n = ~n + 1, we can treat ~n
+/// + 1 as an unsigned integer, so the logic is as same as unsigned integer,
+/// also flip sign-bit do not change result. Compare positive vs negative:
+/// flip sign-bit to promise that positive always bigger than negative.
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    int32_t value,
+    char* dest) const {
+  encodeNoNulls((uint32_t)(value ^ (1u << 31)), dest);
+}
+
+/// Logic is as same as int32_t.
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    uint64_t value,
+    char* dest) const {
+  auto& v = *reinterpret_cast<uint64_t*>(dest);
+  v = __builtin_bswap64(value);
+  if (!ascending_) {
+    v = ~v;
+  }
+}
+
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    int64_t value,
+    char* dest) const {
+  encodeNoNulls((uint64_t)(value ^ (1ull << 63)), dest);
+}
+
+namespace detail {
+/// Convert double to a uint64_t, their value comparison semantics remain
+/// consistent.
+static FOLLY_ALWAYS_INLINE uint64_t encodeDouble(double value) {
+  // Zero is the smallest positive value.
+  if (value == 0) {
+    return 1ull << 63;
+  }
+  // Nan is max value.
+  if (std::isnan(value)) {
+    return std::numeric_limits<uint64_t>::max();
+  }
+  // Infinity is the second max value.
+  if (value > std::numeric_limits<double>::max()) {
+    return std::numeric_limits<uint64_t>::max() - 1;
+  }
+  // -Infinity is the smallest value.
+  if (value < -std::numeric_limits<double>::max()) {
+    return 0;
+  }
+  auto encoded = *reinterpret_cast<uint64_t*>(&value);
+  if ((encoded & (1ull << 63)) == 0) {
+    // For positive numbers, set sign bit to 1.
+    encoded |= (1ull << 63);
+  } else {
+    // For negative numbers, invert bits to get the opposite order.
+    encoded = ~encoded;
+  }
+  return encoded;
+}
+
+// Logic is as same as double.
+static FOLLY_ALWAYS_INLINE uint32_t encodeFloat(float value) {
+  if (value == 0) {
+    return 1u << 31;
+  }
+  if (std::isnan(value)) {
+    return std::numeric_limits<uint32_t>::max();
+  }
+  if (value > std::numeric_limits<float>::max()) {
+    return std::numeric_limits<uint32_t>::max() - 1;
+  }
+  if (value < -std::numeric_limits<float>::max()) {
+    return 0;
+  }
+  auto encoded = *reinterpret_cast<uint32_t*>(&value);
+  if ((encoded & (1u << 31)) == 0) {
+    encoded |= (1u << 31);
+  } else {
+    encoded = ~encoded;
+  }
+  return encoded;
+}
+} // namespace detail
+
+/// The result of encodeDouble() keeps value comparison semantics, then we
+/// can treat it as an unsigned-integer.
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    double value,
+    char* dest) const {
+  encodeNoNulls(detail::encodeDouble(value), dest);
+}
+
+template <>
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    float value,
+    char* dest) const {
+  encodeNoNulls(detail::encodeFloat(value), dest);
+}
+
+/// When comparing Timestamp, first compare seconds and then compare nanos, so
+/// when encoding, just encode seconds and nanos in sequence.
 template <>
-FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encode(int64_t value, char* row) {
-  const auto v = __builtin_bswap64(static_cast<uint64_t>(value));
-  simd::memcpy(row, &v, sizeof(int64_t));
-  row[0] = flipSignBit(row[0]);
+FOLLY_ALWAYS_INLINE void PrefixSortEncoder::encodeNoNulls(
+    Timestamp value,
+    char* dest) const {
+  encodeNoNulls(value.getSeconds(), dest);
+  encodeNoNulls(value.getNanos(), dest + 8);
 }
 
 } // namespace facebook::velox::exec::prefixsort
diff --git a/velox/exec/prefixsort/tests/CMakeLists.txt b/velox/exec/prefixsort/tests/CMakeLists.txt
index 2ce2b2e9975b..cb25e4c6ba6a 100644
--- a/velox/exec/prefixsort/tests/CMakeLists.txt
+++ b/velox/exec/prefixsort/tests/CMakeLists.txt
@@ -13,7 +13,8 @@
 # limitations under the License.
 add_subdirectory(utils)
 
-add_executable(velox_exec_prefixsort_test PrefixSortAlgorithmTest.cpp)
+add_executable(velox_exec_prefixsort_test PrefixSortAlgorithmTest.cpp
+                                          PrefixEncoderTest.cpp)
 
 add_test(
   NAME velox_exec_prefixsort_test
diff --git a/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
new file mode 100644
index 000000000000..3d9343646869
--- /dev/null
+++ b/velox/exec/prefixsort/tests/PrefixEncoderTest.cpp
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "velox/exec/prefixsort/PrefixSortEncoder.h"
+#include "velox/vector/fuzzer/VectorFuzzer.h"
+#include "velox/vector/tests/VectorTestUtils.h"
+
+namespace facebook::velox::exec::prefixsort::test {
+
+namespace {
+// Since 'std::numeric_limit<Timestamp>' has not yet implemented: is_integer,
+// is_signed, quiet_NaN(), add TypeLimits struct to skip this.
+template <typename T>
+struct TypeLimits {
+  static const bool isFloat = !std::numeric_limits<T>::is_integer;
+  static const bool isSigned = std::numeric_limits<T>::is_signed;
+
+  static FOLLY_ALWAYS_INLINE T min() {
+    // Since std::numeric_limits<T>::min() returns 'the minimum finite value, or
+    // for floating types with denormalization, the minimum positive normalized
+    // value', we use -max as min in float types.
+    return std::numeric_limits<T>::is_integer ? std::numeric_limits<T>::min()
+                                              : -std::numeric_limits<T>::max();
+  }
+
+  // For signed numbers mid is 0.
+  // For unsigned numbers mid is max / 2.
+  static FOLLY_ALWAYS_INLINE T mid() {
+    return isSigned ? (T)0 : std::numeric_limits<T>::max() / 2;
+  }
+
+  static FOLLY_ALWAYS_INLINE T nan() {
+    return std::numeric_limits<T>::quiet_NaN();
+  }
+};
+
+template <>
+struct TypeLimits<Timestamp> {
+  static const bool isFloat = false;
+  static const bool isSigned = true;
+  static FOLLY_ALWAYS_INLINE Timestamp min() {
+    return std::numeric_limits<Timestamp>::min();
+  }
+
+  static FOLLY_ALWAYS_INLINE Timestamp mid() {
+    return Timestamp();
+  }
+
+  // Never be called, just for skipping compile error.
+  static FOLLY_ALWAYS_INLINE Timestamp nan() {
+    VELOX_UNSUPPORTED("Timestamp not support nan()");
+  }
+};
+} // namespace
+
+class PrefixEncoderTest : public testing::Test,
+                          public velox::test::VectorTestBase {
+ public:
+  template <typename T>
+  void testEncodeNoNull(T value, char* expectedAsc, char* expectedDesc) {
+    char encoded[sizeof(T)];
+    ascNullsFirstEncoder_.encodeNoNulls(value, (char*)encoded);
+    ASSERT_EQ(std::memcmp(encoded, expectedAsc, sizeof(T)), 0);
+    descNullsFirstEncoder_.encodeNoNulls(value, (char*)encoded);
+    ASSERT_EQ(std::memcmp(encoded, expectedDesc, sizeof(T)), 0);
+  }
+
+  template <typename T>
+  void testEncodeWithNull(T testValue, char* expectedAsc, char* expectedDesc) {
+    std::optional<T> nullValue = std::nullopt;
+    std::optional<T> value = testValue;
+    char encoded[sizeof(T) + 1];
+    char nullFirst[sizeof(T) + 1];
+    char nullLast[sizeof(T) + 1];
+    memset(nullFirst, 0, sizeof(T) + 1);
+    memset(nullLast, 1, 1);
+    memset(nullLast + 1, 0, sizeof(T));
+
+    auto compare = [](char* left, char* right) {
+      return std::memcmp(left, right, sizeof(T) + 1);
+    };
+
+    ascNullsFirstEncoder_.encode(nullValue, encoded);
+    ASSERT_EQ(compare(nullFirst, encoded), 0);
+    ascNullsLastEncoder_.encode(nullValue, encoded);
+    ASSERT_EQ(compare(nullLast, encoded), 0);
+
+    ascNullsFirstEncoder_.encode(value, encoded);
+    ASSERT_EQ(encoded[0], 1);
+    ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, sizeof(T)), 0);
+    ascNullsLastEncoder_.encode(value, encoded);
+    ASSERT_EQ(encoded[0], 0);
+    ASSERT_EQ(std::memcmp(encoded + 1, expectedAsc, sizeof(T)), 0);
+    descNullsFirstEncoder_.encode(value, encoded);
+    ASSERT_EQ(encoded[0], 1);
+    ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, sizeof(T)), 0);
+    descNullsLastEncoder_.encode(value, encoded);
+    ASSERT_EQ(encoded[0], 0);
+    ASSERT_EQ(std::memcmp(encoded + 1, expectedDesc, sizeof(T)), 0);
+  }
+
+  template <typename T>
+  void testEncode(T value, char* expectedAsc, char* expectedDesc) {
+    testEncodeNoNull<T>(value, expectedAsc, expectedDesc);
+    testEncodeWithNull<T>(value, expectedAsc, expectedDesc);
+  }
+
+  template <typename T>
+  void testNullCompare() {
+    std::optional<T> nullValue = std::nullopt;
+    std::optional<T> max = std::numeric_limits<T>::max();
+    std::optional<T> min = std::numeric_limits<T>::min();
+    char encodedNull[sizeof(T) + 1];
+    char encodedMax[sizeof(T) + 1];
+    char encodedMin[sizeof(T) + 1];
+
+    auto encode = [&](auto& encoder) {
+      encoder.encode(nullValue, encodedNull);
+      encoder.encode(min, encodedMin);
+      encoder.encode(max, encodedMax);
+    };
+
+    auto compare = [](char* left, char* right) {
+      return std::memcmp(left, right, sizeof(T) + 1);
+    };
+
+    // Nulls first: NULL < non-NULL.
+    encode(ascNullsFirstEncoder_);
+    ASSERT_LT(compare(encodedNull, encodedMin), 0);
+    encode(descNullsFirstEncoder_);
+    ASSERT_LT(compare(encodedNull, encodedMin), 0);
+
+    // Nulls last: NULL > non-NULL.
+    encode(ascNullsLastEncoder_);
+    ASSERT_GT(compare(encodedNull, encodedMax), 0);
+    encode(descNullsLastEncoder_);
+    ASSERT_GT(compare(encodedNull, encodedMax), 0);
+
+    // For float / double`s NaN.
+    if (TypeLimits<T>::isFloat) {
+      std::optional<T> nan = TypeLimits<T>::nan();
+      char encodedNaN[sizeof(T) + 1];
+
+      ascNullsFirstEncoder_.encode(nan, encodedNaN);
+      ascNullsFirstEncoder_.encode(max, encodedMax);
+      ASSERT_GT(compare(encodedNaN, encodedMax), 0);
+
+      ascNullsFirstEncoder_.encode(nan, encodedNaN);
+      ascNullsFirstEncoder_.encode(nullValue, encodedNull);
+      ASSERT_LT(compare(encodedNull, encodedNaN), 0);
+    }
+  }
+
+  template <typename T>
+  void testValidValueCompare() {
+    std::optional<T> max = std::numeric_limits<T>::max();
+    std::optional<T> min = TypeLimits<T>::min();
+    std::optional<T> mid = TypeLimits<T>::mid();
+    char encodedMax[sizeof(T) + 1];
+    char encodedMin[sizeof(T) + 1];
+    char encodedMid[sizeof(T) + 1];
+    auto encode = [&](auto& encoder) {
+      encoder.encode(mid, encodedMid);
+      encoder.encode(min, encodedMin);
+      encoder.encode(max, encodedMax);
+    };
+
+    auto compare = [](char* left, char* right) {
+      return std::memcmp(left, right, sizeof(T) + 1);
+    };
+
+    encode(ascNullsFirstEncoder_);
+    // ASC: min < mid < max.
+    ASSERT_GT(compare(encodedMid, encodedMin), 0);
+    ASSERT_LT(compare(encodedMid, encodedMax), 0);
+
+    encode(descNullsFirstEncoder_);
+    // DESC: max < mid < min.
+    ASSERT_LT(compare(encodedMid, encodedMin), 0);
+    ASSERT_GT(compare(encodedMid, encodedMax), 0);
+
+    encode(ascNullsLastEncoder_);
+    // ASC: min < mid < max.
+    ASSERT_GT(compare(encodedMid, encodedMin), 0);
+    ASSERT_LT(compare(encodedMid, encodedMax), 0);
+
+    encode(descNullsLastEncoder_);
+    // DESC: max < mid < min.
+    ASSERT_LT(compare(encodedMid, encodedMin), 0);
+    ASSERT_GT(compare(encodedMid, encodedMax), 0);
+  }
+
+  template <typename T>
+  void testCompare() {
+    testNullCompare<T>();
+    testValidValueCompare<T>();
+  }
+
+  template <TypeKind Kind>
+  void testFuzz() {
+    using ValueDataType = typename TypeTraits<Kind>::NativeType;
+    const int vectorSize = 1024;
+
+    auto compare = [](char* left, char* right) {
+      const auto result = std::memcmp(left, right, sizeof(ValueDataType) + 1);
+      // Keeping the result of memory compare consistent with the result of
+      // Vector`s compare method can facilitate ASSERT_EQ.
+      return result < 0 ? -1 : (result > 0 ? 1 : 0);
+    };
+
+    auto test = [&](const PrefixSortEncoder& encoder) {
+      TypePtr type = TypeTraits<Kind>::ImplType::create();
+      VectorFuzzer fuzzer({.vectorSize = vectorSize, .nullRatio = 0.1}, pool());
+
+      CompareFlags compareFlag = {
+          encoder.isNullsFirst(),
+          encoder.isAscending(),
+          false,
+          CompareFlags::NullHandlingMode::kNullAsValue};
+      SCOPED_TRACE(compareFlag.toString());
+      const auto leftVector =
+          std::dynamic_pointer_cast<FlatVector<ValueDataType>>(
+              fuzzer.fuzzFlat(type, vectorSize));
+      const auto rightVector =
+          std::dynamic_pointer_cast<FlatVector<ValueDataType>>(
+              fuzzer.fuzzFlat(type, vectorSize));
+
+      char leftEncoded[sizeof(ValueDataType) + 1];
+      char rightEncoded[sizeof(ValueDataType) + 1];
+
+      for (auto i = 0; i < vectorSize; ++i) {
+        const auto leftValue = leftVector->isNullAt(i)
+            ? std::nullopt
+            : std::optional<ValueDataType>(leftVector->valueAt(i));
+        const auto rightValue = rightVector->isNullAt(i)
+            ? std::nullopt
+            : std::optional<ValueDataType>(rightVector->valueAt(i));
+        encoder.encode(leftValue, leftEncoded);
+        encoder.encode(rightValue, rightEncoded);
+
+        const auto result = compare(leftEncoded, rightEncoded);
+        const auto expected =
+            leftVector->compare(rightVector.get(), i, i, compareFlag).value();
+        ASSERT_EQ(result, expected);
+      }
+    };
+
+    test(ascNullsFirstEncoder_);
+    test(ascNullsLastEncoder_);
+    test(descNullsFirstEncoder_);
+    test(descNullsLastEncoder_);
+  };
+
+ protected:
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+ private:
+  const PrefixSortEncoder ascNullsFirstEncoder_ = {true, true};
+  const PrefixSortEncoder ascNullsLastEncoder_ = {true, false};
+  const PrefixSortEncoder descNullsFirstEncoder_ = {false, true};
+  const PrefixSortEncoder descNullsLastEncoder_ = {false, false};
+};
+
+TEST_F(PrefixEncoderTest, encode) {
+  {
+    uint64_t ascExpected = 0x8877665544332211;
+    uint64_t descExpected = 0x778899aabbccddee;
+    testEncode<uint64_t>(
+        0x1122334455667788, (char*)&ascExpected, (char*)&descExpected);
+  }
+
+  {
+    int64_t ascExpected = 0x8877665544332291;
+    int64_t descExpected = 0x778899aabbccdd6e;
+    testEncode<int64_t>(
+        0x1122334455667788, (char*)&ascExpected, (char*)&descExpected);
+  }
+  {
+    uint32_t ascExpected = 0x44332211;
+    uint32_t descExpected = 0xbbccddee;
+    testEncode<uint32_t>(0x11223344, (char*)&ascExpected, (char*)&descExpected);
+  }
+  {
+    int32_t ascExpected = 0x44332291;
+    int32_t descExpected = 0xbbccdd6e;
+    testEncode<int32_t>(0x11223344, (char*)&ascExpected, (char*)&descExpected);
+  }
+
+  {
+    uint32_t ascExpected = 0x0050c3c7;
+    uint32_t descExpected = 0xffaf3c38;
+    testEncode<float>(100000.00, (char*)&ascExpected, (char*)&descExpected);
+  }
+
+  {
+    uint64_t ascExpected = 0x00000000006af8c0;
+    uint64_t descExpected = 0xffffffffff95073f;
+    testEncode<double>(100000.00, (char*)&ascExpected, (char*)&descExpected);
+  }
+
+  {
+    Timestamp value = Timestamp(0x000000011223344, 0x000000011223344);
+    uint64_t ascExpected[2];
+    uint64_t descExpected[2];
+    ascExpected[0] = 0x4433221100000080;
+    ascExpected[1] = 0x4433221100000000;
+    descExpected[0] = 0xbbccddeeffffff7f;
+    descExpected[1] = 0xbbccddeeffffffff;
+    testEncode<Timestamp>(value, (char*)ascExpected, (char*)descExpected);
+  }
+}
+
+TEST_F(PrefixEncoderTest, compare) {
+  testCompare<uint64_t>();
+  testCompare<uint32_t>();
+  testCompare<int64_t>();
+  testCompare<int32_t>();
+  testCompare<float>();
+  testCompare<double>();
+  testCompare<Timestamp>();
+}
+
+TEST_F(PrefixEncoderTest, fuzzyInteger) {
+  testFuzz<TypeKind::INTEGER>();
+}
+
+TEST_F(PrefixEncoderTest, fuzzyBigint) {
+  testFuzz<TypeKind::BIGINT>();
+}
+
+TEST_F(PrefixEncoderTest, fuzzyReal) {
+  testFuzz<TypeKind::REAL>();
+}
+
+TEST_F(PrefixEncoderTest, fuzzyDouble) {
+  testFuzz<TypeKind::DOUBLE>();
+}
+
+TEST_F(PrefixEncoderTest, fuzzyTimestamp) {
+  testFuzz<TypeKind::TIMESTAMP>();
+}
+
+} // namespace facebook::velox::exec::prefixsort::test
diff --git a/velox/exec/prefixsort/tests/utils/EncoderTestUtils.cpp b/velox/exec/prefixsort/tests/utils/EncoderTestUtils.cpp
index e475364d2607..d1773def0a3f 100644
--- a/velox/exec/prefixsort/tests/utils/EncoderTestUtils.cpp
+++ b/velox/exec/prefixsort/tests/utils/EncoderTestUtils.cpp
@@ -30,9 +30,9 @@ void decodeNoNulls(char* encoded, int64_t& value) {
 } // namespace
 
 void encodeInPlace(std::vector<int64_t>& data) {
+  const static auto encoder = PrefixSortEncoder(true, true);
   for (auto i = 0; i < data.size(); i++) {
-    PrefixSortEncoder::encode(
-        data[i], (char*)data.data() + i * sizeof(int64_t));
+    encoder.encodeNoNulls(data[i], (char*)data.data() + i * sizeof(int64_t));
   }
 }