Skip to content

Commit

Permalink
BitUnpacking for continuous values
Browse files Browse the repository at this point in the history
  • Loading branch information
yingsu00 committed Aug 18, 2022
1 parent e459926 commit 59c9fbf
Show file tree
Hide file tree
Showing 8 changed files with 7,431 additions and 1,139 deletions.
708 changes: 708 additions & 0 deletions velox/dwio/common/BitUnpacking.h

Large diffs are not rendered by default.

1,644 changes: 1,644 additions & 0 deletions velox/dwio/common/tests/BitUnpackingBenchmark.cpp

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions velox/dwio/common/tests/BitUnpackingTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/dwio/common/BitUnpacking.h"

#include <arrow/util/rle_encoding.h> // @manual
#include <gtest/gtest.h>

#include <random>

using namespace facebook::velox;
using namespace facebook::velox::dwio::common;

template <typename T>
class BitUnpackingTest {
public:
BitUnpackingTest() {
inputBuffer.resize(kNumValues * 4, 0);
outputBuffer.resize(kNumValues * 4, 0);
expectedOutputBuffer.resize(kNumValues * 4, 0);
}

void populateInputBuffer(
uint8_t bitWidth,
uint8_t* inputBuf,
uint32_t inputBytes) {
auto gen = std::bind(
std::uniform_int_distribution<>(0, (1L << bitWidth) - 1),
std::default_random_engine());
arrow::bit_util::BitWriter bitWriter(inputBuf, inputBytes);
for (auto j = 0; j < kNumValues; j++) {
auto val = gen();
bitWriter.PutValue(val, bitWidth);
}
bitWriter.Flush(true);
}

uint32_t bytes(uint8_t bitWidth) {
return (kNumValues * bitWidth + 7) / 8;
}

void testUnpack(uint8_t bitWidth) {
populateInputBuffer(bitWidth, inputBuffer.data(), bytes(bitWidth));

const uint8_t* inputIter = inputBuffer.data();
T* outputIter = outputBuffer.data();
facebook::velox::dwio::common::unpack(
bitWidth, inputIter, bytes(bitWidth), kNumValues, outputIter);

inputIter = inputBuffer.data();
T* expectedOutputIter = expectedOutputBuffer.data();
arrow::bit_util::BitReader bitReader(inputIter, bytes(bitWidth));
bitReader.GetBatch(bitWidth, expectedOutputIter, kNumValues);

for (int i = 0; i < kNumValues; i++) {
if (outputBuffer[i] != expectedOutputBuffer[i]) {
break;
}
ASSERT_EQ(outputBuffer[i], expectedOutputBuffer[i]);
}
}
// multiple of 8
static const uint32_t kNumValues = 1024;

std::vector<uint8_t> inputBuffer;
std::vector<T> outputBuffer;
std::vector<T> expectedOutputBuffer;
};

TEST(BitUnpackingTest, uint8) {
BitUnpackingTest<uint8_t> test;
test.testUnpack(1);
test.testUnpack(2);
test.testUnpack(3);
test.testUnpack(4);
test.testUnpack(5);
test.testUnpack(6);
test.testUnpack(7);
test.testUnpack(8);
}

TEST(BitUnpackingTest, uint16) {
BitUnpackingTest<uint16_t> test;
test.testUnpack(1);
test.testUnpack(2);
test.testUnpack(3);
test.testUnpack(4);
test.testUnpack(5);
test.testUnpack(6);
test.testUnpack(7);
test.testUnpack(8);
test.testUnpack(9);
test.testUnpack(10);
test.testUnpack(11);
test.testUnpack(12);
test.testUnpack(13);
test.testUnpack(14);
test.testUnpack(15);
test.testUnpack(16);
}

TEST(BitUnpackingTest, uint32) {
BitUnpackingTest<uint32_t> test;
test.testUnpack(1);
test.testUnpack(2);
test.testUnpack(3);
test.testUnpack(4);
test.testUnpack(5);
test.testUnpack(6);
test.testUnpack(7);
test.testUnpack(8);
test.testUnpack(9);
test.testUnpack(10);
test.testUnpack(11);
test.testUnpack(12);
test.testUnpack(13);
test.testUnpack(14);
test.testUnpack(15);
test.testUnpack(16);
}
17 changes: 17 additions & 0 deletions velox/dwio/common/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,20 @@ target_link_libraries(
${ZSTD}
${ZLIB_LIBRARIES}
${TEST_LINK_LIBS})

# The bit unpacking benchmark tests require arrow and duckdb
if(VELOX_ENABLE_ARROW)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mbmi2 -mavx")

add_executable(velox_dwio_common_bitunpacking_test BitUnpackingTest.cpp)
add_test(velox_dwio_common_bitunpacking_test
velox_dwio_common_bitunpacking_test)
target_link_libraries(velox_dwio_common_bitunpacking_test arrow
${VELOX_LINK_LIBS} gtest gtest_main)

add_executable(velox_dwio_common_bitunpacking_benchmark
BitUnpackingBenchmark.cpp)
target_link_libraries(
velox_dwio_common_bitunpacking_benchmark velox_dwio_common arrow duckdb
${FOLLY} ${FOLLY_BENCHMARK})
endif()
Loading

0 comments on commit 59c9fbf

Please sign in to comment.