Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add ArrowBitsUnpackInt32() #278

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions r/src/materialize_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ static inline int nanoarrow_materialize_int(struct ArrayViewSlice* src,
}
break;
case NANOARROW_TYPE_BOOL:
ArrowBitsUnpackInt32(
src->array_view->buffer_views[1].data.as_uint8 + raw_src_offset, raw_src_offset,
dst->length, result + dst->offset);

// Set any nulls to NA_LOGICAL
if (is_valid != NULL && src->array_view->array->null_count != 0) {
for (R_xlen_t i = 0; i < dst->length; i++) {
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
result[dst->offset + i] = NA_LOGICAL;
}
}
}
break;
case NANOARROW_TYPE_INT8:
case NANOARROW_TYPE_UINT8:
case NANOARROW_TYPE_INT16:
Expand Down
5 changes: 2 additions & 3 deletions r/src/materialize_lgl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ static int nanoarrow_materialize_lgl(struct ArrayViewSlice* src, struct VectorSl
}
break;
case NANOARROW_TYPE_BOOL:
for (R_xlen_t i = 0; i < dst->length; i++) {
result[dst->offset + i] = ArrowBitGet(data_buffer, src->offset + i);
}
ArrowBitsUnpackInt32(data_buffer, raw_src_offset, dst->length,
result + dst->offset);

// Set any nulls to NA_LOGICAL
if (is_valid != NULL && src->array_view->array->null_count != 0) {
Expand Down
59 changes: 54 additions & 5 deletions src/nanoarrow/buffer_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,18 @@ static inline int64_t _ArrowBytesForBits(int64_t bits) {
return (bits >> 3) + ((bits & 7) != 0);
}

static inline void _ArrowBitmapUnpackInt8(const uint8_t word, int8_t* out) {
static inline void _ArrowBitsUnpackInt8(const uint8_t word, int8_t* out) {
out[0] = (word >> 0) & 1;
out[1] = (word >> 1) & 1;
out[2] = (word >> 2) & 1;
out[3] = (word >> 3) & 1;
out[4] = (word >> 4) & 1;
out[5] = (word >> 5) & 1;
out[6] = (word >> 6) & 1;
out[7] = (word >> 7) & 1;
}

static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
out[0] = (word >> 0) & 1;
out[1] = (word >> 1) & 1;
out[2] = (word >> 2) & 1;
Expand All @@ -247,8 +258,47 @@ static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) {
return (bits[i >> 3] >> (i & 0x07)) & 1;
}

static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out) {
static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out) {
if (length == 0) {
return;
}

const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;

const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_last_valid = i_last_valid / 8;

if (bytes_begin == bytes_last_valid) {
for (int i = 0; i < length; i++) {
out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}

return;
}

// first byte
for (int i = 0; i < 8 - (i_begin % 8); i++) {
*out++ = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}

// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
_ArrowBitsUnpackInt8(bits[i], out);
out += 8;
}

// last byte
const int bits_remaining = i_end % 8 == 0 ? 8 : i_end % 8;
for (int i = 0; i < bits_remaining; i++) {
*out++ = ArrowBitGet(&bits[bytes_last_valid], i);
}
}

static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset,
int64_t length, int32_t* out) {
if (length == 0) {
return;
}
Expand All @@ -261,7 +311,6 @@ static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t star
const int64_t bytes_last_valid = i_last_valid / 8;

if (bytes_begin == bytes_last_valid) {
// count bits within a single byte
for (int i = 0; i < length; i++) {
out[i] = ArrowBitGet(&bits[bytes_begin], i + i_begin % 8);
}
Expand All @@ -276,7 +325,7 @@ static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t star

// middle bytes
for (int64_t i = bytes_begin + 1; i < bytes_last_valid; i++) {
_ArrowBitmapUnpackInt8(bits[i], out);
_ArrowBitsUnpackInt32(bits[i], out);
out += 8;
}

Expand Down
82 changes: 50 additions & 32 deletions src/nanoarrow/buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,83 +272,101 @@ TEST(BitmapTest, BitmapTestElement) {
}

template <int offset, int length>
void TestArrowBitmapUnpackInt8Unsafe(const uint8_t* bitmap, int8_t* out,
std::vector<uint8_t> expected) {
ArrowBitmapUnpackInt8Unsafe(bitmap, offset, length, out);
for (int i = 0; i < expected.size(); i++) {
void TestArrowBitmapUnpackUnsafe(const uint8_t* bitmap, std::vector<int8_t> expected) {
int8_t out[length];
int32_t out32[length];
memset(out, 0, sizeof(out));
memset(out32, 0, sizeof(out32));

ASSERT_EQ(length, expected.size());

ArrowBitsUnpackInt8(bitmap, offset, length, out);
for (int i = 0; i < length; i++) {
EXPECT_EQ(out[i], expected[i]);
}

ArrowBitsUnpackInt32(bitmap, offset, length, out32);
for (int i = 0; i < length; i++) {
EXPECT_EQ(out32[i], expected[i]);
}
}

TEST(BitmapTest, BitmapTestBitmapUnpackInt8Unsafe) {
TEST(BitmapTest, BitmapTestBitmapUnpack) {
uint8_t bitmap[3];
int8_t result[sizeof(bitmap) * 8];
int64_t n_values = sizeof(bitmap) * 8;
int8_t result[n_values];
int32_t result32[n_values];

// Basic test of a validity buffer that is all true
memset(bitmap, 0xff, sizeof(bitmap));
ArrowBitmapUnpackInt8Unsafe(bitmap, 0, sizeof(result), result);
for (int i = 0; i < sizeof(result); i++) {
memset(result, 0, sizeof(result));
memset(result32, 0, sizeof(result32));

ArrowBitsUnpackInt8(bitmap, 0, sizeof(result), result);
for (int i = 0; i < n_values; i++) {
EXPECT_EQ(result[i], 1);
}

ArrowBitsUnpackInt32(bitmap, 0, sizeof(result), result32);
for (int i = 0; i < n_values; i++) {
EXPECT_EQ(result32[i], 1);
}

// Ensure that the first byte/middle byte/last byte logic is correct
// Note that TestArrowBitmapUnpack tests both the int8 and int32 version
bitmap[0] = 0x93; // 10010011
bitmap[1] = 0x55; // 01010101
bitmap[2] = 0xaa; // 10101010

// offset 0, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<0, 8>(bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1});
TestArrowBitmapUnpackUnsafe<0, 8>(bitmap, {1, 1, 0, 0, 1, 0, 0, 1});

// offset 0, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<0, 16>(
bitmap, result, {1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<0, 16>(bitmap,
{1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});

// offset 0, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<0, 5>(bitmap, result, {1, 1, 0, 0, 1});
TestArrowBitmapUnpackUnsafe<0, 5>(bitmap, {1, 1, 0, 0, 1});

// offset boundary, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<8, 8>(bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<8, 8>(bitmap, {1, 0, 1, 0, 1, 0, 1, 0});

// offset boundary, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<8, 16>(
bitmap, result, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});
TestArrowBitmapUnpackUnsafe<8, 16>(bitmap,
{1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});

// offset boundary, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<8, 5>(bitmap, result, {1, 0, 1, 0, 1});
TestArrowBitmapUnpackUnsafe<8, 5>(bitmap, {1, 0, 1, 0, 1});

// offset boundary, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<8, 13>(bitmap, result,
{1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<8, 13>(bitmap, {1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0});

// offset non-boundary, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<3, 5>(bitmap, result, {0, 1, 0, 0, 1});
TestArrowBitmapUnpackUnsafe<3, 5>(bitmap, {0, 1, 0, 0, 1});

// offset non-boundary, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<3, 13>(bitmap, result,
{0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<3, 13>(bitmap, {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0});

// offset non-boundary, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<3, 3>(bitmap, result, {0, 1, 0});
TestArrowBitmapUnpackUnsafe<3, 3>(bitmap, {0, 1, 0});

// offset non-boundary, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<3, 11>(bitmap, result,
{0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<3, 11>(bitmap, {0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0});

// offset non-boundary non-first byte, length boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<11, 5>(bitmap, result, {0, 1, 0, 1, 0});
TestArrowBitmapUnpackUnsafe<11, 5>(bitmap, {0, 1, 0, 1, 0});

// offset non-boundary non-first byte, length boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<11, 13>(bitmap, result,
{0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});
TestArrowBitmapUnpackUnsafe<11, 13>(bitmap, {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});

// offset non-boundary non-first byte, length non-boundary, one byte
TestArrowBitmapUnpackInt8Unsafe<11, 3>(bitmap, result, {0, 1, 0});
TestArrowBitmapUnpackUnsafe<11, 3>(bitmap, {0, 1, 0});

// offset non-boundary non-first byte, length non-boundary, different bytes
TestArrowBitmapUnpackInt8Unsafe<11, 11>(bitmap, result,
{0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1});
TestArrowBitmapUnpackUnsafe<11, 11>(bitmap, {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1});

// non-boundary, three byte span
TestArrowBitmapUnpackInt8Unsafe<7, 11>(bitmap, result,
{1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1});
TestArrowBitmapUnpackUnsafe<7, 11>(bitmap, {1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1});
}

TEST(BitmapTest, BitmapTestSetTo) {
Expand Down
12 changes: 8 additions & 4 deletions src/nanoarrow/nanoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,14 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
/// \brief Count true values in a bitmap
static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to);

/// \brief Extract int8 boolean values from a range in a bitmap
static inline void ArrowBitsUnpackInt8(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out);

/// \brief Extract int32 boolean values from a range in a bitmap
static inline void ArrowBitsUnpackInt32(const uint8_t* bits, int64_t start_offset,
int64_t length, int32_t* out);

/// \brief Initialize an ArrowBitmap
///
/// Initialize the builder's buffer, empty its cache, and reset the size to zero
Expand Down Expand Up @@ -716,10 +724,6 @@ static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap,
static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap,
uint8_t bits_are_set, int64_t length);

/// \brief Extract boolean values from a range in a bitmap
static inline void ArrowBitmapUnpackInt8Unsafe(const uint8_t* bits, int64_t start_offset,
int64_t length, int8_t* out);

/// \brief Append boolean values encoded as int8_t to a bitmap
///
/// The values must all be 0 or 1.
Expand Down
Loading