Skip to content

Commit

Permalink
Fix out of bounds read in bit chunk iterator (#416)
Browse files Browse the repository at this point in the history
* Fix out of bounds read in bit chunk iterator

* Add comment why reading one additional byte is enough
  • Loading branch information
jhorstmann authored and alamb committed Jun 8, 2021
1 parent e3d6792 commit 7b9f8de
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 9 deletions.
19 changes: 19 additions & 0 deletions arrow/benches/boolean_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,25 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("and", |b| b.iter(|| bench_and(&array1, &array2)));
c.bench_function("or", |b| b.iter(|| bench_or(&array1, &array2)));
c.bench_function("not", |b| b.iter(|| bench_not(&array1)));

let array1_slice = array1.slice(1, size - 1);
let array1_slice = array1_slice
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap();
let array2_slice = array2.slice(1, size - 1);
let array2_slice = array2_slice
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap();

c.bench_function("and_sliced", |b| {
b.iter(|| bench_and(&array1_slice, &array2_slice))
});
c.bench_function("or_sliced", |b| {
b.iter(|| bench_or(&array1_slice, &array2_slice))
});
c.bench_function("not_sliced", |b| b.iter(|| bench_not(&array1_slice)));
}

criterion_group!(benches, add_benchmark);
Expand Down
36 changes: 27 additions & 9 deletions arrow/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ impl<'a> BitChunks<'a> {
let byte_offset = offset / 8;
let bit_offset = offset % 8;

let chunk_bits = 8 * std::mem::size_of::<u64>();

let chunk_len = len / chunk_bits;
let remainder_len = len & (chunk_bits - 1);
// number of complete u64 chunks
let chunk_len = len / 64;
// number of remaining bits
let remainder_len = len % 64;

BitChunks::<'a> {
buffer: &buffer[byte_offset..],
Expand Down Expand Up @@ -137,14 +137,18 @@ impl Iterator for BitChunkIterator<'_> {
// so when reading as u64 on a big-endian machine, the bytes need to be swapped
let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() };

let combined = if self.bit_offset == 0 {
let bit_offset = self.bit_offset;

let combined = if bit_offset == 0 {
current
} else {
let next =
unsafe { std::ptr::read_unaligned(raw_data.add(index + 1)).to_le() };
// the constructor ensures that bit_offset is in 0..8
// that means we need to read at most one additional byte to fill in the high bits
let next = unsafe {
std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64
};

current >> self.bit_offset
| (next & ((1 << self.bit_offset) - 1)) << (64 - self.bit_offset)
(current >> bit_offset) | (next << (64 - bit_offset))
};

self.index = index + 1;
Expand Down Expand Up @@ -254,4 +258,18 @@ mod tests {
bitchunks.remainder_bits()
);
}

#[test]
fn test_iter_remainder_out_of_bounds() {
// allocating a full page should trigger a fault when reading out of bounds
const ALLOC_SIZE: usize = 4 * 1024;
let input = vec![0xFF_u8; ALLOC_SIZE];

let buffer: Buffer = Buffer::from(input);

let bitchunks = buffer.bit_chunks(57, ALLOC_SIZE * 8 - 57);

assert_eq!(u64::MAX, bitchunks.iter().last().unwrap());
assert_eq!(0x7F, bitchunks.remainder_bits());
}
}

0 comments on commit 7b9f8de

Please sign in to comment.