Skip to content

Commit

Permalink
Add set_bits fuzz test (#6394)
Browse files Browse the repository at this point in the history
* Implement set_bits fuzz test

* Update arrow-buffer/src/util/bit_mask.rs

* Update arrow-buffer/src/util/bit_mask.rs

* fix import
  • Loading branch information
alamb committed Sep 20, 2024
1 parent 8ab18fd commit 669d405
Showing 1 changed file with 210 additions and 100 deletions.
310 changes: 210 additions & 100 deletions arrow-buffer/src/util/bit_mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,126 +162,236 @@ unsafe fn or_write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) {
#[cfg(test)]
mod tests {
use super::*;
use crate::bit_util::{get_bit, set_bit, unset_bit};
use rand::prelude::StdRng;
use rand::{Fill, Rng, SeedableRng};
use std::fmt::Display;

#[test]
fn test_set_bits_aligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 8;
let source_offset = 0;

let len = 64;

let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0,
];
let expected_null_count = 24;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 8,
offset_read: 0,
len: 64,
expected_data: vec![
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b10100101, 0,
],
expected_null_count: 24,
}
.verify();
}

#[test]
fn test_set_bits_unaligned_destination_start() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 3;
let source_offset = 0;

let len = 64;

let expected_data: &[u8] = &[
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
0b00101111, 0b00000101, 0b00000000,
];
let expected_null_count = 24;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 3,
offset_read: 0,
len: 64,
expected_data: vec![
0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
0b00101111, 0b00000101, 0b00000000,
],
expected_null_count: 24,
}
.verify();
}

#[test]
fn test_set_bits_unaligned_destination_end() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
];

let destination_offset = 8;
let source_offset = 0;
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101,
],
offset_write: 8,
offset_read: 0,
len: 62,
expected_data: vec![
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
0b11100111, 0b00100101, 0,
],
expected_null_count: 23,
}
.verify();
}

let len = 62;
#[test]
fn test_set_bits_unaligned() {
SetBitsTest {
write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
data: vec![
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
0b10011001, 0b11011011, 0b11101011, 0b11000011,
],
offset_write: 3,
offset_read: 5,
len: 95,
expected_data: vec![
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
],
expected_null_count: 35,
}
.verify();
}

let expected_data: &[u8] = &[
0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b00100101, 0,
];
let expected_null_count = 23;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);
#[test]
fn set_bits_fuzz() {
let mut rng = StdRng::seed_from_u64(42);
let mut data = SetBitsTest::new();
for _ in 0..100 {
data.regen(&mut rng);
data.verify();
}
}

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
#[derive(Debug, Default)]
struct SetBitsTest {
/// target write data
write_data: Vec<u8>,
/// source data
data: Vec<u8>,
offset_write: usize,
offset_read: usize,
len: usize,
/// the expected contents of write_data after the test
expected_data: Vec<u8>,
/// the expected number of nulls copied at the end of the test
expected_null_count: usize,
}

#[test]
fn test_set_bits_unaligned() {
let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let source: &[u8] = &[
0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
0b10011001, 0b11011011, 0b11101011, 0b11000011,
];
/// prints a byte slice as a binary string like "01010101 10101010"
struct BinaryFormatter<'a>(&'a [u8]);
impl<'a> Display for BinaryFormatter<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for byte in self.0 {
write!(f, "{:08b} ", byte)?;
}
write!(f, " ")?;
Ok(())
}
}

let destination_offset = 3;
let source_offset = 5;
impl Display for SetBitsTest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "SetBitsTest {{")?;
writeln!(f, " write_data: {}", BinaryFormatter(&self.write_data))?;
writeln!(f, " data: {}", BinaryFormatter(&self.data))?;
writeln!(
f,
" expected_data: {}",
BinaryFormatter(&self.expected_data)
)?;
writeln!(f, " offset_write: {}", self.offset_write)?;
writeln!(f, " offset_read: {}", self.offset_read)?;
writeln!(f, " len: {}", self.len)?;
writeln!(f, " expected_null_count: {}", self.expected_null_count)?;
writeln!(f, "}}")
}
}

let len = 95;
impl SetBitsTest {
/// create a new instance of FuzzData
fn new() -> Self {
Self::default()
}

let expected_data: &[u8] = &[
0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
];
let expected_null_count = 35;
let result = set_bits(
destination.as_mut_slice(),
source,
destination_offset,
source_offset,
len,
);
/// Update this instance's fields with randomly selected values and expected data
fn regen(&mut self, rng: &mut StdRng) {
// (read) data
// ------------------+-----------------+-------
// .. offset_read .. | data | ...
// ------------------+-----------------+-------

// Write data
// -------------------+-----------------+-------
// .. offset_write .. | (data to write) | ...
// -------------------+-----------------+-------

// length of data to copy
let len = rng.gen_range(0..=200);

// randomly pick where we will write to
let offset_write_bits = rng.gen_range(0..=200);
let offset_write_bytes = if offset_write_bits % 8 == 0 {
offset_write_bits / 8
} else {
(offset_write_bits / 8) + 1
};
let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0 shows up often

// randomly decide where we will read from
let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0 shows up often
let offset_read_bits = rng.gen_range(0..=200);
let offset_read_bytes = if offset_read_bits % 8 != 0 {
(offset_read_bits / 8) + 1
} else {
offset_read_bits / 8
};

// create space for writing
self.write_data.clear();
self.write_data
.resize(offset_write_bytes + len + extra_write_data_bytes, 0);

// interestingly set_bits seems to assume the output is already zeroed
// the fuzz tests fail when this is uncommented
//self.write_data.try_fill(rng).unwrap();
self.offset_write = offset_write_bits;

// make source data
self.data
.resize(offset_read_bytes + len + extra_read_data_bytes, 0);
// fill source data with random bytes
self.data.try_fill(rng).unwrap();
self.offset_read = offset_read_bits;

self.len = len;

// generated expectated output (not efficient)
self.expected_data.resize(self.write_data.len(), 0);
self.expected_data.copy_from_slice(&self.write_data);

self.expected_null_count = 0;
for i in 0..self.len {
let bit = get_bit(&self.data, self.offset_read + i);
if bit {
set_bit(&mut self.expected_data, self.offset_write + i);
} else {
unset_bit(&mut self.expected_data, self.offset_write + i);
self.expected_null_count += 1;
}
}
}

assert_eq!(destination, expected_data);
assert_eq!(result, expected_null_count);
/// call set_bits with the given parameters and compare with the expected output
fn verify(&self) {
// call set_bits and compare
let mut actual = self.write_data.to_vec();
let null_count = set_bits(
&mut actual,
&self.data,
self.offset_write,
self.offset_read,
self.len,
);

assert_eq!(actual, self.expected_data, "self: {}", self);
assert_eq!(null_count, self.expected_null_count, "self: {}", self);
}
}

#[test]
Expand Down

0 comments on commit 669d405

Please sign in to comment.