Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix undefined behavior in FFI and enable MIRI checks on CI #323

Merged
merged 1 commit into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,9 @@ jobs:
export MIRIFLAGS="-Zmiri-disable-isolation"
cargo miri setup
cargo clean
# Ignore MIRI errors until we can get a clean run
cargo miri test || true
# Currently only the arrow crate is tested with miri
# IO related tests and some unsupported tests are skipped
cargo miri test -p arrow -- --skip csv --skip ipc --skip json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is epic. Nice work @roee88 !


lint:
name: Lint
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/raw_pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ mod tests {

#[test]
#[should_panic(expected = "memory is not aligned")]
#[cfg_attr(miri, ignore)] // sometimes does not panic as expected
fn test_primitive_array_alignment() {
let bytes = vec![0u8, 1u8];
unsafe { RawPtrBox::<u64>::new(bytes.as_ptr().offset(1)) };
Expand Down
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3521,6 +3521,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
Expand Down
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/cast_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
fn string_to_timestamp_no_timezone() -> Result<()> {
// This test is designed to succeed in regardless of the local
// timezone the test machine is running. Thus it is still
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/compute/kernels/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn length_test_string() -> Result<()> {
length_cases()
.into_iter()
Expand All @@ -170,6 +171,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn length_test_large_string() -> Result<()> {
length_cases()
.into_iter()
Expand Down
31 changes: 16 additions & 15 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,27 +153,30 @@ impl FFI_ArrowSchema {
_ => vec![],
};
// note: this cannot be done along with the above because the above is fallible and this op leaks.
let mut children_ptr = children_vec
let children_ptr = children_vec
.into_iter()
.map(Box::into_raw)
.collect::<Box<_>>();
let n_children = children_ptr.len() as i64;
let children = children_ptr.as_mut_ptr();

let flags = field.is_nullable() as i64 * 2;

let mut private = Box::new(SchemaPrivateData {
field,
children_ptr,
});

// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
Ok(FFI_ArrowSchema {
format: CString::new(format).unwrap().into_raw(),
name: CString::new(name).unwrap().into_raw(),
metadata: std::ptr::null_mut(),
flags: field.is_nullable() as i64 * 2,
flags,
n_children,
children,
children: private.children_ptr.as_mut_ptr(),
dictionary: std::ptr::null_mut(),
release: Some(release_schema),
private_data: Box::into_raw(Box::new(SchemaPrivateData {
field,
children_ptr,
})) as *mut ::std::os::raw::c_void,
private_data: Box::into_raw(private) as *mut ::std::os::raw::c_void,
})
}

Expand Down Expand Up @@ -448,27 +451,25 @@ impl FFI_ArrowArray {
.collect::<Vec<_>>();
let n_buffers = buffers.len() as i64;

let mut buffers_ptr = buffers
let buffers_ptr = buffers
.iter()
.map(|maybe_buffer| match maybe_buffer {
// note that `raw_data` takes into account the buffer's offset
Some(b) => b.as_ptr() as *const std::os::raw::c_void,
None => std::ptr::null(),
})
.collect::<Box<[_]>>();
let pointer = buffers_ptr.as_mut_ptr();

let mut children = data
let children = data
.child_data()
.iter()
.map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
.collect::<Box<_>>();
let children_ptr = children.as_mut_ptr();
let n_children = children.len() as i64;

// create the private data owning everything.
// any other data must be added here, e.g. via a struct, to track lifetime.
let private_data = Box::new(PrivateData {
let mut private_data = Box::new(PrivateData {
buffers,
buffers_ptr,
children,
Expand All @@ -480,8 +481,8 @@ impl FFI_ArrowArray {
offset: data.offset() as i64,
n_buffers,
n_children,
buffers: pointer,
children: children_ptr,
buffers: private_data.buffers_ptr.as_mut_ptr(),
children: private_data.children.as_mut_ptr(),
dictionary: std::ptr::null_mut(),
release: Some(release_array),
private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
Expand Down
4 changes: 4 additions & 0 deletions arrow/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
Expand All @@ -205,6 +206,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_1_byte() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
Expand All @@ -226,6 +228,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_bits_across_bytes() {
let input: &[u8] = &[0b00111111, 0b11111100];
let buffer: Buffer = Buffer::from(input);
Expand All @@ -239,6 +242,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_bits_large() {
let input: &[u8] = &[
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
Expand Down
1 change: 1 addition & 0 deletions arrow/src/util/integration_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn test_arrow_data_equality() {
let secs_tz = Some("Europe/Budapest".to_string());
let millis_tz = Some("America/New_York".to_string());
Expand Down