Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix validity checks for char #3853

Merged
merged 1 commit into from
Jan 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 106 additions & 24 deletions kani-compiler/src/kani_middle/transform/check_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use stable_mir::mir::{
Statement, StatementKind, Terminator, TerminatorKind,
};
use stable_mir::target::{MachineInfo, MachineSize};
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Ty, TyKind, UintTy};
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Span, Ty, TyKind, UintTy};
use std::fmt::Debug;
use strum_macros::AsRefStr;
use tracing::{debug, trace};
Expand Down Expand Up @@ -164,7 +164,18 @@ pub struct ValidValueReq {
/// Size of this requirement.
size: MachineSize,
/// The range restriction is represented by a Scalar.
valid_range: WrappingRange,
valid_range: ValidityRange,
}

#[derive(Clone, Debug, Eq, PartialEq, Hash)]
enum ValidityRange {
/// The value validity fits in a single value range.
/// This includes cases where the full range is covered.
Single(WrappingRange),
/// The validity includes more than one value range.
/// Currently, this is only the case for `char`, which has two ranges.
/// If more cases come up, we could turn this into a vector instead.
Multiple([WrappingRange; 2]),
}

// TODO: Optimize checks by merging requirements whenever possible.
Expand All @@ -180,44 +191,88 @@ impl ValidValueReq {
/// It's not possible to define a `rustc_layout_scalar_valid_range_*` to any other structure.
/// Note that this annotation only applies to the first scalar in the layout.
pub fn try_from_ty(machine_info: &MachineInfo, ty: Ty) -> Option<ValidValueReq> {
let shape = ty.layout().unwrap().shape();
match shape.abi {
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
Some(ValidValueReq { offset: 0, size: value.size(machine_info), valid_range })
if ty.kind().is_char() {
Some(ValidValueReq {
offset: 0,
size: MachineSize::from_bits(size_of::<char>() * 8),
valid_range: ValidityRange::Multiple([
WrappingRange { start: 0, end: 0xD7FF },
WrappingRange { start: 0xE000, end: char::MAX.into() },
]),
})
} else {
let shape = ty.layout().unwrap().shape();
match shape.abi {
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
Some(ValidValueReq {
offset: 0,
size: value.size(machine_info),
valid_range: ValidityRange::Single(valid_range),
})
}
ValueAbi::Scalar(_)
| ValueAbi::ScalarPair(_, _)
| ValueAbi::Uninhabited
| ValueAbi::Vector { .. }
| ValueAbi::Aggregate { .. } => None,
}
ValueAbi::Scalar(_)
| ValueAbi::ScalarPair(_, _)
| ValueAbi::Uninhabited
| ValueAbi::Vector { .. }
| ValueAbi::Aggregate { .. } => None,
}
}

/// Check if range is full.
pub fn is_full(&self) -> bool {
self.valid_range.is_full(self.size).unwrap()
if let ValidityRange::Single(valid_range) = self.valid_range {
valid_range.is_full(self.size).unwrap()
} else {
false
}
}

/// Check if this range contains `other` range.
///
/// I.e., `scalar_2` ⊆ `scalar_1`
pub fn contains(&self, other: &ValidValueReq) -> bool {
assert_eq!(self.size, other.size);
match (self.valid_range.wraps_around(), other.valid_range.wraps_around()) {
(true, true) | (false, false) => {
self.valid_range.start <= other.valid_range.start
&& self.valid_range.end >= other.valid_range.end
match (&self.valid_range, &other.valid_range) {
(ValidityRange::Single(this_range), ValidityRange::Single(other_range)) => {
range_contains(this_range, other_range, self.size)
}
(ValidityRange::Multiple(this_ranges), ValidityRange::Single(other_range)) => {
range_contains(&this_ranges[0], other_range, self.size)
|| range_contains(&this_ranges[1], other_range, self.size)
}
(ValidityRange::Single(this_range), ValidityRange::Multiple(other_ranges)) => {
range_contains(this_range, &other_ranges[0], self.size)
&& range_contains(this_range, &other_ranges[1], self.size)
}
(true, false) => {
self.valid_range.start <= other.valid_range.start
|| self.valid_range.end >= other.valid_range.end
(ValidityRange::Multiple(this_ranges), ValidityRange::Multiple(other_ranges)) => {
let contains = (range_contains(&this_ranges[0], &other_ranges[0], self.size)
|| range_contains(&this_ranges[1], &other_ranges[0], self.size))
&& (range_contains(&this_ranges[0], &other_ranges[1], self.size)
|| range_contains(&this_ranges[1], &other_ranges[1], self.size));
// Multiple today only cover `char` case.
debug_assert!(
contains,
"Expected validity of `char` for Multiple ranges. Found: {self:?}, {other:?}"
);
contains
}
(false, true) => self.is_full(),
}
}
}

/// Check if range `r1` contains range `r2`.
///
/// I.e., `r2` ⊆ `r1`
fn range_contains(r1: &WrappingRange, r2: &WrappingRange, sz: MachineSize) -> bool {
match (r1.wraps_around(), r2.wraps_around()) {
(true, true) | (false, false) => r1.start <= r2.start && r1.end >= r2.end,
(true, false) => r1.start <= r2.start || r1.end >= r2.end,
(false, true) => r1.is_full(sz).unwrap(),
}
}

#[derive(AsRefStr, Clone, Debug)]
enum SourceOp {
/// Validity checks are done on a byte level when the Rvalue can generate invalid value.
Expand Down Expand Up @@ -763,8 +818,6 @@ pub fn build_limits(
let span = source.span(body.blocks());
debug!(?req, ?rvalue_ptr, ?span, "build_limits");
let primitive_ty = uint_ty(req.size.bytes());
let start_const = body.new_uint_operand(req.valid_range.start, primitive_ty, span);
let end_const = body.new_uint_operand(req.valid_range.end, primitive_ty, span);
let orig_ptr = if req.offset != 0 {
let start_ptr =
move_local(body.insert_assignment(rvalue_ptr, source, InsertPosition::Before));
Expand Down Expand Up @@ -799,6 +852,35 @@ pub fn build_limits(
InsertPosition::Before,
);
let value = Operand::Copy(Place { local: value_ptr, projection: vec![ProjectionElem::Deref] });
match &req.valid_range {
ValidityRange::Single(range) => {
build_single_limit(body, range, source, span, primitive_ty, value)
}
ValidityRange::Multiple([range1, range2]) => {
// Build `let valid = range1.contains(value) || range2.contains(value);
let cond1 = build_single_limit(body, range1, source, span, primitive_ty, value.clone());
let cond2 = build_single_limit(body, range2, source, span, primitive_ty, value);
body.insert_binary_op(
BinOp::BitOr,
move_local(cond1),
move_local(cond2),
source,
InsertPosition::Before,
)
}
}
}

fn build_single_limit(
body: &mut MutableBody,
range: &WrappingRange,
source: &mut SourceInstruction,
span: Span,
primitive_ty: UintTy,
value: Operand,
) -> Local {
let start_const = body.new_uint_operand(range.start, primitive_ty, span);
let end_const = body.new_uint_operand(range.end, primitive_ty, span);
let start_result = body.insert_binary_op(
BinOp::Ge,
value.clone(),
Expand All @@ -808,7 +890,7 @@ pub fn build_limits(
);
let end_result =
body.insert_binary_op(BinOp::Le, value, end_const, source, InsertPosition::Before);
if req.valid_range.wraps_around() {
if range.wraps_around() {
// valid >= start || valid <= end
body.insert_binary_op(
BinOp::BitOr,
Expand Down
37 changes: 37 additions & 0 deletions tests/expected/valid-value-checks/char_validity.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Checking harness check_invalid_char_unit_wrapper_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char wrapper detection""
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, ()>`
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<(), char>`
VERIFICATION:- FAILED

Checking harness check_invalid_char_nonzero_wrapper_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char / NonZero detection""
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, std::num::NonZero<u32>>`
VERIFICATION:- FAILED

Checking harness check_invalid_char_should_fail...
- Status: UNREACHABLE\
- Description: ""Unreachable code: Expected invalid char detection""
Failed Checks: Undefined Behavior: Invalid value of type `char`
VERIFICATION:- FAILED


Checking harness check_valid_mixed_wrapper...
VERIFICATION:- SUCCESSFUL

Checking harness check_valid_char_wrappers...
VERIFICATION:- SUCCESSFUL

Checking harness cannot_dereference_invalid_char...
VERIFICATION:- SUCCESSFUL

Checking harness check_char_ok...
VERIFICATION:- SUCCESSFUL

Summary:
Verification failed for - check_invalid_char_unit_wrapper_should_fail
Verification failed for - check_invalid_char_nonzero_wrapper_should_fail
Verification failed for - check_invalid_char_should_fail
Complete - 4 successfully verified harnesses, 3 failures, 7 total.
112 changes: 112 additions & 0 deletions tests/expected/valid-value-checks/char_validity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT
// kani-flags: -Z valid-value-checks -Z mem-predicates
//! Check that Kani can correctly identify value validity of `char` and structures with `char`.
//! Note that we use `black_box` hint to ensure the logic doesn't get removed as dead code.

use std::num::NonZeroU32;

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct OneField<T>(T);

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct TwoFields<T, U>(T, U);

#[repr(C)]
#[derive(Copy, Clone, kani::Arbitrary)]
struct ThreeFields<T, U, V>(T, U, V);

/// Check that valid u32's are all identified as valid.
#[kani::proof]
fn check_char_ok() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
assert!(kani::mem::can_dereference(&val as *const _ as *const char));
let c1: char = unsafe { std::mem::transmute(val) };
let c2 = unsafe { char::from_u32_unchecked(val) };
let c3 = char::from_u32(val).unwrap();
assert_eq!(c1, c2);
assert_eq!(c2, c3);
}

/// Check that all invalid u32's identified as invalid.
#[kani::proof]
fn cannot_dereference_invalid_char() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
assert!(!kani::mem::can_dereference(&val as *const _ as *const char));
}

/// Check that transmuting from invalid u32's trigger a UB check.
#[kani::proof]
fn check_invalid_char_should_fail() {
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
let _ = if kani::any() {
unsafe { char::from_u32_unchecked(val) }
} else {
unsafe { std::mem::transmute(val) }
};
assert!(false, "Unreachable code: Expected invalid char detection");
}

#[kani::proof]
fn check_valid_char_wrappers() {
let v1 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let v2 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let v3 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
assert!(kani::mem::can_dereference(&OneField(v1) as *const _ as *const OneField<char>));
assert!(kani::mem::can_dereference(
&TwoFields(v1, v2) as *const _ as *const TwoFields<char, char>
));
assert!(kani::mem::can_dereference(
&ThreeFields(v1, v2, v3) as *const _ as *const ThreeFields<char, char, char>
));
}

/// Ensure that we correctly identify validity of a structure with fields with different
/// requirements.
#[kani::proof]
fn check_valid_mixed_wrapper() {
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let non_zero = kani::any_where(|v: &u32| *v != 0);
assert!(kani::mem::can_dereference(
&TwoFields(unicode, non_zero) as *const _ as *const TwoFields<char, NonZeroU32>
));
assert!(kani::mem::can_dereference(
&TwoFields(non_zero, unicode) as *const _ as *const TwoFields<NonZeroU32, char>
));
assert!(kani::mem::can_dereference(
&TwoFields((), unicode) as *const _ as *const TwoFields<(), char>
));
}

/// Check that transmuting from invalid wrappers trigger UB check failure.
#[kani::proof]
fn check_invalid_char_nonzero_wrapper_should_fail() {
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
let non_zero = kani::any_where(|v: &u32| *v != 0);
let var: TwoFields<char, NonZeroU32> = if kani::any() {
unsafe { std::mem::transmute(TwoFields(non_unicode, non_zero)) }
} else {
unsafe { std::mem::transmute(TwoFields(unicode, 0)) }
};
// Ensure the condition above does not get pruned.
std::hint::black_box(var);
assert!(false, "Unreachable code: Expected invalid char / NonZero detection");
}

/// Check that transmuting from invalid wrappers trigger UB check failure independent
/// on the position of the unit field.
#[kani::proof]
fn check_invalid_char_unit_wrapper_should_fail() {
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
if kani::any() {
let var: TwoFields<char, ()> = unsafe { std::mem::transmute(TwoFields(non_unicode, ())) };
std::hint::black_box(var);
} else {
let var: TwoFields<(), char> = unsafe { std::mem::transmute(TwoFields((), non_unicode)) };
std::hint::black_box(var);
}
assert!(false, "Unreachable code: Expected invalid char wrapper detection");
}
Loading