Skip to content

Commit

Permalink
Auto merge of #87123 - RalfJung:miri-provenance-overhaul, r=oli-obk
Browse files Browse the repository at this point in the history
CTFE/Miri engine Pointer type overhaul

This fixes the long-standing problem that we are using `Scalar` as a type to represent pointers that might be integer values (since they point to a ZST). The main problem is that with int-to-ptr casts, there are multiple ways to represent the same pointer as a `Scalar` and it is unclear if "normalization" (i.e., the cast) already happened or not. This leads to ugly methods like `force_mplace_ptr` and `force_op_ptr`.
Another problem this solves is that in Miri, it would make a lot more sense to have the `Pointer::offset` field represent the full absolute address (instead of being relative to the `AllocId`). This means we can do ptr-to-int casts without access to any machine state, and it means that the overflow checks on pointer arithmetic are (finally!) accurate.

To solve this, the `Pointer` type is made entirely parametric over the provenance, so that we can use `Pointer<AllocId>` inside `Scalar` but use `Pointer<Option<AllocId>>` when accessing memory (where `None` represents the case that we could not figure out an `AllocId`; in that case the `offset` is an absolute address). Moreover, the `Provenance` trait determines if a pointer with a given provenance can be cast to an integer by simply dropping the provenance.

I hope this can be read commit-by-commit, but the first commit does the bulk of the work. It introduces some FIXMEs that are resolved later.
Fixes rust-lang/miri#841
Miri PR: rust-lang/miri#1851
r? `@oli-obk`
  • Loading branch information
bors committed Jul 17, 2021
2 parents f502bd3 + efbee50 commit c78ebb7
Show file tree
Hide file tree
Showing 106 changed files with 1,311 additions and 1,401 deletions.
21 changes: 11 additions & 10 deletions compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,21 @@ pub(crate) fn codegen_const_value<'tcx>(
place.to_cvalue(fx)
}
}
Scalar::Ptr(ptr) => {
let alloc_kind = fx.tcx.get_global_alloc(ptr.alloc_id);
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative
let alloc_kind = fx.tcx.get_global_alloc(alloc_id);
let base_addr = match alloc_kind {
Some(GlobalAlloc::Memory(alloc)) => {
let data_id = data_id_for_alloc_id(
&mut fx.constants_cx,
fx.module,
ptr.alloc_id,
alloc_id,
alloc.mutability,
);
let local_data_id =
fx.module.declare_data_in_func(data_id, &mut fx.bcx.func);
if fx.clif_comments.enabled() {
fx.add_comment(local_data_id, format!("{:?}", ptr.alloc_id));
fx.add_comment(local_data_id, format!("{:?}", alloc_id));
}
fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
}
Expand All @@ -226,10 +227,10 @@ pub(crate) fn codegen_const_value<'tcx>(
}
fx.bcx.ins().global_value(fx.pointer_type, local_data_id)
}
None => bug!("missing allocation {:?}", ptr.alloc_id),
None => bug!("missing allocation {:?}", alloc_id),
};
let val = if ptr.offset.bytes() != 0 {
fx.bcx.ins().iadd_imm(base_addr, i64::try_from(ptr.offset.bytes()).unwrap())
let val = if offset.bytes() != 0 {
fx.bcx.ins().iadd_imm(base_addr, i64::try_from(offset.bytes()).unwrap())
} else {
base_addr
};
Expand Down Expand Up @@ -406,7 +407,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data_ctx.define(bytes.into_boxed_slice());

for &(offset, (_tag, reloc)) in alloc.relocations().iter() {
for &(offset, alloc_id) in alloc.relocations().iter() {
let addend = {
let endianness = tcx.data_layout.endian;
let offset = offset.bytes() as usize;
Expand All @@ -417,7 +418,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
read_target_uint(endianness, bytes).unwrap()
};

let reloc_target_alloc = tcx.get_global_alloc(reloc).unwrap();
let reloc_target_alloc = tcx.get_global_alloc(alloc_id).unwrap();
let data_id = match reloc_target_alloc {
GlobalAlloc::Function(instance) => {
assert_eq!(addend, 0);
Expand All @@ -427,7 +428,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
continue;
}
GlobalAlloc::Memory(target_alloc) => {
data_id_for_alloc_id(cx, module, reloc, target_alloc.mutability)
data_id_for_alloc_id(cx, module, alloc_id, target_alloc.mutability)
}
GlobalAlloc::Static(def_id) => {
if tcx.codegen_fn_attrs(def_id).flags.contains(CodegenFnAttrFlags::THREAD_LOCAL)
Expand Down
9 changes: 5 additions & 4 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,16 +243,17 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
self.const_bitcast(llval, llty)
}
}
Scalar::Ptr(ptr) => {
let (base_addr, base_addr_space) = match self.tcx.global_alloc(ptr.alloc_id) {
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts();
let (base_addr, base_addr_space) = match self.tcx.global_alloc(alloc_id) {
GlobalAlloc::Memory(alloc) => {
let init = const_alloc_to_llvm(self, alloc);
let value = match alloc.mutability {
Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None),
_ => self.static_addr_of(init, alloc.align, None),
};
if !self.sess().fewer_names() {
llvm::set_value_name(value, format!("{:?}", ptr.alloc_id).as_bytes());
llvm::set_value_name(value, format!("{:?}", alloc_id).as_bytes());
}
(value, AddressSpace::DATA)
}
Expand All @@ -269,7 +270,7 @@ impl ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
let llval = unsafe {
llvm::LLVMConstInBoundsGEP(
self.const_bitcast(base_addr, self.type_i8p_ext(base_addr_space)),
&self.const_usize(ptr.offset.bytes()),
&self.const_usize(offset.bytes()),
1,
)
};
Expand Down
9 changes: 6 additions & 3 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use rustc_codegen_ssa::traits::*;
use rustc_hir::def_id::DefId;
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
use rustc_middle::mir::interpret::{
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer,
read_target_uint, Allocation, ErrorHandled, GlobalAlloc, Pointer, Scalar as InterpScalar,
};
use rustc_middle::mir::mono::MonoItem;
use rustc_middle::ty::{self, Instance, Ty};
Expand All @@ -25,7 +25,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
let pointer_size = dl.pointer_size.bytes() as usize;

let mut next_offset = 0;
for &(offset, ((), alloc_id)) in alloc.relocations().iter() {
for &(offset, alloc_id) in alloc.relocations().iter() {
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
Expand Down Expand Up @@ -55,7 +55,10 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
};

llvals.push(cx.scalar_to_backend(
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)).into(),
InterpScalar::from_pointer(
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)),
&cx.tcx,
),
&Scalar { value: Primitive::Pointer, valid_range: 0..=!0 },
cx.type_i8p_ext(address_space),
));
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
Abi::ScalarPair(ref a, _) => a,
_ => bug!("from_const: invalid ScalarPair layout: {:#?}", layout),
};
let a = Scalar::from(Pointer::new(
bx.tcx().create_memory_alloc(data),
Size::from_bytes(start),
));
let a = Scalar::from_pointer(
Pointer::new(bx.tcx().create_memory_alloc(data), Size::from_bytes(start)),
&bx.tcx(),
);
let a_llval = bx.scalar_to_backend(
a,
a_scalar,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_middle/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#![feature(iter_zip)]
#![feature(thread_local_const_init)]
#![feature(try_reserve)]
#![feature(nonzero_ops)]
#![recursion_limit = "512"]

#[macro_use]
Expand Down
104 changes: 56 additions & 48 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::borrow::Cow;
use std::convert::TryFrom;
use std::iter;
use std::ops::{Deref, DerefMut, Range};
use std::ops::{Deref, Range};
use std::ptr;

use rustc_ast::Mutability;
Expand All @@ -25,7 +25,7 @@ use crate::ty;
/// module provides higher-level access.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
#[derive(HashStable)]
pub struct Allocation<Tag = (), Extra = ()> {
pub struct Allocation<Tag = AllocId, Extra = ()> {
/// The actual bytes of the allocation.
/// Note that the bytes of a pointer represent the offset of the pointer.
bytes: Vec<u8>,
Expand Down Expand Up @@ -154,26 +154,32 @@ impl<Tag> Allocation<Tag> {
}
}

impl Allocation<()> {
/// Add Tag and Extra fields
pub fn with_tags_and_extra<T, E>(
impl Allocation {
/// Convert Tag and add Extra fields
pub fn convert_tag_add_extra<Tag, Extra>(
self,
mut tagger: impl FnMut(AllocId) -> T,
extra: E,
) -> Allocation<T, E> {
cx: &impl HasDataLayout,
extra: Extra,
mut tagger: impl FnMut(Pointer<AllocId>) -> Pointer<Tag>,
) -> Allocation<Tag, Extra> {
// Compute new pointer tags, which also adjusts the bytes.
let mut bytes = self.bytes;
let mut new_relocations = Vec::with_capacity(self.relocations.0.len());
let ptr_size = cx.data_layout().pointer_size.bytes_usize();
let endian = cx.data_layout().endian;
for &(offset, alloc_id) in self.relocations.iter() {
let idx = offset.bytes_usize();
let ptr_bytes = &mut bytes[idx..idx + ptr_size];
let bits = read_target_uint(endian, ptr_bytes).unwrap();
let (ptr_tag, ptr_offset) =
tagger(Pointer::new(alloc_id, Size::from_bytes(bits))).into_parts();
write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
new_relocations.push((offset, ptr_tag));
}
// Create allocation.
Allocation {
bytes: self.bytes,
relocations: Relocations::from_presorted(
self.relocations
.iter()
// The allocations in the relocations (pointers stored *inside* this allocation)
// all get the base pointer tag.
.map(|&(offset, ((), alloc))| {
let tag = tagger(alloc);
(offset, (tag, alloc))
})
.collect(),
),
bytes,
relocations: Relocations::from_presorted(new_relocations),
init_mask: self.init_mask,
align: self.align,
mutability: self.mutability,
Expand Down Expand Up @@ -279,6 +285,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
/// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory.
pub fn get_bytes_mut_ptr(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> *mut [u8] {
self.mark_init(range, true);
// This also clears relocations that just overlap with the written range. So writing to some
// byte can de-initialize its neighbors! See
// <https://github.com/rust-lang/rust/issues/87184> for details.
self.clear_relocations(cx, range);

assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
Expand Down Expand Up @@ -321,7 +330,11 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
cx: &impl HasDataLayout,
range: AllocRange,
) -> AllocResult<ScalarMaybeUninit<Tag>> {
// `get_bytes_unchecked` tests relocation edges.
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
// We deliberately error when loading data that partially has provenance, or partially
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
// further discussion.
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
// Uninit check happens *after* we established that the alignment is correct.
// We must not return `Ok()` for unaligned pointers!
Expand All @@ -339,9 +352,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
self.check_relocations(cx, range)?;
} else {
// Maybe a pointer.
if let Some(&(tag, alloc_id)) = self.relocations.get(&range.start) {
let ptr = Pointer::new_with_tag(alloc_id, Size::from_bytes(bits), tag);
return Ok(ScalarMaybeUninit::Scalar(ptr.into()));
if let Some(&prov) = self.relocations.get(&range.start) {
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
}
// We don't. Just return the bits.
Expand Down Expand Up @@ -371,18 +384,23 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
}
};

let bytes = match val.to_bits_or_ptr(range.size, cx) {
Err(val) => u128::from(val.offset.bytes()),
Ok(data) => data,
// `to_bits_or_ptr_internal` is the right method because we just want to store this data
// as-is into memory.
let (bytes, provenance) = match val.to_bits_or_ptr_internal(range.size) {
Err(val) => {
let (provenance, offset) = val.into_parts();
(u128::from(offset.bytes()), Some(provenance))
}
Ok(data) => (data, None),
};

let endian = cx.data_layout().endian;
let dst = self.get_bytes_mut(cx, range);
write_target_uint(endian, dst, bytes).unwrap();

// See if we have to also write a relocation.
if let Scalar::Ptr(val) = val {
self.relocations.insert(range.start, (val.tag, val.alloc_id));
if let Some(provenance) = provenance {
self.relocations.0.insert(range.start, provenance);
}

Ok(())
Expand All @@ -392,11 +410,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
/// Relocations.
impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
/// Returns all relocations overlapping with the given pointer-offset pair.
pub fn get_relocations(
&self,
cx: &impl HasDataLayout,
range: AllocRange,
) -> &[(Size, (Tag, AllocId))] {
pub fn get_relocations(&self, cx: &impl HasDataLayout, range: AllocRange) -> &[(Size, Tag)] {
// We have to go back `pointer_size - 1` bytes, as that one would still overlap with
// the beginning of this range.
let start = range.start.bytes().saturating_sub(cx.data_layout().pointer_size.bytes() - 1);
Expand Down Expand Up @@ -446,7 +460,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
}

// Forget all the relocations.
self.relocations.remove_range(first..last);
self.relocations.0.remove_range(first..last);
}

/// Errors if there are relocations overlapping with the edges of the
Expand Down Expand Up @@ -582,39 +596,33 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
}
}

/// Relocations.
/// "Relocations" stores the provenance information of pointers stored in memory.
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, TyEncodable, TyDecodable)]
pub struct Relocations<Tag = (), Id = AllocId>(SortedMap<Size, (Tag, Id)>);
pub struct Relocations<Tag = AllocId>(SortedMap<Size, Tag>);

impl<Tag, Id> Relocations<Tag, Id> {
impl<Tag> Relocations<Tag> {
pub fn new() -> Self {
Relocations(SortedMap::new())
}

// The caller must guarantee that the given relocations are already sorted
// by address and contain no duplicates.
pub fn from_presorted(r: Vec<(Size, (Tag, Id))>) -> Self {
pub fn from_presorted(r: Vec<(Size, Tag)>) -> Self {
Relocations(SortedMap::from_presorted_elements(r))
}
}

impl<Tag> Deref for Relocations<Tag> {
type Target = SortedMap<Size, (Tag, AllocId)>;
type Target = SortedMap<Size, Tag>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<Tag> DerefMut for Relocations<Tag> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

/// A partial, owned list of relocations to transfer into another allocation.
pub struct AllocationRelocations<Tag> {
relative_relocations: Vec<(Size, (Tag, AllocId))>,
relative_relocations: Vec<(Size, Tag)>,
}

impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
Expand Down Expand Up @@ -652,7 +660,7 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
/// The affected range, as defined in the parameters to `prepare_relocation_copy` is expected
/// to be clear of relocations.
pub fn mark_relocation_range(&mut self, relocations: AllocationRelocations<Tag>) {
self.relocations.insert_presorted(relocations.relative_relocations);
self.relocations.0.insert_presorted(relocations.relative_relocations);
}
}

Expand Down
Loading

0 comments on commit c78ebb7

Please sign in to comment.