From 51bed0f4185f93e8f6c4a5edd216b5c6e1793a12 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 1 Aug 2023 12:09:23 +0200 Subject: [PATCH 01/35] UnsafeAliased: port text from hackmd --- text/0000-unsafe-aliased.md | 337 ++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 text/0000-unsafe-aliased.md diff --git a/text/0000-unsafe-aliased.md b/text/0000-unsafe-aliased.md new file mode 100644 index 00000000000..fe790d0a7de --- /dev/null +++ b/text/0000-unsafe-aliased.md @@ -0,0 +1,337 @@ +# `unsafe_aliased` + +- Feature Name: `unsafe_aliased` +- Start Date: 2022-11-05 +- RFC PR: [rust-lang/rfcs#0000](https://github.com/rust-lang/rfcs/pull/0000) +- Rust Issue: [rust-lang/rust#0000](https://github.com/rust-lang/rust/issues/0000) + +# Summary +[summary]: #summary + +Add a type `UnsafeAliased` that acts on `&mut` in a similar way to how `UnsafeCell` acts on `&`: it opts-out of the aliasing guarantees. +However, `&mut UnsafeAliased` can still be `mem::swap`ed, so this is not a free ticket for arbitrary aliasing of mutable references. +Abstractions built on top of `UnsafeAliased` must ensure proper encapsulation when handing such aliases references to clients (e.g. via pinning). + +This type is then used in generator lowering, finally fixing [#63818](https://github.com/rust-lang/rust/issues/63818). + +# Motivation +[motivation]: #motivation + +Let's say you want to write a type with a self-referential pointer: + +```rust +#![feature(pin_macro)] +#![feature(negative_impls)] +use std::ptr; +use std::pin::{pin, Pin}; + +pub struct S { + data: i32, + ptr_to_data: *mut i32, +} + +impl !Unpin for S {} + +impl S { + pub fn new() -> Self { + S { data: 42, ptr_to_data: ptr::null_mut() } + } + + pub fn get_data(self: Pin<&mut Self>) -> i32 { + // SAFETY: We're not moving anything. + let this = unsafe { Pin::get_unchecked_mut(self) }; + if this.ptr_to_data.is_null() { + this.ptr_to_data = ptr::addr_of_mut!(this.data); + } + // SAFETY: if the pointer is non-null, then we are pinned and it points to the `data` field. + unsafe { this.ptr_to_data.read() } + } + + pub fn set_data(self: Pin<&mut Self>, data: i32) { + // SAFETY: We're not moving anything. + let this = unsafe { Pin::get_unchecked_mut(self) }; + if this.ptr_to_data.is_null() { + this.ptr_to_data = ptr::addr_of_mut!(this.data); + } + // SAFETY: if the pointer is non-null, then we are pinned and it points to the `data` field. + unsafe { this.ptr_to_data.write(data) } + } +} + +fn main() { + let mut s = pin!(S::new()); + s.as_mut().set_data(42); + println!("{}", s.as_mut().get_data()); +} +``` + +This kind of code is implicitly generated by rustc all the time when an `async fn` has a local variable of reference type that is live across a yield point. +The problem is that this code has UB under our aliasing rules: the `&mut S` inside the `self` argument of `get_data` aliases with `ptr_to_data`! + +This simple code only has UB under Stacked Borrows but not under the LLVM aliasing rules; more complex variants of this -- still in the realm of what `async fn` generates -- also have UB under the LLVM aliasing rules. + +
+ +A more complex variant + +```rust +#![feature(pin_macro)] +#![feature(negative_impls)] +use std::ptr; +use std::pin::{pin, Pin}; +use std::task::Poll; + +pub struct S { + state: i32, + data: i32, + ptr_to_data: *mut i32, +} + +impl !Unpin for S {} + +impl S { + pub fn new() -> Self { + S { state: 0, data: 42, ptr_to_data: ptr::null_mut() } + } + + fn poll(self: Pin<&mut Self>) -> Poll<()> { + // SAFETY: We're not moving anything. + let this = unsafe { Pin::get_unchecked_mut(self) }; + match this.state { + 0 => { + // The first time, set up the pointer. + this.ptr_to_data = ptr::addr_of_mut!(this.data); + // Now yield. + this.state += 1; + Poll::Pending + } + 1 => { + // After coming back from the yield, write to the pointer. + unsafe { this.ptr_to_data.write(42) }; + // And read our local variable `data`. + // THIS IS UB! `this` is derived from the `noalias` pointer + // `self` but we did a write to `this.data` in the previous + // line when writing to `ptr_to_data`. The compiler is allowed + // to reorder this and the previous line and then the output + // would change. + println!("{}", this.data); + // Done! + Poll::Ready(()) + } + _ => unreachable!(), + } + } +} + +fn main() { + let mut s = pin!(S::new()); + while let Poll::Pending = s.as_mut().poll() {} +} +``` + +
+ +
+ +Beyond self-referential types, a similar problem also comes up with intrusive linked lists: the nodes of such a list often live on the stack frames of the functions participating in the list, but also have incoming pointers from other list elements. +When a function takes a mutable reference to its stack-allocated node, that will alias the pointers from the neighboring elements. +`Pin` is used to ensure that the list elements don't just move elsewhere (which would invalidate those incoming pointers), but there still is the problem that an `&mut Node` is actually not a unique pointer due to these aliases -- so we need a way for the to opt-out of the aliasing rules. + +
+ +The goal of this RFC is to offer a way of writing such self-referential types and intrusive collections without UB. +We don't want to change the rules for mutable references in general (that would also affect all the code that doesn't do anything self-referential), instad we want to be able to tell the compiler that this code is doing funky aliasing and that should be taken into account for optimizations. + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +To write this code in a UB-free way, wrap the fields that are *targets* of self-referential pointers in an `UnsafeAliased`: + +```rust +pub struct S { + data: UnsafeAliased, //