Reducing Raw Pointer Footguns: Preventing Reference Aliasing Violations at Compile Time

Hello, I am currently experimenting with building a tool to make the experience of using raw pointers safer. The primary goal is to provide compile time errors if there is a reference aliasing violation. My tests were successful, it managed to give compile time errors

I then thought it would be great if the std library had methods like this, perhaps with better code than this, but with the same objective of catching aliasing violations at compile time. Currently, the as_ref() and as_mut() methods in std do not provide compile time errors when such violations occur

The other methods listed above, as well as similar ones, could also be created with more refined code. This implementation serves to demonstrate that it is possible to provide compile time safety for the raw pointer operations covered here. Overall, this reduces the 'footguns' associated with raw pointers that makes Unsafe Rust more manageable

The code :

use std::marker::PhantomData;
use std::ops::Deref;

pub trait RawPointer<T: ?Sized> {
    fn immutable_pointer(&self) -> *const T;
    fn mutable_pointer(&self) -> *mut T;
    fn set_immutable_pointer(&mut self, ptr: *const T);
    fn set_mutable_pointer(&mut self, ptr: *mut T);
    fn from_immutable_pointer(ptr: *const T) -> Self;
    fn from_mutable_pointer(ptr: *mut T) -> Self;
}

pub struct AliasingGuardMut<'a, T: ?Sized> {
    ptr: *mut T,
    start_addr: Option<usize>,
    end_addr: Option<usize>,
    _marker: PhantomData<&'a mut T>,
}

pub struct AliasingGuardConst<'a, T: ?Sized> {
    ptr: *const T,
    start_addr: Option<usize>,
    end_addr: Option<usize>,
    _marker: PhantomData<&'a T>,
}

impl<'a, T: ?Sized> RawPointer<T> for AliasingGuardMut<'a, T> {
    fn immutable_pointer(&self) -> *const T { self.ptr as *const T }
    fn mutable_pointer(&self) -> *mut T { self.ptr }
    fn set_immutable_pointer(&mut self, ptr: *const T) {
        self.ptr = ptr as *mut T;
    }
    fn set_mutable_pointer(&mut self, ptr: *mut T) {
        self.ptr = ptr;
    }
    fn from_immutable_pointer(ptr: *const T) -> Self { Self::from_pointer(ptr as *mut T) }
    fn from_mutable_pointer(ptr: *mut T) -> Self { Self::from_pointer(ptr) }
}

impl<'a, T: ?Sized> RawPointer<T> for AliasingGuardConst<'a, T> {
    fn immutable_pointer(&self) -> *const T { self.ptr }
    fn mutable_pointer(&self) -> *mut T { self.ptr as *mut T }
    fn set_immutable_pointer(&mut self, ptr: *const T) {
        self.ptr = ptr;
    }
    fn set_mutable_pointer(&mut self, ptr: *mut T) {
        self.ptr = ptr as *const T;
    }
    fn from_immutable_pointer(ptr: *const T) -> Self { Self::from_pointer(ptr) }
    fn from_mutable_pointer(ptr: *mut T) -> Self { Self::from_pointer(ptr as *const T) }
}

fn check_alignment<T: ?Sized>(ptr: *const T) {
    let addr = ptr as *const () as usize;
    let align = unsafe { align_of_val(&*ptr) }; 
    debug_assert!(addr % align == 0, "Pointer address {} is not aligned to {}", addr, align);
}

impl<'a, T: ?Sized> AliasingGuardConst<'a, T> {
    pub fn from_reference(value: &'a T) -> Self {
        Self {
            ptr: value as *const T,
            start_addr: None,
            end_addr: None,
            _marker: PhantomData,
        }
    }
    
    pub fn from_pointer(value: *const T) -> Self {
        debug_assert!(!value.is_null());
        check_alignment(value);
        
        Self {
            ptr: value,
            start_addr: None,
            end_addr: None,
            _marker: PhantomData,
        }
    }
    
    fn cast_guard<U>(self) -> AliasingGuardConst<'a, U> {
        let new_ptr = self.immutable_pointer().cast::<U>();
        
        AliasingGuardConst {
            ptr: new_ptr,
            start_addr: self.start_addr,
            end_addr: self.end_addr,
            _marker: PhantomData,
        }
    }
    
    fn cast_offset<U>(self, count: isize) -> AliasingGuardConst<'a, U>
    where 
        T: Sized,
        U: Sized
    {
        const {
            if std::mem::align_of::<U>() > std::mem::align_of::<T>() {
                panic!("Alignment mismatch: Target type has stricter alignment");
            }
        }
        
        let new_ptr = unsafe { 
            self.immutable_pointer()
                .cast::<U>()
                .offset(count) 
        };
        
        AliasingGuardConst::from_pointer(new_ptr)
    }
}

impl<'a, T: ?Sized> AliasingGuardMut<'a, T> {
    pub fn from_reference(value: &'a mut T) -> Self {
        Self {
            ptr: value as *mut T,
            start_addr: None,
            end_addr: None,
            _marker: PhantomData,
        }
    }
    
    pub fn from_pointer(value: *mut T) -> Self {
        debug_assert!(!value.is_null());
        check_alignment(value);
        
        Self {
            ptr: value,
            start_addr: None,
            end_addr: None,
            _marker: PhantomData,
        }
    }
    
    fn cast_guard<U>(self) -> AliasingGuardMut<'a, U> {
        let new_ptr = self.mutable_pointer().cast::<U>();
        
        AliasingGuardMut {
            ptr: new_ptr,
            start_addr: self.start_addr,
            end_addr: self.end_addr,
            _marker: PhantomData,
        }
    }
    
    fn cast_offset<U>(self, count: isize) -> AliasingGuardMut<'a, U>
    where 
        T: Sized,
        U: Sized
    {
        const {
            if std::mem::align_of::<U>() > std::mem::align_of::<T>() {
                panic!("Alignment mismatch: Target type has stricter alignment");
            }
        }
        
        let new_ptr = unsafe { 
            self.mutable_pointer()
                .cast::<U>()
                .offset(count) 
        };
        
        AliasingGuardMut::from_pointer(new_ptr)
    }
}

impl<'a, T: Sized> AliasingGuardMut<'a, T> {
    pub fn from_mutable_slice(slice: &'a mut [T]) -> Self {
        let ptr = slice.as_mut_ptr();
        let len = slice.len();
        let start_addr = ptr as usize;
        let end_addr = start_addr + (len * size_of::<T>());

        Self {
            ptr,
            start_addr: Some(start_addr),
            end_addr: Some(end_addr),
            _marker: PhantomData,
        }
    }
    
    pub fn bound_checked_offset(self, count: isize) -> Self {
        let new_ptr = unsafe { self.ptr.offset(count) };
        let new_addr = new_ptr as usize;

        if let (Some(start_addr), Some(end_addr)) = (self.start_addr, self.end_addr) {
            assert!(
                new_addr >= start_addr && new_addr < end_addr,
                "Out of Bounds: Offset {} is ouside of the location (Addr: {} - {})",
                count, start_addr, end_addr
            );
        }

        Self {
            ptr: new_ptr,
            start_addr: self.start_addr,
            end_addr: self.end_addr,
            _marker: PhantomData,
        }
    }
    
    pub fn bound_checked_advance(&mut self, count: isize) {
        let new_ptr = self.ptr.wrapping_offset(count);
        let new_addr = new_ptr as usize;

        if let (Some(start_addr), Some(end_addr)) = (self.start_addr, self.end_addr) {
            if new_addr < start_addr || new_addr >= end_addr {
                panic!(
                    "Out of Bounds: Advance by {} elements is outside the allocated range!\n\
                    Valid range: {} - {}\n\
                    Target address: {}",
                    count, start_addr, end_addr, new_addr
                );
            }
        }

        self.ptr = new_ptr;
    }
}

pub trait AliasingGuardExt<'a, T: ?Sized>: RawPointer<T> + Sized {

    fn mutable_reference(&mut self) -> &mut T {
        unsafe { &mut *self.mutable_pointer() }
    }

    fn immutable_reference(&self) -> &T {
        unsafe { &*self.immutable_pointer() }
    }
    
    fn close(self) {
        
    }
    
    fn cast_mutable_pointer<U>(&self) -> *mut U {
        self.mutable_pointer().cast::<U>()
    }
    
    fn cast_mutable_pointer_and_close<U>(self) -> *mut U {
        self.mutable_pointer().cast::<U>()
    }
    
    fn reference_different_type<U>(&self) -> &U 
    where 
        T: Sized, 
        U: Sized 
    {
        const {
            if size_of::<T>() != size_of::<U>() {
                panic!("Size mismatch: Source and target types must have the same size in bytes.");
            }
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target type requires stricter alignment than source type.");
            }
        }

        unsafe { &*self.immutable_pointer().cast::<U>() }
    }
    
    fn mutable_reference_different_type<U>(&mut self) -> &mut U 
    where 
        T: Sized, 
        U: Sized 
    {
        const {
            if size_of::<T>() != size_of::<U>() {
                panic!("Size mismatch: Source and target types must have the same size in bytes.");
            }
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target type requires stricter alignment than source type.");
            }
        }

        unsafe { &mut *self.mutable_pointer().cast::<U>() }
    }
    
    fn cast_immutable_reference_array<U, const N: usize>(&self) -> &[U; N]
    where
        T: Sized,
        U: Sized,
    {
        const {
            let total_target_size = size_of::<U>() * N;
            if size_of::<T>() != total_target_size {
                panic!("Size mismatch: The source type size does not match the total size of the requested array.");
            }
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target element type requires stricter alignment than source type.");
            }
        }

        unsafe { & *self.immutable_pointer().cast::<[U; N]>() }
    }
    
    fn cast_immutable_reference_slice<U>(&self, len: usize) -> &[U]
    where
        T: Sized,
        U: Sized,
    {
        const {
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target element type requires stricter alignment.");
            }
        }

        assert!(
            len * size_of::<U>() <= size_of::<T>(),
            "Runtime Error: Requested slice length exceeds source memory size."
        );

        unsafe { std::slice::from_raw_parts(self.immutable_pointer().cast::<U>(), len) }
    }
    
    fn cast_mutable_reference_array<U, const N: usize>(&mut self) -> &mut [U; N]
    where
        T: Sized,
        U: Sized,
    {
        const {
            let total_target_size = size_of::<U>() * N;
            if size_of::<T>() != total_target_size {
                panic!("Size mismatch: The source type size does not match the total size of the requested array.");
            }
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target element type requires stricter alignment than source type.");
            }
        }

        unsafe { &mut *self.mutable_pointer().cast::<[U; N]>() }
    }
    
    fn cast_mutable_reference_slice<U>(&mut self, len: usize) -> &mut [U]
    where
        T: Sized,
        U: Sized,
    {
        const {
            if align_of::<T>() < align_of::<U>() {
                panic!("Alignment mismatch: Target element type requires stricter alignment.");
            }
        }

        assert!(
            len * size_of::<U>() <= size_of::<T>(),
            "Runtime Error: Requested slice length exceeds source memory size."
        );

        unsafe { std::slice::from_raw_parts_mut(self.mutable_pointer().cast::<U>(), len) }
    }
    
    unsafe fn offset(self, count: isize) -> Self 
    where 
        T: Sized,
        Self: RawPointer<T>
    {
        let new_ptr = unsafe { self.immutable_pointer().offset(count) };
        Self::from_immutable_pointer(new_ptr as *mut T)
    }
    
    unsafe fn advance(&mut self, count: isize) 
    where T: Sized 
    {
        self.set_mutable_pointer(unsafe { self.mutable_pointer().offset(count) });
    }
    
}

impl<'a, T: ?Sized> AliasingGuardExt<'_, T> for AliasingGuardMut<'a, T> {}
impl<'a, T: ?Sized> AliasingGuardExt<'_, T> for AliasingGuardConst<'a, T> {}


impl<'a, T: ?Sized> Deref for AliasingGuardConst<'a, T> {
    type Target = T;
    fn deref(&self) -> &Self::Target {
        assert!(!self.ptr.is_null(), "Attempted to dereference a null AliasingGuardConst");
        
        unsafe { &*self.ptr }
    }
}

impl<'a, T: ?Sized> Deref for AliasingGuardMut<'a, T> {
    type Target = T;
    fn deref(&self) -> &Self::Target {
        assert!(!self.ptr.is_null(), "Attempted to dereference a null AliasingGuardMut");
        
        unsafe { &*self.ptr }
    }
}

fn main() {
    let mut a = String::from("hello");
    let mut ptr = std::ptr::NonNull::new(&raw mut a).unwrap();
    
    unsafe {
        let tes = ptr.as_mut();
        let tes2 = ptr.as_mut();
        
        // this will compile but it causes UB, checked in Miri
        *tes = String::from("hello 2");
        println!("{}", *tes2);
    }

    let mut guard = AliasingGuardMut::from_reference(&mut a);

    
    let b = guard.mutable_reference();
    *b = String::from("reference from raw ptr");

    // these will give compile time error
    
    //let c_illegal = guard.immutable_reference();
    //let d_illegal = guard.mutable_reference();

    *b = String::from("reference from raw ptr 2");
    
    let e = guard.immutable_reference();
    
    // this will give compile time error
    
    // let f_illegal = guard.mutable_reference();

    println!("{}", *e);

    // drop(guard) or guard.close() to close the guard without waiting an end of scope
    
    println!("{}", a);
    
    //guard.close();
    
    let mut guard = AliasingGuardMut::from_reference(&mut a);

    // these will cause compile time error
    
    //let g_illegal = guard.reference_different_type::<&i64>();
    //let h_illegal = guard.mutable_reference_different_type::<&mut i64>();
 
    //let i_illegal = guard.cast_immutable_reference_array::<i64, 1024>();
    //let j_illegal = guard.cast_mutable_reference_array::<i64, 1024>();
    
    // these will cause runtime panic
    
    //let k_illegal = guard.cast_immutable_reference_slice::<i64>(1024);
    //let l_illegal = guard.cast_mutable_reference_slice::<i64>(1024);
    
    let mut numbers = [10u32, 20, 30, 40];
    
    let mut guard = AliasingGuardMut::from_mutable_slice(&mut numbers);
    
    // move pointer to point index 1
    guard.bound_checked_advance(1); 
    println!("Index 1: {}", *guard); // print 20
    
    // this will cause runtime panic
    
    //guard.bound_checked_advance(100); 
    
    // this will cause compile time error
    
    //let illegal_m = guard.cast_offset::<u64>(1);

    
}

UPDATE :

The new code is too long to be included here as code block. I uploaded to github with readme.md included for explanation here:

Aren't .mutable_reference() and .immutable_reference() just piggy-backing off of Rust's normal & and &mut semantics, just applied to guard instead of a? It doesn't look like your compile-time checks add anything beyond & and &mut.

Also.... just looking around, a lot of the methods you wrote are just mem::transmute in a "safe" trench coat. (That is, they're unsound. Size and align are far from the only aspects of a type that matter.)

Taking a step back and looking at the big picture, even if you solve those problems, your desired goal (compile-time errors for aliasing violations) is impossible in general. There is simply some information that you cannot know until runtime, and tools like Miri exist to test for UB, including aliasing violations, at runtime.

The purpose of raw pointers is to be used in places where compile-time checks are overly restrictive.

Granted, with a sufficiently complicated type system and sufficiently verbose annotations, you probably could encode a lot more invariants into the type system for compile-time checks. But speaking from experience as somebody who does very complicated manipulation of lifetimes, unsafe traits, higher-kinded trait bounds, etc... encoding too many invariants into the type system becomes a massive hassle that brushes against the edges of what's possible in Rust, and I'd imagine that most people aren't willing to deal with all that, all in exchange for... moving relatively-simple unsafe code into an overengineered system which likely uses plenty of unsafe internally? Probably a bit anticlimactic, even if it can be useful to remove unsafe from downstream code.

IMO, the best way to reduce the footguns associated with raw pointers is to better teach people Rust's "operational semantics". Piecing together a good mental model of how unsafe Rust works is currently... difficult. You have to do a lot of searching for information yourself, on random docs and forums and conversations.

3 Likes

Yeahhh that's why we can guarantee a compile time error whenever there's a reference aliasing violation, because we're leveraging the checks present in safe Rust. The mechanism is similar to RefCell::borrow_mut, with the difference being that it's purely at compile time. In safe Rust, references receive this compile time check, whereas references originating from unsafe raw pointers do not, we're required to follow aliasing rules manually, which is prone to human error since there are no checks. My motivation for creating this is that many people say unsafe Rust is difficult because of the aliasing rules. By using a new intermediate struct here called AliasingGuard which can be think of as having the same utility as the struct RefCell::borrow_mut but at compile time, we can bridge the gap between unsafe raw pointers and safe references to gain the compile time checks found in safe Rust. Since following aliasing rules in unsafe Rust manually is essentially trying to replicate safe Rust's reference rules by hand, this bridge removes that manual burden because the automatic aliasing rule checks in safe Rust are already at work. The & and &mut checkers can not be disabled even if the things are only known at runtime, instead they are encoded in the type system, so this is guaranteed

The aliasing rules are mandatory for all references, including those converted from raw pointers. Violating this is UB, so it's not an optional choice, but a requirement

  • If there is an active immutable reference to A, we can not create a mutable reference to A
  • If there is an active mutable reference to A, we can not create another mutable reference to A

The purpose here is to prevent the incorrect conversion of a raw pointer into a reference that violates the reference aliasing rule. It isn't for checking aliasing rules for raw pointers, because raw pointers themselves don't have aliasing rules, references do. We can not avoid the restrictiveness of references even if we create them using raw pointers, but following the rules manually, so it is like coding C and C++ where thing relies on human dicipline

So, simply by using an intermediate struct and methods that utilize & and &mut rules, we can prevent reference aliasing rule violations, for references generated from raw pointers, at compile time

For other methods, we just need to add whatever checks are missing, this is optional but good to create. It's not a reason to stop. The goal of this discussion is to identify what’s missing, determine if those gaps can be filled, and keep exploring until we hit a wall, at which point we can pause and see if there's a better approach

What is most clear is the compile time error method for the aliasing rules mentioned above. Rust's goal is to move as many errors as possible to compile time, that’s the whole reason we have the borrow checker, to prevent mistakes at compile time. One aspect of this now is gradually giving unsafe Rust better ways to prevent mistakes, specifically regarding raw pointer to reference conversions that violate aliasing rules in this case. It's like having a way to prevent data races at compile time so we don't need Miri. If something can be checked at compile time, it's better than relying on Miri, because Miri needs to trigger every execution path to achieve 100% coverage, which is time consuming for large codebases. If there's a massive code update, we'd also need a massive update to the tests to ensure they run and maintain 100% coverage. With compile time checks, none of that is necessary. This is why Rust's advantage as a language with extensive compile time checks is so significant

If you want to dereference a raw pointer, even without materializing a reference, there are still aliasing rules you need to follow. They're just looser and finer-grained than the rules for & and &mut references. A write through one raw pointer can still invalidate other raw pointers (such that reading or writing through those pointers would be UB). (See the std::ptr module's docs.)

In part no doubt because Rust hasn't fully defined its aliasing rules yet. Seems to be several years out still.

2 Likes

All unsafe usages in your example code are missing safety comments proving they are indeed safe. If you try to add them you should see why your proposed API is unsound.

4 Likes

I've gone through the documentation and you're right. We can't create a const pointer while a &mut is active, nor can we read from pointers when that mutable reference exists. Writing through a mut pointer is also can't if any & or &mut are active, creating a &mut invalidates existing raw pointers

I've updated the design to provide compile time error for that cases and separated the code

use std::marker::PhantomData;
use std::ptr::NonNull;

pub struct AliasingGuardMut<'a, T: ?Sized> {
    ptr: NonNull<T>,

    // SAFETY:
    // This models exclusive mutable ownership over `T` for lifetime `'a`.
    //
    // The guard conceptually behaves like it owns an `&'a mut T`,
    // which prevents aliasing mutable borrows through Rust's borrow checker.
    //
    // `PhantomData<&'a mut T>` is important because:
    // - it enforces invariance over `T`
    // - it tells the compiler this type semantically contains `&mut T`
    // - it enables borrow checking rules for aliasing/exclusivity
    // - it prevents multiple mutable guards existing simultaneously in safe code
    _marker: PhantomData<&'a mut T>,
}

impl<'a, T: ?Sized> AliasingGuardMut<'a, T> {
    #[inline(always)]
    pub fn from_reference(value: &'a mut T) -> Self {
        Self {
            // SAFETY:
            // `NonNull::from` is safe because `&mut T` is guaranteed:
            // - non-null
            // - properly aligned
            // - valid for reads/writes for `'a`
            ptr: NonNull::from(value),

            _marker: PhantomData,
        }
    }

    #[inline(always)]
    pub fn immutable_reference(&self) -> &T {
        // SAFETY:
        // The original `&mut T` guarantees:
        // - pointer validity
        // - proper alignment
        // - initialized memory
        //
        // Returning `&T` from `&self` is safe because:
        // - immutable references may alias other immutable references
        // - Rust reference rules prevent obtaining `&mut self` simultaneously with this reference in safe code
        unsafe { self.ptr.as_ref() }
    }

    #[inline(always)]
    pub fn mutable_reference(&mut self) -> &mut T {
        // SAFETY:
        // `&mut self` guarantees exclusive access to the guard.
        //
        // Because the guard semantically owns an exclusive `&mut T`,
        // this ensures no competing mutable references can exist
        // through this API in safe Rust.
        //
        // WARNING:
        // Raw pointers previously extracted from this guard may still
        // exist and can violate aliasing rules if used incorrectly.
        // Safe Rust callers cannot trigger UB here, but unsafe callers can.
        unsafe { self.ptr.as_mut() }
    }

    #[inline(always)]
    pub fn with_immutable_reference<R>(&self, f: impl FnOnce(&T) -> R) -> R {
        // SAFETY:
        // Same reasoning as `immutable_reference`.
        //
        // The reference is scoped to the closure call,
        // preventing it from escaping accidentally.
        unsafe { f(self.ptr.as_ref()) }
    }

    #[inline(always)]
    pub fn with_mutable_reference<R>(&mut self, f: impl FnOnce(&mut T) -> R) -> R {
        // SAFETY:
        // Same reasoning as `mutable_reference`.
        //
        // The mutable reference is scoped to the closure execution,
        // which helps reduce accidental misuse duration.
        unsafe { f(self.ptr.as_mut()) }
    }

    #[inline(always)]
    pub fn with_immutable_pointer<R>(&self, f: impl FnOnce(*const T) -> R) -> R {
        // SAFETY:
        // - Rust reference rules prevent obtaining `&mut self` simultaneously with this reference in safe code
        //
        // In particular:
        // - The immutable raw pointer is scoped to the closure execution
        // which makes able to create `&mut` without invalidating the pointers
        // - It prevents calling immutable raw pointer while `&mut` is still active because it violates the aliasing rules
        f(self.ptr.as_ptr())
    }

    #[inline(always)]
    pub fn with_mutable_pointer<R>(&mut self, f: impl FnOnce(*mut T) -> R) -> R {
        // SAFETY:
        // - Rust reference rules prevent obtaining `&mut self` simultaneously with this reference in safe code
        //
        // In particular:
        // - The mutable raw pointer is scoped to the closure execution
        // which makes able to create `&` or `&mut` without invalidating the pointers
        // - It prevents calling mutable raw pointer while `&` or `&mut` is still active because it violates the aliasing rules
        f(self.ptr.as_ptr())
    }

    #[inline(always)]
    pub unsafe fn as_ptr(&mut self) -> *mut T {
        // SAFETY:
        // This exists to make if closure based pointer is not enough, then this unsafe method can be used
        // Returning raw pointers is safe by itself.
        //
        // However, once the pointer escapes, this type can no longer
        // enforce aliasing guarantees.
        //
        // The caller must ensure:
        // - no invalid reference/raw-pointer combinations are used
        // - no aliasing UB occurs
        // - do not write to the pointer while `&` or `&mut` to same memory is still active
        // - do not read the pointer while `&mut` to same memory is still active
        // - be aware that `&mut` creation that points to same address of this pointer will invalidate this pointer
        // - pointer is not used after underlying value becomes invalid
        self.ptr.as_ptr()
    }

    #[inline(always)]
    pub fn close(self) {
        // SAFETY:
        // Consuming `self` ends the guard lifetime early.
        //
        // This can be useful to release the conceptual mutable borrow
        // before the surrounding scope ends.
    }
}

fn main() {
    let mut a = String::from("hello");
   
    let mut guard = AliasingGuardMut::from_reference(&mut a);

    let b = guard.mutable_reference();
    *b = String::from("reference from raw ptr");
    
    // these will give compile time error
    
    //let c_illegal = guard.immutable_reference();
    //let d_illegal = guard.mutable_reference();

    *b = String::from("reference from raw ptr 2");
    
    let e = guard.immutable_reference();
    
    // this will give compile time error
    
    // let f_illegal = guard.mutable_reference();
    
    println!("{}", *e);
    
    let g = guard.mutable_reference();

    /* this will give compile time error
    guard.with_immutable_pointer(|ptr| {
        println!("{}", unsafe { &*ptr });
    });
    */
    
    /* this will give compile time error
    guard.with_mutable_pointer(|ptr| unsafe {
        *ptr = String::from("hello");
    });
    */
    
    /* this will give compile time error
    guard.with_immutable_reference(|reff| {
        println!("{}", *reff);
    });
    */
    
    /* this will give compile time error
    guard.with_mutable_reference(|reff| {
        *reff = String::from("hello");
    });
    */
    
    *g = String::from("reference from raw ptr 3");

    // drop(guard) or guard.close() to close the guard without waiting an end of scope
    
    println!("{}", a);
    
    
}

Honestly, I wasn't knowledgeable enough to comment on this properly at first, but I’ve been digging deeper and feel like I have a better grasp now. I’ve refactored the code to separate the concerns and added safety comments to the unsafe blocks in the comment

If any of my comments are off :] please let me know. The goal here isn't to present a perfect implementation, but rather to spark some ideas and get a discussion going on this topic, so that everybody with more expertise can improve it, others might know what I do not know, and then we can all improve it together

I don’t have time to fully elaborate at the moment, but I believe &'a Cell<T> might have semantics you’d be interested in. See Cell::from_mut.

I think there are two reasonable possible semantics for your AliasingGuardMut. Either they are unique, or they are not.

The former case is practically identical to &'a mut T (though there is still a range of flexibility in exactly when you assert uniqueness… which even Rust’s &'a mut T has not yet decided on).

In the latter case, the guards aren’t unique, so you can’t obtain a &'a mut T from an AliasingGuardMut<'a, T> (which requires asserting uniqueness). However, you can still read and write a T from/to the pointee; so long as there’s no data races, you don’t need unique access over a T to mutate it. It’d even be fine to go all the way and make AliasableGuardMut<'a, T> Copy.

This sounds familiar… read and write a T at a given instant, but don’t obtain a &mut T or &T… prohibit multithreading… the pointee lives for 'a and the pointer is Copy… that’s just &'a Cell<T>.

2 Likes

The design for AliasingGuardMut is to have a fully unique semantics, not aliasable semantics like Cell

The current API is structured so the guard behaves conceptually like an owned &mut T:

  • the guard itself is unique
  • obtaining &mut T requires &mut self
  • immutable and mutable references are mutually exclusive through the borrow checker
  • closure based raw pointer APIs prevent raw pointer access from overlapping with active & or &mut references in safe code

Because of that, the design preserves the important guarantees carried by &mut:

  • exclusive mutable access to prevent aliasing based race conditions in safe code
  • noalias information for the optimizer

This is also the reason the design differs from Cell / UnsafeCell semantics

Cell allows aliasable mutation by design. Since aliasable writes are legal with Cell, the compiler can not rely on strong noalias assumptions and therefore must be more conservative with optimizations

In contrast, this guard is designed to preserve uniqueness semantics so the compiler can still perform aggressive optimizations similarly to normal &mut usage

The unsafe as_ptr escape hatch exists only for advanced use cases where closure scoped raw pointer access is insufficient. Once a raw pointer escapes, the compiler and the guard can no longer enforce uniqueness automatically, which is why that API is explicitly unsafe

It looks to me like, as far as borrow checking and safe code goes, this is just equivalent to &'a mut T, and for unsafe code, the functions like with_mutable_pointer() provide no additional guarantees, only a tiny bit of documentation (because raw pointers are always Copy, and thus can be copied out of your FnOnce callbacks).

Can you provide an example usage where this provides useful guarantees that could not be easier achieved by using &'a mut T and converting to a raw pointer as needed for each use?

3 Likes

To understand how this is useful, we must understand that when we are faced with case that mix raw pointers and references to to the same memory, it is highly prone to human error leading to UB due to aliasing violations, which is many people cite unsafe Rust as difficult because of this aliasing. For example, when the current data is a raw pointer and there are multiple operations from other code or a third party library that some of them accept raw pointers, some of them accept references, we automatically need to convert the pointer to references. This is exactly where the human error occurs, because converting raw pointer to references and using it must comply with Rust's aliasing rules

Therefore, in conclusion, this is useful wherever we need to convert raw pointer to multiple references. Because its purpose is to bridge that conversion safely, there is no silent UB if Rust's aliasing is violated, the compiler immediately provides an error message at compile time, meaning it prevents UB code

Whereas converting naked raw pointer to multiple naked references has no UB prevention against aliasing rule violations

Converting raw pointers to references is widely used, for example, *mut T as &mut T and *mut T as &T. Because by converting it to a reference, we do not carry the raw pointer everywhere, raw pointers are unsafe, so we do not want to carry them everywhere. At a certain point, we want to convert it to reference so that the subsequent code can be written in safe Rust

Sorry, I may not have been clear enough. Can you please provide an example of usage — that is, example code which uses AliasingGuardMut, and benefits from it — that is, has a stronger static check than could have been achieved by using raw pointers instead, and could not have been written using &'a mut T alone?

2 Likes

For the with_mutable_pointer method, the purpose is to prevent human error UB due to aliasing violations because writing to raw pointer when there is an active & or &mut and reading raw pointer when there is an active &mut

For example:


fn main() {
    let mut a = String::from("hello");
    
    // ekstract raw pointer
    let ptr = &raw mut a;
    
    // ekstract reference
    let reff = &mut a;
    
    // using the raw pointer in any operation
    unsafe { *ptr = String::from("hello 2") };
    
    // using the reference in any operation
    *reff = String::from("hello 3");
}

That causes UB because it violates the aliasing rules

error: Undefined Behavior: trying to retag from <441> for Unique permission at alloc315[0x0], but that tag does not exist in the borrow stack for this location
   --> /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:825:1
    |
825 | / pub(crate) const unsafe fn drop_glue<T: PointeeSized>(_: &mut T)
826 | | where
827 | |     T: [const] Destruct,
    | |________________________^ this error occurs as part of function-entry retag at alloc315[0x0..0x18]
    |
    = help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental
    = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information
help: <441> was created by a SharedReadWrite retag at offsets [0x0..0x18]

That UB is impossible to happend with the guard:

fn main() {
    let mut a = String::from("hello");
   
    let mut guard = AliasingGuardMut::from_reference(&mut a);

    // ekstract reference
    let b = guard.mutable_reference();
    
    // Using mutable raw pointer in any operation while reference is still active is prevented by the compiler with a compile time error: "cannot borrow as mutable because it is also borrowed as mutable," because `with_mutable_pointer` takes `&mut self`. Therefore, it prevents aliasing between the mutable pointer and the reference that will cause UB
    guard.with_mutable_pointer(|ptr| unsafe {
        *ptr = String::from("hello");
    });
    
     // using the reference in any operation
    *b = String::from("hello 3");
}

That one in my reply is example where the with_mutable_pointer method will prevent UB due to an aliasing rule violation. Waitt, I am currently writing the more examples including examples for the other methods

This program could have equally well been written using the safe code *reff = String::from(...), so it doesn’t demonstrate the benefit of AliasingGuardMut<'a, T>. Can you provide an example program that

  • requires unsafe code,
  • uses AliasingGuardMut<'a, T> in a place that cannot be &'a mut T, and
  • uses AliasingGuardMut<'a, T> instead of raw pointers in a way that there is some bug it would prevent?

I am particularly concerned that calling with_mutable_pointer() allows straightforwardly making a copy of the pointer, or deriving other pointers from that pointer. Therefore, AliasingGuardMut can't guarantee anything except for the regions of code where with_mutable_pointer() (or as_ptr()) is not called. But in those regions, what does having AliasingGuardMut get you that having &mut does not?

2 Likes

All these code has UB that comes from human mistake because violating the aliasing rules. What we call a mistake is when we do something accidentally, either because we don't know, or we just slipped up. And because the code still compiles and runs, we think everything is totally fine, where actually it is UB

fn a(b: &mut Vec<i32>) {
    *b = vec![1];
}

// some other code that must take pointer
// eg FFI code, low level code that takes pointer
// or 3rd party library code that takes pointer
// we can't assume we can just change this to reference
// because we are simulating code that "must take raw pointer"
// any code, eg they are present in FFI code, low level code, 3rd lib code
fn b(b: *mut Vec<i32>) {
    unsafe { *b = vec![1] }
}

fn main() {
    let mut s = vec![1, 2, 3];
    
    // we create the raw pointer
    let input = &raw mut s;
    
    // some operations
    // this contains the human mistakes, violating the aliasing rules
    
    println!("{:?}", s);
    
    a(&mut s);
    
    b(input);
    
    // another inline operation
    println!("{:?}", s);   
}
struct Holder {
    ptr: *mut i32,
}

impl Holder {
    fn write(&self, val: i32) {
        unsafe {
            *self.ptr = val;
        }
    }
}

fn main() {
    let mut x = 5;

    let h = Holder {
        ptr: &mut x,
    };

    let r = &mut x;

    h.write(10);

    *r += 1;
}
// anything that saves pointer to heap
struct Arena<T> {
    ptr: *mut T,
}

impl<T> Arena<T> {
    unsafe fn get_mut<'a>(&self) -> &'a mut T {
        &mut *self.ptr
    }
}

fn main() {
    let mut value = String::from("hello");

    let arena = Arena {
        ptr: &mut value,
    };

    let a = unsafe { arena.get_mut() };

    let b = unsafe { arena.get_mut() };

    a.push_str(" world");
    b.push_str(" !!!");
}
use std::ptr::NonNull;

struct Node {
    next: Option<NonNull<Node>>,
    value: i32,
}

struct List {
    head: Option<NonNull<Node>>,
}

impl List {
    fn new() -> Self {
        Self {
            head: None,
        }
    }

    unsafe fn push_front(&mut self, node: *mut Node) {
        (*node).next = self.head;
        self.head = Some(NonNull::new_unchecked(node));
    }

    unsafe fn first_mut<'a>(&mut self) -> Option<&'a mut Node> {
        self.head.map(|ptr| &mut *ptr.as_ptr())
    }
}

fn increment(node: &mut Node) {
    node.value += 1;
}

fn main() {
    let mut node = Box::new(Node {
        next: None,
        value: 10,
    });

    let mut list = List::new();

    unsafe {
        list.push_front(&mut *node);
    }

    unsafe {
        let a = list.first_mut().unwrap();

        increment(a);

        let b = list.first_mut().unwrap();

        b.value += 10;

        println!("{}", a.value);
    }
}
use std::ptr::NonNull;

struct Buffer {
    ptr: NonNull<Vec<i32>>,
}

impl Buffer {
    fn new(vec: &mut Vec<i32>) -> Self {
        Self {
            ptr: NonNull::from(vec),
        }
    }

    unsafe fn get_mut<'a>(&mut self) -> &'a mut Vec<i32> {
        &mut *self.ptr.as_ptr()
    }
}

fn append_data(vec: &mut Vec<i32>) {
    vec.push(100);
}

fn main() {
    let mut data = vec![1, 2, 3];

    let mut buffer = Buffer::new(&mut data);

    unsafe {
        let a = buffer.get_mut();

        append_data(a);

        let b = buffer.get_mut();

        b.push(200);

        a.push(300);

        println!("{:?}", a);
    }
}
use std::ptr::NonNull;

struct SlotMap<T> {
    slots: Vec<NonNull<T>>,
}

impl<T> SlotMap<T> {
    fn new() -> Self {
        Self {
            slots: Vec::new(),
        }
    }

    fn insert(&mut self, value: &mut T) {
        self.slots.push(NonNull::from(value));
    }

    unsafe fn get_mut<'a>(&mut self, index: usize) -> &'a mut T {
        &mut *self.slots[index].as_ptr()
    }
}

fn update_user(user: &mut String) {
    user.push_str(" updated");
}

fn main() {
    let mut user = String::from("alice");

    let mut map = SlotMap::new();

    map.insert(&mut user);

    unsafe {
        let current = map.get_mut(0);

        update_user(current);

        let another = map.get_mut(0);

        another.push_str(" !!!");

        println!("{}", current);
    }
}

However, if we use AliasGuard, we get no aliasing guarantees, well defined lifetime, and compile time errors. Here is how AliasGuard can help :


fn a(b: &mut Vec<i32>) {
    *b = vec![1];
}

// some other code that must take pointer
// eg FFI code, low level code that takes pointer
// or 3rd party library code that takes pointer
fn b(b: *mut Vec<i32>) {
    unsafe { *b = vec![1] }
}

fn main() {
    let mut s = vec![1, 2, 3];

    let mut guard = AliasingGuardMut::from_reference(&mut s);
    
    // no UB, because everything is enforced to have well defined lifetime
    
    guard.with_mutable_reference(|reff| {
        a(reff);
    });

    guard.with_mutable_pointer(|ptr| {
        b(ptr);
    });
}
struct Holder<'a> {
    guard: AliasingGuardMut<'a, i32>,
}

impl<'a> Holder<'a> {
    fn write(&mut self, val: i32) {
        self.guard.with_mutable_pointer(|ptr| unsafe {
            *ptr = val;
        });
    }
}

fn main() {
    let mut x = 5;

    let mut h = Holder {
        guard: AliasingGuardMut::from_reference(&mut x),
    };

    let r = h.guard.mutable_reference();

    h.write(10);

    *r += 1;
}
struct Arena<'a, T> {
    guard: AliasingGuardMut<'a, T>,
}

impl<'a, T> Arena<'a, T> {
    fn get_mut(&self) -> &mut T {
        self.guard.mutable_reference()
    }
}

fn main() {
    let mut value = String::from("hello");

    let mut arena = Arena {
        guard: AliasingGuardMut::from_reference(&mut value),
    };

    let a = arena.get_mut();

    let b = arena.get_mut();

    a.push_str(" world");
    b.push_str(" !!!");
}
use std::ptr::NonNull;

struct Node {
    next: Option<NonNull<Node>>,
    value: i32,
}

struct List<'a> {
    head: Option<NonNull<Node>>,
    guard: Option<AliasingGuardMut<'a, Node>>,
}

impl<'a> List<'a> {
    fn new() -> Self {
        Self {
            head: None,
            guard: None,
        }
    }

    unsafe fn push_front(&mut self, node: &'a mut Node) {
        node.next = self.head;
        self.head = Some(NonNull::from(&mut *node));
        self.guard = Some(AliasingGuardMut::from_reference(node));
    }

    fn first_mut(&mut self) -> Option<&mut Node> {
        self.guard.as_mut().map(|guard| {
            guard.mutable_reference()
        })
    }
}

fn increment(node: &mut Node) {
    node.value += 1;
}

fn main() {
    let mut node = Box::new(Node {
        next: None,
        value: 10,
    });

    let mut list = List::new();

    unsafe {
        list.push_front(&mut *node);
    }

    let a = list.first_mut().unwrap();

    increment(a);

    let b = list.first_mut().unwrap();

    b.value += 10;

    println!("{}", a.value);
}
struct Buffer<'a> {
    guard: AliasingGuardMut<'a, Vec<i32>>,
}

impl<'a> Buffer<'a> {
    fn new(vec: &'a mut Vec<i32>) -> Self {
        Self {
            guard: AliasingGuardMut::from_reference(vec),
        }
    }

    fn get_mut(&mut self) -> &mut Vec<i32> {
        self.guard.mutable_reference()
    }
}

fn append_data(vec: &mut Vec<i32>) {
    vec.push(100);
}

fn main() {
    let mut data = vec![1, 2, 3];

    let mut buffer = Buffer::new(&mut data);

    let a = buffer.get_mut();

    append_data(a);

    let b = buffer.get_mut();

    b.push(200);

    a.push(300);

    println!("{:?}", a);
}
struct SlotMap<'a, T> {
    slot: Option<AliasingGuardMut<'a, T>>,
}

impl<'a, T> SlotMap<'a, T> {
    fn new() -> Self {
        Self {
            slot: None,
        }
    }

    fn insert(&mut self, value: &'a mut T) {
        self.slot = Some(AliasingGuardMut::from_reference(value));
    }

    fn get_mut(&mut self, _: usize) -> &mut T {
        self.slot
            .as_mut()
            .unwrap()
            .mutable_reference()
    }
}

fn update_user(user: &mut String) {
    user.push_str(" updated");
}

fn main() {
    let mut user = String::from("alice");

    let mut map = SlotMap::new();

    map.insert(&mut user);

    let current = map.get_mut(0);

    update_user(current);

    let another = map.get_mut(0);

    another.push_str(" !!!");

    println!("{}", current);
}

The point of the example is not the specific body of the code itself. The raw pointer operation is only a simulation of real world code or low level operations that fundamentally require raw pointers and can not be changed into references. Eg FFI calls, low level code, intrusive data structures, 3rd party libs that take raw pointers

It is demonstrating how fragile the aliasing violation becomes once raw pointers and references start interacting. A tiny ordering mistake silently become UB without any compile time error

The purpose of AliasingGuard is to wrap raw pointer usage before the raw pointer is actually used, then using with_mutable_pointer for the operation that use pointer

Not like this:

let mut leaked: *mut i32 = std::ptr::null_mut();

guard.with_mutable_pointer(|ptr| {
    leaked = ptr;
});

let r = guard.mutable_reference();

unsafe {
    *leaked = 123;
}

But like this:

let mut leaked: *mut i32 = std::ptr::null_mut();

guard.with_mutable_pointer(|ptr| {
    leaked = ptr;
});

let r = guard.mutable_reference();

guard.with_mutable_pointer(|ptr| {
    *ptr = 3;
});

Because once we wrap it, we opt into routing pointer operations through the guard whenever possible, which should cover the majority of cases

Maybe you could show examples of code that genuinely can not be expressed through the guard API, so we can investigate whether the code can be improved further. That way, we can gradually build a stronger safety around raw pointer usage. For now I'm trying to add sub guard :]

Meanwhile, unsafe as_ptr() exists to revert back to the unrestricted/raw style without the guard managing aliasing anymore. In other words, the guard explicitly steps aside because we requested to turn off the guard system entirely by escaping the pointer. It exists to support edge cases where with_mutable_pointer() is insufficient that is also not discovered yet where with_mutable_pointer() is not sufficient

Here's how all your AliasingGuardMut can be written with simply a &mut reference without raw pointers! So while AliasingGuardMut may be safer than using raw pointers, it doesn't seem to enable anything that &mut references can do, So where is the actual benefit of using AliasingGuardMut? Note that in your examples there were even examples of unsoundness and reference aliasing violation where I left a comment on, so AliasingGuardMut seem to not even prevent them.

fn a(b: &mut Vec<i32>) {
    *b = vec![1];
}

// some other code that must take pointer
// eg FFI code, low level code that takes pointer
// or 3rd party library code that takes pointer
fn b(b: *mut Vec<i32>) {
    unsafe { *b = vec![1] }
}

fn main() {
    let mut s = vec![1, 2, 3];

    let reff = &mut s;
    a(reff);
    b(reff);
}
    guard: &'a mut i32
}

impl<'a> Holder<'a> {
    fn write(&mut self, val: i32) {
        *self.guard = val;
    }
}

fn main() {
    let mut x = 5;

    let mut h = Holder {
        guard: &mut x,
    };

    let r = &mut *h.guard;

    h.write(10);

    *r += 1;
}
struct Arena<'a, T> {
    guard: &'a mut T,
}

impl<'a, T> Arena<'a, T> {
    fn get_mut(&self) -> &mut T {
        // This is a compile time error.
        // I hope the existing self.guard.mutable_reference() was a compile time error too
        // or it would have been unsound (worse than a raw pointer because it didn't even require unsafe!)
        &mut *self.guard
    }
}

fn main() {
    let mut value = String::from("hello");

    let mut arena = Arena {
        guard: &mut value,
    };

    let a = arena.get_mut();

    let b = arena.get_mut();

    a.push_str(" world");
    b.push_str(" !!!");
}
use std::ptr::NonNull;

struct Node {
    next: Option<NonNull<Node>>,
    value: i32,
}

struct List<'a> {
    head: Option<NonNull<Node>>,
    guard: Option<&'a mut Node>,
}

impl<'a> List<'a> {
    fn new() -> Self {
        Self {
            head: None,
            guard: None,
        }
    }

    unsafe fn push_front(&mut self, node: &'a mut Node) {
        node.next = self.head;
        self.head = Some(NonNull::from(&mut *node));
        // Note that both the new and the old code invalidate the pointer in `self.head`
        // I'm not sure how you intended to use it, but it shows that AliasingGuardMut also has footguns
        // self.guard = Some(AliasingGuardMut::from_reference(node));
        self.guard = Some(node);
    }

    fn first_mut(&mut self) -> Option<&mut Node> {
        self.guard.as_mut()
    }
}

fn increment(node: &mut Node) {
    node.value += 1;
}

fn main() {
    let mut node = Box::new(Node {
        next: None,
        value: 10,
    });

    let mut list = List::new();

    unsafe {
        list.push_front(&mut *node);
    }

    let a = list.first_mut().unwrap();

    increment(a);

    let b = list.first_mut().unwrap();

    b.value += 10;

    println!("{}", a.value);
}
struct Buffer<'a> {
    guard: &'a mut Vec<i32>,
}

impl<'a> Buffer<'a> {
    fn new(vec: &'a mut Vec<i32>) -> Self {
        Self {
            guard: vec,
        }
    }

    fn get_mut(&mut self) -> &mut Vec<i32> {
        &mut *self.guard
    }
}

fn append_data(vec: &mut Vec<i32>) {
    vec.push(100);
}

fn main() {
    let mut data = vec![1, 2, 3];

    let mut buffer = Buffer::new(&mut data);

    let a = buffer.get_mut();

    append_data(a);

    let b = buffer.get_mut();

    b.push(200);

    a.push(300);

    println!("{:?}", a);
}
struct SlotMap<'a, T> {
    slot: Option<&'a mut T>,
}

impl<'a, T> SlotMap<'a, T> {
    fn new() -> Self {
        Self {
            slot: None,
        }
    }

    fn insert(&mut self, value: &'a mut T) {
        self.slot = Some(value);
    }

    fn get_mut(&mut self, _: usize) -> &mut T {
        self.slot
            .as_mut()
            .unwrap()
    }
}

fn update_user(user: &mut String) {
    user.push_str(" updated");
}

fn main() {
    let mut user = String::from("alice");

    let mut map = SlotMap::new();

    map.insert(&mut user);

    let current = map.get_mut(0);

    update_user(current);

    let another = map.get_mut(0);

    another.push_str(" !!!");

    println!("{}", current);
}
2 Likes