Using store
method of AtomicU64
gives me like 150 lines (I wonder what I'm doing differently), but the reason why the IR is so bloated is because you can use AtomicU64::store
in multiple ways.
Let me clarify. The AtomicU64::store
method redirects to atomic_store
function, nothing special here.
pub fn store(&self, val: $int_type, order: Ordering) {
unsafe { atomic_store(self.v.get(), val, order); }
}
This translates to rather straightforward assembly.
core::sync::atomic::AtomicU64::store:
sub rsp, 40
mov qword ptr [rsp + 32], rdi
mov rax, qword ptr [rsp + 32]
mov rdi, rax
mov qword ptr [rsp + 24], rsi
mov byte ptr [rsp + 23], dl
call qword ptr [rip + core::cell::UnsafeCell<T>::get@GOTPCREL]
mov qword ptr [rsp + 8], rax
mov rdi, qword ptr [rsp + 8]
mov rsi, qword ptr [rsp + 24]
mov al, byte ptr [rsp + 23]
movzx edx, al
call qword ptr [rip + core::sync::atomic::atomic_store@GOTPCREL]
add rsp, 40
ret
core::cell::UnsafeCell<T>::get:
mov rax, rdi
ret
The real deal is atomic_store
however.
unsafe fn atomic_store<T>(dst: *mut T, val: T, order: Ordering) {
match order {
Release => intrinsics::atomic_store_rel(dst, val),
Relaxed => intrinsics::atomic_store_relaxed(dst, val),
SeqCst => intrinsics::atomic_store(dst, val),
Acquire => panic!("there is no such thing as an acquire store"),
AcqRel => panic!("there is no such thing as an acquire/release store"),
}
}
There is no inlining, so Rust cannot inline the order and has to compile the code for all of this. In addition, there is panic, which means unwinding, which means even more code bloat to handle possible exceptions.
The generated assembly code has a jump table for all possible variants (Rust compiler told LLVM to generate one, so that's what it did). However, because optimizations are disabled the compiler did not bother optimizing branches within a jump table or merging similar code, so they are somewhat verbose.
core::sync::atomic::atomic_store:
sub rsp, 56
mov byte ptr [rsp + 31], dl
mov byte ptr [rsp + 39], 0
mov byte ptr [rsp + 39], 1
movzx eax, byte ptr [rsp + 31]
mov ecx, eax
mov qword ptr [rsp + 16], rsi
mov qword ptr [rsp + 8], rdi
mov qword ptr [rsp], rcx
lea rax, [rip + .LJTI1_0]
mov rcx, qword ptr [rsp]
movsxd rdx, dword ptr [rax + 4*rcx]
add rdx, rax
jmp rdx
.LBB1_1:
mov rdi, qword ptr [rsp + 40]
call _Unwind_Resume@PLT
ud2
.LBB1_2:
lea rdi, [rip + .L__unnamed_1]
lea rdx, [rip + .L__unnamed_2]
mov rax, qword ptr [rip + core::panicking::panic@GOTPCREL]
mov esi, 50
call rax
jmp .LBB1_14
ud2
.LBB1_4:
mov byte ptr [rsp + 39], 0
mov rax, qword ptr [rsp + 8]
mov rcx, qword ptr [rsp + 16]
mov qword ptr [rax], rcx
jmp .LBB1_11
.LBB1_6:
mov byte ptr [rsp + 39], 0
mov rax, qword ptr [rsp + 8]
mov rcx, qword ptr [rsp + 16]
mov qword ptr [rax], rcx
jmp .LBB1_11
.LBB1_8:
mov byte ptr [rsp + 39], 0
mov rax, qword ptr [rsp + 16]
mov rcx, qword ptr [rsp + 8]
xchg qword ptr [rcx], rax
jmp .LBB1_11
.LBB1_10:
lea rdi, [rip + .L__unnamed_3]
lea rdx, [rip + .L__unnamed_4]
mov rax, qword ptr [rip + core::panicking::panic@GOTPCREL]
mov esi, 42
call rax
jmp .LBB1_14
.LBB1_11:
add rsp, 56
ret
.LBB1_12:
mov byte ptr [rsp + 39], 0
jmp .LBB1_1
.LBB1_13:
test byte ptr [rsp + 39], 1
jne .LBB1_12
jmp .LBB1_1
.LBB1_14:
ud2
mov qword ptr [rsp + 40], rax
mov dword ptr [rsp + 48], edx
jmp .LBB1_13
.LJTI1_0:
.long .LBB1_6-.LJTI1_0
.long .LBB1_4-.LJTI1_0
.long .LBB1_10-.LJTI1_0
.long .LBB1_2-.LJTI1_0
.long .LBB1_8-.LJTI1_0
.L__unnamed_1:
.ascii "there is no such thing as an acquire/release store"
.L__unnamed_5:
.ascii "src/libcore/sync/atomic.rs"
.L__unnamed_2:
.quad .L__unnamed_5
.asciz "\032\000\000\000\000\000\000\000]\b\000\000\023\000\000"
.L__unnamed_3:
.ascii "there is no such thing as an acquire store"
.L__unnamed_4:
.quad .L__unnamed_5
.asciz "\032\000\000\000\000\000\000\000\\\b\000\000\024\000\000"
When optimized, assuming unknown Ordering
in release mode, the code looks much more readable.
push rax
movzx eax, dl
lea rcx, [rip + .LJTI0_0]
movsxd rax, dword ptr [rcx + 4*rax]
add rax, rcx
jmp rax
.LBB0_4:
mov qword ptr [rdi], rsi
pop rax
ret
.LBB0_2:
xchg qword ptr [rdi], rsi
pop rax
ret
.LBB0_3:
lea rdi, [rip + .L__unnamed_1]
lea rdx, [rip + .L__unnamed_2]
mov esi, 42
call qword ptr [rip + core::panicking::panic@GOTPCREL]
ud2
.LBB0_1:
lea rdi, [rip + .L__unnamed_3]
lea rdx, [rip + .L__unnamed_4]
mov esi, 50
call qword ptr [rip + core::panicking::panic@GOTPCREL]
ud2
.LJTI0_0:
.long .LBB0_4-.LJTI0_0
.long .LBB0_4-.LJTI0_0
.long .LBB0_3-.LJTI0_0
.long .LBB0_1-.LJTI0_0
.long .LBB0_2-.LJTI0_0
.L__unnamed_3:
.ascii "there is no such thing as an acquire/release store"
.L__unnamed_5:
.ascii "src/libcore/sync/atomic.rs"
.L__unnamed_4:
.quad .L__unnamed_5
.asciz "\032\000\000\000\000\000\000\000]\b\000\000\023\000\000"
.L__unnamed_1:
.ascii "there is no such thing as an acquire store"
.L__unnamed_2:
.quad .L__unnamed_5
.asciz "\032\000\000\000\000\000\000\000\\\b\000\000\024\000\000"
How does that affect compilation other parts of Rust in development mode? It doesn't, really. The pattern used by atomics with a parameter explaining which function actually should be used isn't seen often.
That said, there are many issues with the quality of development code (it's bad), but atomics are an edge case of sorts.