As a variant of the Rule of least power I'd like to use a boxed slice instead of a Vec when I don't want to change its length after it's built.
This is the asm if I use a regular Vec:
pub fn foo(n: usize) -> Vec<u32> {
vec![0; n]
}
example::foo:
push r15
push r14
push r12
push rbx
push rax
mov ecx, 4
xor ebx, ebx
mov rax, rsi
mul rcx
mov r12, rax
setno al
jo .LBB2_6
mov r14, rsi
mov r15, rdi
mov bl, al
shl rbx, 2
test r12, r12
je .LBB2_3
mov rdi, r12
mov rsi, rbx
call qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
test rax, rax
je .LBB2_7
.LBB2_5:
shr r12, 2
mov qword ptr [r15], rax
mov qword ptr [r15 + 8], r12
mov qword ptr [r15 + 16], r14
mov rax, r15
add rsp, 8
pop rbx
pop r12
pop r14
pop r15
ret
.LBB2_3:
mov rax, rbx
test rax, rax
jne .LBB2_5
.LBB2_7:
mov rdi, r12
mov rsi, rbx
call alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
ud2
.LBB2_6:
call alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
ud2
If the length if a compile-time constant then there's this often better solution:
#![feature(box_syntax)]
pub fn spam1() -> Box<[u32]> {
box [0; 1000]
}
pub fn spam2() -> Box<[u32; 1000]> {
box [0; 1000]
}
Their asm is very similar, the slice needs the length too (the mov edx, 1000
instruction):
example::spam1:
push rbx
mov edi, 4000
mov esi, 4
call qword ptr [rip + __rust_alloc@GOTPCREL]
test rax, rax
je .LBB4_1
mov rbx, rax
mov edx, 4000
mov rdi, rax
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov edx, 1000
mov rax, rbx
pop rbx
ret
.LBB4_1:
mov edi, 4000
mov esi, 4
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
ud2
example::spam2:
push rbx
mov edi, 4000
mov esi, 4
call qword ptr [rip + __rust_alloc@GOTPCREL]
test rax, rax
je .LBB5_1
mov rbx, rax
mov edx, 4000
mov rdi, rax
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov rax, rbx
pop rbx
ret
.LBB5_1:
mov edi, 4000
mov esi, 4
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
ud2
But often I can't use that because the length is a run-time value. I could write:
pub fn bar(n: usize) -> Box<[u32]> {
vec![0; n].into()
}
example::bar:
push r15
push r14
push rbx
mov ecx, 4
xor ebx, ebx
mov rax, rdi
mul rcx
mov r15, rax
setno al
jo .LBB3_13
mov r14, rdi
mov bl, al
shl rbx, 2
test r15, r15
je .LBB3_3
mov rdi, r15
mov rsi, rbx
call qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
test rax, rax
je .LBB3_14
.LBB3_5:
mov rcx, r15
shr rcx, 2
cmp rcx, r14
je .LBB3_12
jb .LBB3_15
test r15, r15
je .LBB3_12
lea rbx, [4*r14]
cmp r15, rbx
je .LBB3_12
mov edx, 4
mov rdi, rax
mov rsi, r15
test rbx, rbx
je .LBB3_10
mov rcx, rbx
call qword ptr [rip + __rust_realloc@GOTPCREL]
test rax, rax
jne .LBB3_12
mov esi, 4
mov rdi, rbx
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
ud2
.LBB3_3:
mov rax, rbx
test rax, rax
jne .LBB3_5
.LBB3_14:
mov rdi, r15
mov rsi, rbx
call alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
ud2
.LBB3_10:
call qword ptr [rip + __rust_dealloc@GOTPCREL]
mov eax, 4
.LBB3_12:
mov rdx, r14
pop rbx
pop r14
pop r15
ret
.LBB3_13:
call alloc::raw_vec::RawVec<T,A>::allocate_in::{{closure}}
ud2
.LBB3_15:
lea rdi, [rip + .L__unnamed_1]
lea rdx, [rip + .L__unnamed_2]
mov esi, 36
call qword ptr [rip + core::panicking::panic@GOTPCREL]
ud2
The asm of bar() is messy, I think it should't be more complex than the asm of foo() (and I think this messy stuff is actually slowing down my code a bit). Currently the implementation of the Vec -> Boxed slice is:
pub fn into_boxed_slice(mut self) -> Box<[T]> {
unsafe {
self.shrink_to_fit();
let me = ManuallyDrop::new(self);
let buf = ptr::read(&me.buf);
let len = me.len();
buf.into_box(len).assume_init()
}
}
That in general is good, but for a Vec that was just allocated I think that's overkill. Can this problem be solved in future allowing the creation of dynamically sized boxed slices in a single go?
pub fn bar2(n: usize) -> Box<[u32]> {
box [0; n]
}
This syntax in bar2() can't be used because it's the same for the allocation of a boxed array (but see spam1/spam2), so some other syntax should be used. Better solutions are welcome.