Hi, this code shows two versions of a short function that computes totient values from zero to a given N, based on a variant of a sieve. The version 1 uses a Vec and a for loop, the second a boxed array and a while loop. Version 2 works with a compile-time constant length N. So it's reasonable for the version 2 to be a little more optimizable. But here the second version avoids all but one array bound test and the division by zero test.
So I think such difference is excessive. So I think Rustc should add a Vec/slice length range analisys pass to propagate the length of vectors in simple cases (like this, here the Vec length never changes after its creation). Another optimization pass could be added to rustc for the for-step_by that here is pessimizing the code compared to the while loop. for loops are very common and I think they are worth having a more focused optimization before high-level semantics gets lots by LLVM.
#[inline(never)]
pub fn euler_phi1(n: usize) -> Vec<u32> {
let mut phi = vec![0_u32; n + 1];
phi[1] = 1;
for p in (3 .. n + 1).step_by(2) {
if phi[p] == 0 {
let mut i = (n / p - 1) | 1;
while i > 0 {
if phi[i] != 0 {
let mut x = i * p;
let mut f = phi[i] * (p as u32 ^ 1);
while x <= n {
phi[x] = f;
x *= p;
f = f.wrapping_mul(p as u32);
}
}
if i <= 2 { break; }
i -= 2;
}
}
}
for i in 1 .. n / 2 + 1 {
phi[i << 1] = if (i & 1) != 0 { phi[i] } else { phi[i] << 1 };
}
phi
}
const N: usize = 100_000; // Input.
type Phis = Box<[u32; N + 1]>;
#[inline(never)]
pub fn euler_phi2() -> Phis {
let mut phi: Phis = vec![0_u32; N + 1]
.into_boxed_slice()
.try_into()
.unwrap();
phi[1] = 1;
let mut p = 3;
while p < N + 1 {
if phi[p] == 0 {
let mut i = (N / p - 1) | 1;
while i > 0 {
if phi[i] != 0 { // Bound test.
let mut x = i * p;
let mut f = phi[i] * (p as u32 ^ 1);
while x <= N {
phi[x] = f;
x *= p;
f = f.wrapping_mul(p as u32);
}
}
if i <= 2 { break; }
i -= 2;
}
}
p += 2;
}
for i in 1 .. N / 2 + 1 {
phi[i << 1] = if (i & 1) != 0 { phi[i] } else { phi[i] << 1 };
}
phi
}
fn main() {
let p1 = euler_phi1(N);
let p2 = euler_phi2();
assert_eq!(&p1[..], &p2[..]);
}
euler_phi1:
push r15
push r14
push r13
push r12
push rbx
lea r15, [rsi + 1]
mov ecx, 4
xor r12d, r12d
mov rax, r15
mul rcx
mov r13, rax
setno al
jo .LBB0_47
mov rbx, rsi
mov r14, rdi
mov r12b, al
shl r12, 2
test r13, r13
je .LBB0_2
mov rdi, r13
mov rsi, r12
call qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
mov rcx, rax
test rcx, rcx
je .LBB0_48
.LBB0_5:
mov qword ptr [r14], rcx
mov qword ptr [r14 + 8], r15
mov qword ptr [r14 + 16], r15
cmp r15, 1
jbe .LBB0_6
mov dword ptr [rcx + 4], 1
mov r8d, 3
xor r9d, r9d
.LBB0_8:
test r9b, 1
je .LBB0_15
lea rax, [r8 + 1]
inc r8
je .LBB0_16
cmp r8, r15
jae .LBB0_16
inc rax
mov rdi, r8
mov r8, rax
jmp .LBB0_12
.LBB0_15:
cmp r8, r15
mov rax, r8
adc rax, 0
mov rdi, r8
cmp r8, r15
mov r8, rax
jae .LBB0_16
.LBB0_12:
cmp rdi, r15
jae .LBB0_13
mov r9b, 1
cmp dword ptr [rcx + 4*rdi], 0
jne .LBB0_8
test rdi, rdi
je .LBB0_33
mov rax, rbx
or rax, rdi
shr rax, 32
je .LBB0_24
mov rax, rbx
xor edx, edx
div rdi
dec rax
or rax, 1
cmp rax, r15
jb .LBB0_27
jmp .LBB0_34
.LBB0_24:
mov eax, ebx
xor edx, edx
div edi
dec rax
or rax, 1
cmp rax, r15
jae .LBB0_34
.LBB0_27:
mov r10d, edi
xor r10d, 1
.LBB0_28:
mov edx, dword ptr [rcx + 4*rax]
test edx, edx
je .LBB0_37
mov rsi, rax
imul rsi, rdi
cmp rsi, rbx
ja .LBB0_37
imul edx, r10d
.LBB0_31:
cmp rsi, r15
jae .LBB0_32
mov dword ptr [rcx + 4*rsi], edx
imul edx, edi
imul rsi, rdi
cmp rsi, rbx
jbe .LBB0_31
.LBB0_37:
cmp rax, 3
jb .LBB0_8
add rax, -2
cmp rax, r15
jb .LBB0_28
.LBB0_34:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.4]
mov rdi, rax
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_32:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.5]
mov rdi, rsi
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_2:
mov rcx, r12
test rcx, rcx
jne .LBB0_5
.LBB0_48:
mov rdi, r13
mov rsi, r12
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
ud2
.LBB0_16:
cmp rbx, 2
jb .LBB0_46
shr rbx
mov edi, 1
.LBB0_18:
test dil, 1
jne .LBB0_40
cmp rdi, r15
jae .LBB0_20
mov edx, dword ptr [rcx + 4*rdi]
add edx, edx
jmp .LBB0_43
.LBB0_40:
cmp rdi, r15
jae .LBB0_41
mov edx, dword ptr [rcx + 4*rdi]
.LBB0_43:
lea rax, [rdi + rdi]
cmp rax, r15
jae .LBB0_44
lea rsi, [rdi + 1]
mov dword ptr [rcx + 4*rax], edx
cmp rdi, rbx
mov rdi, rsi
jne .LBB0_18
.LBB0_46:
mov rax, r14
pop rbx
pop r12
pop r13
pop r14
pop r15
ret
.LBB0_13:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.2]
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_44:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.8]
mov rdi, rax
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_33:
lea rdi, [rip + str.0]
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.3]
mov esi, 25
call qword ptr [rip + core::panicking::panic@GOTPCREL]
ud2
.LBB0_20:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.7]
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_41:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.6]
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_6:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.1]
mov edi, 1
mov rsi, r15
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB0_47:
call qword ptr [rip + alloc::raw_vec::capacity_overflow@GOTPCREL]
ud2
example::euler_phi2:
push rax
mov edi, 400004
mov esi, 4
call qword ptr [rip + __rust_alloc_zeroed@GOTPCREL]
test rax, rax
je .LBB1_16
mov rcx, rax
mov dword ptr [rax + 4], 1
mov esi, 3
jmp .LBB1_2
.LBB1_3:
lea rax, [rsi + 2]
cmp rsi, 99999
mov rsi, rax
jae .LBB1_4
.LBB1_2:
cmp dword ptr [rcx + 4*rsi], 0
jne .LBB1_3
mov eax, 100000
xor edx, edx
div esi
dec rax
or rax, 1
cmp rax, 100000
ja .LBB1_15
mov r8d, esi
xor r8d, 1
.LBB1_9:
mov edi, dword ptr [rcx + 4*rax]
test edi, edi
je .LBB1_13
mov rdx, rax
imul rdx, rsi
cmp rdx, 100000
ja .LBB1_13
imul edi, r8d
.LBB1_12:
mov dword ptr [rcx + 4*rdx], edi
imul edi, esi
imul rdx, rsi
cmp rdx, 100001
jb .LBB1_12
.LBB1_13:
cmp rax, 3
jb .LBB1_3
add rax, -2
cmp rax, 100001
jb .LBB1_9
.LBB1_15:
lea rdx, [rip + .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.9]
mov esi, 100001
mov rdi, rax
call qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
ud2
.LBB1_4:
mov rax, -50000
.LBB1_5:
mov edx, eax
neg dl
and dl, 1
shlx esi, dword ptr [rcx + 4*rax + 200004], edx
mov dword ptr [rcx + 8*rax + 400008], esi
lea esi, [rax + 82]
not sil
and sil, 1
shlx esi, dword ptr [rcx + 4*rax + 200008], esi
mov dword ptr [rcx + 8*rax + 400016], esi
shlx esi, dword ptr [rcx + 4*rax + 200012], edx
mov dword ptr [rcx + 8*rax + 400024], esi
lea esi, [rax + 84]
not sil
and sil, 1
shlx esi, dword ptr [rcx + 4*rax + 200016], esi
mov dword ptr [rcx + 8*rax + 400032], esi
shlx edx, dword ptr [rcx + 4*rax + 200020], edx
mov dword ptr [rcx + 8*rax + 400040], edx
add rax, 5
jne .LBB1_5
mov rax, rcx
pop rcx
ret
.LBB1_16:
mov edi, 400004
mov esi, 4
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
ud2
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0:
.ascii "/app/example.rs"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.1:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\004\000\000\000\005\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.2:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\007\000\000\000\f\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.3:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\b\000\000\000\032\000\000"
str.0:
.ascii "attempt to divide by zero"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.4:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\n\000\000\000\024\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.5:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\017\000\000\000\031\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.6:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\034\000\000\000)\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.7:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\034\000\000\0009\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.8:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\000\034\000\000\000\t\000\000"
.Lanon.f5a6578b9a75e6d29108a9d55f85dd98.9:
.quad .Lanon.f5a6578b9a75e6d29108a9d55f85dd98.0
.asciz "\017\000\000\000\000\000\000\0002\000\000\000\024\000\000"