My wish list for Rust 2024 and beyond

I just gave an example with "?", for Rust this symbol is already reserved, but like we can replace it with "ยง" or "?!" :laughing: ( but I understand your point of view :+1: )

the Flat always remains better by ~5%

in my test I made a loop of 10000000 just to be sure to make the noise negligible..

For me, running cargo run --release, all three benchmarks (from 2e71828's playground) had this identical assembly:

70: movdqu xmm0,XMMWORD PTR [rsp]
    movdqu xmm1,XMMWORD PTR [rsp+0x10]
    paddq  xmm1,xmm0
    pshufd xmm0,xmm1,0xee
    paddq  xmm0,xmm1
    movq   r8,xmm0
    add    r8,rcx
    add    r8,rdi
    inc    rdi
    mov    QWORD PTR [rsp+0x20],r8
    cmp    r15,rdi
    jne    70

So them also performing the same is to be expected (though not guaranteed).

I'm not convinced the vectorization is ideal in this case, but that's beside the point. The benchmark is very artificial anyway.

5 Likes

I found a difference (release mode) in my test :thinking:

FLAT style :

fn_test::test_perf_cpp_style:
 sub     rsp, 136
 mov     qword, ptr, [rsp, +, 8], rdi
 mov     qword, ptr, [rsp, +, 32], 0
 call    qword, ptr, [rip, +, _ZN3std4time7Instant3now17hcaf082b521a24e93E@GOTPCREL]
 mov     rdi, qword, ptr, [rsp, +, 8]
 mov     qword, ptr, [rsp, +, 16], rax
 mov     dword, ptr, [rsp, +, 28], edx
 mov     qword, ptr, [rsp, +, 40], 0
 mov     qword, ptr, [rsp, +, 48], rdi
 mov     rdi, qword, ptr, [rsp, +, 40]
 mov     rsi, qword, ptr, [rsp, +, 48]
 call    <I as core::iter::traits::collect::IntoIterator>::into_iter
 mov     qword, ptr, [rsp, +, 56], rax
 mov     qword, ptr, [rsp, +, 64], rdx
.LBB16_1:
 lea     rdi, [rsp, +, 56]
 call    core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next
 mov     qword, ptr, [rsp, +, 80], rdx
 mov     qword, ptr, [rsp, +, 72], rax
 cmp     qword, ptr, [rsp, +, 72], 0
 jne     .LBB16_3
 mov     rax, qword, ptr, [rip, +, _ZN3std4time7Instant3now17hcaf082b521a24e93E@GOTPCREL]
 call    rax
 mov     rsi, qword, ptr, [rsp, +, 16]
 mov     ecx, edx
 mov     edx, dword, ptr, [rsp, +, 28]
 mov     dword, ptr, [rsp, +, 128], ecx
 mov     qword, ptr, [rsp, +, 120], rax
 mov     rax, qword, ptr, [rip, +, _ZN3std4time7Instant14duration_since17h04c83e08d82e773aE@GOTPCREL]
 lea     rdi, [rsp, +, 120]
 call    rax
 add     rsp, 136
 ret
.LBB16_3:
 mov     rcx, qword, ptr, [rsp, +, 80]
 mov     rdi, rcx
 add     rdi, 1
 mov     rsi, rcx
 add     rsi, 3
 mov     rdx, rcx
 add     rdx, 3
 mov     rax, rcx
 add     rax, 2
 mov     qword, ptr, [rsp, +, 88], rdi
 mov     qword, ptr, [rsp, +, 96], rsi
 mov     qword, ptr, [rsp, +, 104], rdx
 mov     qword, ptr, [rsp, +, 112], rax
 mov     rax, qword, ptr, [rsp, +, 88]
 add     rax, qword, ptr, [rsp, +, 96]
 add     rax, rcx
 mov     qword, ptr, [rsp, +, 32], rax
 jmp     .LBB16_1

Rust Style

fn_test::test_perf_rust_style:
 sub     rsp, 152
 mov     qword, ptr, [rsp, +, 8], rdi
 mov     qword, ptr, [rsp, +, 32], 0
 call    qword, ptr, [rip, +, _ZN3std4time7Instant3now17hcaf082b521a24e93E@GOTPCREL]
 mov     rdi, qword, ptr, [rsp, +, 8]
 mov     qword, ptr, [rsp, +, 16], rax
 mov     dword, ptr, [rsp, +, 28], edx
 mov     qword, ptr, [rsp, +, 40], 0
 mov     qword, ptr, [rsp, +, 48], rdi
 mov     rdi, qword, ptr, [rsp, +, 40]
 mov     rsi, qword, ptr, [rsp, +, 48]
 call    <I as core::iter::traits::collect::IntoIterator>::into_iter
 mov     qword, ptr, [rsp, +, 56], rax
 mov     qword, ptr, [rsp, +, 64], rdx
.LBB15_1:
 lea     rdi, [rsp, +, 56]
 call    core::iter::range::<impl core::iter::traits::iterator::Iterator for core::ops::range::Range<A>>::next
 mov     qword, ptr, [rsp, +, 80], rdx
 mov     qword, ptr, [rsp, +, 72], rax
 cmp     qword, ptr, [rsp, +, 72], 0
 jne     .LBB15_3
 mov     rax, qword, ptr, [rip, +, _ZN3std4time7Instant3now17hcaf082b521a24e93E@GOTPCREL]
 call    rax
 mov     rsi, qword, ptr, [rsp, +, 16]
 mov     ecx, edx
 mov     edx, dword, ptr, [rsp, +, 28]
 mov     dword, ptr, [rsp, +, 144], ecx
 mov     qword, ptr, [rsp, +, 136], rax
 mov     rax, qword, ptr, [rip, +, _ZN3std4time7Instant14duration_since17h04c83e08d82e773aE@GOTPCREL]
 lea     rdi, [rsp, +, 136]
 call    rax
 add     rsp, 152
 ret
.LBB15_3:
 mov     rcx, qword, ptr, [rsp, +, 80]
 mov     rdx, rcx
 add     rdx, 1
 mov     rax, rcx
 add     rax, 3
 mov     qword, ptr, [rsp, +, 120], rdx
 mov     qword, ptr, [rsp, +, 128], rax
 mov     rdx, rcx
 add     rdx, 3
 mov     rax, rcx
 add     rax, 2
 mov     rdi, qword, ptr, [rsp, +, 120]
 mov     rsi, qword, ptr, [rsp, +, 128]
 mov     qword, ptr, [rsp, +, 88], rdi
 mov     qword, ptr, [rsp, +, 96], rsi
 mov     qword, ptr, [rsp, +, 104], rdx
 mov     qword, ptr, [rsp, +, 112], rax
 mov     rax, qword, ptr, [rsp, +, 88]
 add     rax, qword, ptr, [rsp, +, 96]
 add     rax, rcx
 mov     qword, ptr, [rsp, +, 32], rax
 jmp     .LBB15_1

more instructions, so that justifies the 5%.