On the Rust Discord server, the question came up whether to prefer write!(f, "literal")
or f.write_str("literal")
when writing a string literal to something that implements std::fmt::Write
.
While this seems like a purely stylistic choice, a quick comparison seems to indicate that the function call produces much simpler machine code than the macro:
use std::fmt::Write;
#[inline(never)]
fn write_via_mac<T: Write>(mut f: T) {
let _ = write!(f, "aaaa");
}
#[inline(never)]
fn write_via_fun<T: Write>(mut f: T) {
let _ = f.write_str("aaaa");
}
fn main() {
let mut s = String::new();
write_via_mac(&mut s);
write_via_fun(&mut s);
}
The relevant assembly:
playground::write_via_mac:
sub rsp, 56
mov qword ptr [rsp], rdi
lea rax, [rip + .Lbyte_str.j]
mov qword ptr [rsp + 8], rax
mov qword ptr [rsp + 16], 1
mov qword ptr [rsp + 24], 0
lea rax, [rip + .Lbyte_str.k]
mov qword ptr [rsp + 40], rax
mov qword ptr [rsp + 48], 0
lea rsi, [rip + .Lvtable.7]
mov rdi, rsp
lea rdx, [rsp + 8]
call core::fmt::write@PLT
add rsp, 56
ret
playground::write_via_fun:
push r14
push rbx
push rax
mov r14, rdi
mov rsi, qword ptr [r14 + 8]
mov rbx, qword ptr [r14 + 16]
mov rax, rsi
sub rax, rbx
cmp rax, 4
jae .LBB13_1
add rbx, 4
jb .LBB13_9
lea rax, [rsi + rsi]
cmp rbx, rax
cmovb rbx, rax
test rsi, rsi
je .LBB13_5
mov rdi, qword ptr [r14]
mov edx, 1
mov rcx, rbx
call __rust_realloc@PLT
test rax, rax
je .LBB13_10
.LBB13_8:
mov qword ptr [r14], rax
mov qword ptr [r14 + 8], rbx
mov rbx, qword ptr [r14 + 16]
jmp .LBB13_2
.LBB13_1:
mov rax, qword ptr [r14]
.LBB13_2:
lea rcx, [rbx + 4]
mov qword ptr [r14 + 16], rcx
mov dword ptr [rax + rbx], 1633771873
add rsp, 8
pop rbx
pop r14
ret
.LBB13_5:
mov esi, 1
mov rdi, rbx
call __rust_alloc@PLT
test rax, rax
jne .LBB13_8
.LBB13_10:
call <alloc::alloc::Global as core::alloc::Alloc>::oom
ud2
.LBB13_9:
call alloc::raw_vec::capacity_overflow@PLT
ud2
And the playground link.
My two questions would be a) whether I did something wrong in this comparison and b) whether it’d be possible and worthwhile to optimize write!(f, "literal")
to give the same performance as the corresponding function call.