Missing optimization of write!() with string literal


#1

On the Rust Discord server, the question came up whether to prefer write!(f, "literal") or f.write_str("literal") when writing a string literal to something that implements std::fmt::Write.

While this seems like a purely stylistic choice, a quick comparison seems to indicate that the function call produces much simpler machine code than the macro:

use std::fmt::Write;

#[inline(never)]
fn write_via_mac<T: Write>(mut f: T) {
    let _ = write!(f, "aaaa");
}

#[inline(never)]
fn write_via_fun<T: Write>(mut f: T) {
    let _ = f.write_str("aaaa");
}

fn main() {
    let mut s = String::new();
    write_via_mac(&mut s);
    write_via_fun(&mut s);
}

The relevant assembly:

playground::write_via_mac:
	sub	rsp, 56
	mov	qword ptr [rsp], rdi
	lea	rax, [rip + .Lbyte_str.j]
	mov	qword ptr [rsp + 8], rax
	mov	qword ptr [rsp + 16], 1
	mov	qword ptr [rsp + 24], 0
	lea	rax, [rip + .Lbyte_str.k]
	mov	qword ptr [rsp + 40], rax
	mov	qword ptr [rsp + 48], 0
	lea	rsi, [rip + .Lvtable.7]
	mov	rdi, rsp
	lea	rdx, [rsp + 8]
	call	core::fmt::write@PLT
	add	rsp, 56
	ret

playground::write_via_fun:
	push	r14
	push	rbx
	push	rax
	mov	r14, rdi
	mov	rsi, qword ptr [r14 + 8]
	mov	rbx, qword ptr [r14 + 16]
	mov	rax, rsi
	sub	rax, rbx
	cmp	rax, 4
	jae	.LBB13_1
	add	rbx, 4
	jb	.LBB13_9
	lea	rax, [rsi + rsi]
	cmp	rbx, rax
	cmovb	rbx, rax
	test	rsi, rsi
	je	.LBB13_5
	mov	rdi, qword ptr [r14]
	mov	edx, 1
	mov	rcx, rbx
	call	__rust_realloc@PLT
	test	rax, rax
	je	.LBB13_10

.LBB13_8:
	mov	qword ptr [r14], rax
	mov	qword ptr [r14 + 8], rbx
	mov	rbx, qword ptr [r14 + 16]
	jmp	.LBB13_2

.LBB13_1:
	mov	rax, qword ptr [r14]

.LBB13_2:
	lea	rcx, [rbx + 4]
	mov	qword ptr [r14 + 16], rcx
	mov	dword ptr [rax + rbx], 1633771873
	add	rsp, 8
	pop	rbx
	pop	r14
	ret

.LBB13_5:
	mov	esi, 1
	mov	rdi, rbx
	call	__rust_alloc@PLT
	test	rax, rax
	jne	.LBB13_8

.LBB13_10:
	call	<alloc::alloc::Global as core::alloc::Alloc>::oom
	ud2

.LBB13_9:
	call	alloc::raw_vec::capacity_overflow@PLT
	ud2

And the playground link.

My two questions would be a) whether I did something wrong in this comparison and b) whether it’d be possible and worthwhile to optimize write!(f, "literal") to give the same performance as the corresponding function call.


#2

I’d like to see this as well, for write!, print!, and println!. GCC and Clang have optimizations for this as well: they’ll turn a printf ending in a \n into a call to puts, and a single-character printf into putchar. Rust should do the same thing for a call to write!, print!, or println! with a string literal.


#3

Even without literals, using write! can be wasteful. For instance, compare write!(f, "{}", s) (where s is a &str), with f.write_str(s).