Missing optimization of write!() with string literal


On the Rust Discord server, the question came up whether to prefer write!(f, "literal") or f.write_str("literal") when writing a string literal to something that implements std::fmt::Write.

While this seems like a purely stylistic choice, a quick comparison seems to indicate that the function call produces much simpler machine code than the macro:

use std::fmt::Write;

fn write_via_mac<T: Write>(mut f: T) {
    let _ = write!(f, "aaaa");

fn write_via_fun<T: Write>(mut f: T) {
    let _ = f.write_str("aaaa");

fn main() {
    let mut s = String::new();
    write_via_mac(&mut s);
    write_via_fun(&mut s);

The relevant assembly:

	sub	rsp, 56
	mov	qword ptr [rsp], rdi
	lea	rax, [rip + .Lbyte_str.j]
	mov	qword ptr [rsp + 8], rax
	mov	qword ptr [rsp + 16], 1
	mov	qword ptr [rsp + 24], 0
	lea	rax, [rip + .Lbyte_str.k]
	mov	qword ptr [rsp + 40], rax
	mov	qword ptr [rsp + 48], 0
	lea	rsi, [rip + .Lvtable.7]
	mov	rdi, rsp
	lea	rdx, [rsp + 8]
	call	core::fmt::write@PLT
	add	rsp, 56

	push	r14
	push	rbx
	push	rax
	mov	r14, rdi
	mov	rsi, qword ptr [r14 + 8]
	mov	rbx, qword ptr [r14 + 16]
	mov	rax, rsi
	sub	rax, rbx
	cmp	rax, 4
	jae	.LBB13_1
	add	rbx, 4
	jb	.LBB13_9
	lea	rax, [rsi + rsi]
	cmp	rbx, rax
	cmovb	rbx, rax
	test	rsi, rsi
	je	.LBB13_5
	mov	rdi, qword ptr [r14]
	mov	edx, 1
	mov	rcx, rbx
	call	__rust_realloc@PLT
	test	rax, rax
	je	.LBB13_10

	mov	qword ptr [r14], rax
	mov	qword ptr [r14 + 8], rbx
	mov	rbx, qword ptr [r14 + 16]
	jmp	.LBB13_2

	mov	rax, qword ptr [r14]

	lea	rcx, [rbx + 4]
	mov	qword ptr [r14 + 16], rcx
	mov	dword ptr [rax + rbx], 1633771873
	add	rsp, 8
	pop	rbx
	pop	r14

	mov	esi, 1
	mov	rdi, rbx
	call	__rust_alloc@PLT
	test	rax, rax
	jne	.LBB13_8

	call	<alloc::alloc::Global as core::alloc::Alloc>::oom

	call	alloc::raw_vec::capacity_overflow@PLT

And the playground link.

My two questions would be a) whether I did something wrong in this comparison and b) whether it’d be possible and worthwhile to optimize write!(f, "literal") to give the same performance as the corresponding function call.


I’d like to see this as well, for write!, print!, and println!. GCC and Clang have optimizations for this as well: they’ll turn a printf ending in a \n into a call to puts, and a single-character printf into putchar. Rust should do the same thing for a call to write!, print!, or println! with a string literal.


Even without literals, using write! can be wasteful. For instance, compare write!(f, "{}", s) (where s is a &str), with f.write_str(s).