fn decode_rle_lib_optim(
buffer: &mut Vec<u8>,
repeating_fragment_len: usize,
num_bytes_to_fill: usize,
) {
// Initial copy so that vec looks like
// o o o o o [ slice ] # # # # #
let mut left_to_fill = num_bytes_to_fill;
let mut fill_size = std::cmp::min(left_to_fill, repeating_fragment_len);
let copied_data_start = buffer.len();
{
let (src, mut append) = buffer.with_fixed_capacity(left_to_fill);
let slice_to_repeat = &src[(src.len() - fill_size)..]; // figure out what to repeat
append.extend_from_slice(slice_to_repeat);
}
left_to_fill -= fill_size;
// Now we can double the items we copy each time we call extend_from_slice
// #1: o o o o o [ slice ] # # # # # # # # # # # # # # # # # # ...
// #2: o o o o o [ slice ] [ slice ] # # # # # # # # # # # # # ...
// #3: o o o o o [ slice ] [ slice ] [ slice ] [ slice ] # # # ...
// #4: ....
while left_to_fill > 0 {
fill_size = std::cmp::min(left_to_fill, fill_size);
let (src, mut append) = buffer.with_fixed_capacity(left_to_fill);
let slice_to_repeat = &src[copied_data_start..(copied_data_start + fill_size)]; // figure out what to repeat
append.extend_from_slice(slice_to_repeat);
left_to_fill -= fill_size;
fill_size *= 2;
}
}
With this I get consistent results for all buffer sizes:
repeating_fragment_len == 1
test bench_decode_rle_lib_naive ... bench: 7,336 ns/iter (+/- 1,211)
test bench_decode_rle_lib_opt ... bench: 1,426 ns/iter (+/- 642)
test bench_decode_rle_naive ... bench: 6,291 ns/iter (+/- 636)
test bench_decode_rle_vuln ... bench: 4,221 ns/iter (+/- 631)
repeating_fragment_len == 512
test bench_decode_rle_lib_naive ... bench: 7,234 ns/iter (+/- 951)
test bench_decode_rle_lib_opt ... bench: 1,518 ns/iter (+/- 557)
test bench_decode_rle_naive ... bench: 6,642 ns/iter (+/- 433)
test bench_decode_rle_vuln ... bench: 3,543 ns/iter (+/- 306)