Here is a small app:
//! ```cargo
//! [dependencies]
//! libc = "0.2.151"
//! rand = "0.8.5"
//!
//! [target.'cfg(windows)'.dependencies]
//! winapi = { version = "0.3.9", features = ["winbase"] }
//! ```
use rand::prelude::*;
use std::fs::{File, OpenOptions};
use std::io::{Seek, SeekFrom};
use std::{env, io};
const SECTOR_SIZE: usize = 1024 * 1024 * 1024;
const CHUNK_SIZE: usize = 32;
fn main() -> io::Result<()> {
let file = env::args().nth(1).unwrap();
let sectors = (File::open(&file)?.seek(SeekFrom::End(0))? / SECTOR_SIZE as u64) as usize;
let file = OpenOptions::new()
.read(true)
.advise_random_access()
.open(&file)?;
file.advise_random_access()?;
let mut result = vec![[0u8; CHUNK_SIZE]; sectors];
for i in 0.. {
(0..sectors)
.into_iter()
.zip(&mut result)
.try_for_each(|(offset, result)| {
let sector_offset = offset * SECTOR_SIZE;
let offset_within_sector =
thread_rng().gen_range(0..SECTOR_SIZE / CHUNK_SIZE) * CHUNK_SIZE;
file.read_at(result, sector_offset + offset_within_sector)
})?;
if i > 0 && i % 10_000 == 0 {
println!("{i} iterations");
}
}
Ok(())
}
trait ReadAtSync: Send + Sync {
/// Fill the buffer by reading bytes at a specific offset
fn read_at(&self, buf: &mut [u8], offset: usize) -> io::Result<()>;
}
impl ReadAtSync for File {
fn read_at(&self, buf: &mut [u8], offset: usize) -> io::Result<()> {
self.read_exact_at(buf, offset as u64)
}
}
/// Extension convenience trait that allows setting some file opening options in cross-platform way
trait OpenOptionsExt {
/// Advise OS/file system that file will use random access and read-ahead behavior is
/// undesirable, only has impact on Windows, for other operating systems see [`FileExt`]
fn advise_random_access(&mut self) -> &mut Self;
}
impl OpenOptionsExt for OpenOptions {
#[cfg(target_os = "linux")]
fn advise_random_access(&mut self) -> &mut Self {
// Not supported
self
}
#[cfg(target_os = "macos")]
fn advise_random_access(&mut self) -> &mut Self {
// Not supported
self
}
#[cfg(windows)]
fn advise_random_access(&mut self) -> &mut Self {
use std::os::windows::fs::OpenOptionsExt;
self.custom_flags(winapi::um::winbase::FILE_FLAG_RANDOM_ACCESS)
}
}
/// Extension convenience trait that allows pre-allocating files, suggesting random access pattern
/// and doing cross-platform exact reads/writes
trait FileExt {
/// Advise OS/file system that file will use random access and read-ahead behavior is
/// undesirable, on Windows this can only be set when file is opened, see [`OpenOptionsExt`]
fn advise_random_access(&self) -> io::Result<()>;
/// Read exact number of bytes at a specific offset
fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> io::Result<()>;
}
impl FileExt for File {
#[cfg(target_os = "linux")]
fn advise_random_access(&self) -> io::Result<()> {
use std::os::unix::io::AsRawFd;
let err = unsafe { libc::posix_fadvise(self.as_raw_fd(), 0, 0, libc::POSIX_FADV_RANDOM) };
if err != 0 {
Err(std::io::Error::from_raw_os_error(err))
} else {
Ok(())
}
}
#[cfg(target_os = "macos")]
fn advise_random_access(&self) -> io::Result<()> {
use std::os::unix::io::AsRawFd;
if unsafe { libc::fcntl(self.as_raw_fd(), libc::F_RDAHEAD, 0) } != 0 {
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}
#[cfg(windows)]
fn advise_random_access(&self) -> io::Result<()> {
// Not supported
Ok(())
}
#[cfg(unix)]
fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> io::Result<()> {
std::os::unix::fs::FileExt::read_exact_at(self, buf, offset)
}
#[cfg(windows)]
fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
while !buf.is_empty() {
match std::os::windows::fs::FileExt::seek_read(self, buf, offset) {
Ok(0) => {
break;
}
Ok(n) => {
buf = &mut buf[n..];
offset += n as u64;
}
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {
// Try again
}
Err(e) => {
return Err(e);
}
}
}
if !buf.is_empty() {
Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"failed to fill whole buffer",
))
} else {
Ok(())
}
}
}
What it does is reading small chunks of a large file at random in a loop.
It can be stored in file.rs
and then used as cargo +nightly -Zscript file.rs D:\large-file.bin
, where large-file.bin
in my case is 500G.
What happens afterwards on Windows is saying application uses 0.5MiB of RAM:
However, total Windows memory usage in a minute or two grows by ~1GiB (sometimes less, sometimes more) and I can't find where it goes. This looks like either bug in Windows or something fishy happening in Rust standard library, but either way seems to be outside of my knowledge to find out.
Screenshot of task manager while application is running and after application is stopped:
The example above is cross-platform and doesn't have such issues on Linux. Happens on both Windows 10 and Windows 11.
This is a reduced example of a real-world app where a single app sometimes manages tens of terabytes of large files. Appreciate any hints.