I've been experimenting with sans-io parsers, and they all result in creating manual state machines. So I've decided to give it a go with the unstable coroutines feature as it generates the state machines for me.
It works pretty well! However, using coroutines recursively is a bit painful. It might be useful to look into a .yield
postfix that automatically yields the upper coroutine when the inner coroutine returns CoroutineState::Yielded
(and the Resume
and Yield
types are compatible).
Feedback is very welcome!
(Also as a gist)
#![feature(coroutines, coroutine_trait)]
use futures_lite::{AsyncReadExt, AsyncSeekExt};
use std::{
io::{Read, Seek},
ops::{Coroutine, CoroutineState},
};
/// A type that can be parsed from a file.
pub trait Parser: Sized {
/// Read this type at the given position.
///
/// Users should use the [`SyncParser`] and/or the [`AsyncParser`], instead
/// of this trait. This trait is only for implementers.
///
/// After the coroutine is finished, `position` **must** be at the
/// end of the type. It's not unsafe, but your parser will fail at some
/// point.
fn read_at_inner(
position: &mut usize,
) -> impl Coroutine<IoBuffer, Yield = IoBuffer, Return = Self>;
}
/// A parser for the FLAC file format, currently only parses the magic number.
impl Parser for Flac {
fn read_at_inner(position: &mut usize) -> impl Coroutine<IoBuffer, Yield = IoBuffer, Return = Self> {
#[coroutine]
static |mut buffer: IoBuffer| {
// This would be much nicer:
// let magic = u32::read_at(position).yield;
let mut cor = std::pin::pin!(u32::read_at_inner(position));
let magic = loop {
match cor.as_mut().resume(buffer) {
CoroutineState::Yielded(buf) => buffer = yield buf,
CoroutineState::Complete(t) => break t,
}
};
assert_eq!(&magic.to_ne_bytes(), b"fLaC", "Magic is wrong!");
Flac { magic }
}
}
}
/// Trait that implements a synchronous interface for [`Parser`] types.
///
/// Automatically implemented for every type.
pub trait SyncParser: Parser {
fn read_at(mut reader: impl Read + Seek, position: &mut usize) -> Self {
let mut buffer = IoBuffer {
buffer: vec![0; 16],
buffer_start: *position,
request: (*position, 0),
};
let mut cor = std::pin::pin!(Self::read_at_inner(position));
loop {
reader
.seek(std::io::SeekFrom::Start(buffer.request.0 as u64))
.unwrap();
reader.read_exact(&mut buffer.buffer).unwrap();
buffer.buffer_start = buffer.request.0;
match cor.as_mut().resume(buffer) {
CoroutineState::Yielded(buf) => buffer = buf,
CoroutineState::Complete(t) => break t,
}
}
}
}
impl<T: Parser> SyncParser for T {}
/// Trait that implements a asynchronous interface for [`Parser`] types.
///
/// Automatically implemented for every type.
pub trait AsyncParser: Parser {
#[allow(async_fn_in_trait, reason = "Just a demo")]
async fn read_at(
mut reader: impl AsyncReadExt + AsyncSeekExt + Unpin,
position: &mut usize,
) -> Self {
let mut buffer = IoBuffer {
buffer: vec![0; 16],
buffer_start: *position,
request: (*position, 0),
};
let mut cor = std::pin::pin!(Self::read_at_inner(position));
loop {
reader
.seek(std::io::SeekFrom::Start(buffer.request.0 as u64))
.await
.unwrap();
reader.read_exact(&mut buffer.buffer).await.unwrap();
buffer.buffer_start = buffer.request.0;
match cor.as_mut().resume(buffer) {
CoroutineState::Yielded(buf) => buffer = buf,
CoroutineState::Complete(t) => break t,
}
}
}
}
impl<T: Parser> AsyncParser for T {}
// Supporting types
type AbsolutePosition = usize;
type BytesRequested = usize;
/// A read buffer used by the [`Parser`] coroutine to read bytes.
pub struct IoBuffer {
/// The internal buffer.
buffer: Vec<u8>,
/// The absolute position in the file for which the buffer is a match.
buffer_start: AbsolutePosition,
/// When not enough bytes are available set the position and amount of bytes needed.
pub request: (AbsolutePosition, BytesRequested),
}
impl IoBuffer {
/// Read `N` bytes at position.
///
/// Will set the request field to the required amount if not enough bytes are available.
pub fn read_bytes<const N: usize>(&mut self, position: &mut usize) -> Option<[u8; N]> {
if *position < self.buffer_start {
self.request.0 = *position;
self.request.1 = N;
return None;
}
let diff = *position - self.buffer_start;
if diff + N > self.buffer.len() {
self.request.0 = *position;
self.request.1 = N;
}
Some(<[u8; N]>::try_from(&self.buffer[diff..diff + N]).expect("Unreachable!"))
}
}
/// Implementation for u32.
///
/// The library would contain implementations for all primitive (and some "complex") types.
/// So a user would almost never have to implement the loop themselves.
impl Parser for u32 {
fn read_at_inner(position: &mut usize) -> impl Coroutine<IoBuffer, Yield = IoBuffer, Return = Self> {
#[coroutine]
|mut buffer: IoBuffer| {
let bytes = loop {
if let Some(bytes) = buffer.read_bytes::<4>(position) {
break bytes;
}
buffer = yield buffer;
};
*position += 4;
u32::from_le_bytes(bytes)
}
}
}
pub struct Flac {
pub magic: u32,
}