Isn't this the same as your example? (Where's the overhead?)
fn dfs_iterative(graph: &[&[usize]], mark: &mut [bool], mut i: usize) {
let mut stack = vec![];
let mut it = graph[i].iter();
loop {
if let Some(&j) = it.next() {
if !mark[j] {
stack.push((i, it));
i = j;
it = graph[j].iter();
}
} else {
mark[i] = true;
match stack.pop() {
None => return,
Some((j, other)) => {
i = j;
it = other;
}
}
}
}
}