♻️ refactor pipeline architecture and fix macOS memory detection
- Replace WorkerPool-based pipelines with typed `Pipe` abstraction in obipipeline - Introduce Pipe/PipeIter for composable, sourceless/sink-less pipelines - Update partition and superkmer commands to use new Pipe API via make_pipe! - Remove Arc<Mutex<...>> patterns; simplify state management - Fix macOS available_memory() returning 0 by falling back to half total memory in dereplicate() - Remove unused `format: "zstd"` field from partition.meta
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
mod scheduler;
|
||||
|
||||
pub use scheduler::Pipe;
|
||||
pub use scheduler::PipeIter;
|
||||
pub use scheduler::Pipeline;
|
||||
pub use scheduler::PipelineError;
|
||||
pub use scheduler::SharedFlatFn;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use crossbeam_channel::{Receiver, Select, Sender, bounded};
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
@@ -371,6 +372,115 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// ── Pipe ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Typed, composable iterator transformer.
|
||||
///
|
||||
/// A `Pipe<D, In, Out>` is a pure description of pipeline stages — no threads,
|
||||
/// no channels, no scheduler. Call `.apply(iter, n_workers, capacity)` to start
|
||||
/// execution and get back a `PipeIter<Out>`.
|
||||
///
|
||||
/// Compose two pipes with `.then()`: the resulting `Pipe` holds the concatenated
|
||||
/// stage list, so a single scheduler is created when `.apply()` is eventually called.
|
||||
pub struct Pipe<D, In, Out> {
|
||||
stages: Vec<Stage<D>>,
|
||||
wrap: Arc<dyn Fn(In) -> D + Send + Sync>,
|
||||
unwrap: Arc<dyn Fn(D) -> Out + Send + Sync>,
|
||||
_phantom: PhantomData<(In, Out)>,
|
||||
}
|
||||
|
||||
impl<D, In, Out> Pipe<D, In, Out> {
|
||||
/// Build a `Pipe` from stages and wrap/unwrap converters.
|
||||
/// Prefer the `make_pipe!` macro.
|
||||
pub fn new(
|
||||
stages: Vec<Stage<D>>,
|
||||
wrap: Arc<dyn Fn(In) -> D + Send + Sync>,
|
||||
unwrap: Arc<dyn Fn(D) -> Out + Send + Sync>,
|
||||
) -> Self {
|
||||
Self { stages, wrap, unwrap, _phantom: PhantomData }
|
||||
}
|
||||
|
||||
/// Concatenate stages from two pipes into one.
|
||||
///
|
||||
/// Requires `Out` of `self` == `In` of `other`. The single scheduler
|
||||
/// created at `.apply()` time sees the full combined stage list.
|
||||
pub fn then<Next>(self, other: Pipe<D, Out, Next>) -> Pipe<D, In, Next> {
|
||||
Pipe {
|
||||
stages: self.stages.into_iter().chain(other.stages).collect(),
|
||||
wrap: self.wrap,
|
||||
unwrap: other.unwrap,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<D, In, Out> Pipe<D, In, Out>
|
||||
where
|
||||
D: Send + Sync + 'static,
|
||||
In: Send + 'static,
|
||||
Out: Send + 'static,
|
||||
{
|
||||
/// Run the pipeline in a background thread; returns an iterator over the output.
|
||||
pub fn apply(
|
||||
self,
|
||||
input: impl Iterator<Item = In> + Send + 'static,
|
||||
n_workers: usize,
|
||||
capacity: usize,
|
||||
) -> PipeIter<Out> {
|
||||
let wrap = Arc::clone(&self.wrap);
|
||||
let unwrap = Arc::clone(&self.unwrap);
|
||||
|
||||
let mut iter = input;
|
||||
let source: SourceFn<D> = Box::new(move || match iter.next() {
|
||||
Some(x) => Ok(wrap(x)),
|
||||
None => Err(PipelineError::EndOfStream),
|
||||
});
|
||||
|
||||
let (out_tx, out_rx) = bounded::<Out>(capacity);
|
||||
let sink: SinkFn<D> = Box::new(move |data: D| {
|
||||
out_tx.send(unwrap(data)).map_err(|_| {
|
||||
PipelineError::StepError(Box::new(std::io::Error::new(
|
||||
std::io::ErrorKind::BrokenPipe,
|
||||
"output channel closed",
|
||||
)))
|
||||
})
|
||||
});
|
||||
|
||||
let pipeline = Pipeline::new(source, self.stages, sink);
|
||||
let handle = thread::spawn(move || {
|
||||
WorkerPool::new(pipeline, n_workers, capacity).run();
|
||||
});
|
||||
|
||||
PipeIter { rx: out_rx, handle: Some(handle) }
|
||||
}
|
||||
}
|
||||
|
||||
// ── PipeIter ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Iterator over the output of `Pipe::apply()`.
|
||||
pub struct PipeIter<Out> {
|
||||
rx: Receiver<Out>,
|
||||
handle: Option<thread::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl<Out> Iterator for PipeIter<Out> {
|
||||
type Item = Out;
|
||||
|
||||
fn next(&mut self) -> Option<Out> {
|
||||
self.rx.recv().ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl<Out> Drop for PipeIter<Out> {
|
||||
fn drop(&mut self) {
|
||||
// Drain buffered items so the scheduler can unblock if the channel is full.
|
||||
while self.rx.try_recv().is_ok() {}
|
||||
if let Some(h) = self.handle.take() {
|
||||
let _ = h.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Envoie `data` au stage `stage_idx`.
|
||||
/// Pour un `Transform`, empile une `WorkerTask::Transform`.
|
||||
/// Pour un `Flat`, incrémente `flat_workers_active` et empile une `WorkerTask::Flat`.
|
||||
@@ -684,3 +794,85 @@ macro_rules! make_pipeline {
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
/// Builds a typed `Pipe<D, In, Out>` — sourceless and sinkless.
|
||||
///
|
||||
/// Syntax:
|
||||
/// ```ignore
|
||||
/// make_pipe! {
|
||||
/// MyData : InType => OutType,
|
||||
/// | func : InVariant => OutVariant, // transform 1→1
|
||||
/// |? func : InVariant => OutVariant, // transform 1→1 fallible
|
||||
/// || func : InVariant => OutVariant, // flat transform 1→N
|
||||
/// ||? func : InVariant => OutVariant, // flat transform 1→N fallible
|
||||
/// }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! make_pipe {
|
||||
// ── Entry: first stage | ─────────────────────────────────────────────
|
||||
($enum:ident : $in_ty:ty => $out_ty:ty,
|
||||
| $tf:tt : $fi:ident => $fo:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$crate::make_transform!($enum, $tf, $fi, $fo),], $fo, $($rest)*)
|
||||
};
|
||||
// ── Entry: first stage |? ────────────────────────────────────────────
|
||||
($enum:ident : $in_ty:ty => $out_ty:ty,
|
||||
|? $tf:tt : $fi:ident => $fo:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$crate::make_transform_fallible!($enum, $tf, $fi, $fo),], $fo, $($rest)*)
|
||||
};
|
||||
// ── Entry: first stage || ────────────────────────────────────────────
|
||||
($enum:ident : $in_ty:ty => $out_ty:ty,
|
||||
|| $tf:tt : $fi:ident => $fo:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$crate::make_flat_transform!($enum, $tf, $fi, $fo),], $fo, $($rest)*)
|
||||
};
|
||||
// ── Entry: first stage ||? ───────────────────────────────────────────
|
||||
($enum:ident : $in_ty:ty => $out_ty:ty,
|
||||
||? $tf:tt : $fi:ident => $fo:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$crate::make_flat_transform_fallible!($enum, $tf, $fi, $fo),], $fo, $($rest)*)
|
||||
};
|
||||
|
||||
// ── Accumulation: | ──────────────────────────────────────────────────
|
||||
(@build $enum:ident : $in_ty:ty => $out_ty:ty, $fi:ident,
|
||||
[$($acc:tt)*], $lo:ident,
|
||||
| $tf:tt : $ti:ident => $to:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$($acc)* $crate::make_transform!($enum, $tf, $ti, $to),], $to, $($rest)*)
|
||||
};
|
||||
// ── Accumulation: |? ─────────────────────────────────────────────────
|
||||
(@build $enum:ident : $in_ty:ty => $out_ty:ty, $fi:ident,
|
||||
[$($acc:tt)*], $lo:ident,
|
||||
|? $tf:tt : $ti:ident => $to:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$($acc)* $crate::make_transform_fallible!($enum, $tf, $ti, $to),], $to, $($rest)*)
|
||||
};
|
||||
// ── Accumulation: || ─────────────────────────────────────────────────
|
||||
(@build $enum:ident : $in_ty:ty => $out_ty:ty, $fi:ident,
|
||||
[$($acc:tt)*], $lo:ident,
|
||||
|| $tf:tt : $ti:ident => $to:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$($acc)* $crate::make_flat_transform!($enum, $tf, $ti, $to),], $to, $($rest)*)
|
||||
};
|
||||
// ── Accumulation: ||? ────────────────────────────────────────────────
|
||||
(@build $enum:ident : $in_ty:ty => $out_ty:ty, $fi:ident,
|
||||
[$($acc:tt)*], $lo:ident,
|
||||
||? $tf:tt : $ti:ident => $to:ident, $($rest:tt)*) => {
|
||||
$crate::make_pipe!(@build $enum : $in_ty => $out_ty, $fi,
|
||||
[$($acc)* $crate::make_flat_transform_fallible!($enum, $tf, $ti, $to),], $to, $($rest)*)
|
||||
};
|
||||
|
||||
// ── Termination ───────────────────────────────────────────────────────
|
||||
(@build $enum:ident : $in_ty:ty => $out_ty:ty, $fi:ident,
|
||||
[$($acc:tt)*], $lo:ident $(,)?) => {
|
||||
$crate::Pipe::new(
|
||||
vec![$($acc)*],
|
||||
::std::sync::Arc::new(|x: $in_ty| $enum::$fi(x)),
|
||||
::std::sync::Arc::new(|d: $enum| -> $out_ty {
|
||||
if let $enum::$lo(x) = d { x }
|
||||
else { ::std::unreachable!("unexpected pipeline data variant in make_pipe!") }
|
||||
}),
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user