feat: add pipeline concurrency throttling and HPC build docs
Introduces a counting semaphore-based throttling mechanism to limit concurrent file I/O and pipeline processing. Replaces custom path wrappers with standardized `Throttled` types across `obikmer` and `obikpartitionner`, ensuring RAII-based resource cleanup and explicit backpressure. Additionally, documents how to redirect Cargo build artifacts to local scratch storage on HPC filesystems to prevent compilation slowdowns.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
mod scheduler;
|
||||
pub mod throttle;
|
||||
|
||||
pub use scheduler::Pipe;
|
||||
pub use scheduler::PipeIter;
|
||||
@@ -10,6 +11,10 @@ pub use scheduler::SinkFn;
|
||||
pub use scheduler::SourceFn;
|
||||
pub use scheduler::Stage;
|
||||
pub use scheduler::WorkerPool;
|
||||
pub use throttle::Throttle;
|
||||
pub use throttle::ThrottleGuard;
|
||||
pub use throttle::Throttled;
|
||||
pub use throttle::throttle;
|
||||
|
||||
/// Re-export de `crossbeam_channel::Sender` utilisé dans les macros flat transform.
|
||||
/// Permet aux macros `make_flat_transform!` / `make_flat_transform_fallible!` d'utiliser
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
use std::sync::{Arc, Condvar, Mutex};
|
||||
|
||||
// ── Throttle ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Counting semaphore: limits how many items from a source are in-flight
|
||||
/// simultaneously through the Flat stage of a pipeline.
|
||||
///
|
||||
/// Slots are acquired in the source thread before an item is emitted, and
|
||||
/// released when the corresponding `ThrottleGuard` is dropped (i.e. when the
|
||||
/// Flat worker finishes processing the item). Acquisition must never happen
|
||||
/// inside a worker — only in the source thread — to prevent deadlocks.
|
||||
pub struct Throttle {
|
||||
count: Mutex<usize>,
|
||||
condvar: Condvar,
|
||||
max: usize,
|
||||
}
|
||||
|
||||
impl Throttle {
|
||||
pub fn new(max: usize) -> Self {
|
||||
Self { count: Mutex::new(0), condvar: Condvar::new(), max }
|
||||
}
|
||||
|
||||
pub fn acquire(&self) {
|
||||
let mut count = self.count.lock().unwrap();
|
||||
while *count >= self.max {
|
||||
count = self.condvar.wait(count).unwrap();
|
||||
}
|
||||
*count += 1;
|
||||
}
|
||||
|
||||
fn release(&self) {
|
||||
let mut count = self.count.lock().unwrap();
|
||||
*count -= 1;
|
||||
self.condvar.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
// ── ThrottleGuard ─────────────────────────────────────────────────────────────
|
||||
|
||||
/// RAII guard: releases one slot in the `Throttle` when dropped.
|
||||
pub struct ThrottleGuard(Arc<Throttle>);
|
||||
|
||||
impl Drop for ThrottleGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.release();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Throttled<T> ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// An item paired with its throttle guard.
|
||||
///
|
||||
/// The guard keeps a slot acquired until this value is dropped. In a Flat
|
||||
/// pipeline stage, carry the guard inside the worker closure until the item
|
||||
/// is fully processed, then let it drop.
|
||||
pub struct Throttled<T> {
|
||||
pub item: T,
|
||||
pub guard: ThrottleGuard,
|
||||
}
|
||||
|
||||
// ── throttle() ────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Wrap `source` so that at most `max_concurrent` items are emitted before
|
||||
/// earlier ones have been fully processed (i.e. their `ThrottleGuard` dropped).
|
||||
///
|
||||
/// Acquisition blocks the source thread until a slot is available. This must
|
||||
/// be called in the source thread, never inside a pipeline worker.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// let throttled = obipipeline::throttle(file_paths, n_workers - 1);
|
||||
/// // Use `throttled` as the pipeline source; carry `item.guard` through the
|
||||
/// // Flat stage and let it drop when the file is fully read.
|
||||
/// ```
|
||||
pub fn throttle<I>(source: I, max_concurrent: usize) -> impl Iterator<Item = Throttled<I::Item>>
|
||||
where
|
||||
I: Iterator,
|
||||
I::Item: Send + 'static,
|
||||
{
|
||||
let t = Arc::new(Throttle::new(max_concurrent));
|
||||
source.map(move |item| {
|
||||
t.acquire();
|
||||
Throttled { item, guard: ThrottleGuard(Arc::clone(&t)) }
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user