feat: add merge operation specs and partition progress bar

Added implementation specifications for the `merge` operation, detailing parallel partition processing, I/O paths, and kmer matrix aggregation across multiple indexes. Integrated an `indicatif` progress bar into the `rayon` parallel loop to monitor processing position, throughput, ETA, and recent partition duration.
This commit is contained in:
Eric Coissac
2026-05-20 21:18:19 +02:00
parent 7d1b62ddf3
commit bfa436ad15
2 changed files with 27 additions and 1 deletions
+16 -1
View File
@@ -219,19 +219,34 @@ impl KmerPartition {
let n_threads = rayon::current_num_threads().max(1) as u64;
let available_per_thread = available / n_threads;
let pb = ProgressBar::new(self.n_partitions as u64);
pb.set_style(
ProgressStyle::with_template(
"dereplicating [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}",
)
.unwrap()
.progress_chars("█▌░"),
);
let results: Vec<SKResult<()>> = (0..self.n_partitions)
.into_par_iter()
.map(|i| {
let dir = self.part_dir(i);
if !dir.exists() {
pb.inc(1);
return Ok(());
}
let raw_path = dir.join(format!("raw.{SK_EXT}"));
let t = Instant::now();
let n_buckets = optimal_buckets(&raw_path, available_per_thread);
dereplicate_partition(&dir, level, n_buckets)
let result = dereplicate_partition(&dir, level, n_buckets);
pb.set_message(format!("last {:.0}ms", t.elapsed().as_millis()));
pb.inc(1);
result
})
.collect();
pb.finish_and_clear();
for r in results {
r?;
}