feat: add merge operation specs and partition progress bar
Added implementation specifications for the `merge` operation, detailing parallel partition processing, I/O paths, and kmer matrix aggregation across multiple indexes. Integrated an `indicatif` progress bar into the `rayon` parallel loop to monitor processing position, throughput, ETA, and recent partition duration.
This commit is contained in:
@@ -219,19 +219,34 @@ impl KmerPartition {
|
||||
let n_threads = rayon::current_num_threads().max(1) as u64;
|
||||
let available_per_thread = available / n_threads;
|
||||
|
||||
let pb = ProgressBar::new(self.n_partitions as u64);
|
||||
pb.set_style(
|
||||
ProgressStyle::with_template(
|
||||
"dereplicating [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("█▌░"),
|
||||
);
|
||||
|
||||
let results: Vec<SKResult<()>> = (0..self.n_partitions)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
let dir = self.part_dir(i);
|
||||
if !dir.exists() {
|
||||
pb.inc(1);
|
||||
return Ok(());
|
||||
}
|
||||
let raw_path = dir.join(format!("raw.{SK_EXT}"));
|
||||
let t = Instant::now();
|
||||
let n_buckets = optimal_buckets(&raw_path, available_per_thread);
|
||||
dereplicate_partition(&dir, level, n_buckets)
|
||||
let result = dereplicate_partition(&dir, level, n_buckets);
|
||||
pb.set_message(format!("last {:.0}ms", t.elapsed().as_millis()));
|
||||
pb.inc(1);
|
||||
result
|
||||
})
|
||||
.collect();
|
||||
|
||||
pb.finish_and_clear();
|
||||
for r in results {
|
||||
r?;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user