From 4677d6f177f653dc12961228b0e3daf271140d7f Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 3 Jun 2026 15:30:44 +0200 Subject: [PATCH 1/2] refactor: improve resource cleanup and index packing Explicitly close file handles and remove temporary artifacts after serialization to prevent disk space leaks. Additionally, compact internal matrix structures immediately upon loading the KmerIndex to improve memory efficiency and prepare for downstream operations. --- src/obicompactvec/src/bitmatrix.rs | 4 ++++ src/obicompactvec/src/intmatrix.rs | 4 ++++ src/obikindex/src/rebuild.rs | 4 +++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/obicompactvec/src/bitmatrix.rs b/src/obicompactvec/src/bitmatrix.rs index ead53af..5fd426b 100644 --- a/src/obicompactvec/src/bitmatrix.rs +++ b/src/obicompactvec/src/bitmatrix.rs @@ -190,6 +190,10 @@ pub fn pack_bit_matrix(dir: &Path) -> io::Result<()> { file.write_all(&(n_cols as u64).to_le_bytes())?; for &off in &offsets { file.write_all(&off.to_le_bytes())?; } for data in &col_files { file.write_all(data)?; } + drop(file); + + for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; } + fs::remove_file(dir.join("meta.json"))?; Ok(()) } diff --git a/src/obicompactvec/src/intmatrix.rs b/src/obicompactvec/src/intmatrix.rs index 5fa1cc4..d1421e5 100644 --- a/src/obicompactvec/src/intmatrix.rs +++ b/src/obicompactvec/src/intmatrix.rs @@ -293,6 +293,10 @@ pub fn pack_compact_int_matrix(dir: &Path) -> io::Result<()> { file.write_all(&(n_cols as u64).to_le_bytes())?; for &off in &offsets { file.write_all(&off.to_le_bytes())?; } for data in &col_files { file.write_all(data)?; } + drop(file); + + for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; } + fs::remove_file(dir.join("meta.json"))?; Ok(()) } diff --git a/src/obikindex/src/rebuild.rs b/src/obikindex/src/rebuild.rs index 95dc4b8..c7ac4fd 100644 --- a/src/obikindex/src/rebuild.rs +++ b/src/obikindex/src/rebuild.rs @@ -112,6 +112,8 @@ impl KmerIndex { // Write SENTINEL_INDEXED — output is ready to use. fs::File::create(output.join(SENTINEL_INDEXED))?; - KmerIndex::open(output) + let idx = KmerIndex::open(output)?; + idx.pack_matrices()?; + Ok(idx) } } -- 2.52.0 From 02cb30c0efc4a23f578a95ccf53d27f5176754e5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 3 Jun 2026 15:33:15 +0200 Subject: [PATCH 2/2] feat: add obisys crate for standardized CLI progress reporting This commit introduces the `obisys` crate, which wraps `indicatif` to provide reusable `spinner` and `progress_bar` utilities with consistent styling and tick intervals. It refactors progress reporting across `obikindex`, `obikpartitionner`, and `obikmer` to use these shared functions, eliminating inline UI configuration and ensuring uniform terminal feedback. --- src/Cargo.lock | 2 ++ src/obikindex/src/index.rs | 9 ++---- src/obikindex/src/merge.rs | 40 ++++++--------------------- src/obikindex/src/rebuild.rs | 11 ++------ src/obikindex/src/reindex.rs | 14 ++-------- src/obikmer/src/steps/scatter.rs | 12 ++------ src/obikpartitionner/Cargo.toml | 1 + src/obikpartitionner/src/partition.rs | 20 ++------------ src/obisys/Cargo.toml | 5 ++-- src/obisys/src/lib.rs | 34 ++++++++++++++++++++++- 10 files changed, 59 insertions(+), 89 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index 2a23bcf..fe3f9a1 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1561,6 +1561,7 @@ dependencies = [ "obiread", "obiskbuilder", "obiskio", + "obisys", "ptr_hash", "rayon", "remove_dir_all", @@ -1659,6 +1660,7 @@ dependencies = [ name = "obisys" version = "0.1.0" dependencies = [ + "indicatif", "libc", "sysinfo", ] diff --git a/src/obikindex/src/index.rs b/src/obikindex/src/index.rs index b785c1a..2c58aed 100644 --- a/src/obikindex/src/index.rs +++ b/src/obikindex/src/index.rs @@ -4,10 +4,9 @@ use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; -use indicatif::{ProgressBar, ProgressStyle}; use obikpartitionner::{KmerPartition, KmerSpectrum}; use obilayeredmap; -use obisys::{Reporter, Stage}; +use obisys::{Reporter, Stage, progress_bar}; use rayon::prelude::*; use tracing::info; @@ -155,11 +154,7 @@ impl KmerIndex { let block_bits = self.meta.config.block_bits; let total_kmers = AtomicUsize::new(0); - let pb = Arc::new(Mutex::new( - ProgressBar::new(n as u64).with_style( - ProgressStyle::with_template("index — [{bar:20}] {pos}/{len} | {msg}").unwrap(), - ), - )); + let pb = Arc::new(Mutex::new(progress_bar("index", n as u64, "partitions"))); (0..n).into_par_iter().for_each(|i| { match self.partition.build_index_layer(i, min_ab, max_ab, with_counts, &evidence, block_bits) { diff --git a/src/obikindex/src/merge.rs b/src/obikindex/src/merge.rs index dc129d8..d2e95da 100644 --- a/src/obikindex/src/merge.rs +++ b/src/obikindex/src/merge.rs @@ -2,10 +2,7 @@ use std::collections::HashMap; use std::fs; use std::io; use std::path::Path; -use std::time::Duration; - -use indicatif::{ProgressBar, ProgressStyle}; -use obisys::{Reporter, Stage}; +use obisys::{Reporter, Stage, progress_bar, spinner}; use rayon::prelude::*; use tracing::info; @@ -124,7 +121,8 @@ impl KmerIndex { sources[0].meta.genomes.len(), ); let t = Stage::start("bootstrap"); - let pb = spinner("bootstrap — copying index …"); + let pb = spinner("bootstrap"); + pb.set_message("copying index …"); copy_dir_all(&sources[0].root_path, output)?; // Rewrite index.meta with final genome labels and the effective mode. @@ -146,7 +144,8 @@ impl KmerIndex { // Drop the spectrums/ that were copied from source_0 and rebuild from scratch. info!("rebuilding spectrums for {} source(s)", sources.len()); let t = Stage::start("spectrums"); - let pb = spinner("spectrums — copying …"); + let pb = spinner("spectrums"); + pb.set_message("copying …"); let spectrums_dir = output.join("spectrums"); if spectrums_dir.exists() { fs::remove_dir_all(&spectrums_dir)?; @@ -172,7 +171,7 @@ impl KmerIndex { n_partitions, n_src_genomes, n_dst_genomes, ); let t = Stage::start("merge_partitions"); - let pb = partition_bar(n_partitions as u64); + let pb = progress_bar("merge", n_partitions as u64, "partitions"); let dst_partition = &dst.partition; let block_bits = dst.meta.config.block_bits; @@ -199,7 +198,8 @@ impl KmerIndex { // ── Pack matrices after merge ───────────────────────────────────────── { let t = Stage::start("pack"); - let pb = spinner("pack — consolidating column files …"); + let pb = spinner("pack"); + pb.set_message("consolidating column files …"); let dst2 = KmerIndex::open(output)?; dst2.pack_matrices()?; pb.finish_and_clear(); @@ -285,30 +285,6 @@ fn remove_dirs_named(root: &Path, name: &str) -> io::Result<()> { Ok(()) } -fn spinner(msg: &'static str) -> ProgressBar { - let pb = ProgressBar::new_spinner(); - pb.set_style( - ProgressStyle::with_template("{spinner} {msg} {elapsed}") - .unwrap() - .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]), - ); - pb.set_message(msg); - pb.enable_steady_tick(Duration::from_millis(100)); - pb -} - -fn partition_bar(n: u64) -> ProgressBar { - let pb = ProgressBar::new(n); - pb.set_style( - ProgressStyle::with_template( - "{spinner} merge — {bar:40.cyan/blue} {pos}/{len} partitions {elapsed}", - ) - .unwrap() - .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]), - ); - pb.enable_steady_tick(Duration::from_millis(100)); - pb -} fn format_evidence(ev: &IndexMode) -> String { match ev { diff --git a/src/obikindex/src/rebuild.rs b/src/obikindex/src/rebuild.rs index c7ac4fd..6948209 100644 --- a/src/obikindex/src/rebuild.rs +++ b/src/obikindex/src/rebuild.rs @@ -1,11 +1,9 @@ use std::fs; use std::io; use std::path::Path; -use std::time::Duration; -use indicatif::{ProgressBar, ProgressStyle}; use obikpartitionner::{KmerFilter, KmerPartition, MergeMode}; -use obisys::{Reporter, Stage}; +use obisys::{Reporter, Stage, progress_bar}; use rayon::prelude::*; use tracing::info; @@ -80,12 +78,7 @@ impl KmerIndex { ); let t = Stage::start("rebuild"); - let pb = ProgressBar::new(n_partitions as u64).with_style( - ProgressStyle::with_template("rebuild — [{bar:20}] {pos}/{len} | {msg}") - .unwrap() - .progress_chars("=> "), - ); - pb.enable_steady_tick(Duration::from_millis(100)); + let pb = progress_bar("rebuild", n_partitions as u64, "partitions"); let src_partition = &src.partition; let block_bits = meta.config.block_bits; diff --git a/src/obikindex/src/reindex.rs b/src/obikindex/src/reindex.rs index ac1c42c..878d51a 100644 --- a/src/obikindex/src/reindex.rs +++ b/src/obikindex/src/reindex.rs @@ -1,11 +1,8 @@ use std::fs; use std::path::Path; -use std::time::Duration; - -use indicatif::{ProgressBar, ProgressStyle}; use obilayeredmap::{IndexMode, layer::Layer}; use obilayeredmap::meta::PartitionMeta; -use obisys::{Reporter, Stage}; +use obisys::{Reporter, Stage, progress_bar}; use rayon::prelude::*; use tracing::info; @@ -46,14 +43,7 @@ impl KmerIndex { ); let t = Stage::start("reindex"); - let pb = ProgressBar::new(n as u64).with_style( - ProgressStyle::with_template( - "reindex — [{bar:20}] {pos}/{len} | {msg}", - ) - .unwrap() - .tick_strings(&["⠋","⠙","⠹","⠸","⠼","⠴","⠦","⠧","⠇","⠏"]), - ); - pb.enable_steady_tick(Duration::from_millis(80)); + let pb = progress_bar("reindex", n as u64, "partitions"); let errors: Vec = (0..n) .into_par_iter() diff --git a/src/obikmer/src/steps/scatter.rs b/src/obikmer/src/steps/scatter.rs index cb5a14b..8888839 100644 --- a/src/obikmer/src/steps/scatter.rs +++ b/src/obikmer/src/steps/scatter.rs @@ -1,9 +1,9 @@ use std::path::PathBuf; use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Instant; -use indicatif::{ProgressBar, ProgressStyle}; +use obisys::spinner; use obiread::NucPage; use obikpartitionner::KmerPartition; use obisys::{Reporter, Stage}; @@ -83,13 +83,7 @@ pub fn scatter( } : NucPage => Batch, }; - let pb = ProgressBar::new_spinner(); - pb.set_style( - ProgressStyle::with_template("{spinner} scatter — {msg} {elapsed}") - .unwrap() - .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]), - ); - pb.enable_steady_tick(Duration::from_millis(100)); + let pb = spinner("scatter"); let mut total_bases: u64 = 0; let mut ema_rate: f64 = 0.0; diff --git a/src/obikpartitionner/Cargo.toml b/src/obikpartitionner/Cargo.toml index d62fdf4..e131573 100644 --- a/src/obikpartitionner/Cargo.toml +++ b/src/obikpartitionner/Cargo.toml @@ -28,3 +28,4 @@ memmap2 = "0.9.10" obicompactvec = { path = "../obicompactvec" } ptr_hash = "1.1" indicatif = "0.17" +obisys = { path = "../obisys" } diff --git a/src/obikpartitionner/src/partition.rs b/src/obikpartitionner/src/partition.rs index ea7f810..321f6cf 100644 --- a/src/obikpartitionner/src/partition.rs +++ b/src/obikpartitionner/src/partition.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::time::Instant; use tracing::debug; -use indicatif::{ProgressBar, ProgressStyle}; +use obisys::progress_bar; use cacheline_ef::{CachelineEf, CachelineEfVec}; use epserde::ser::Serialize as EpSerialize; @@ -219,14 +219,7 @@ impl KmerPartition { let n_threads = rayon::current_num_threads().max(1) as u64; let available_per_thread = available / n_threads; - let pb = ProgressBar::new(self.n_partitions as u64); - pb.set_style( - ProgressStyle::with_template( - "dereplicating [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}", - ) - .unwrap() - .progress_chars("█▌░"), - ); + let pb = progress_bar("dereplication", self.n_partitions as u64, "partitions"); let results: Vec> = (0..self.n_partitions) .into_par_iter() @@ -274,14 +267,7 @@ impl KmerPartition { let n_threads = rayon::current_num_threads().max(1) as u64; let chunk_kmers = chunk_size_from_ram(available / n_threads); - let pb = ProgressBar::new(self.n_partitions as u64); - pb.set_style( - ProgressStyle::with_template( - "counting [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}", - ) - .unwrap() - .progress_chars("█▌░"), - ); + let pb = progress_bar("counting", self.n_partitions as u64, "partitions"); let results: Vec> = (0..self.n_partitions) .into_par_iter() diff --git a/src/obisys/Cargo.toml b/src/obisys/Cargo.toml index 89d7cb5..eabb9e6 100644 --- a/src/obisys/Cargo.toml +++ b/src/obisys/Cargo.toml @@ -4,5 +4,6 @@ version = "0.1.0" edition = "2024" [dependencies] -libc = "0.2" -sysinfo = "0.33" +libc = "0.2" +sysinfo = "0.33" +indicatif = "0.17" diff --git a/src/obisys/src/lib.rs b/src/obisys/src/lib.rs index d352a39..49b80c0 100644 --- a/src/obisys/src/lib.rs +++ b/src/obisys/src/lib.rs @@ -1,5 +1,37 @@ use std::fmt; -use std::time::Instant; +use std::time::{Duration, Instant}; + +use indicatif::{ProgressBar, ProgressStyle}; + +const BRAILLE: &[&str] = &["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; + +/// Spinner with the standard project look: `⠋ label — msg 0s`. +/// Caller updates the message with `pb.set_message(...)`. +pub fn spinner(label: &str) -> ProgressBar { + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::with_template(&format!("{{spinner}} {label} — {{msg}} {{elapsed}}")) + .unwrap() + .tick_strings(BRAILLE), + ); + pb.enable_steady_tick(Duration::from_millis(100)); + pb +} + +/// Progress bar with the standard project look: +/// `⠋ label — [████░░░░] pos/len unit elapsed`. +pub fn progress_bar(label: &str, n: u64, unit: &str) -> ProgressBar { + let pb = ProgressBar::new(n); + pb.set_style( + ProgressStyle::with_template(&format!( + "{{spinner}} {label} — {{bar:40.cyan/blue}} {{pos}}/{{len}} {unit} {{elapsed}}" + )) + .unwrap() + .tick_strings(BRAILLE), + ); + pb.enable_steady_tick(Duration::from_millis(100)); + pb +} use libc::{RUSAGE_SELF, getrusage, rusage, timeval}; use sysinfo::System; -- 2.52.0