3 Commits

Author SHA1 Message Date
coissac edc18b4908 Merge pull request 'Push rrwpnquuzsvr' (#15) from push-rrwpnquuzsvr into main
Reviewed-on: #15
2026-06-03 17:04:25 +00:00
Eric Coissac 02cb30c0ef feat: add obisys crate for standardized CLI progress reporting
This commit introduces the `obisys` crate, which wraps `indicatif` to provide reusable `spinner` and `progress_bar` utilities with consistent styling and tick intervals. It refactors progress reporting across `obikindex`, `obikpartitionner`, and `obikmer` to use these shared functions, eliminating inline UI configuration and ensuring uniform terminal feedback.
2026-06-03 19:03:59 +02:00
Eric Coissac 4677d6f177 refactor: improve resource cleanup and index packing
Explicitly close file handles and remove temporary artifacts after serialization to prevent disk space leaks. Additionally, compact internal matrix structures immediately upon loading the KmerIndex to improve memory efficiency and prepare for downstream operations.
2026-06-03 15:35:56 +02:00
12 changed files with 70 additions and 90 deletions
+2
View File
@@ -1561,6 +1561,7 @@ dependencies = [
"obiread", "obiread",
"obiskbuilder", "obiskbuilder",
"obiskio", "obiskio",
"obisys",
"ptr_hash", "ptr_hash",
"rayon", "rayon",
"remove_dir_all", "remove_dir_all",
@@ -1659,6 +1660,7 @@ dependencies = [
name = "obisys" name = "obisys"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"indicatif",
"libc", "libc",
"sysinfo", "sysinfo",
] ]
+4
View File
@@ -190,6 +190,10 @@ pub fn pack_bit_matrix(dir: &Path) -> io::Result<()> {
file.write_all(&(n_cols as u64).to_le_bytes())?; file.write_all(&(n_cols as u64).to_le_bytes())?;
for &off in &offsets { file.write_all(&off.to_le_bytes())?; } for &off in &offsets { file.write_all(&off.to_le_bytes())?; }
for data in &col_files { file.write_all(data)?; } for data in &col_files { file.write_all(data)?; }
drop(file);
for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; }
fs::remove_file(dir.join("meta.json"))?;
Ok(()) Ok(())
} }
+4
View File
@@ -293,6 +293,10 @@ pub fn pack_compact_int_matrix(dir: &Path) -> io::Result<()> {
file.write_all(&(n_cols as u64).to_le_bytes())?; file.write_all(&(n_cols as u64).to_le_bytes())?;
for &off in &offsets { file.write_all(&off.to_le_bytes())?; } for &off in &offsets { file.write_all(&off.to_le_bytes())?; }
for data in &col_files { file.write_all(data)?; } for data in &col_files { file.write_all(data)?; }
drop(file);
for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; }
fs::remove_file(dir.join("meta.json"))?;
Ok(()) Ok(())
} }
+2 -7
View File
@@ -4,10 +4,9 @@ use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use indicatif::{ProgressBar, ProgressStyle};
use obikpartitionner::{KmerPartition, KmerSpectrum}; use obikpartitionner::{KmerPartition, KmerSpectrum};
use obilayeredmap; use obilayeredmap;
use obisys::{Reporter, Stage}; use obisys::{Reporter, Stage, progress_bar};
use rayon::prelude::*; use rayon::prelude::*;
use tracing::info; use tracing::info;
@@ -155,11 +154,7 @@ impl KmerIndex {
let block_bits = self.meta.config.block_bits; let block_bits = self.meta.config.block_bits;
let total_kmers = AtomicUsize::new(0); let total_kmers = AtomicUsize::new(0);
let pb = Arc::new(Mutex::new( let pb = Arc::new(Mutex::new(progress_bar("index", n as u64, "partitions")));
ProgressBar::new(n as u64).with_style(
ProgressStyle::with_template("index — [{bar:20}] {pos}/{len} | {msg}").unwrap(),
),
));
(0..n).into_par_iter().for_each(|i| { (0..n).into_par_iter().for_each(|i| {
match self.partition.build_index_layer(i, min_ab, max_ab, with_counts, &evidence, block_bits) { match self.partition.build_index_layer(i, min_ab, max_ab, with_counts, &evidence, block_bits) {
+8 -32
View File
@@ -2,10 +2,7 @@ use std::collections::HashMap;
use std::fs; use std::fs;
use std::io; use std::io;
use std::path::Path; use std::path::Path;
use std::time::Duration; use obisys::{Reporter, Stage, progress_bar, spinner};
use indicatif::{ProgressBar, ProgressStyle};
use obisys::{Reporter, Stage};
use rayon::prelude::*; use rayon::prelude::*;
use tracing::info; use tracing::info;
@@ -124,7 +121,8 @@ impl KmerIndex {
sources[0].meta.genomes.len(), sources[0].meta.genomes.len(),
); );
let t = Stage::start("bootstrap"); let t = Stage::start("bootstrap");
let pb = spinner("bootstrap — copying index …"); let pb = spinner("bootstrap");
pb.set_message("copying index …");
copy_dir_all(&sources[0].root_path, output)?; copy_dir_all(&sources[0].root_path, output)?;
// Rewrite index.meta with final genome labels and the effective mode. // Rewrite index.meta with final genome labels and the effective mode.
@@ -146,7 +144,8 @@ impl KmerIndex {
// Drop the spectrums/ that were copied from source_0 and rebuild from scratch. // Drop the spectrums/ that were copied from source_0 and rebuild from scratch.
info!("rebuilding spectrums for {} source(s)", sources.len()); info!("rebuilding spectrums for {} source(s)", sources.len());
let t = Stage::start("spectrums"); let t = Stage::start("spectrums");
let pb = spinner("spectrums — copying …"); let pb = spinner("spectrums");
pb.set_message("copying …");
let spectrums_dir = output.join("spectrums"); let spectrums_dir = output.join("spectrums");
if spectrums_dir.exists() { if spectrums_dir.exists() {
fs::remove_dir_all(&spectrums_dir)?; fs::remove_dir_all(&spectrums_dir)?;
@@ -172,7 +171,7 @@ impl KmerIndex {
n_partitions, n_src_genomes, n_dst_genomes, n_partitions, n_src_genomes, n_dst_genomes,
); );
let t = Stage::start("merge_partitions"); let t = Stage::start("merge_partitions");
let pb = partition_bar(n_partitions as u64); let pb = progress_bar("merge", n_partitions as u64, "partitions");
let dst_partition = &dst.partition; let dst_partition = &dst.partition;
let block_bits = dst.meta.config.block_bits; let block_bits = dst.meta.config.block_bits;
@@ -199,7 +198,8 @@ impl KmerIndex {
// ── Pack matrices after merge ───────────────────────────────────────── // ── Pack matrices after merge ─────────────────────────────────────────
{ {
let t = Stage::start("pack"); let t = Stage::start("pack");
let pb = spinner("pack — consolidating column files …"); let pb = spinner("pack");
pb.set_message("consolidating column files …");
let dst2 = KmerIndex::open(output)?; let dst2 = KmerIndex::open(output)?;
dst2.pack_matrices()?; dst2.pack_matrices()?;
pb.finish_and_clear(); pb.finish_and_clear();
@@ -285,30 +285,6 @@ fn remove_dirs_named(root: &Path, name: &str) -> io::Result<()> {
Ok(()) Ok(())
} }
fn spinner(msg: &'static str) -> ProgressBar {
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::with_template("{spinner} {msg} {elapsed}")
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", "", "", ""]),
);
pb.set_message(msg);
pb.enable_steady_tick(Duration::from_millis(100));
pb
}
fn partition_bar(n: u64) -> ProgressBar {
let pb = ProgressBar::new(n);
pb.set_style(
ProgressStyle::with_template(
"{spinner} merge — {bar:40.cyan/blue} {pos}/{len} partitions {elapsed}",
)
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", "", "", ""]),
);
pb.enable_steady_tick(Duration::from_millis(100));
pb
}
fn format_evidence(ev: &IndexMode) -> String { fn format_evidence(ev: &IndexMode) -> String {
match ev { match ev {
+5 -10
View File
@@ -1,11 +1,9 @@
use std::fs; use std::fs;
use std::io; use std::io;
use std::path::Path; use std::path::Path;
use std::time::Duration;
use indicatif::{ProgressBar, ProgressStyle};
use obikpartitionner::{KmerFilter, KmerPartition, MergeMode}; use obikpartitionner::{KmerFilter, KmerPartition, MergeMode};
use obisys::{Reporter, Stage}; use obisys::{Reporter, Stage, progress_bar};
use rayon::prelude::*; use rayon::prelude::*;
use tracing::info; use tracing::info;
@@ -80,12 +78,7 @@ impl KmerIndex {
); );
let t = Stage::start("rebuild"); let t = Stage::start("rebuild");
let pb = ProgressBar::new(n_partitions as u64).with_style( let pb = progress_bar("rebuild", n_partitions as u64, "partitions");
ProgressStyle::with_template("rebuild — [{bar:20}] {pos}/{len} | {msg}")
.unwrap()
.progress_chars("=> "),
);
pb.enable_steady_tick(Duration::from_millis(100));
let src_partition = &src.partition; let src_partition = &src.partition;
let block_bits = meta.config.block_bits; let block_bits = meta.config.block_bits;
@@ -112,6 +105,8 @@ impl KmerIndex {
// Write SENTINEL_INDEXED — output is ready to use. // Write SENTINEL_INDEXED — output is ready to use.
fs::File::create(output.join(SENTINEL_INDEXED))?; fs::File::create(output.join(SENTINEL_INDEXED))?;
KmerIndex::open(output) let idx = KmerIndex::open(output)?;
idx.pack_matrices()?;
Ok(idx)
} }
} }
+2 -12
View File
@@ -1,11 +1,8 @@
use std::fs; use std::fs;
use std::path::Path; use std::path::Path;
use std::time::Duration;
use indicatif::{ProgressBar, ProgressStyle};
use obilayeredmap::{IndexMode, layer::Layer}; use obilayeredmap::{IndexMode, layer::Layer};
use obilayeredmap::meta::PartitionMeta; use obilayeredmap::meta::PartitionMeta;
use obisys::{Reporter, Stage}; use obisys::{Reporter, Stage, progress_bar};
use rayon::prelude::*; use rayon::prelude::*;
use tracing::info; use tracing::info;
@@ -46,14 +43,7 @@ impl KmerIndex {
); );
let t = Stage::start("reindex"); let t = Stage::start("reindex");
let pb = ProgressBar::new(n as u64).with_style( let pb = progress_bar("reindex", n as u64, "partitions");
ProgressStyle::with_template(
"reindex — [{bar:20}] {pos}/{len} | {msg}",
)
.unwrap()
.tick_strings(&["","","","","","","","","",""]),
);
pb.enable_steady_tick(Duration::from_millis(80));
let errors: Vec<String> = (0..n) let errors: Vec<String> = (0..n)
.into_par_iter() .into_par_iter()
+3 -9
View File
@@ -1,9 +1,9 @@
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::Instant;
use indicatif::{ProgressBar, ProgressStyle}; use obisys::spinner;
use obiread::NucPage; use obiread::NucPage;
use obikpartitionner::KmerPartition; use obikpartitionner::KmerPartition;
use obisys::{Reporter, Stage}; use obisys::{Reporter, Stage};
@@ -83,13 +83,7 @@ pub fn scatter(
} : NucPage => Batch, } : NucPage => Batch,
}; };
let pb = ProgressBar::new_spinner(); let pb = spinner("scatter");
pb.set_style(
ProgressStyle::with_template("{spinner} scatter — {msg} {elapsed}")
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", "", "", ""]),
);
pb.enable_steady_tick(Duration::from_millis(100));
let mut total_bases: u64 = 0; let mut total_bases: u64 = 0;
let mut ema_rate: f64 = 0.0; let mut ema_rate: f64 = 0.0;
+1
View File
@@ -28,3 +28,4 @@ memmap2 = "0.9.10"
obicompactvec = { path = "../obicompactvec" } obicompactvec = { path = "../obicompactvec" }
ptr_hash = "1.1" ptr_hash = "1.1"
indicatif = "0.17" indicatif = "0.17"
obisys = { path = "../obisys" }
+3 -17
View File
@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
use std::time::Instant; use std::time::Instant;
use tracing::debug; use tracing::debug;
use indicatif::{ProgressBar, ProgressStyle}; use obisys::progress_bar;
use cacheline_ef::{CachelineEf, CachelineEfVec}; use cacheline_ef::{CachelineEf, CachelineEfVec};
use epserde::ser::Serialize as EpSerialize; use epserde::ser::Serialize as EpSerialize;
@@ -219,14 +219,7 @@ impl KmerPartition {
let n_threads = rayon::current_num_threads().max(1) as u64; let n_threads = rayon::current_num_threads().max(1) as u64;
let available_per_thread = available / n_threads; let available_per_thread = available / n_threads;
let pb = ProgressBar::new(self.n_partitions as u64); let pb = progress_bar("dereplication", self.n_partitions as u64, "partitions");
pb.set_style(
ProgressStyle::with_template(
"dereplicating [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}",
)
.unwrap()
.progress_chars("█▌░"),
);
let results: Vec<SKResult<()>> = (0..self.n_partitions) let results: Vec<SKResult<()>> = (0..self.n_partitions)
.into_par_iter() .into_par_iter()
@@ -274,14 +267,7 @@ impl KmerPartition {
let n_threads = rayon::current_num_threads().max(1) as u64; let n_threads = rayon::current_num_threads().max(1) as u64;
let chunk_kmers = chunk_size_from_ram(available / n_threads); let chunk_kmers = chunk_size_from_ram(available / n_threads);
let pb = ProgressBar::new(self.n_partitions as u64); let pb = progress_bar("counting", self.n_partitions as u64, "partitions");
pb.set_style(
ProgressStyle::with_template(
"counting [{bar:40}] {pos}/{len} ({percent}%) {per_sec} eta {eta} {msg}",
)
.unwrap()
.progress_chars("█▌░"),
);
let results: Vec<SKResult<()>> = (0..self.n_partitions) let results: Vec<SKResult<()>> = (0..self.n_partitions)
.into_par_iter() .into_par_iter()
+1
View File
@@ -6,3 +6,4 @@ edition = "2024"
[dependencies] [dependencies]
libc = "0.2" libc = "0.2"
sysinfo = "0.33" sysinfo = "0.33"
indicatif = "0.17"
+33 -1
View File
@@ -1,5 +1,37 @@
use std::fmt; use std::fmt;
use std::time::Instant; use std::time::{Duration, Instant};
use indicatif::{ProgressBar, ProgressStyle};
const BRAILLE: &[&str] = &["", "", "", "", "", "", "", "", "", ""];
/// Spinner with the standard project look: `⠋ label — msg 0s`.
/// Caller updates the message with `pb.set_message(...)`.
pub fn spinner(label: &str) -> ProgressBar {
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::with_template(&format!("{{spinner}} {label}{{msg}} {{elapsed}}"))
.unwrap()
.tick_strings(BRAILLE),
);
pb.enable_steady_tick(Duration::from_millis(100));
pb
}
/// Progress bar with the standard project look:
/// `⠋ label — [████░░░░] pos/len unit elapsed`.
pub fn progress_bar(label: &str, n: u64, unit: &str) -> ProgressBar {
let pb = ProgressBar::new(n);
pb.set_style(
ProgressStyle::with_template(&format!(
"{{spinner}} {label}{{bar:40.cyan/blue}} {{pos}}/{{len}} {unit} {{elapsed}}"
))
.unwrap()
.tick_strings(BRAILLE),
);
pb.enable_steady_tick(Duration::from_millis(100));
pb
}
use libc::{RUSAGE_SELF, getrusage, rusage, timeval}; use libc::{RUSAGE_SELF, getrusage, rusage, timeval};
use sysinfo::System; use sysinfo::System;