Push ruqusmkoyvwm #16

Merged
coissac merged 10 commits from push-ruqusmkoyvwm into main 2026-06-05 08:41:08 +00:00
Showing only changes of commit 712a03a3a6 - Show all commits
+40 -29
View File
@@ -1,54 +1,65 @@
use std::io::{self, BufWriter, Write}; use std::io::{self, BufWriter, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Mutex;
use clap::Args; use clap::Args;
use obidebruinj::GraphDeBruijn;
use obifastwrite::write_unitig; use obifastwrite::write_unitig;
use obikindex::KmerIndex; use obikindex::KmerIndex;
use obikseq::set_k;
use obiskio::UnitigFileReader;
use rayon::prelude::*;
use tracing::info; use tracing::info;
use super::predicate::FilterArgs;
#[derive(Args)] #[derive(Args)]
pub struct UnitigArgs { pub struct UnitigArgs {
/// Index directory produced by the `index` command /// Index directory
pub index: PathBuf, pub index: PathBuf,
#[command(flatten)]
pub filter: FilterArgs,
} }
pub fn run(args: UnitigArgs) { pub fn run(args: UnitigArgs) {
let idx = KmerIndex::open(&args.index).unwrap_or_else(|e| { let idx = KmerIndex::open(&args.index).unwrap_or_else(|e| {
eprintln!("error opening index: {e}"); eprintln!("error opening index: {e}");
std::process::exit(1) std::process::exit(1);
}); });
let k = idx.kmer_size(); let k = idx.kmer_size();
set_k(k); let n = idx.n_partitions();
let n = idx.n_partitions(); let n_genomes = idx.meta().genomes.len().max(1);
info!("dumping unitigs from {n} partitions (k={k})"); let use_counts = idx.meta().config.with_counts;
let stdout = Mutex::new(BufWriter::new(io::stdout())); info!("unitig: building de Bruijn graph from {n} partition(s) (k={k})");
(0..n).into_par_iter().for_each(|i| { let filters = args.filter.build_filters(&idx.meta().genomes);
let path = idx.layer_unitigs_path(i, 0);
if !path.exists() {
return;
}
// open_sequential: works with and without .idx (approx or exact index) // ── Collect all filtered kmers into a single de Bruijn graph ─────────────
let reader = UnitigFileReader::open_sequential(&path).unwrap_or_else(|e| { let mut g = GraphDeBruijn::new();
eprintln!("error opening unitigs (partition {i}): {e}");
std::process::exit(1)
});
for (j, unitig) in reader.iter_unitigs() { for i in 0..n {
let mut out = stdout.lock().unwrap(); idx.partition()
write_unitig(&unitig, k, i, j, &mut *out).unwrap_or_else(|e| { .iter_partition_kmers(i, use_counts, n_genomes, &filters, |kmer, _row| {
eprintln!("write error: {e}"); g.push(kmer);
std::process::exit(1) })
.unwrap_or_else(|e| {
eprintln!("error reading partition {i}: {e}");
std::process::exit(1);
}); });
} }
});
stdout.into_inner().unwrap().flush().expect("flush error"); info!("unitig: {} distinct k-mers", g.len());
g.compute_degrees();
// ── Enumerate unitigs and write as FASTA ──────────────────────────────────
let stdout = io::stdout();
let mut out = BufWriter::new(stdout.lock());
for (j, unitig) in g.iter_unitig().enumerate() {
write_unitig(&unitig, k, 0, j, &mut out).unwrap_or_else(|e| {
eprintln!("write error: {e}");
std::process::exit(1);
});
}
out.flush().expect("flush error");
} }