Push ruqusmkoyvwm #16
@@ -1,54 +1,65 @@
|
||||
use std::io::{self, BufWriter, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use clap::Args;
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obifastwrite::write_unitig;
|
||||
use obikindex::KmerIndex;
|
||||
use obikseq::set_k;
|
||||
use obiskio::UnitigFileReader;
|
||||
use rayon::prelude::*;
|
||||
use tracing::info;
|
||||
|
||||
use super::predicate::FilterArgs;
|
||||
|
||||
#[derive(Args)]
|
||||
pub struct UnitigArgs {
|
||||
/// Index directory produced by the `index` command
|
||||
/// Index directory
|
||||
pub index: PathBuf,
|
||||
|
||||
#[command(flatten)]
|
||||
pub filter: FilterArgs,
|
||||
}
|
||||
|
||||
pub fn run(args: UnitigArgs) {
|
||||
let idx = KmerIndex::open(&args.index).unwrap_or_else(|e| {
|
||||
eprintln!("error opening index: {e}");
|
||||
std::process::exit(1)
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
let k = idx.kmer_size();
|
||||
set_k(k);
|
||||
let n = idx.n_partitions();
|
||||
info!("dumping unitigs from {n} partitions (k={k})");
|
||||
let n_genomes = idx.meta().genomes.len().max(1);
|
||||
let use_counts = idx.meta().config.with_counts;
|
||||
|
||||
let stdout = Mutex::new(BufWriter::new(io::stdout()));
|
||||
info!("unitig: building de Bruijn graph from {n} partition(s) (k={k})");
|
||||
|
||||
(0..n).into_par_iter().for_each(|i| {
|
||||
let path = idx.layer_unitigs_path(i, 0);
|
||||
if !path.exists() {
|
||||
return;
|
||||
let filters = args.filter.build_filters(&idx.meta().genomes);
|
||||
|
||||
// ── Collect all filtered kmers into a single de Bruijn graph ─────────────
|
||||
let mut g = GraphDeBruijn::new();
|
||||
|
||||
for i in 0..n {
|
||||
idx.partition()
|
||||
.iter_partition_kmers(i, use_counts, n_genomes, &filters, |kmer, _row| {
|
||||
g.push(kmer);
|
||||
})
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("error reading partition {i}: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
// open_sequential: works with and without .idx (approx or exact index)
|
||||
let reader = UnitigFileReader::open_sequential(&path).unwrap_or_else(|e| {
|
||||
eprintln!("error opening unitigs (partition {i}): {e}");
|
||||
std::process::exit(1)
|
||||
});
|
||||
info!("unitig: {} distinct k-mers", g.len());
|
||||
g.compute_degrees();
|
||||
|
||||
for (j, unitig) in reader.iter_unitigs() {
|
||||
let mut out = stdout.lock().unwrap();
|
||||
write_unitig(&unitig, k, i, j, &mut *out).unwrap_or_else(|e| {
|
||||
// ── Enumerate unitigs and write as FASTA ──────────────────────────────────
|
||||
let stdout = io::stdout();
|
||||
let mut out = BufWriter::new(stdout.lock());
|
||||
|
||||
for (j, unitig) in g.iter_unitig().enumerate() {
|
||||
write_unitig(&unitig, k, 0, j, &mut out).unwrap_or_else(|e| {
|
||||
eprintln!("write error: {e}");
|
||||
std::process::exit(1)
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
stdout.into_inner().unwrap().flush().expect("flush error");
|
||||
out.flush().expect("flush error");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user