Push ruqusmkoyvwm #16
@@ -1,54 +1,65 @@
|
|||||||
use std::io::{self, BufWriter, Write};
|
use std::io::{self, BufWriter, Write};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::sync::Mutex;
|
|
||||||
|
|
||||||
use clap::Args;
|
use clap::Args;
|
||||||
|
use obidebruinj::GraphDeBruijn;
|
||||||
use obifastwrite::write_unitig;
|
use obifastwrite::write_unitig;
|
||||||
use obikindex::KmerIndex;
|
use obikindex::KmerIndex;
|
||||||
use obikseq::set_k;
|
|
||||||
use obiskio::UnitigFileReader;
|
|
||||||
use rayon::prelude::*;
|
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
|
use super::predicate::FilterArgs;
|
||||||
|
|
||||||
#[derive(Args)]
|
#[derive(Args)]
|
||||||
pub struct UnitigArgs {
|
pub struct UnitigArgs {
|
||||||
/// Index directory produced by the `index` command
|
/// Index directory
|
||||||
pub index: PathBuf,
|
pub index: PathBuf,
|
||||||
|
|
||||||
|
#[command(flatten)]
|
||||||
|
pub filter: FilterArgs,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(args: UnitigArgs) {
|
pub fn run(args: UnitigArgs) {
|
||||||
let idx = KmerIndex::open(&args.index).unwrap_or_else(|e| {
|
let idx = KmerIndex::open(&args.index).unwrap_or_else(|e| {
|
||||||
eprintln!("error opening index: {e}");
|
eprintln!("error opening index: {e}");
|
||||||
std::process::exit(1)
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
let k = idx.kmer_size();
|
let k = idx.kmer_size();
|
||||||
set_k(k);
|
|
||||||
let n = idx.n_partitions();
|
let n = idx.n_partitions();
|
||||||
info!("dumping unitigs from {n} partitions (k={k})");
|
let n_genomes = idx.meta().genomes.len().max(1);
|
||||||
|
let use_counts = idx.meta().config.with_counts;
|
||||||
|
|
||||||
let stdout = Mutex::new(BufWriter::new(io::stdout()));
|
info!("unitig: building de Bruijn graph from {n} partition(s) (k={k})");
|
||||||
|
|
||||||
(0..n).into_par_iter().for_each(|i| {
|
let filters = args.filter.build_filters(&idx.meta().genomes);
|
||||||
let path = idx.layer_unitigs_path(i, 0);
|
|
||||||
if !path.exists() {
|
// ── Collect all filtered kmers into a single de Bruijn graph ─────────────
|
||||||
return;
|
let mut g = GraphDeBruijn::new();
|
||||||
|
|
||||||
|
for i in 0..n {
|
||||||
|
idx.partition()
|
||||||
|
.iter_partition_kmers(i, use_counts, n_genomes, &filters, |kmer, _row| {
|
||||||
|
g.push(kmer);
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
eprintln!("error reading partition {i}: {e}");
|
||||||
|
std::process::exit(1);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// open_sequential: works with and without .idx (approx or exact index)
|
info!("unitig: {} distinct k-mers", g.len());
|
||||||
let reader = UnitigFileReader::open_sequential(&path).unwrap_or_else(|e| {
|
g.compute_degrees();
|
||||||
eprintln!("error opening unitigs (partition {i}): {e}");
|
|
||||||
std::process::exit(1)
|
|
||||||
});
|
|
||||||
|
|
||||||
for (j, unitig) in reader.iter_unitigs() {
|
// ── Enumerate unitigs and write as FASTA ──────────────────────────────────
|
||||||
let mut out = stdout.lock().unwrap();
|
let stdout = io::stdout();
|
||||||
write_unitig(&unitig, k, i, j, &mut *out).unwrap_or_else(|e| {
|
let mut out = BufWriter::new(stdout.lock());
|
||||||
|
|
||||||
|
for (j, unitig) in g.iter_unitig().enumerate() {
|
||||||
|
write_unitig(&unitig, k, 0, j, &mut out).unwrap_or_else(|e| {
|
||||||
eprintln!("write error: {e}");
|
eprintln!("write error: {e}");
|
||||||
std::process::exit(1)
|
std::process::exit(1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
stdout.into_inner().unwrap().flush().expect("flush error");
|
out.flush().expect("flush error");
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user