refactor: extract obikindex crate and remove deprecated CLI commands
Extracted core indexing logic, state tracking, and metadata management into a new `obikindex` crate. Refactored the `index` and `unitig` commands to leverage the `KmerIndex` abstraction and state-driven pipeline transitions. Removed obsolete CLI subcommands (`count`, `fasta`, `longtig`, `partition`) and their associated pipeline steps. Updated FASTA writing utilities for single-line output and deterministic identifiers, and refreshed workspace dependencies.
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
//use ahash::RandomState;
|
||||
use hashbrown::HashMap;
|
||||
use obifastwrite::write_unitig;
|
||||
use obikseq::k;
|
||||
use obikseq::unitig::Unitig;
|
||||
use obikseq::{CanonicalKmer, Kmer, Sequence};
|
||||
use std::cell::Cell;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use xxhash_rust::xxh3::Xxh3Builder;
|
||||
|
||||
// ── Types ─────────────────────────────────────────────────────────────────────
|
||||
@@ -293,59 +291,6 @@ impl GraphDeBruijn {
|
||||
Some(oriented)
|
||||
}
|
||||
|
||||
fn next_longtig_kmer(&self, kmer: Kmer) -> Option<Kmer> {
|
||||
let canonical = kmer.canonical();
|
||||
let node = self.nodes.get(&canonical)?.get();
|
||||
|
||||
let direct = kmer.raw() == canonical.raw();
|
||||
|
||||
if (direct && node.n_right_neighbours() == 0) || (!direct && node.n_left_neighbours() == 0)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let next_c: CanonicalKmer = if direct {
|
||||
if node.can_extend_right() {
|
||||
canonical
|
||||
.into_kmer()
|
||||
.push_right(node.right_nuc())
|
||||
.canonical()
|
||||
} else {
|
||||
self.iter_right_neighbors(canonical)
|
||||
.filter(|n| !self.is_visited(n).unwrap_or(true))
|
||||
.next()?
|
||||
}
|
||||
} else {
|
||||
if node.can_extend_left() {
|
||||
canonical.into_kmer().push_left(node.left_nuc()).canonical()
|
||||
} else {
|
||||
self.iter_left_neighbors(canonical)
|
||||
.filter(|n| !self.is_visited(n).unwrap_or(true))
|
||||
.next()?
|
||||
}
|
||||
};
|
||||
|
||||
let cell = self.nodes.get(&next_c)?;
|
||||
let next_node = cell.get();
|
||||
if next_node.is_visited() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let oriented = oriented_next(kmer, next_c);
|
||||
let ndirect = oriented.raw() == next_c.raw();
|
||||
|
||||
if (ndirect && next_node.n_right_neighbours() > 1)
|
||||
|| (!ndirect && next_node.n_left_neighbours() > 1)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut updated = next_node;
|
||||
updated.set_visited();
|
||||
cell.set(updated);
|
||||
Some(oriented)
|
||||
}
|
||||
|
||||
fn iter_unitig_kmers(&self, start: Kmer) -> UnitigIter<'_> {
|
||||
UnitigIter {
|
||||
graph: self,
|
||||
@@ -353,13 +298,6 @@ impl GraphDeBruijn {
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_longtig_kmers(&self, start: Kmer) -> LongtigIter<'_> {
|
||||
LongtigIter {
|
||||
graph: self,
|
||||
current: Some(start),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter_unitig(&self) -> impl Iterator<Item = Unitig> + '_ {
|
||||
let k = k();
|
||||
self.start_iter().map(move |(start, first_next)| {
|
||||
@@ -373,36 +311,6 @@ impl GraphDeBruijn {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn iter_longtig(&self) -> impl Iterator<Item = Unitig> + '_ {
|
||||
let k = k();
|
||||
self.start_iter().map(move |(start, first_next)| {
|
||||
let mut nucs: Vec<u8> = (0..k).map(|i| start.nucleotide(i)).collect();
|
||||
if let Some(next_c) = first_next {
|
||||
for kmer in self.iter_longtig_kmers(next_c) {
|
||||
nucs.push(kmer.nucleotide(k - 1));
|
||||
}
|
||||
}
|
||||
Unitig::from_nucleotides(&nucs)
|
||||
})
|
||||
}
|
||||
|
||||
/// Write all unitigs to `out` in FASTA format.
|
||||
///
|
||||
/// Calls [`obifastwrite::write_unitig`] for each unitig produced by
|
||||
/// [`iter_unitig`]. Stops and returns the first I/O error encountered.
|
||||
pub fn write_fasta<W: io::Write>(&self, out: &mut W, unitig: bool) -> io::Result<()> {
|
||||
if unitig {
|
||||
for unitig in self.iter_unitig() {
|
||||
write_unitig(&unitig, k(), out)?;
|
||||
}
|
||||
} else {
|
||||
for unitig in self.iter_longtig() {
|
||||
write_unitig(&unitig, k(), out)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
@@ -516,23 +424,6 @@ impl Iterator for UnitigIter<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
// ── UnitigIter ────────────────────────────────────────────────────────────────
|
||||
|
||||
struct LongtigIter<'a> {
|
||||
graph: &'a GraphDeBruijn,
|
||||
current: Option<Kmer>,
|
||||
}
|
||||
|
||||
impl Iterator for LongtigIter<'_> {
|
||||
type Item = Kmer;
|
||||
|
||||
fn next(&mut self) -> Option<Kmer> {
|
||||
let current = self.current?;
|
||||
self.current = self.graph.next_longtig_kmer(current);
|
||||
Some(current)
|
||||
}
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fn oriented_next(from: Kmer, to: CanonicalKmer) -> Kmer {
|
||||
|
||||
Reference in New Issue
Block a user