refactor: extract obikindex crate and remove deprecated CLI commands

Extracted core indexing logic, state tracking, and metadata management into a new `obikindex` crate. Refactored the `index` and `unitig` commands to leverage the `KmerIndex` abstraction and state-driven pipeline transitions. Removed obsolete CLI subcommands (`count`, `fasta`, `longtig`, `partition`) and their associated pipeline steps. Updated FASTA writing utilities for single-line output and deterministic identifiers, and refreshed workspace dependencies.
This commit is contained in:
Eric Coissac
2026-05-20 18:21:05 +02:00
parent f8cfb493b8
commit 17c9e076bd
24 changed files with 792 additions and 1593 deletions
-109
View File
@@ -1,12 +1,10 @@
//use ahash::RandomState;
use hashbrown::HashMap;
use obifastwrite::write_unitig;
use obikseq::k;
use obikseq::unitig::Unitig;
use obikseq::{CanonicalKmer, Kmer, Sequence};
use std::cell::Cell;
use std::fmt;
use std::io;
use xxhash_rust::xxh3::Xxh3Builder;
// ── Types ─────────────────────────────────────────────────────────────────────
@@ -293,59 +291,6 @@ impl GraphDeBruijn {
Some(oriented)
}
fn next_longtig_kmer(&self, kmer: Kmer) -> Option<Kmer> {
let canonical = kmer.canonical();
let node = self.nodes.get(&canonical)?.get();
let direct = kmer.raw() == canonical.raw();
if (direct && node.n_right_neighbours() == 0) || (!direct && node.n_left_neighbours() == 0)
{
return None;
}
let next_c: CanonicalKmer = if direct {
if node.can_extend_right() {
canonical
.into_kmer()
.push_right(node.right_nuc())
.canonical()
} else {
self.iter_right_neighbors(canonical)
.filter(|n| !self.is_visited(n).unwrap_or(true))
.next()?
}
} else {
if node.can_extend_left() {
canonical.into_kmer().push_left(node.left_nuc()).canonical()
} else {
self.iter_left_neighbors(canonical)
.filter(|n| !self.is_visited(n).unwrap_or(true))
.next()?
}
};
let cell = self.nodes.get(&next_c)?;
let next_node = cell.get();
if next_node.is_visited() {
return None;
}
let oriented = oriented_next(kmer, next_c);
let ndirect = oriented.raw() == next_c.raw();
if (ndirect && next_node.n_right_neighbours() > 1)
|| (!ndirect && next_node.n_left_neighbours() > 1)
{
return None;
}
let mut updated = next_node;
updated.set_visited();
cell.set(updated);
Some(oriented)
}
fn iter_unitig_kmers(&self, start: Kmer) -> UnitigIter<'_> {
UnitigIter {
graph: self,
@@ -353,13 +298,6 @@ impl GraphDeBruijn {
}
}
fn iter_longtig_kmers(&self, start: Kmer) -> LongtigIter<'_> {
LongtigIter {
graph: self,
current: Some(start),
}
}
pub fn iter_unitig(&self) -> impl Iterator<Item = Unitig> + '_ {
let k = k();
self.start_iter().map(move |(start, first_next)| {
@@ -373,36 +311,6 @@ impl GraphDeBruijn {
})
}
pub fn iter_longtig(&self) -> impl Iterator<Item = Unitig> + '_ {
let k = k();
self.start_iter().map(move |(start, first_next)| {
let mut nucs: Vec<u8> = (0..k).map(|i| start.nucleotide(i)).collect();
if let Some(next_c) = first_next {
for kmer in self.iter_longtig_kmers(next_c) {
nucs.push(kmer.nucleotide(k - 1));
}
}
Unitig::from_nucleotides(&nucs)
})
}
/// Write all unitigs to `out` in FASTA format.
///
/// Calls [`obifastwrite::write_unitig`] for each unitig produced by
/// [`iter_unitig`]. Stops and returns the first I/O error encountered.
pub fn write_fasta<W: io::Write>(&self, out: &mut W, unitig: bool) -> io::Result<()> {
if unitig {
for unitig in self.iter_unitig() {
write_unitig(&unitig, k(), out)?;
}
} else {
for unitig in self.iter_longtig() {
write_unitig(&unitig, k(), out)?;
}
}
Ok(())
}
pub fn len(&self) -> usize {
self.nodes.len()
}
@@ -516,23 +424,6 @@ impl Iterator for UnitigIter<'_> {
}
}
// ── UnitigIter ────────────────────────────────────────────────────────────────
struct LongtigIter<'a> {
graph: &'a GraphDeBruijn,
current: Option<Kmer>,
}
impl Iterator for LongtigIter<'_> {
type Item = Kmer;
fn next(&mut self) -> Option<Kmer> {
let current = self.current?;
self.current = self.graph.next_longtig_kmer(current);
Some(current)
}
}
// ── helpers ───────────────────────────────────────────────────────────────────
fn oriented_next(from: Kmer, to: CanonicalKmer) -> Kmer {