Push pvqkqxlkkwry #17
+203
-11
@@ -3,8 +3,9 @@ use hashbrown::HashMap;
|
||||
use obikseq::k;
|
||||
use obikseq::{CanonicalKmer, Kmer, Sequence};
|
||||
use rayon::prelude::*;
|
||||
use std::arch::naked_asm;
|
||||
use std::fmt;
|
||||
use std::mem::needs_drop;
|
||||
use std::os::unix::raw::gid_t;
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
use xxhash_rust::xxh3::Xxh3Builder;
|
||||
|
||||
@@ -175,6 +176,156 @@ impl fmt::Display for Node {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WalkState {
|
||||
kmer: CanonicalKmer,
|
||||
node: Node,
|
||||
direct: bool,
|
||||
}
|
||||
|
||||
impl WalkState {
|
||||
pub fn new(kmer: CanonicalKmer, node: Node, direct: bool) -> Self {
|
||||
debug_assert!(!node.is_visited(), "Cannot walk over a visited node");
|
||||
Self { kmer, node, direct }
|
||||
}
|
||||
|
||||
pub fn leavable(&self, graph: &GraphDeBruijn) -> bool {
|
||||
WalkState {
|
||||
kmer: self.kmer,
|
||||
node: self.node,
|
||||
direct: !self.direct,
|
||||
}
|
||||
.reachable(graph)
|
||||
}
|
||||
|
||||
pub fn reachable(&self, graph: &GraphDeBruijn) -> bool {
|
||||
if self.direct {
|
||||
if self.node.can_extend_left() {
|
||||
let next = self.kmer.into_kmer().push_left(self.node.left_nuc());
|
||||
let cnext = next.canonical();
|
||||
let dnext = next.raw() == cnext.raw();
|
||||
let next_node = Node(
|
||||
graph
|
||||
.nodes
|
||||
.get(&cnext)
|
||||
.unwrap()
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
!next_node.is_visited()
|
||||
&& if dnext {
|
||||
next_node.can_extend_right()
|
||||
} else {
|
||||
next_node.can_extend_left()
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
if self.node.can_extend_right() {
|
||||
let next = self.kmer.into_kmer().push_right(self.node.right_nuc());
|
||||
let cnext = next.canonical();
|
||||
let dnext = next.raw() == cnext.raw();
|
||||
let next_node = Node(
|
||||
graph
|
||||
.nodes
|
||||
.get(&cnext)
|
||||
.unwrap()
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
!next_node.is_visited()
|
||||
&& if dnext {
|
||||
next_node.can_extend_left()
|
||||
} else {
|
||||
next_node.can_extend_right()
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn walk(&self, graph: &GraphDeBruijn) -> Option<WalkState> {
|
||||
if self.direct {
|
||||
if self.node.can_extend_right() {
|
||||
let next = self.kmer.into_kmer().push_right(self.node.right_nuc());
|
||||
let cnext = next.canonical();
|
||||
let dnext = next.raw() == cnext.raw();
|
||||
let next_node = Node(
|
||||
graph
|
||||
.nodes
|
||||
.get(&cnext)
|
||||
.unwrap()
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
if next_node.is_visited() {
|
||||
None
|
||||
} else {
|
||||
if dnext {
|
||||
if next_node.can_extend_left() {
|
||||
Some(WalkState {
|
||||
kmer: cnext,
|
||||
node: next_node,
|
||||
direct: dnext,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
if next_node.can_extend_right() {
|
||||
Some(WalkState {
|
||||
kmer: cnext,
|
||||
node: next_node,
|
||||
direct: dnext,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if self.node.can_extend_left() {
|
||||
let next = self.kmer.into_kmer().push_left(self.node.left_nuc());
|
||||
let cnext = next.canonical();
|
||||
let dnext = next.raw() == cnext.raw();
|
||||
let next_node = Node(
|
||||
graph
|
||||
.nodes
|
||||
.get(&cnext)
|
||||
.unwrap()
|
||||
.load(std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
if next_node.is_visited() {
|
||||
None
|
||||
} else {
|
||||
if dnext {
|
||||
if next_node.can_extend_right() {
|
||||
Some(WalkState {
|
||||
kmer: cnext,
|
||||
node: next_node,
|
||||
direct: dnext,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
if next_node.can_extend_left() {
|
||||
Some(WalkState {
|
||||
kmer: cnext,
|
||||
node: next_node,
|
||||
direct: dnext,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── GraphDeBruijn ─────────────────────────────────────────────────────────────
|
||||
|
||||
pub struct GraphDeBruijn {
|
||||
@@ -260,6 +411,52 @@ impl GraphDeBruijn {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_walk(&self, kmer: CanonicalKmer) -> WalkState {
|
||||
let node = Node(
|
||||
self.nodes
|
||||
.get(&kmer)
|
||||
.expect("Cannot start a walk from not indexed kmer")
|
||||
.load(Ordering::Relaxed),
|
||||
);
|
||||
debug_assert!(
|
||||
!node.is_visited(),
|
||||
"Cannot start a walk from a visited node"
|
||||
);
|
||||
WalkState::new(kmer, node, true)
|
||||
}
|
||||
|
||||
pub fn try_start_walk(&self, kmer: CanonicalKmer) -> Option<WalkState> {
|
||||
let node = Node(self.nodes.get(&kmer)?.load(Ordering::Relaxed));
|
||||
if node.is_visited() {
|
||||
return None;
|
||||
}
|
||||
Some(WalkState::new(kmer, node, true))
|
||||
}
|
||||
|
||||
pub fn walk(&self, step: WalkState) -> Option<WalkState> {
|
||||
if !step.leavable(self) {
|
||||
return None;
|
||||
}
|
||||
let node = step.node;
|
||||
let kmer = step.kmer.into_kmer();
|
||||
let n_kmer = if step.direct {
|
||||
kmer.push_right(node.right_nuc())
|
||||
} else {
|
||||
kmer.push_left(node.left_nuc())
|
||||
};
|
||||
let n_ckmer = n_kmer.canonical();
|
||||
let n_direct = n_ckmer.raw() == n_kmer.raw();
|
||||
let n_node = if let Some(node_val) = self.nodes.get(&n_ckmer) {
|
||||
Node(node_val.load(Ordering::Relaxed))
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
if n_node.is_visited() {
|
||||
return None;
|
||||
}
|
||||
Some(WalkState::new(n_ckmer, n_node, n_direct))
|
||||
}
|
||||
|
||||
fn next_unitig_kmer(&self, kmer: Kmer) -> Option<Kmer> {
|
||||
let canonical = kmer.canonical();
|
||||
let node = Node(self.nodes.get(&canonical)?.load(Ordering::Relaxed));
|
||||
@@ -454,17 +651,12 @@ impl GraphDeBruijn {
|
||||
}
|
||||
|
||||
fn is_start(&self, query: CanonicalKmer, node: Node) -> bool {
|
||||
if !node.can_extend_left() {
|
||||
return true;
|
||||
!WalkState {
|
||||
kmer: query,
|
||||
node,
|
||||
direct: true,
|
||||
}
|
||||
let pred = query.into_kmer().push_left(node.left_nuc()).canonical();
|
||||
self.nodes
|
||||
.get(&pred)
|
||||
.map(|a| {
|
||||
let pred_node = Node(a.load(Ordering::Acquire));
|
||||
!pred_node.can_extend_right()
|
||||
})
|
||||
.unwrap_or(false)
|
||||
.reachable(self)
|
||||
}
|
||||
|
||||
pub fn try_for_each_unitig<E, F>(&self, f: F) -> Result<(), E>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use super::*;
|
||||
use obikseq::{k, set_k};
|
||||
use obikseq::{k, set_k, unitig::Unitig};
|
||||
|
||||
// Build a graph from an ASCII sequence, inserting all canonical k-mers.
|
||||
fn graph_from_ascii(seq: &[u8]) -> GraphDeBruijn {
|
||||
@@ -22,6 +22,16 @@ fn canonical_kmers(seq: &[u8]) -> Vec<CanonicalKmer> {
|
||||
v
|
||||
}
|
||||
|
||||
fn collect_unitigs(g: &GraphDeBruijn) -> Vec<Unitig> {
|
||||
let mut unitigs = Vec::new();
|
||||
g.try_for_each_unitig(|nuc_iter| -> Result<(), std::convert::Infallible> {
|
||||
unitigs.push(nuc_iter.collect());
|
||||
Ok(())
|
||||
})
|
||||
.unwrap();
|
||||
unitigs
|
||||
}
|
||||
|
||||
// ── push / canonicalisation ───────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
@@ -76,7 +86,7 @@ fn degrees_linear_chain_extensions() {
|
||||
let seq = b"AAAAGGGG";
|
||||
let g = graph_from_ascii(seq);
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
assert_eq!(unitigs.len(), 1, "linear chain → exactly one unitig");
|
||||
// seql = k + (n_kmers - 1) = 5 + 3 = 8 = seq.len()
|
||||
assert_eq!(
|
||||
@@ -123,7 +133,7 @@ fn unitig_roundtrip_linear() {
|
||||
}
|
||||
|
||||
println!("Les unitig:");
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
for unitig in &unitigs {
|
||||
println!("{}", String::from_utf8_lossy(&unitig.to_ascii()));
|
||||
}
|
||||
@@ -149,7 +159,7 @@ fn unitig_roundtrip_longer_sequence() {
|
||||
let seq = b"ACGTGGCTATCGAC";
|
||||
let g = graph_from_ascii(seq);
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
let mut got = kmers_from_unitigs(&unitigs);
|
||||
let mut expected = canonical_kmers(seq);
|
||||
got.sort_unstable();
|
||||
@@ -166,7 +176,7 @@ fn unitig_isolated_node() {
|
||||
let mut g = GraphDeBruijn::new();
|
||||
g.push(kmer.canonical());
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
assert_eq!(unitigs.len(), 1);
|
||||
assert_eq!(unitigs[0].seql(), k);
|
||||
}
|
||||
@@ -191,7 +201,7 @@ fn unitig_two_truly_distinct_isolated_nodes() {
|
||||
g.push(Kmer::from_ascii(b"AAAAC").unwrap().canonical());
|
||||
g.push(Kmer::from_ascii(b"GGGGT").unwrap().canonical());
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
// Each isolated node → one unitig of length k
|
||||
assert_eq!(unitigs.len(), 2);
|
||||
assert!(unitigs.iter().all(|u| u.seql() == k));
|
||||
@@ -206,7 +216,7 @@ fn no_kmer_lost_or_duplicated() {
|
||||
let seq = b"ACGTACGTACGTTTTTACGTACGT";
|
||||
let g = graph_from_ascii(seq);
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
let got = kmers_from_unitigs(&unitigs);
|
||||
let expected = canonical_kmers(seq);
|
||||
assert_eq!(
|
||||
@@ -231,7 +241,7 @@ fn cycle_kmers_not_lost() {
|
||||
let seq = b"ACGTACGT";
|
||||
let g = graph_from_ascii(seq);
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
let got = kmers_from_unitigs(&unitigs);
|
||||
let expected = canonical_kmers(seq);
|
||||
assert_eq!(got.len(), expected.len(), "cycle k-mers lost");
|
||||
@@ -274,7 +284,7 @@ fn branching_graph_no_kmer_lost_or_duplicated() {
|
||||
insert(b"TTCGTGGCTA"); // J-I-F (different J prefix)
|
||||
|
||||
g.compute_degrees_and_mark_starts();
|
||||
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
|
||||
let unitigs: Vec<Unitig> = collect_unitigs(&g);
|
||||
|
||||
// Collect all k-mers from unitigs.
|
||||
let got = kmers_from_unitigs(&unitigs);
|
||||
|
||||
Reference in New Issue
Block a user