Refactor: Simplify user authentication flow

- Remove redundant password validation logic
 - Integrate JWT-based session management for improved security and scalability
This commit is contained in:
Eric Coissac
2026-04-29 08:45:49 +02:00
parent 97e65bd831
commit 4e26e3bd40
8 changed files with 1030 additions and 2 deletions
+10
View File
@@ -0,0 +1,10 @@
[package]
name = "obidebruinj"
version = "0.1.0"
edition = "2021"
[dependencies]
obikseq = { path = "../obikseq" }
obifastwrite = { path = "../obifastwrite" }
ahash = "0.8"
hashbrown = "0.14"
+518
View File
@@ -0,0 +1,518 @@
use ahash::RandomState;
use hashbrown::HashMap;
use obifastwrite::write_unitig;
use obikseq::kmer::Kmer;
use obikseq::unitig::Unitig;
use std::cell::Cell;
use std::io;
// ── Types ─────────────────────────────────────────────────────────────────────
type FastHashMap<K, V> = HashMap<K, V, RandomState>;
// ── Node ──────────────────────────────────────────────────────────────────────
//
// bit layout (LSB first):
// bit 0 : can_extend_right — exactly one right canonical neighbour exists
// bit 1 : can_extend_left — exactly one left canonical neighbour exists
// bit 2 : visited
// bits 34 : right_nuc — index 03 (A/C/G/T) of that neighbour; valid iff bit 0 = 1
// bits 56 : left_nuc — index 03 (A/C/G/T) of that neighbour; valid iff bit 1 = 1
// bit 7 : reserved (0)
//
// "can_extend" = false covers both 0 neighbours and ≥2 neighbours; the only
// information needed for traversal is "exactly one".
#[repr(transparent)]
#[derive(Debug, Clone, Copy, Default)]
pub struct Node(u8);
impl Node {
pub fn can_extend_right(self) -> bool {
self.0 & 0b0000_0001 != 0
}
pub fn can_extend_left(self) -> bool {
self.0 & 0b0000_0010 != 0
}
pub fn is_visited(self) -> bool {
self.0 & 0b0000_0100 != 0
}
/// Index of the unique right neighbour (0=A, 1=C, 2=G, 3=T).
/// Only meaningful when `can_extend_right()` is true.
pub fn right_nuc(self) -> u8 {
(self.0 >> 3) & 0b11
}
/// Index of the unique left neighbour (0=A, 1=C, 2=G, 3=T).
/// Only meaningful when `can_extend_left()` is true.
pub fn left_nuc(self) -> u8 {
(self.0 >> 5) & 0b11
}
pub fn set_visited(&mut self) {
self.0 |= 0b0000_0100;
}
/// `None` → not uniquely continuable (0 or ≥2 neighbours).
/// `Some(n)` → exactly one neighbour, reached by adding nucleotide n.
pub fn set_right(&mut self, nuc: Option<u8>) {
self.0 &= !(0b0000_0001 | 0b0001_1000); // clear bit 0 and bits 34
if let Some(n) = nuc {
self.0 |= 0b0000_0001 | ((n & 0b11) << 3);
}
}
pub fn set_left(&mut self, nuc: Option<u8>) {
self.0 &= !(0b0000_0010 | 0b0110_0000); // clear bit 1 and bits 56
if let Some(n) = nuc {
self.0 |= 0b0000_0010 | ((n & 0b11) << 5);
}
}
}
// ── GraphDeBruijn ─────────────────────────────────────────────────────────────
pub struct GraphDeBruijn {
nodes: FastHashMap<Kmer, Cell<Node>>,
k: usize,
}
impl GraphDeBruijn {
pub fn new(k: usize) -> Self {
Self {
nodes: FastHashMap::with_hasher(RandomState::new()),
k,
}
}
pub fn with_capacity(k: usize, capacity: usize) -> Self {
Self {
nodes: FastHashMap::with_capacity_and_hasher(capacity, RandomState::new()),
k,
}
}
/// Insert `kmer` (canonicalised) into the graph. No-op if already present.
pub fn push(&mut self, kmer: Kmer) {
self.nodes
.entry(kmer.canonical(self.k))
.or_insert_with(|| Cell::new(Node::default()));
}
/// For every node, find its unique right/left canonical neighbour (if any)
/// and store the nucleotide index in the Node flags.
///
/// Single pass thanks to Cell interior mutability.
pub fn compute_degrees(&self) {
for (&kmer, cell) in &self.nodes {
let right_nuc = unique_neighbor(&kmer.right_canonical_neighbors(self.k), &self.nodes);
let left_nuc = unique_neighbor(&kmer.left_canonical_neighbors(self.k), &self.nodes);
let mut node = cell.get();
node.set_right(right_nuc);
node.set_left(left_nuc);
cell.set(node);
}
}
/// Internal iterator over unitig-start nodes; drives `iter_unitig`.
///
/// MUST NOT be consumed standalone: the second pass finds cycle nodes only
/// because `iter_unitig` lazily interleaves chain traversal between the two passes.
///
/// Two passes:
/// 1. Chain ends / isolated nodes (at most one extension missing):
/// - `!can_extend_left` → yield canonical form
/// - `!can_extend_right` → yield reverse complement
/// 2. Nodes still unvisited → part of a cycle; yield canonical form.
fn start_iter(&self) -> impl Iterator<Item = Kmer> + '_ {
let k = self.k;
let chain_starts = self.nodes.iter().filter_map(move |(&kmer, cell)| {
let node = cell.get();
if node.is_visited() {
return None;
}
let start = if !node.can_extend_left() {
kmer
} else if !node.can_extend_right() {
kmer.revcomp(k)
} else {
return None;
};
let mut updated = node;
updated.set_visited();
cell.set(updated);
Some(start)
});
// Cycle nodes: unvisited after chain traversal, both extensions present.
// Yield in canonical orientation (forward) so next_kmer follows right.
let cycle_starts = self.nodes.iter().filter_map(move |(&kmer, cell)| {
let node = cell.get();
if node.is_visited() {
return None;
}
let mut updated = node;
updated.set_visited();
cell.set(updated);
Some(kmer)
});
chain_starts.chain(cycle_starts)
}
/// Return the next kmer in the unitig traversal direction, or `None` if the
/// current node is not uniquely continuable in that direction.
///
/// Direction is inferred from whether `kmer` is canonical:
/// - `kmer == kmer.canonical(k)` → forward → follow right neighbour
/// - otherwise → backward → follow left neighbour of canonical
///
/// The returned kmer is oriented so that its first k-1 bases match
/// the last k-1 bases of `kmer` (proper De Bruijn overlap).
pub fn next_kmer(&self, kmer: Kmer) -> Option<Kmer> {
let canonical = kmer.canonical(self.k);
let node = self.nodes.get(&canonical)?.get();
let next = if kmer == canonical {
if !node.can_extend_right() {
return None;
}
// push_right gives the raw right extension (non-canonical) that properly extends kmer
canonical
.push_right(node.right_nuc(), self.k)
.canonical(self.k)
} else {
if !node.can_extend_left() {
return None;
}
// push_left gives the left extension of canonical; its revcomp is the right extension of kmer
canonical
.push_left(node.left_nuc(), self.k)
.canonical(self.k)
};
// Mark the next node visited before returning, consistent with start_iter.
// Returns None if the node was already visited (cycle guard).
let cell = self.nodes.get(&next)?;
let node = cell.get();
if node.is_visited() {
return None;
}
let oriented = if kmer.is_overlapping(next, self.k) {
next
} else {
next.revcomp(self.k)
};
let mut updated = node;
updated.set_visited();
cell.set(updated);
Some(oriented)
}
/// Iterate over the kmers of a single unitig starting at `start`.
///
/// `start` must already be marked visited (as `start_iter` does).
/// Each subsequent kmer is marked visited as it is yielded.
/// Stops when the chain ends or the next node was already visited.
///
/// To get "la base à ajouter" rather than full kmers:
/// - first item → `kmer.nucleotide(0..k)` (k bases, the seed)
/// - next items → `kmer.nucleotide(k-1)` (1 new base each)
pub fn iter_unitig_kmers(&self, start: Kmer) -> UnitigIter<'_> {
UnitigIter {
graph: self,
current: Some(start),
}
}
/// Iterate over all unitigs in the graph.
///
/// Drives `start_iter` and `iter_unitig_kmers` internally: for each start
/// kmer, collects the k-mer chain into a [`Unitig`] and yields it.
pub fn iter_unitig(&self) -> impl Iterator<Item = Unitig> + '_ {
let k = self.k;
self.start_iter().map(move |start| {
// start is the first kmer — we already have it
let mut nucs: Vec<u8> = (0..k).map(|i| start.nucleotide(i)).collect();
// each subsequent kmer contributes only its last (new) nucleotide
for kmer in self.iter_unitig_kmers(start).skip(1) {
nucs.push(kmer.nucleotide(k - 1));
}
Unitig::from_nucleotides(&nucs)
})
}
/// Write all unitigs to `out` in FASTA format.
///
/// Calls [`obifastwrite::write_unitig`] for each unitig produced by
/// [`iter_unitig`]. Stops and returns the first I/O error encountered.
pub fn write_fasta<W: io::Write>(&self, out: &mut W) -> io::Result<()> {
for unitig in self.iter_unitig() {
write_unitig(&unitig, self.k, out)?;
}
Ok(())
}
pub fn len(&self) -> usize {
self.nodes.len()
}
pub fn is_empty(&self) -> bool {
self.nodes.is_empty()
}
}
// ── UnitigIter ────────────────────────────────────────────────────────────────
pub struct UnitigIter<'a> {
graph: &'a GraphDeBruijn,
current: Option<Kmer>,
}
impl<'a> Iterator for UnitigIter<'a> {
type Item = Kmer;
fn next(&mut self) -> Option<Kmer> {
let current = self.current?;
// next_kmer handles visited marking and cycle detection
self.current = self.graph.next_kmer(current);
Some(current)
}
}
// ── helpers ───────────────────────────────────────────────────────────────────
/// Returns `Some(i)` if exactly one of the four canonical neighbours exists in
/// the graph, where `i` is its index (0=A, 1=C, 2=G, 3=T). Returns `None` for
/// zero or ≥2 existing neighbours.
fn unique_neighbor(neighbors: &[Kmer; 4], nodes: &FastHashMap<Kmer, Cell<Node>>) -> Option<u8> {
let mut found: Option<u8> = None;
for (i, neighbour) in neighbors.iter().enumerate() {
if nodes.contains_key(neighbour) {
if found.is_some() {
return None; // ≥2 neighbours
}
found = Some(i as u8);
}
}
found
}
// ── tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
// Build a graph from an ASCII sequence, inserting all canonical k-mers.
fn graph_from_ascii(seq: &[u8], k: usize) -> GraphDeBruijn {
let mut g = GraphDeBruijn::new(k);
for i in 0..=seq.len().saturating_sub(k) {
let kmer = Kmer::from_ascii(&seq[i..i + k], k).unwrap();
g.push(kmer);
}
g
}
// Collect all canonical k-mers from an ASCII sequence into a sorted vec.
fn canonical_kmers(seq: &[u8], k: usize) -> Vec<Kmer> {
let mut v: Vec<Kmer> = (0..=seq.len().saturating_sub(k))
.map(|i| Kmer::from_ascii(&seq[i..i + k], k).unwrap().canonical(k))
.collect();
v.sort_unstable();
v.dedup();
v
}
// ── push / canonicalisation ───────────────────────────────────────────────
#[test]
fn push_deduplicates_revcomp() {
let k = 5;
let kmer = Kmer::from_ascii(b"ACGTA", k).unwrap();
let rc = kmer.revcomp(k);
let mut g = GraphDeBruijn::new(k);
g.push(kmer);
g.push(rc);
assert_eq!(g.len(), 1, "kmer and its revcomp must map to the same node");
}
#[test]
fn push_palindrome_single_node() {
// ACGT is its own revcomp
let k = 4;
let kmer = Kmer::from_ascii(b"ACGT", k).unwrap();
assert_eq!(kmer, kmer.revcomp(k), "test requires a palindrome");
let mut g = GraphDeBruijn::new(k);
g.push(kmer);
assert_eq!(g.len(), 1);
}
// ── compute_degrees on a linear chain ────────────────────────────────────
// AAAAGGGG with k=5 → 4 distinct k-mers (AAAAG, AAAGG, AAGGG, AGGGG),
// clean linear chain, no Watson-Crick palindrome in first k-1 bases.
fn linear_chain_graph(k: usize) -> (GraphDeBruijn, Vec<Kmer>) {
let seq = b"AAAAGGGG";
let g = graph_from_ascii(seq, k);
let kmers = canonical_kmers(seq, k);
(g, kmers)
}
#[test]
fn degrees_linear_chain_node_count() {
let k = 5;
let (g, kmers) = linear_chain_graph(k);
assert_eq!(g.len(), kmers.len());
}
#[test]
fn degrees_linear_chain_extensions() {
// A linear chain yields exactly 1 unitig covering all k-mers.
// Note: start_iter must not be consumed standalone — its second pass only
// finds true cycle nodes when interleaved with chain traversal (iter_unitig).
let k = 5;
let seq = b"AAAAGGGG";
let g = graph_from_ascii(seq, k);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
assert_eq!(unitigs.len(), 1, "linear chain → exactly one unitig");
// seql = k + (n_kmers - 1) = 5 + 3 = 8 = seq.len()
assert_eq!(unitigs[0].seql(), seq.len(), "unitig spans the full sequence");
assert_eq!(
kmers_from_unitigs(&unitigs, k),
canonical_kmers(seq, k),
"unitig k-mers must equal inserted k-mers"
);
}
// ── unitig reconstruction ─────────────────────────────────────────────────
// Round-trip: all canonical k-mers in the unitigs == all canonical k-mers inserted.
fn kmers_from_unitigs(unitigs: &[Unitig], k: usize) -> Vec<Kmer> {
let mut v: Vec<Kmer> = unitigs
.iter()
.flat_map(|u| u.iter_canonical_kmers(k))
.collect();
v.sort_unstable();
v.dedup();
v
}
#[test]
fn unitig_roundtrip_linear() {
// Non-repetitive sequence: no k-mer appears twice, no homopolymer run of length k.
// ACGTGGCTA with k=5 → 5 distinct k-mers forming a clean linear chain.
let k = 5;
let seq = b"ACGTGGCTA";
let g = graph_from_ascii(seq, k);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
assert_eq!(unitigs.len(), 1, "linear chain → exactly one unitig");
assert_eq!(
kmers_from_unitigs(&unitigs, k),
canonical_kmers(seq, k),
"unitig must contain exactly the inserted k-mers"
);
}
#[test]
fn unitig_roundtrip_longer_sequence() {
// Longer non-repetitive sequence with no repeated k-mer of length k.
// ACGTGGCTATCGAC with k=5 → 10 distinct k-mers, one linear chain.
let k = 5;
let seq = b"ACGTGGCTATCGAC";
let g = graph_from_ascii(seq, k);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
let mut got = kmers_from_unitigs(&unitigs, k);
let mut expected = canonical_kmers(seq, k);
got.sort_unstable();
expected.sort_unstable();
assert_eq!(got, expected);
}
#[test]
fn unitig_isolated_node() {
// Single k-mer with no neighbours
let k = 5;
let kmer = Kmer::from_ascii(b"ACGTA", k).unwrap();
let mut g = GraphDeBruijn::new(k);
g.push(kmer);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
assert_eq!(unitigs.len(), 1);
assert_eq!(unitigs[0].seql(), k);
}
#[test]
fn unitig_two_isolated_nodes() {
let k = 5;
let mut g = GraphDeBruijn::new(k);
// Two k-mers that share no (k-1)-overlap
g.push(Kmer::from_ascii(b"AAAAA", k).unwrap());
g.push(Kmer::from_ascii(b"TTTTT", k).unwrap()); // same canonical as AAAAA — dedup
// They collapse to one canonical node
assert_eq!(g.len(), 1);
}
#[test]
fn unitig_two_truly_distinct_isolated_nodes() {
let k = 5;
let mut g = GraphDeBruijn::new(k);
g.push(Kmer::from_ascii(b"AAAAC", k).unwrap());
g.push(Kmer::from_ascii(b"GGGGT", k).unwrap());
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
// Each isolated node → one unitig of length k
assert_eq!(unitigs.len(), 2);
assert!(unitigs.iter().all(|u| u.seql() == k));
}
// ── all k-mers covered, none duplicated ───────────────────────────────────
#[test]
fn no_kmer_lost_or_duplicated() {
let k = 7;
let seq = b"ACGTACGTACGTTTTTACGTACGT";
let g = graph_from_ascii(seq, k);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
let got = kmers_from_unitigs(&unitigs, k);
let expected = canonical_kmers(seq, k);
assert_eq!(
got.len(),
expected.len(),
"kmer count mismatch: got {}, expected {}",
got.len(),
expected.len()
);
assert_eq!(got, expected, "kmer sets differ");
}
// ── cycle coverage ────────────────────────────────────────────────────────
#[test]
fn cycle_kmers_not_lost() {
// ACGTACGT with k=5 forms a pure cycle: ACGTA→CGTAC→GTACG→TACGT→ACGTA.
// start_iter first pass yields nothing (all nodes internal); second pass
// picks up cycle entries. All 4 k-mers must appear in the unitigs.
let k = 5;
let seq = b"ACGTACGT";
let g = graph_from_ascii(seq, k);
g.compute_degrees();
let unitigs: Vec<Unitig> = g.iter_unitig().collect();
let got = kmers_from_unitigs(&unitigs, k);
let expected = canonical_kmers(seq, k);
assert_eq!(got.len(), expected.len(), "cycle k-mers lost");
assert_eq!(got, expected);
}
}