♻️ refactor(obikpartitionner): replace low-level I/O with obiskio::SKFileWriter

- Replace `limits` module and raw binary I/O with a new high-level abstraction using obiskio::SKFileWriter
- Remove `niffler` dependency and compression logic (Gzip/Zstd/Lz4/Bgzf)
- Simplify PartitionManager to manage partitioned file writers based on kmer hashing
  * Uses `n_partition_bits` for bitmask-based partition selection (2^n partitions)
- Add obiskio as a local dependency
Note: This is likely part of aligning with unified I/O primitives in the obiskio crate.
This commit is contained in:
Eric Coissac
2026-04-26 14:58:41 +02:00
parent c09d17401d
commit eaf893174f
4 changed files with 43 additions and 1 deletions
+4
View File
@@ -781,6 +781,9 @@ dependencies = [
[[package]]
name = "obikpartitionner"
version = "0.1.0"
dependencies = [
"obiskio",
]
[[package]]
name = "obikrope"
@@ -796,6 +799,7 @@ version = "0.1.0"
dependencies = [
"bitvec",
"criterion2",
"xxhash-rust",
]
[[package]]
+1
View File
@@ -5,6 +5,7 @@ edition = "2024"
[dependencies]
bitvec = "1"
xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] }
[dev-dependencies]
criterion2 = { version = "3", features = ["cargo_bench_support"] }
+17
View File
@@ -49,6 +49,15 @@ impl std::error::Error for KmerError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Kmer(u64);
#[inline]
fn mix64(x: u64) -> u64 {
let x = x ^ (x >> 30);
let x = x.wrapping_mul(0xbf58476d1ce4e5b9);
let x = x ^ (x >> 27);
let x = x.wrapping_mul(0x94d049bb133111eb);
x ^ (x >> 31)
}
impl Kmer {
/// Wrap a raw left-aligned u64 value as a Kmer.
#[inline]
@@ -144,6 +153,14 @@ impl Kmer {
let rc = self.revcomp(k);
if self.0 <= rc.0 { *self } else { rc }
}
/// Return a hash of this kmer.
///
/// Uses the canonical form of the kmer to compute the hash.
#[inline]
pub fn hash(&self, k: usize) -> u64 {
mix64(self.canonical(k).0)
}
}
// ── tests ─────────────────────────────────────────────────────────────────────
+21 -1
View File
@@ -4,6 +4,7 @@ use crate::encoding::{DEC4, encode_base};
use crate::kmer::{Kmer, KmerError};
use crate::revcomp_lookup::REVCOMP4;
use bitvec::prelude::*;
use xxhash_rust::xxh3::xxh3_64;
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
@@ -312,6 +313,15 @@ impl SuperKmer {
///
/// Returns `true` if already canonical (no change), `false` if revcomp was applied.
pub fn canonical(&mut self) -> bool {
if self.is_canonical() {
return true;
}
self.revcomp();
false
}
/// Returns `true` if this super-kmer is in canonical form (lexicographic minimum of forward and revcomp).
pub fn is_canonical(&self) -> bool {
let seql = self.seql();
for i in 0..seql {
let fwd = self.nucleotide(i);
@@ -320,12 +330,22 @@ impl SuperKmer {
return true;
}
if fwd > rev {
self.revcomp();
return false;
}
}
true
}
/// Returns the XXH3 hash of the super-kmer sequence.
pub fn hash(&self) -> u64 {
if self.is_canonical() {
return xxh3_64(&self.seq);
} else {
let mut rev = self.clone();
rev.revcomp();
return xxh3_64(&rev.seq);
}
}
}
// ── helpers ───────────────────────────────────────────────────────────────────