♻️ refactor(obikpartitionner): replace low-level I/O with obiskio::SKFileWriter
- Replace `limits` module and raw binary I/O with a new high-level abstraction using obiskio::SKFileWriter - Remove `niffler` dependency and compression logic (Gzip/Zstd/Lz4/Bgzf) - Simplify PartitionManager to manage partitioned file writers based on kmer hashing * Uses `n_partition_bits` for bitmask-based partition selection (2^n partitions) - Add obiskio as a local dependency Note: This is likely part of aligning with unified I/O primitives in the obiskio crate.
This commit is contained in:
Generated
+4
@@ -781,6 +781,9 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "obikpartitionner"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"obiskio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "obikrope"
|
||||
@@ -796,6 +799,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitvec",
|
||||
"criterion2",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -5,6 +5,7 @@ edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
bitvec = "1"
|
||||
xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion2 = { version = "3", features = ["cargo_bench_support"] }
|
||||
|
||||
@@ -49,6 +49,15 @@ impl std::error::Error for KmerError {}
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Kmer(u64);
|
||||
|
||||
#[inline]
|
||||
fn mix64(x: u64) -> u64 {
|
||||
let x = x ^ (x >> 30);
|
||||
let x = x.wrapping_mul(0xbf58476d1ce4e5b9);
|
||||
let x = x ^ (x >> 27);
|
||||
let x = x.wrapping_mul(0x94d049bb133111eb);
|
||||
x ^ (x >> 31)
|
||||
}
|
||||
|
||||
impl Kmer {
|
||||
/// Wrap a raw left-aligned u64 value as a Kmer.
|
||||
#[inline]
|
||||
@@ -144,6 +153,14 @@ impl Kmer {
|
||||
let rc = self.revcomp(k);
|
||||
if self.0 <= rc.0 { *self } else { rc }
|
||||
}
|
||||
|
||||
/// Return a hash of this kmer.
|
||||
///
|
||||
/// Uses the canonical form of the kmer to compute the hash.
|
||||
#[inline]
|
||||
pub fn hash(&self, k: usize) -> u64 {
|
||||
mix64(self.canonical(k).0)
|
||||
}
|
||||
}
|
||||
|
||||
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -4,6 +4,7 @@ use crate::encoding::{DEC4, encode_base};
|
||||
use crate::kmer::{Kmer, KmerError};
|
||||
use crate::revcomp_lookup::REVCOMP4;
|
||||
use bitvec::prelude::*;
|
||||
use xxhash_rust::xxh3::xxh3_64;
|
||||
|
||||
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
|
||||
|
||||
@@ -312,6 +313,15 @@ impl SuperKmer {
|
||||
///
|
||||
/// Returns `true` if already canonical (no change), `false` if revcomp was applied.
|
||||
pub fn canonical(&mut self) -> bool {
|
||||
if self.is_canonical() {
|
||||
return true;
|
||||
}
|
||||
self.revcomp();
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns `true` if this super-kmer is in canonical form (lexicographic minimum of forward and revcomp).
|
||||
pub fn is_canonical(&self) -> bool {
|
||||
let seql = self.seql();
|
||||
for i in 0..seql {
|
||||
let fwd = self.nucleotide(i);
|
||||
@@ -320,12 +330,22 @@ impl SuperKmer {
|
||||
return true;
|
||||
}
|
||||
if fwd > rev {
|
||||
self.revcomp();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns the XXH3 hash of the super-kmer sequence.
|
||||
pub fn hash(&self) -> u64 {
|
||||
if self.is_canonical() {
|
||||
return xxh3_64(&self.seq);
|
||||
} else {
|
||||
let mut rev = self.clone();
|
||||
rev.revcomp();
|
||||
return xxh3_64(&rev.seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user