♻️ refactor(obikpartitionner): replace low-level I/O with obiskio::SKFileWriter
- Replace `limits` module and raw binary I/O with a new high-level abstraction using obiskio::SKFileWriter - Remove `niffler` dependency and compression logic (Gzip/Zstd/Lz4/Bgzf) - Simplify PartitionManager to manage partitioned file writers based on kmer hashing * Uses `n_partition_bits` for bitmask-based partition selection (2^n partitions) - Add obiskio as a local dependency Note: This is likely part of aligning with unified I/O primitives in the obiskio crate.
This commit is contained in:
Generated
+4
@@ -781,6 +781,9 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "obikpartitionner"
|
name = "obikpartitionner"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"obiskio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "obikrope"
|
name = "obikrope"
|
||||||
@@ -796,6 +799,7 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"bitvec",
|
"bitvec",
|
||||||
"criterion2",
|
"criterion2",
|
||||||
|
"xxhash-rust",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ edition = "2024"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bitvec = "1"
|
bitvec = "1"
|
||||||
|
xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
criterion2 = { version = "3", features = ["cargo_bench_support"] }
|
criterion2 = { version = "3", features = ["cargo_bench_support"] }
|
||||||
|
|||||||
@@ -49,6 +49,15 @@ impl std::error::Error for KmerError {}
|
|||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub struct Kmer(u64);
|
pub struct Kmer(u64);
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn mix64(x: u64) -> u64 {
|
||||||
|
let x = x ^ (x >> 30);
|
||||||
|
let x = x.wrapping_mul(0xbf58476d1ce4e5b9);
|
||||||
|
let x = x ^ (x >> 27);
|
||||||
|
let x = x.wrapping_mul(0x94d049bb133111eb);
|
||||||
|
x ^ (x >> 31)
|
||||||
|
}
|
||||||
|
|
||||||
impl Kmer {
|
impl Kmer {
|
||||||
/// Wrap a raw left-aligned u64 value as a Kmer.
|
/// Wrap a raw left-aligned u64 value as a Kmer.
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -144,6 +153,14 @@ impl Kmer {
|
|||||||
let rc = self.revcomp(k);
|
let rc = self.revcomp(k);
|
||||||
if self.0 <= rc.0 { *self } else { rc }
|
if self.0 <= rc.0 { *self } else { rc }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a hash of this kmer.
|
||||||
|
///
|
||||||
|
/// Uses the canonical form of the kmer to compute the hash.
|
||||||
|
#[inline]
|
||||||
|
pub fn hash(&self, k: usize) -> u64 {
|
||||||
|
mix64(self.canonical(k).0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── tests ─────────────────────────────────────────────────────────────────────
|
// ── tests ─────────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use crate::encoding::{DEC4, encode_base};
|
|||||||
use crate::kmer::{Kmer, KmerError};
|
use crate::kmer::{Kmer, KmerError};
|
||||||
use crate::revcomp_lookup::REVCOMP4;
|
use crate::revcomp_lookup::REVCOMP4;
|
||||||
use bitvec::prelude::*;
|
use bitvec::prelude::*;
|
||||||
|
use xxhash_rust::xxh3::xxh3_64;
|
||||||
|
|
||||||
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
|
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -312,6 +313,15 @@ impl SuperKmer {
|
|||||||
///
|
///
|
||||||
/// Returns `true` if already canonical (no change), `false` if revcomp was applied.
|
/// Returns `true` if already canonical (no change), `false` if revcomp was applied.
|
||||||
pub fn canonical(&mut self) -> bool {
|
pub fn canonical(&mut self) -> bool {
|
||||||
|
if self.is_canonical() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
self.revcomp();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this super-kmer is in canonical form (lexicographic minimum of forward and revcomp).
|
||||||
|
pub fn is_canonical(&self) -> bool {
|
||||||
let seql = self.seql();
|
let seql = self.seql();
|
||||||
for i in 0..seql {
|
for i in 0..seql {
|
||||||
let fwd = self.nucleotide(i);
|
let fwd = self.nucleotide(i);
|
||||||
@@ -320,12 +330,22 @@ impl SuperKmer {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if fwd > rev {
|
if fwd > rev {
|
||||||
self.revcomp();
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the XXH3 hash of the super-kmer sequence.
|
||||||
|
pub fn hash(&self) -> u64 {
|
||||||
|
if self.is_canonical() {
|
||||||
|
return xxh3_64(&self.seq);
|
||||||
|
} else {
|
||||||
|
let mut rev = self.clone();
|
||||||
|
rev.revcomp();
|
||||||
|
return xxh3_64(&rev.seq);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user