feat: add seq_hash() and refactor canonical hashing
Introduce `. seqhash(&self)` for direct XXH3-64 sequencing of packed bytes, and remove legacy `.hash()` method that used conditional canonicalization via revcomp. Also update partitioning logic to use `sk.hashseq_hash()` and deduplicate imports.
This commit is contained in:
@@ -160,7 +160,7 @@ impl Kmer {
|
||||
///
|
||||
/// Uses the canonical form of the kmer to compute the hash.
|
||||
#[inline]
|
||||
pub fn hash(&self, k: usize) -> u64 {
|
||||
pub fn seq_hash(&self, k: usize) -> u64 {
|
||||
mix64(self.canonical(k).0)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
pub trait Sequence {
|
||||
fn len(&self) -> usize;
|
||||
fn sequence(&self) -> &[u8];
|
||||
fn revcomp(&self) -> Self;
|
||||
fn canonical(&self) -> Self;
|
||||
fn seq_hash(&self) -> u64;
|
||||
}
|
||||
|
||||
@@ -155,7 +155,9 @@ impl SuperKmer {
|
||||
}
|
||||
|
||||
/// Reverse-complement this super-kmer in place.
|
||||
pub fn revcomp(&mut self) {
|
||||
///
|
||||
/// This method is only used internally by the build method.
|
||||
fn revcomp(&mut self) {
|
||||
let seql = self.len();
|
||||
let n = byte_len(seql);
|
||||
|
||||
@@ -280,7 +282,7 @@ impl SuperKmer {
|
||||
/// Put this super-kmer in canonical form (lexicographic minimum of forward and revcomp).
|
||||
///
|
||||
/// Returns `true` if already canonical (no change), `false` if revcomp was applied.
|
||||
pub fn canonical(&mut self) -> bool {
|
||||
fn canonical(&mut self) -> bool {
|
||||
if self.is_canonical() {
|
||||
return true;
|
||||
}
|
||||
@@ -289,7 +291,7 @@ impl SuperKmer {
|
||||
}
|
||||
|
||||
/// Returns `true` if this super-kmer is in canonical form (lexicographic minimum of forward and revcomp).
|
||||
pub fn is_canonical(&self) -> bool {
|
||||
fn is_canonical(&self) -> bool {
|
||||
let seql = self.len();
|
||||
for i in 0..seql {
|
||||
let fwd = self.nucleotide(i);
|
||||
@@ -314,15 +316,9 @@ impl SuperKmer {
|
||||
self.iter_kmers(k).map(move |km| km.canonical(k))
|
||||
}
|
||||
|
||||
/// Returns the XXH3 hash of the super-kmer sequence.
|
||||
pub fn hash(&self) -> u64 {
|
||||
if self.is_canonical() {
|
||||
return xxh3_64(&self.seq);
|
||||
} else {
|
||||
let mut rev = self.clone();
|
||||
rev.revcomp();
|
||||
return xxh3_64(&rev.seq);
|
||||
}
|
||||
/// Returns the XXH3-64 hash of the packed sequence bytes.
|
||||
pub fn seq_hash(&self) -> u64 {
|
||||
xxh3_64(&self.seq)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user