Refactor k-mer encoding functions to use 'canonical' terminology

This commit refactors all k-mer encoding and normalization functions to consistently use 'canonical' instead of 'normalized' terminology. This includes renaming functions like EncodeNormalizedKmer to EncodeCanonicalKmer, IterNormalizedKmers to IterCanonicalKmers, and NormalizeKmer to CanonicalKmer. The change aligns the API with biological conventions where 'canonical' refers to the lexicographically smallest representation of a k-mer and its reverse complement. All related documentation and examples have been updated accordingly. The commit also updates the version file with a new commit hash.
This commit is contained in:
Eric Coissac
2026-02-05 16:14:24 +01:00
parent 16f72e6305
commit 09ac15a76b
4 changed files with 50 additions and 50 deletions

View File

@@ -44,9 +44,9 @@ func (ks *KmerSet) AddKmerCode(kmer uint64) {
ks.bitmap.Add(kmer)
}
// AddNormalizedKmerCode ajoute un k-mer encodé normalisé à l'ensemble
func (ks *KmerSet) AddNormalizedKmerCode(kmer uint64) {
canonical := NormalizeKmer(kmer, ks.k)
// AddCanonicalKmerCode ajoute un k-mer encodé canonique à l'ensemble
func (ks *KmerSet) AddCanonicalKmerCode(kmer uint64) {
canonical := CanonicalKmer(kmer, ks.k)
ks.bitmap.Add(canonical)
}
@@ -58,11 +58,11 @@ func (ks *KmerSet) AddKmer(seq []byte) {
ks.bitmap.Add(kmer)
}
// AddNormalizedKmer ajoute un k-mer normalisé à l'ensemble en encodant la séquence
// AddCanonicalKmer ajoute un k-mer canonique à l'ensemble en encodant la séquence
// La séquence doit avoir exactement k nucléotides
// Zero-allocation: encode directement en forme canonique sans créer de slice intermédiaire
func (ks *KmerSet) AddNormalizedKmer(seq []byte) {
canonical := EncodeNormalizedKmer(seq, ks.k)
func (ks *KmerSet) AddCanonicalKmer(seq []byte) {
canonical := EncodeCanonicalKmer(seq, ks.k)
ks.bitmap.Add(canonical)
}
@@ -70,7 +70,7 @@ func (ks *KmerSet) AddNormalizedKmer(seq []byte) {
// Utilise un itérateur pour éviter l'allocation d'un vecteur intermédiaire
func (ks *KmerSet) AddSequence(seq *obiseq.BioSequence) {
rawSeq := seq.Sequence()
for canonical := range IterNormalizedKmers(rawSeq, ks.k) {
for canonical := range IterCanonicalKmers(rawSeq, ks.k) {
ks.bitmap.Add(canonical)
}
}