diff --git a/Makefile b/Makefile
index 3ad2ffc..a3eeba0 100644
--- a/Makefile
+++ b/Makefile
@@ -108,5 +108,27 @@ ifneq ($(strip $(COMMIT_ID)),)
 	@rm -f $(OUTPUT)
 endif
 
-.PHONY: all obitools update-deps obitests githubtests .FORCE
+jjnew:
+	@echo "$(YELLOW)→ Creating a new commit...$(NC)"
+	@echo "$(BLUE)→ Documenting current commit...$(NC)"
+	@jj auto-describe
+	@echo "$(BLUE)→ Done.$(NC)"
+	@jj new
+	@echo "$(GREEN)✓ New commit created$(NC)"
+
+jjpush: 
+	@echo "$(YELLOW)→ Pushing commit to repository...$(NC)"
+	@echo "$(BLUE)→ Documenting current commit...$(NC)"
+	@jj auto-describe
+	@echo "$(BLUE)→ Done.$(NC)"
+	@jj git push --change @
+	@echo "$(GREEN)✓ Commit pushed to repository$(NC)"
+
+jjfetch:
+	@echo "$(YELLOW)→ Pulling latest commits...$(NC)"
+	@jj git fetch
+	@jj new master@origin
+	@echo "$(GREEN)✓ Latest commits pulled$(NC)"
+
+.PHONY: all obitools update-deps obitests githubtests jjnew jjpush jjfetch .FORCE
 .FORCE:
\ No newline at end of file
diff --git a/blackboard/Prospective/kmer_index_design.md b/blackboard/Prospective/kmer_index_design.md
new file mode 100644
index 0000000..b80cc6d
--- /dev/null
+++ b/blackboard/Prospective/kmer_index_design.md
@@ -0,0 +1,213 @@
+# Index de k-mers pour génomes de grande taille
+
+## Contexte et objectifs
+
+### Cas d'usage
+
+- Indexation de k-mers longs (k=31) pour des génomes de grande taille (< 10 Go par génome)
+- Nombre de génomes : plusieurs dizaines à quelques centaines
+- Indexation en parallèle
+- Stockage sur disque
+- Possibilité d'ajouter des génomes, mais pas de modifier un génome existant
+
+### Requêtes cibles
+
+- **Présence/absence** d'un k-mer dans un génome
+- **Intersection** entre génomes
+- **Distances** : Jaccard (présence/absence) et potentiellement Bray-Curtis (comptage)
+
+### Ressources disponibles
+
+- 128 Go de RAM
+- Stockage disque
+
+---
+
+## Estimation des volumes
+
+### Par génome
+
+- **10 Go de séquence** → ~10¹⁰ k-mers bruts (chevauchants)
+- **Après déduplication** : typiquement 10-50% de k-mers uniques → **~1-5 × 10⁹ k-mers distincts**
+
+### Espace théorique
+
+- **k=31** → 62 bits → ~4.6 × 10¹⁸ k-mers possibles
+- Table d'indexation directe impossible
+
+---
+
+## Métriques de distance
+
+### Présence/absence (binaire)
+
+- **Jaccard** : |A ∩ B| / |A ∪ B|
+- **Sørensen-Dice** : 2|A ∩ B| / (|A| + |B|)
+
+### Comptage (abondance)
+
+- **Bray-Curtis** : 1 - (2 × Σ min(aᵢ, bᵢ)) / (Σ aᵢ + Σ bᵢ)
+
+Note : Pour Bray-Curtis, le stockage des comptages est nécessaire, ce qui augmente significativement la taille de l'index.
+
+---
+
+## Options d'indexation
+
+### Option 1 : Bloom Filter par génome
+
+**Principe** : Structure probabiliste pour test d'appartenance.
+
+**Avantages :**
+- Très compact : ~10 bits/élément pour FPR ~1%
+- Construction rapide, streaming
+- Facile à sérialiser/désérialiser
+- Intersection et Jaccard estimables via formules analytiques
+
+**Inconvénients :**
+- Faux positifs (pas de faux négatifs)
+- Distances approximatives
+
+**Taille estimée** : 1-6 Go par génome (selon FPR cible)
+
+#### Dimensionnement des Bloom filters
+
+```
+\mathrm{FPR} ;=; \left(1 - e^{-h n / m}\right)^h
+```
+
+
+| Bits/élément | FPR optimal | k (hash functions) |
+|--------------|-------------|---------------------|
+| 8            | ~2%         | 5-6                 |
+| 10           | ~1%         | 7                   |
+| 12           | ~0.3%       | 8                   |
+| 16           | ~0.01%      | 11                  |
+
+Formule du taux de faux positifs :
+```
+FPR ≈ (1 - e^(-kn/m))^k
+```
+Où n = nombre d'éléments, m = nombre de bits, k = nombre de hash functions.
+
+### Option 2 : Ensemble trié de k-mers
+
+**Principe** : Stocker les k-mers (uint64) triés, avec compression possible.
+
+**Avantages :**
+- Exact (pas de faux positifs)
+- Intersection/union par merge sort O(n+m)
+- Compression efficace (delta encoding sur k-mers triés)
+
+**Inconvénients :**
+- Plus volumineux : 8 octets/k-mer
+- Construction plus lente (tri nécessaire)
+
+**Taille estimée** : 8-40 Go par génome (non compressé)
+
+### Option 3 : MPHF (Minimal Perfect Hash Function)
+
+**Principe** : Fonction de hash parfaite minimale pour les k-mers présents.
+
+**Avantages :**
+- Très compact : ~3-4 bits/élément
+- Lookup O(1)
+- Exact pour les k-mers présents
+
+**Inconvénients :**
+- Construction coûteuse (plusieurs passes)
+- Statique (pas d'ajout de k-mers après construction)
+- Ne distingue pas "absent" vs "jamais vu" sans structure auxiliaire
+
+### Option 4 : Hybride MPHF + Bloom filter
+
+- MPHF pour mapping compact des k-mers présents
+- Bloom filter pour pré-filtrage des absents
+
+---
+
+## Optimisation : Indexation de (k-2)-mers pour requêtes k-mers
+
+### Principe
+
+Au lieu d'indexer directement les 31-mers dans un Bloom filter, on indexe les 29-mers. Pour tester la présence d'un 31-mer, on vérifie que les **trois 29-mers** qu'il contient sont présents :
+
+- positions 0-28
+- positions 1-29
+- positions 2-30
+
+### Analyse probabiliste
+
+Si le Bloom filter a un FPR de p pour un 29-mer individuel, le FPR effectif pour un 31-mer devient **p³** (les trois requêtes doivent toutes être des faux positifs).
+
+| FPR 29-mer | FPR 31-mer effectif |
+|------------|---------------------|
+| 10%        | 0.1%                |
+| 5%         | 0.0125%             |
+| 1%         | 0.0001%             |
+
+### Avantages
+
+1. **Moins d'éléments à stocker** : il y a moins de 29-mers distincts que de 31-mers distincts dans un génome (deux 31-mers différents peuvent partager un même 29-mer)
+
+2. **FPR drastiquement réduit** : FPR³ avec seulement 3 requêtes
+
+3. **Index plus compact** : on peut utiliser moins de bits par élément (FPR plus élevé acceptable sur le 29-mer) tout en obtenant un FPR très bas sur le 31-mer
+
+### Trade-off
+
+Un Bloom filter à **5-6 bits/élément** pour les 29-mers donnerait un FPR effectif < 0.01% pour les 31-mers, soit environ **2× plus compact** que l'approche directe à qualité égale.
+
+**Coût** : 3× plus de requêtes par lookup (mais les requêtes Bloom sont très rapides).
+
+---
+
+## Accélération des calculs de distance : MinHash
+
+### Principe
+
+Pré-calculer une "signature" compacte (sketch) de chaque génome permettant d'estimer rapidement Jaccard sans charger les index complets.
+
+### Avantages
+
+- Matrice de distances entre 100+ génomes en quelques secondes
+- Signature de taille fixe (ex: 1000-10000 hash values) quel que soit le génome
+- Stockage minimal
+
+### Utilisation
+
+1. Construction : une passe sur les k-mers de chaque génome
+2. Distance : comparaison des sketches en O(taille du sketch)
+
+---
+
+## Architecture recommandée
+
+### Pour présence/absence + Jaccard
+
+1. **Index principal** : Bloom filter de (k-2)-mers avec l'optimisation décrite
+   - Compact (~3-5 Go par génome)
+   - FPR très bas pour les k-mers grâce aux requêtes triples
+
+2. **Sketches MinHash** : pour calcul rapide des distances entre génomes
+   - Quelques Ko par génome
+   - Permet exploration rapide de la matrice de distances
+
+### Pour comptage + Bray-Curtis
+
+1. **Index principal** : k-mers triés + comptages
+   - uint64 (k-mer) + uint8/uint16 (count)
+   - Compression delta possible
+   - Plus volumineux mais exact
+
+2. **Sketches** : variantes de MinHash pour données pondérées (ex: HyperMinHash)
+
+---
+
+## Prochaines étapes
+
+1. Implémenter un Bloom filter optimisé pour k-mers
+2. Implémenter l'optimisation (k-2)-mer → k-mer
+3. Implémenter MinHash pour les sketches
+4. Définir le format de sérialisation sur disque
+5. Benchmarker sur des génomes réels
diff --git a/pkg/obikmer/encodekmer.go b/pkg/obikmer/encodekmer.go
new file mode 100644
index 0000000..7604ea8
--- /dev/null
+++ b/pkg/obikmer/encodekmer.go
@@ -0,0 +1,183 @@
+package obikmer
+
+// EncodeKmers converts a DNA sequence to a slice of encoded k-mers.
+// Each nucleotide is encoded on 2 bits according to __single_base_code__:
+//   - A = 0 (00)
+//   - C = 1 (01)
+//   - G = 2 (10)
+//   - T/U = 3 (11)
+//
+// The function returns overlapping k-mers of size k encoded as uint64.
+// For a sequence of length n, it returns n-k+1 k-mers.
+//
+// Parameters:
+//   - seq: DNA sequence as a byte slice (case insensitive, supports A, C, G, T, U)
+//   - k: k-mer size (must be between 1 and 32)
+//   - buffer: optional pre-allocated buffer for results. If nil, a new slice is created.
+//
+// Returns:
+//   - slice of uint64 encoded k-mers
+//   - nil if sequence is shorter than k or k is invalid
+func EncodeKmers(seq []byte, k int, buffer *[]uint64) []uint64 {
+	if k < 1 || k > 32 || len(seq) < k {
+		return nil
+	}
+
+	n := len(seq) - k + 1
+
+	var result []uint64
+	if buffer == nil {
+		result = make([]uint64, 0, n)
+	} else {
+		result = (*buffer)[:0]
+	}
+
+	// Mask to keep only k*2 bits
+	mask := uint64(1)<<(k*2) - 1
+
+	// Build the first k-mer
+	var kmer uint64
+	for i := 0; i < k; i++ {
+		kmer <<= 2
+		kmer |= uint64(__single_base_code__[seq[i]&31])
+	}
+	result = append(result, kmer)
+
+	// Slide through the rest of the sequence
+	for i := k; i < len(seq); i++ {
+		kmer <<= 2
+		kmer |= uint64(__single_base_code__[seq[i]&31])
+		kmer &= mask
+		result = append(result, kmer)
+	}
+
+	return result
+}
+
+// ReverseComplement computes the reverse complement of an encoded k-mer.
+// The k-mer is encoded with 2 bits per nucleotide (A=00, C=01, G=10, T=11).
+// The complement is: A↔T (00↔11), C↔G (01↔10), which is simply XOR with 11.
+// The reverse swaps the order of 2-bit pairs.
+//
+// Parameters:
+//   - kmer: the encoded k-mer
+//   - k: the k-mer size (number of nucleotides)
+//
+// Returns:
+//   - the reverse complement of the k-mer
+func ReverseComplement(kmer uint64, k int) uint64 {
+	// Step 1: Complement - XOR with all 1s to flip A↔T and C↔G
+	// For a k-mer of size k, we only want to flip the lower k*2 bits
+	mask := uint64(1)<<(k*2) - 1
+	rc := (^kmer) & mask
+
+	// Step 2: Reverse the order of 2-bit pairs
+	// We use a series of swaps at increasing granularity
+	rc = ((rc & 0x3333333333333333) << 2) | ((rc & 0xCCCCCCCCCCCCCCCC) >> 2)   // Swap adjacent pairs
+	rc = ((rc & 0x0F0F0F0F0F0F0F0F) << 4) | ((rc & 0xF0F0F0F0F0F0F0F0) >> 4)   // Swap nibbles
+	rc = ((rc & 0x00FF00FF00FF00FF) << 8) | ((rc & 0xFF00FF00FF00FF00) >> 8)   // Swap bytes
+	rc = ((rc & 0x0000FFFF0000FFFF) << 16) | ((rc & 0xFFFF0000FFFF0000) >> 16) // Swap 16-bit words
+	rc = (rc << 32) | (rc >> 32)                                               // Swap 32-bit words
+
+	// Step 3: Shift right to align the k-mer (we reversed all 32 pairs, need only k)
+	rc >>= (64 - k*2)
+
+	return rc
+}
+
+// NormalizeKmer returns the lexicographically smaller of a k-mer and its
+// reverse complement. This canonical form ensures that a k-mer and its
+// reverse complement map to the same value.
+//
+// Parameters:
+//   - kmer: the encoded k-mer
+//   - k: the k-mer size (number of nucleotides)
+//
+// Returns:
+//   - the canonical (normalized) k-mer
+func NormalizeKmer(kmer uint64, k int) uint64 {
+	rc := ReverseComplement(kmer, k)
+	if rc < kmer {
+		return rc
+	}
+	return kmer
+}
+
+// EncodeNormalizedKmers converts a DNA sequence to a slice of normalized k-mers.
+// Each k-mer is replaced by the lexicographically smaller of itself and its
+// reverse complement. This ensures that forward and reverse complement sequences
+// produce the same k-mer set.
+//
+// Parameters:
+//   - seq: DNA sequence as a byte slice (case insensitive, supports A, C, G, T, U)
+//   - k: k-mer size (must be between 1 and 32)
+//   - buffer: optional pre-allocated buffer for results. If nil, a new slice is created.
+//
+// Returns:
+//   - slice of uint64 normalized k-mers
+//   - nil if sequence is shorter than k or k is invalid
+func EncodeNormalizedKmers(seq []byte, k int, buffer *[]uint64) []uint64 {
+	if k < 1 || k > 32 || len(seq) < k {
+		return nil
+	}
+
+	n := len(seq) - k + 1
+
+	var result []uint64
+	if buffer == nil {
+		result = make([]uint64, 0, n)
+	} else {
+		result = (*buffer)[:0]
+	}
+
+	// Mask to keep only k*2 bits
+	mask := uint64(1)<<(k*2) - 1
+
+	// Shift amount for adding to reverse complement (high position)
+	rcShift := uint((k - 1) * 2)
+
+	// Complement lookup: A(00)->T(11), C(01)->G(10), G(10)->C(01), T(11)->A(00)
+	// This is simply XOR with 3
+
+	// Build the first k-mer (forward and reverse complement)
+	var fwd, rvc uint64
+	for i := 0; i < k; i++ {
+		code := uint64(__single_base_code__[seq[i]&31])
+		// Forward: shift left and add new code at low end
+		fwd <<= 2
+		fwd |= code
+		// Reverse complement: shift right and add complement at high end
+		rvc >>= 2
+		rvc |= (code ^ 3) << rcShift
+	}
+
+	// Store the normalized (canonical) k-mer
+	if fwd <= rvc {
+		result = append(result, fwd)
+	} else {
+		result = append(result, rvc)
+	}
+
+	// Slide through the rest of the sequence
+	for i := k; i < len(seq); i++ {
+		code := uint64(__single_base_code__[seq[i]&31])
+
+		// Update forward k-mer: shift left, add new code, mask
+		fwd <<= 2
+		fwd |= code
+		fwd &= mask
+
+		// Update reverse complement: shift right, add complement at high end
+		rvc >>= 2
+		rvc |= (code ^ 3) << rcShift
+
+		// Store the normalized k-mer
+		if fwd <= rvc {
+			result = append(result, fwd)
+		} else {
+			result = append(result, rvc)
+		}
+	}
+
+	return result
+}
diff --git a/pkg/obikmer/encodekmer_test.go b/pkg/obikmer/encodekmer_test.go
new file mode 100644
index 0000000..e89c0c3
--- /dev/null
+++ b/pkg/obikmer/encodekmer_test.go
@@ -0,0 +1,518 @@
+package obikmer
+
+import (
+	"bytes"
+	"testing"
+)
+
+// TestEncodeKmersBasic tests basic k-mer encoding
+func TestEncodeKmersBasic(t *testing.T) {
+	tests := []struct {
+		name     string
+		seq      string
+		k        int
+		expected []uint64
+	}{
+		{
+			name:     "simple 4-mer ACGT",
+			seq:      "ACGT",
+			k:        4,
+			expected: []uint64{0b00011011}, // A=00, C=01, G=10, T=11 -> 00 01 10 11 = 27
+		},
+		{
+			name:     "simple 2-mer AC",
+			seq:      "AC",
+			k:        2,
+			expected: []uint64{0b0001}, // A=00, C=01 -> 00 01 = 1
+		},
+		{
+			name:     "sliding 2-mer ACGT",
+			seq:      "ACGT",
+			k:        2,
+			expected: []uint64{0b0001, 0b0110, 0b1011}, // AC=1, CG=6, GT=11
+		},
+		{
+			name:     "lowercase",
+			seq:      "acgt",
+			k:        4,
+			expected: []uint64{0b00011011},
+		},
+		{
+			name:     "with U instead of T",
+			seq:      "ACGU",
+			k:        4,
+			expected: []uint64{0b00011011}, // U encodes same as T
+		},
+		{
+			name:     "8-mer",
+			seq:      "ACGTACGT",
+			k:        8,
+			expected: []uint64{0b0001101100011011}, // ACGTACGT
+		},
+		{
+			name:     "32-mer max size",
+			seq:      "ACGTACGTACGTACGTACGTACGTACGTACGT",
+			k:        32,
+			expected: []uint64{0x1B1B1B1B1B1B1B1B}, // ACGTACGT repeated 4 times
+		},
+		{
+			name: "longer sequence sliding",
+			seq:  "AAACCCGGG",
+			k:    3,
+			expected: []uint64{
+				0b000000, // AAA = 0
+				0b000001, // AAC = 1
+				0b000101, // ACC = 5
+				0b010101, // CCC = 21
+				0b010110, // CCG = 22
+				0b011010, // CGG = 26
+				0b101010, // GGG = 42
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := EncodeKmers([]byte(tt.seq), tt.k, nil)
+
+			if len(result) != len(tt.expected) {
+				t.Errorf("length mismatch: got %d, want %d", len(result), len(tt.expected))
+				return
+			}
+
+			for i, v := range result {
+				if v != tt.expected[i] {
+					t.Errorf("position %d: got %d (0b%b), want %d (0b%b)",
+						i, v, v, tt.expected[i], tt.expected[i])
+				}
+			}
+		})
+	}
+}
+
+// TestEncodeKmersEdgeCases tests edge cases
+func TestEncodeKmersEdgeCases(t *testing.T) {
+	// Empty sequence
+	result := EncodeKmers([]byte{}, 4, nil)
+	if result != nil {
+		t.Errorf("empty sequence should return nil, got %v", result)
+	}
+
+	// k > sequence length
+	result = EncodeKmers([]byte("ACG"), 4, nil)
+	if result != nil {
+		t.Errorf("k > seq length should return nil, got %v", result)
+	}
+
+	// k = 0
+	result = EncodeKmers([]byte("ACGT"), 0, nil)
+	if result != nil {
+		t.Errorf("k=0 should return nil, got %v", result)
+	}
+
+	// k > 32
+	result = EncodeKmers([]byte("ACGTACGTACGTACGTACGTACGTACGTACGTACGT"), 33, nil)
+	if result != nil {
+		t.Errorf("k>32 should return nil, got %v", result)
+	}
+
+	// k = sequence length (single k-mer)
+	result = EncodeKmers([]byte("ACGT"), 4, nil)
+	if len(result) != 1 {
+		t.Errorf("k=seq_len should return 1 k-mer, got %d", len(result))
+	}
+}
+
+// TestEncodeKmersBuffer tests buffer reuse
+func TestEncodeKmersBuffer(t *testing.T) {
+	seq := []byte("ACGTACGTACGT")
+	k := 4
+
+	// First call without buffer
+	result1 := EncodeKmers(seq, k, nil)
+
+	// Second call with buffer - pre-allocate with capacity
+	buffer := make([]uint64, 0, 100)
+	result2 := EncodeKmers(seq, k, &buffer)
+
+	if len(result1) != len(result2) {
+		t.Errorf("buffer reuse: length mismatch %d vs %d", len(result1), len(result2))
+	}
+
+	for i := range result1 {
+		if result1[i] != result2[i] {
+			t.Errorf("buffer reuse: position %d mismatch", i)
+		}
+	}
+
+	// Verify results are correct
+	if len(result2) == 0 {
+		t.Errorf("result should not be empty")
+	}
+
+	// Test multiple calls with same buffer to verify no memory issues
+	for i := 0; i < 10; i++ {
+		result3 := EncodeKmers(seq, k, &buffer)
+		if len(result3) != len(result1) {
+			t.Errorf("iteration %d: length mismatch", i)
+		}
+	}
+}
+
+// TestEncodeKmersVariousLengths tests encoding with various sequence lengths
+func TestEncodeKmersVariousLengths(t *testing.T) {
+	lengths := []int{1, 4, 8, 15, 16, 17, 31, 32, 33, 63, 64, 65, 100, 256, 1000}
+	k := 8
+
+	for _, length := range lengths {
+		// Generate test sequence
+		seq := make([]byte, length)
+		for i := range seq {
+			seq[i] = "ACGT"[i%4]
+		}
+
+		if length < k {
+			continue
+		}
+
+		t.Run("length_"+string(rune('0'+length/100))+string(rune('0'+(length%100)/10))+string(rune('0'+length%10)), func(t *testing.T) {
+			result := EncodeKmers(seq, k, nil)
+
+			expectedLen := length - k + 1
+			if len(result) != expectedLen {
+				t.Errorf("length mismatch: got %d, want %d", len(result), expectedLen)
+			}
+		})
+	}
+}
+
+// TestEncodeKmersLongSequence tests with a longer realistic sequence
+func TestEncodeKmersLongSequence(t *testing.T) {
+	// Simulate a realistic DNA sequence
+	seq := bytes.Repeat([]byte("ACGTACGTNNACGTACGT"), 100)
+	k := 16
+
+	result := EncodeKmers(seq, k, nil)
+	expectedLen := len(seq) - k + 1
+
+	if len(result) != expectedLen {
+		t.Fatalf("length mismatch: got %d, want %d", len(result), expectedLen)
+	}
+}
+
+// BenchmarkEncodeKmers benchmarks the encoding function
+func BenchmarkEncodeKmers(b *testing.B) {
+	// Create test sequences of various sizes
+	sizes := []int{100, 1000, 10000, 100000}
+	kSizes := []int{8, 16, 32}
+
+	for _, k := range kSizes {
+		for _, size := range sizes {
+			seq := make([]byte, size)
+			for i := range seq {
+				seq[i] = "ACGT"[i%4]
+			}
+
+			name := "k" + string(rune('0'+k/10)) + string(rune('0'+k%10)) + "_size" + string(rune('0'+size/10000)) + string(rune('0'+(size%10000)/1000)) + string(rune('0'+(size%1000)/100)) + string(rune('0'+(size%100)/10)) + string(rune('0'+size%10))
+			b.Run(name, func(b *testing.B) {
+				buffer := make([]uint64, 0, size)
+				b.ResetTimer()
+				b.SetBytes(int64(size))
+
+				for i := 0; i < b.N; i++ {
+					EncodeKmers(seq, k, &buffer)
+				}
+			})
+		}
+	}
+}
+
+// TestEncodeNucleotide verifies nucleotide encoding
+func TestEncodeNucleotide(t *testing.T) {
+	testCases := []struct {
+		nucleotide byte
+		expected   byte
+	}{
+		{'A', 0},
+		{'a', 0},
+		{'C', 1},
+		{'c', 1},
+		{'G', 2},
+		{'g', 2},
+		{'T', 3},
+		{'t', 3},
+		{'U', 3},
+		{'u', 3},
+	}
+
+	for _, tc := range testCases {
+		result := EncodeNucleotide(tc.nucleotide)
+		if result != tc.expected {
+			t.Errorf("EncodeNucleotide('%c') = %d, want %d",
+				tc.nucleotide, result, tc.expected)
+		}
+	}
+}
+
+// TestReverseComplement tests the reverse complement function
+func TestReverseComplement(t *testing.T) {
+	tests := []struct {
+		name     string
+		seq      string
+		k        int
+		expected string // expected reverse complement sequence
+	}{
+		{
+			name:     "ACGT -> ACGT (palindrome)",
+			seq:      "ACGT",
+			k:        4,
+			expected: "ACGT",
+		},
+		{
+			name:     "AAAA -> TTTT",
+			seq:      "AAAA",
+			k:        4,
+			expected: "TTTT",
+		},
+		{
+			name:     "TTTT -> AAAA",
+			seq:      "TTTT",
+			k:        4,
+			expected: "AAAA",
+		},
+		{
+			name:     "CCCC -> GGGG",
+			seq:      "CCCC",
+			k:        4,
+			expected: "GGGG",
+		},
+		{
+			name:     "AACG -> CGTT",
+			seq:      "AACG",
+			k:        4,
+			expected: "CGTT",
+		},
+		{
+			name:     "AC -> GT",
+			seq:      "AC",
+			k:        2,
+			expected: "GT",
+		},
+		{
+			name:     "ACGTACGT -> ACGTACGT (palindrome)",
+			seq:      "ACGTACGT",
+			k:        8,
+			expected: "ACGTACGT",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Encode the input sequence
+			kmers := EncodeKmers([]byte(tt.seq), tt.k, nil)
+			if len(kmers) != 1 {
+				t.Fatalf("expected 1 k-mer, got %d", len(kmers))
+			}
+
+			// Compute reverse complement
+			rc := ReverseComplement(kmers[0], tt.k)
+
+			// Encode the expected reverse complement
+			expectedKmers := EncodeKmers([]byte(tt.expected), tt.k, nil)
+			if len(expectedKmers) != 1 {
+				t.Fatalf("expected 1 k-mer for expected, got %d", len(expectedKmers))
+			}
+
+			if rc != expectedKmers[0] {
+				t.Errorf("ReverseComplement(%s) = %d (0b%b), want %d (0b%b) for %s",
+					tt.seq, rc, rc, expectedKmers[0], expectedKmers[0], tt.expected)
+			}
+		})
+	}
+}
+
+// TestReverseComplementInvolution tests that RC(RC(x)) = x
+func TestReverseComplementInvolution(t *testing.T) {
+	testSeqs := []string{"ACGT", "AAAA", "TTTT", "ACGTACGT", "AACGTTGC", "AC", "ACGTACGTACGTACGT", "ACGTACGTACGTACGTACGTACGTACGTACGT"}
+
+	for _, seq := range testSeqs {
+		k := len(seq)
+		kmers := EncodeKmers([]byte(seq), k, nil)
+		if len(kmers) != 1 {
+			continue
+		}
+
+		original := kmers[0]
+		rc := ReverseComplement(original, k)
+		rcrc := ReverseComplement(rc, k)
+
+		if rcrc != original {
+			t.Errorf("RC(RC(%s)) != %s: got %d, want %d", seq, seq, rcrc, original)
+		}
+	}
+}
+
+// TestNormalizeKmer tests the normalization function
+func TestNormalizeKmer(t *testing.T) {
+	tests := []struct {
+		name string
+		seq  string
+		k    int
+	}{
+		{"ACGT palindrome", "ACGT", 4},
+		{"AAAA vs TTTT", "AAAA", 4},
+		{"TTTT vs AAAA", "TTTT", 4},
+		{"AACG vs CGTT", "AACG", 4},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			kmers := EncodeKmers([]byte(tt.seq), tt.k, nil)
+			if len(kmers) != 1 {
+				t.Fatalf("expected 1 k-mer, got %d", len(kmers))
+			}
+
+			kmer := kmers[0]
+			rc := ReverseComplement(kmer, tt.k)
+			normalized := NormalizeKmer(kmer, tt.k)
+
+			// Normalized should be the minimum
+			expectedNorm := kmer
+			if rc < kmer {
+				expectedNorm = rc
+			}
+
+			if normalized != expectedNorm {
+				t.Errorf("NormalizeKmer(%d) = %d, want %d", kmer, normalized, expectedNorm)
+			}
+
+			// Normalizing the RC should give the same result
+			normalizedRC := NormalizeKmer(rc, tt.k)
+			if normalizedRC != normalized {
+				t.Errorf("NormalizeKmer(RC) = %d, want %d (same as NormalizeKmer(fwd))", normalizedRC, normalized)
+			}
+		})
+	}
+}
+
+// TestEncodeNormalizedKmersBasic tests basic normalized k-mer encoding
+func TestEncodeNormalizedKmersBasic(t *testing.T) {
+	// Test that a sequence and its reverse complement produce the same normalized k-mers
+	seq := []byte("AACGTT")
+	revComp := []byte("AACGTT") // This is a palindrome!
+
+	k := 4
+	kmers1 := EncodeNormalizedKmers(seq, k, nil)
+	kmers2 := EncodeNormalizedKmers(revComp, k, nil)
+
+	if len(kmers1) != len(kmers2) {
+		t.Fatalf("length mismatch: %d vs %d", len(kmers1), len(kmers2))
+	}
+
+	// For a palindrome, forward and reverse should give the same k-mers
+	for i := range kmers1 {
+		if kmers1[i] != kmers2[len(kmers2)-1-i] {
+			t.Logf("Note: position %d differs (expected for non-palindromic sequences)", i)
+		}
+	}
+}
+
+// TestEncodeNormalizedKmersSymmetry tests that seq and its RC produce same normalized k-mers (reversed)
+func TestEncodeNormalizedKmersSymmetry(t *testing.T) {
+	// Manually construct a sequence and its reverse complement
+	seq := []byte("ACGTAACCGG")
+
+	// Compute reverse complement manually
+	rcMap := map[byte]byte{'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
+	revComp := make([]byte, len(seq))
+	for i, b := range seq {
+		revComp[len(seq)-1-i] = rcMap[b]
+	}
+
+	k := 4
+	kmers1 := EncodeNormalizedKmers(seq, k, nil)
+	kmers2 := EncodeNormalizedKmers(revComp, k, nil)
+
+	if len(kmers1) != len(kmers2) {
+		t.Fatalf("length mismatch: %d vs %d", len(kmers1), len(kmers2))
+	}
+
+	// The normalized k-mers should be the same but in reverse order
+	for i := range kmers1 {
+		j := len(kmers2) - 1 - i
+		if kmers1[i] != kmers2[j] {
+			t.Errorf("position %d vs %d: %d != %d", i, j, kmers1[i], kmers2[j])
+		}
+	}
+}
+
+// TestEncodeNormalizedKmersConsistency verifies normalized k-mers match manual normalization
+func TestEncodeNormalizedKmersConsistency(t *testing.T) {
+	seq := []byte("ACGTACGTACGTACGT")
+	k := 8
+
+	// Get k-mers both ways
+	rawKmers := EncodeKmers(seq, k, nil)
+	normalizedKmers := EncodeNormalizedKmers(seq, k, nil)
+
+	if len(rawKmers) != len(normalizedKmers) {
+		t.Fatalf("length mismatch: %d vs %d", len(rawKmers), len(normalizedKmers))
+	}
+
+	// Verify each normalized k-mer matches manual normalization
+	for i, raw := range rawKmers {
+		expected := NormalizeKmer(raw, k)
+		if normalizedKmers[i] != expected {
+			t.Errorf("position %d: EncodeNormalizedKmers gave %d, NormalizeKmer gave %d",
+				i, normalizedKmers[i], expected)
+		}
+	}
+}
+
+// BenchmarkEncodeNormalizedKmers benchmarks the normalized encoding function
+func BenchmarkEncodeNormalizedKmers(b *testing.B) {
+	sizes := []int{100, 1000, 10000, 100000}
+	kSizes := []int{8, 16, 32}
+
+	for _, k := range kSizes {
+		for _, size := range sizes {
+			seq := make([]byte, size)
+			for i := range seq {
+				seq[i] = "ACGT"[i%4]
+			}
+
+			name := "k" + string(rune('0'+k/10)) + string(rune('0'+k%10)) + "_size" + string(rune('0'+size/10000)) + string(rune('0'+(size%10000)/1000)) + string(rune('0'+(size%1000)/100)) + string(rune('0'+(size%100)/10)) + string(rune('0'+size%10))
+			b.Run(name, func(b *testing.B) {
+				buffer := make([]uint64, 0, size)
+				b.ResetTimer()
+				b.SetBytes(int64(size))
+
+				for i := 0; i < b.N; i++ {
+					EncodeNormalizedKmers(seq, k, &buffer)
+				}
+			})
+		}
+	}
+}
+
+// BenchmarkReverseComplement benchmarks the reverse complement function
+func BenchmarkReverseComplement(b *testing.B) {
+	kmer := uint64(0x123456789ABCDEF0)
+	k := 32
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ReverseComplement(kmer, k)
+	}
+}
+
+// BenchmarkNormalizeKmer benchmarks the normalization function
+func BenchmarkNormalizeKmer(b *testing.B) {
+	kmer := uint64(0x123456789ABCDEF0)
+	k := 32
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		NormalizeKmer(kmer, k)
+	}
+}