Refactor k-mer normalization functions and add quorum operations

This commit refactors the k-mer normalization functions, renaming them from 'NormalizeKmer' to 'CanonicalKmer' to better reflect their purpose of returning canonical k-mers. It also introduces new quorum operations (AtLeast, AtMost, Exactly) for k-mer set groups, along with comprehensive tests and benchmarks. The version commit hash has also been updated.
2026-06-24 17:51:00 +00:00 · 2026-02-05 17:11:14 +01:00
parent a43e6258be
commit aa2e94dd6f
4 changed files with 671 additions and 41 deletions
@@ -352,8 +352,8 @@ func TestReverseComplementInvolution(t *testing.T) {
 	}
 }
-// TestNormalizeKmer tests the normalization function
+// TestCanonicalKmer tests the normalization function
-func TestNormalizeKmer(t *testing.T) {
+func TestCanonicalKmer(t *testing.T) {
 	tests := []struct {
 		name string
 		seq  string
@@ -374,7 +374,7 @@ func TestNormalizeKmer(t *testing.T) {
 			kmer := kmers[0]
 			rc := ReverseComplement(kmer, tt.k)
-			normalized := NormalizeKmer(kmer, tt.k)
+			normalized := CanonicalKmer(kmer, tt.k)
 			// Normalized should be the minimum
 			expectedNorm := kmer
@@ -383,27 +383,27 @@ func TestNormalizeKmer(t *testing.T) {
 			}
 			if normalized != expectedNorm {
-				t.Errorf("NormalizeKmer(%d) = %d, want %d", kmer, normalized, expectedNorm)
+				t.Errorf("CanonicalKmer(%d) = %d, want %d", kmer, normalized, expectedNorm)
 			}
 			// Normalizing the RC should give the same result
-			normalizedRC := NormalizeKmer(rc, tt.k)
+			normalizedRC := CanonicalKmer(rc, tt.k)
 			if normalizedRC != normalized {
-				t.Errorf("NormalizeKmer(RC) = %d, want %d (same as NormalizeKmer(fwd))", normalizedRC, normalized)
+				t.Errorf("CanonicalKmer(RC) = %d, want %d (same as CanonicalKmer(fwd))", normalizedRC, normalized)
 			}
 		})
 	}
 }
-// TestEncodeNormalizedKmersBasic tests basic normalized k-mer encoding
+// TestEncodeCanonicalKmersBasic tests basic normalized k-mer encoding
-func TestEncodeNormalizedKmersBasic(t *testing.T) {
+func TestEncodeCanonicalKmersBasic(t *testing.T) {
 	// Test that a sequence and its reverse complement produce the same normalized k-mers
 	seq := []byte("AACGTT")
 	revComp := []byte("AACGTT") // This is a palindrome!
 	k := 4
-	kmers1 := EncodeNormalizedKmers(seq, k, nil)
+	kmers1 := EncodeCanonicalKmers(seq, k, nil)
-	kmers2 := EncodeNormalizedKmers(revComp, k, nil)
+	kmers2 := EncodeCanonicalKmers(revComp, k, nil)
 	if len(kmers1) != len(kmers2) {
 		t.Fatalf("length mismatch: %d vs %d", len(kmers1), len(kmers2))
@@ -417,8 +417,8 @@ func TestEncodeNormalizedKmersBasic(t *testing.T) {
 	}
 }
-// TestEncodeNormalizedKmersSymmetry tests that seq and its RC produce same normalized k-mers (reversed)
+// TestEncodeCanonicalKmersSymmetry tests that seq and its RC produce same normalized k-mers (reversed)
-func TestEncodeNormalizedKmersSymmetry(t *testing.T) {
+func TestEncodeCanonicalKmersSymmetry(t *testing.T) {
 	// Manually construct a sequence and its reverse complement
 	seq := []byte("ACGTAACCGG")
@@ -430,8 +430,8 @@ func TestEncodeNormalizedKmersSymmetry(t *testing.T) {
 	}
 	k := 4
-	kmers1 := EncodeNormalizedKmers(seq, k, nil)
+	kmers1 := EncodeCanonicalKmers(seq, k, nil)
-	kmers2 := EncodeNormalizedKmers(revComp, k, nil)
+	kmers2 := EncodeCanonicalKmers(revComp, k, nil)
 	if len(kmers1) != len(kmers2) {
 		t.Fatalf("length mismatch: %d vs %d", len(kmers1), len(kmers2))
@@ -446,14 +446,14 @@ func TestEncodeNormalizedKmersSymmetry(t *testing.T) {
 	}
 }
-// TestEncodeNormalizedKmersConsistency verifies normalized k-mers match manual normalization
+// TestEncodeCanonicalKmersConsistency verifies normalized k-mers match manual normalization
-func TestEncodeNormalizedKmersConsistency(t *testing.T) {
+func TestEncodeCanonicalKmersConsistency(t *testing.T) {
 	seq := []byte("ACGTACGTACGTACGT")
 	k := 8
 	// Get k-mers both ways
 	rawKmers := EncodeKmers(seq, k, nil)
-	normalizedKmers := EncodeNormalizedKmers(seq, k, nil)
+	normalizedKmers := EncodeCanonicalKmers(seq, k, nil)
 	if len(rawKmers) != len(normalizedKmers) {
 		t.Fatalf("length mismatch: %d vs %d", len(rawKmers), len(normalizedKmers))
@@ -461,16 +461,16 @@ func TestEncodeNormalizedKmersConsistency(t *testing.T) {
 	// Verify each normalized k-mer matches manual normalization
 	for i, raw := range rawKmers {
-		expected := NormalizeKmer(raw, k)
+		expected := CanonicalKmer(raw, k)
 		if normalizedKmers[i] != expected {
-			t.Errorf("position %d: EncodeNormalizedKmers gave %d, NormalizeKmer gave %d",
+			t.Errorf("position %d: EncodeCanonicalKmers gave %d, CanonicalKmer gave %d",
 				i, normalizedKmers[i], expected)
 		}
 	}
 }
-// BenchmarkEncodeNormalizedKmers benchmarks the normalized encoding function
+// BenchmarkEncodeCanonicalKmers benchmarks the normalized encoding function
-func BenchmarkEncodeNormalizedKmers(b *testing.B) {
+func BenchmarkEncodeCanonicalKmers(b *testing.B) {
 	sizes := []int{100, 1000, 10000, 100000}
 	kSizes := []int{8, 16, 31}
@@ -488,7 +488,7 @@ func BenchmarkEncodeNormalizedKmers(b *testing.B) {
 				b.SetBytes(int64(size))
 				for i := 0; i < b.N; i++ {
-					EncodeNormalizedKmers(seq, k, &buffer)
+					EncodeCanonicalKmers(seq, k, &buffer)
 				}
 			})
 		}
@@ -506,14 +506,14 @@ func BenchmarkReverseComplement(b *testing.B) {
 	}
 }
-// BenchmarkNormalizeKmer benchmarks the normalization function
+// BenchmarkCanonicalKmer benchmarks the normalization function
-func BenchmarkNormalizeKmer(b *testing.B) {
+func BenchmarkCanonicalKmer(b *testing.B) {
 	kmer := uint64(0x06C6C6C6C6C6C6C6)
 	k := 31
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		NormalizeKmer(kmer, k)
+		CanonicalKmer(kmer, k)
 	}
 }
@@ -730,7 +730,7 @@ func TestExtractSuperKmersCanonical(t *testing.T) {
 	for i, sk := range result {
 		// Verify the minimizer is indeed canonical (equal to its normalized form)
-		normalized := NormalizeKmer(sk.Minimizer, m)
+		normalized := CanonicalKmer(sk.Minimizer, m)
 		if sk.Minimizer != normalized {
 			t.Errorf("super k-mer %d: minimizer %d is not canonical (normalized: %d)",
 				i, sk.Minimizer, normalized)
@@ -886,8 +886,8 @@ func TestKmerErrorMarkersWithRealKmers(t *testing.T) {
 			}
 			// Verify normalization works with error bits cleared
-			normalized1 := NormalizeKmer(originalKmer, k)
+			normalized1 := CanonicalKmer(originalKmer, k)
-			normalized2 := NormalizeKmer(ClearKmerError(marked), k)
+			normalized2 := CanonicalKmer(ClearKmerError(marked), k)
 			if normalized1 != normalized2 {
 				t.Errorf("Normalization affected by error bits")
 			}
@@ -977,8 +977,8 @@ func TestReverseComplementPreservesErrorBits(t *testing.T) {
 	}
 }
-// TestNormalizeKmerWithErrorBits tests that NormalizeKmer works with error bits
+// TestCanonicalKmerWithErrorBits tests that CanonicalKmer works with error bits
-func TestNormalizeKmerWithErrorBits(t *testing.T) {
+func TestCanonicalKmerWithErrorBits(t *testing.T) {
 	seq := []byte("ACGTACGTACGTACGTACGTACGTACGTACG")
 	k := 31
@@ -995,7 +995,7 @@ func TestNormalizeKmerWithErrorBits(t *testing.T) {
 			marked := SetKmerError(originalKmer, errCode)
 			// Normalize should work on the sequence part
-			normalized := NormalizeKmer(marked, k)
+			normalized := CanonicalKmer(marked, k)
 			// Error bits should be preserved
 			if GetKmerError(normalized) != errCode {
@@ -1004,7 +1004,7 @@ func TestNormalizeKmerWithErrorBits(t *testing.T) {
 			// The sequence part should be normalized
 			cleanNormalized := ClearKmerError(normalized)
-			expectedNormalized := NormalizeKmer(ClearKmerError(marked), k)
+			expectedNormalized := CanonicalKmer(ClearKmerError(marked), k)
 			if cleanNormalized != expectedNormalized {
 				t.Errorf("Normalization incorrect with error bits present")
@@ -1081,19 +1081,19 @@ func TestIterKmers(t *testing.T) {
 	}
 }
-// TestIterNormalizedKmers tests the normalized k-mer iterator
+// TestIterCanonicalKmers tests the normalized k-mer iterator
-func TestIterNormalizedKmers(t *testing.T) {
+func TestIterCanonicalKmers(t *testing.T) {
 	seq := []byte("ACGTACGTACGT")
 	k := 6
 	// Collect k-mers via iterator
 	var iterKmers []uint64
-	for kmer := range IterNormalizedKmers(seq, k) {
+	for kmer := range IterCanonicalKmers(seq, k) {
 		iterKmers = append(iterKmers, kmer)
 	}
 	// Compare with slice-based version
-	sliceKmers := EncodeNormalizedKmers(seq, k, nil)
+	sliceKmers := EncodeCanonicalKmers(seq, k, nil)
 	if len(iterKmers) != len(sliceKmers) {
 		t.Errorf("length mismatch: iter=%d, slice=%d", len(iterKmers), len(sliceKmers))
@@ -1151,8 +1151,8 @@ func BenchmarkIterKmers(b *testing.B) {
 	})
 }
-// BenchmarkIterNormalizedKmers benchmarks the normalized iterator
+// BenchmarkIterCanonicalKmers benchmarks the normalized iterator
-func BenchmarkIterNormalizedKmers(b *testing.B) {
+func BenchmarkIterCanonicalKmers(b *testing.B) {
 	seq := make([]byte, 10000)
 	for i := range seq {
 		seq[i] = "ACGT"[i%4]
@@ -1163,7 +1163,7 @@ func BenchmarkIterNormalizedKmers(b *testing.B) {
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
 			count := 0
-			for range IterNormalizedKmers(seq, k) {
+			for range IterCanonicalKmers(seq, k) {
 				count++
 			}
 		}
@@ -1173,7 +1173,7 @@ func BenchmarkIterNormalizedKmers(b *testing.B) {
 		var buffer []uint64
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
-			buffer = EncodeNormalizedKmers(seq, k, &buffer)
+			buffer = EncodeCanonicalKmers(seq, k, &buffer)
 		}
 	})
 }
@@ -0,0 +1,235 @@
 package obikmer
 import (
 	"container/heap"
 	"github.com/RoaringBitmap/roaring/roaring64"
 )
 // heapItem represents an element in the min-heap for k-way merge
 type heapItem struct {
 	value uint64
 	idx   int
 }
 // kmerMinHeap implements heap.Interface for k-way merge algorithm
 type kmerMinHeap []heapItem
 func (h kmerMinHeap) Len() int           { return len(h) }
 func (h kmerMinHeap) Less(i, j int) bool { return h[i].value < h[j].value }
 func (h kmerMinHeap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
 func (h *kmerMinHeap) Push(x interface{}) {
 	*h = append(*h, x.(heapItem))
 }
 func (h *kmerMinHeap) Pop() interface{} {
 	old := *h
 	n := len(old)
 	x := old[n-1]
 	*h = old[0 : n-1]
 	return x
 }
 // QuorumAtLeast returns k-mers present in at least q sets
 //
 // Algorithm: K-way merge with min-heap counting
 //
 // The algorithm processes all k-mers in sorted order using a min-heap:
 //
 //  1. Initialize one iterator per non-empty set
 //  2. Build a min-heap of (value, set_index) pairs, one per iterator
 //  3. While heap is not empty:
 //     a. Extract the minimum value v from heap
 //     b. Pop ALL heap items with value == v (counting occurrences)
 //     c. If count >= q, add v to result
 //     d. Advance each popped iterator and re-insert into heap if valid
 //
 // This ensures each unique k-mer is counted exactly once across all sets.
 //
 // Time complexity: O(M log N)
 //   - M = sum of all set cardinalities (total k-mer occurrences)
 //   - N = number of sets
 //   - Each k-mer occurrence is inserted/extracted from heap once: O(M) operations
 //   - Each heap operation costs O(log N)
 //
 // Space complexity: O(N)
 //   - Heap contains at most N elements (one per set iterator)
 //   - Output bitmap size depends on quorum result
 //
 // Special cases (optimized):
 //   - q <= 0: returns empty set
 //   - q == 1: delegates to Union() (native OR operations)
 //   - q == n: delegates to Intersect() (native AND operations)
 //   - q > n: returns empty set (impossible to satisfy)
 func (ksg *KmerSetGroup) QuorumAtLeast(q int) *KmerSet {
 	n := len(ksg.sets)
 	// Edge cases
 	if q <= 0 || n == 0 {
 		return NewKmerSet(ksg.k)
 	}
 	if q > n {
 		return NewKmerSet(ksg.k)
 	}
 	if q == 1 {
 		return ksg.Union()
 	}
 	if q == n {
 		return ksg.Intersect()
 	}
 	// Initialize iterators for all non-empty sets
 	iterators := make([]roaring64.IntIterable64, 0, n)
 	iterIndices := make([]int, 0, n)
 	for i, set := range ksg.sets {
 		if set.Len() > 0 {
 			iter := set.bitmap.Iterator()
 			if iter.HasNext() {
 				iterators = append(iterators, iter)
 				iterIndices = append(iterIndices, i)
 			}
 		}
 	}
 	if len(iterators) == 0 {
 		return NewKmerSet(ksg.k)
 	}
 	// Initialize heap with first value from each iterator
 	h := make(kmerMinHeap, len(iterators))
 	for i, iter := range iterators {
 		h[i] = heapItem{value: iter.Next(), idx: i}
 	}
 	heap.Init(&h)
 	// Result bitmap
 	result := roaring64.New()
 	// K-way merge with counting
 	for len(h) > 0 {
 		minVal := h[0].value
 		count := 0
 		activeIndices := make([]int, 0, len(h))
 		// Pop all elements with same value (count occurrences)
 		for len(h) > 0 && h[0].value == minVal {
 			item := heap.Pop(&h).(heapItem)
 			count++
 			activeIndices = append(activeIndices, item.idx)
 		}
 		// Add to result if quorum reached
 		if count >= q {
 			result.Add(minVal)
 		}
 		// Advance iterators and re-insert into heap
 		for _, iterIdx := range activeIndices {
 			if iterators[iterIdx].HasNext() {
 				heap.Push(&h, heapItem{
 					value: iterators[iterIdx].Next(),
 					idx:   iterIdx,
 				})
 			}
 		}
 	}
 	return NewKmerSetFromBitmap(ksg.k, result)
 }
 // QuorumAtMost returns k-mers present in at most q sets
 //
 // Algorithm: Uses the mathematical identity
 //   AtMost(q) = Union() - AtLeast(q+1)
 //
 // Proof:
 //   - Union() contains all k-mers present in at least 1 set
 //   - AtLeast(q+1) contains all k-mers present in q+1 or more sets
 //   - Their difference contains only k-mers present in at most q sets
 //
 // Implementation:
 //  1. Compute U = Union()
 //  2. Compute A = QuorumAtLeast(q+1)
 //  3. Return U - A using bitmap AndNot operation
 //
 // Time complexity: O(M log N)
 //   - Union(): O(M) with native OR operations
 //   - QuorumAtLeast(q+1): O(M log N)
 //   - AndNot: O(|U|) where |U| <= M
 //   - Total: O(M log N)
 //
 // Space complexity: O(N)
 //   - Inherited from QuorumAtLeast heap
 //
 // Special cases:
 //   - q <= 0: returns empty set
 //   - q >= n: returns Union() (all k-mers are in at most n sets)
 func (ksg *KmerSetGroup) QuorumAtMost(q int) *KmerSet {
 	n := len(ksg.sets)
 	// Edge cases
 	if q <= 0 {
 		return NewKmerSet(ksg.k)
 	}
 	if q >= n {
 		return ksg.Union()
 	}
 	// Compute Union() - AtLeast(q+1)
 	union := ksg.Union()
 	atLeastQ1 := ksg.QuorumAtLeast(q + 1)
 	// Difference: elements in union but not in atLeastQ1
 	result := union.bitmap.Clone()
 	result.AndNot(atLeastQ1.bitmap)
 	return NewKmerSetFromBitmap(ksg.k, result)
 }
 // QuorumExactly returns k-mers present in exactly q sets
 //
 // Algorithm: Uses the mathematical identity
 //   Exactly(q) = AtLeast(q) - AtLeast(q+1)
 //
 // Proof:
 //   - AtLeast(q) contains all k-mers present in q or more sets
 //   - AtLeast(q+1) contains all k-mers present in q+1 or more sets
 //   - Their difference contains only k-mers present in exactly q sets
 //
 // Implementation:
 //  1. Compute A = QuorumAtLeast(q)
 //  2. Compute B = QuorumAtLeast(q+1)
 //  3. Return A - B using bitmap AndNot operation
 //
 // Time complexity: O(M log N)
 //   - Two calls to QuorumAtLeast: 2 * O(M log N)
 //   - One AndNot operation: O(|A|) where |A| <= M
 //   - Total: O(M log N) since AndNot is dominated by merge operations
 //
 // Space complexity: O(N)
 //   - Inherited from QuorumAtLeast heap
 //   - Two temporary bitmaps for intermediate results
 //
 // Special cases:
 //   - q <= 0: returns empty set
 //   - q > n: returns empty set (impossible to have k-mer in more than n sets)
 func (ksg *KmerSetGroup) QuorumExactly(q int) *KmerSet {
 	n := len(ksg.sets)
 	// Edge cases
 	if q <= 0 || q > n {
 		return NewKmerSet(ksg.k)
 	}
 	// Compute AtLeast(q) - AtLeast(q+1)
 	aq := ksg.QuorumAtLeast(q)
 	aq1 := ksg.QuorumAtLeast(q + 1)
 	// Difference: elements in aq but not in aq1
 	result := aq.bitmap.Clone()
 	result.AndNot(aq1.bitmap)
 	return NewKmerSetFromBitmap(ksg.k, result)
 }
@@ -0,0 +1,395 @@
 package obikmer
 import (
 	"testing"
 )
 // TestQuorumAtLeastEdgeCases tests edge cases for QuorumAtLeast
 func TestQuorumAtLeastEdgeCases(t *testing.T) {
 	k := 5
 	// Test group with all empty sets
 	emptyGroup := NewKmerSetGroup(k, 3)
 	result := emptyGroup.QuorumAtLeast(1)
 	if result.Len() != 0 {
 		t.Errorf("Empty sets: expected 0 k-mers, got %d", result.Len())
 	}
 	// Test q <= 0
 	group := NewKmerSetGroup(k, 3)
 	result = group.QuorumAtLeast(0)
 	if result.Len() != 0 {
 		t.Errorf("q=0: expected 0 k-mers, got %d", result.Len())
 	}
 	result = group.QuorumAtLeast(-1)
 	if result.Len() != 0 {
 		t.Errorf("q=-1: expected 0 k-mers, got %d", result.Len())
 	}
 	// Test q > n
 	group.Get(0).AddKmerCode(1)
 	result = group.QuorumAtLeast(10)
 	if result.Len() != 0 {
 		t.Errorf("q>n: expected 0 k-mers, got %d", result.Len())
 	}
 }
 // TestQuorumAtLeastQ1 tests q=1 (should equal Union)
 func TestQuorumAtLeastQ1(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 3)
 	// Add different k-mers to each set
 	group.Get(0).AddKmerCode(1)
 	group.Get(0).AddKmerCode(2)
 	group.Get(1).AddKmerCode(2)
 	group.Get(1).AddKmerCode(3)
 	group.Get(2).AddKmerCode(3)
 	group.Get(2).AddKmerCode(4)
 	quorum := group.QuorumAtLeast(1)
 	union := group.Union()
 	if quorum.Len() != union.Len() {
 		t.Errorf("QuorumAtLeast(1) length %d != Union length %d", quorum.Len(), union.Len())
 	}
 	// Check all elements match
 	for kmer := uint64(1); kmer <= 4; kmer++ {
 		if quorum.Contains(kmer) != union.Contains(kmer) {
 			t.Errorf("Mismatch for k-mer %d", kmer)
 		}
 	}
 }
 // TestQuorumAtLeastQN tests q=n (should equal Intersect)
 func TestQuorumAtLeastQN(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 3)
 	// Add some common k-mers and some unique
 	for i := 0; i < 3; i++ {
 		group.Get(i).AddKmerCode(10) // common to all
 		group.Get(i).AddKmerCode(20) // common to all
 	}
 	group.Get(0).AddKmerCode(1) // unique to set 0
 	group.Get(1).AddKmerCode(2) // unique to set 1
 	quorum := group.QuorumAtLeast(3)
 	intersect := group.Intersect()
 	if quorum.Len() != intersect.Len() {
 		t.Errorf("QuorumAtLeast(n) length %d != Intersect length %d", quorum.Len(), intersect.Len())
 	}
 	if quorum.Len() != 2 {
 		t.Errorf("Expected 2 common k-mers, got %d", quorum.Len())
 	}
 	if !quorum.Contains(10) || !quorum.Contains(20) {
 		t.Error("Missing common k-mers")
 	}
 	if quorum.Contains(1) || quorum.Contains(2) {
 		t.Error("Unique k-mers should not be in result")
 	}
 }
 // TestQuorumAtLeastGeneral tests general quorum values
 func TestQuorumAtLeastGeneral(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 5)
 	// Setup: k-mer i appears in i sets (for i=1..5)
 	// k-mer 1: in set 0
 	// k-mer 2: in sets 0,1
 	// k-mer 3: in sets 0,1,2
 	// k-mer 4: in sets 0,1,2,3
 	// k-mer 5: in sets 0,1,2,3,4 (all)
 	for kmer := uint64(1); kmer <= 5; kmer++ {
 		for setIdx := 0; setIdx < int(kmer); setIdx++ {
 			group.Get(setIdx).AddKmerCode(kmer)
 		}
 	}
 	tests := []struct {
 		q        int
 		expected map[uint64]bool
 	}{
 		{1, map[uint64]bool{1: true, 2: true, 3: true, 4: true, 5: true}},
 		{2, map[uint64]bool{2: true, 3: true, 4: true, 5: true}},
 		{3, map[uint64]bool{3: true, 4: true, 5: true}},
 		{4, map[uint64]bool{4: true, 5: true}},
 		{5, map[uint64]bool{5: true}},
 	}
 	for _, tt := range tests {
 		result := group.QuorumAtLeast(tt.q)
 		if result.Len() != uint64(len(tt.expected)) {
 			t.Errorf("q=%d: expected %d k-mers, got %d", tt.q, len(tt.expected), result.Len())
 		}
 		for kmer := uint64(1); kmer <= 5; kmer++ {
 			shouldContain := tt.expected[kmer]
 			doesContain := result.Contains(kmer)
 			if shouldContain != doesContain {
 				t.Errorf("q=%d, k-mer=%d: expected contains=%v, got %v", tt.q, kmer, shouldContain, doesContain)
 			}
 		}
 	}
 }
 // TestQuorumExactlyBasic tests QuorumExactly basic functionality
 func TestQuorumExactlyBasic(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 5)
 	// Setup: k-mer i appears in exactly i sets
 	for kmer := uint64(1); kmer <= 5; kmer++ {
 		for setIdx := 0; setIdx < int(kmer); setIdx++ {
 			group.Get(setIdx).AddKmerCode(kmer)
 		}
 	}
 	tests := []struct {
 		q        int
 		expected []uint64
 	}{
 		{1, []uint64{1}},
 		{2, []uint64{2}},
 		{3, []uint64{3}},
 		{4, []uint64{4}},
 		{5, []uint64{5}},
 	}
 	for _, tt := range tests {
 		result := group.QuorumExactly(tt.q)
 		if result.Len() != uint64(len(tt.expected)) {
 			t.Errorf("q=%d: expected %d k-mers, got %d", tt.q, len(tt.expected), result.Len())
 		}
 		for _, kmer := range tt.expected {
 			if !result.Contains(kmer) {
 				t.Errorf("q=%d: missing k-mer %d", tt.q, kmer)
 			}
 		}
 	}
 }
 // TestQuorumIdentity tests the mathematical identity: Exactly(q) = AtLeast(q) - AtLeast(q+1)
 func TestQuorumIdentity(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 4)
 	// Add random distribution
 	group.Get(0).AddKmerCode(1)
 	group.Get(0).AddKmerCode(2)
 	group.Get(0).AddKmerCode(3)
 	group.Get(1).AddKmerCode(2)
 	group.Get(1).AddKmerCode(3)
 	group.Get(1).AddKmerCode(4)
 	group.Get(2).AddKmerCode(3)
 	group.Get(2).AddKmerCode(4)
 	group.Get(3).AddKmerCode(4)
 	for q := 1; q <= 4; q++ {
 		exactly := group.QuorumExactly(q)
 		atLeast := group.QuorumAtLeast(q)
 		atLeastPlus1 := group.QuorumAtLeast(q + 1)
 		// Verify: every element in exactly(q) is in atLeast(q)
 		iter := exactly.Iterator()
 		for iter.HasNext() {
 			kmer := iter.Next()
 			if !atLeast.Contains(kmer) {
 				t.Errorf("q=%d: k-mer %d in Exactly but not in AtLeast", q, kmer)
 			}
 			if atLeastPlus1.Contains(kmer) {
 				t.Errorf("q=%d: k-mer %d in Exactly but also in AtLeast(q+1)", q, kmer)
 			}
 		}
 	}
 }
 // TestQuorumDisjointSets tests quorum on completely disjoint sets
 func TestQuorumDisjointSets(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 3)
 	// Each set has unique k-mers
 	group.Get(0).AddKmerCode(1)
 	group.Get(1).AddKmerCode(2)
 	group.Get(2).AddKmerCode(3)
 	// q=1 should give all
 	result := group.QuorumAtLeast(1)
 	if result.Len() != 3 {
 		t.Errorf("Disjoint sets q=1: expected 3, got %d", result.Len())
 	}
 	// q=2 should give none
 	result = group.QuorumAtLeast(2)
 	if result.Len() != 0 {
 		t.Errorf("Disjoint sets q=2: expected 0, got %d", result.Len())
 	}
 }
 // TestQuorumIdenticalSets tests quorum on identical sets
 func TestQuorumIdenticalSets(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 3)
 	// All sets have same k-mers
 	for i := 0; i < 3; i++ {
 		group.Get(i).AddKmerCode(10)
 		group.Get(i).AddKmerCode(20)
 		group.Get(i).AddKmerCode(30)
 	}
 	// Any q <= n should give all k-mers
 	for q := 1; q <= 3; q++ {
 		result := group.QuorumAtLeast(q)
 		if result.Len() != 3 {
 			t.Errorf("Identical sets q=%d: expected 3, got %d", q, result.Len())
 		}
 	}
 }
 // TestQuorumLargeNumbers tests with large k-mer values
 func TestQuorumLargeNumbers(t *testing.T) {
 	k := 21
 	group := NewKmerSetGroup(k, 3)
 	// Use large uint64 values (actual k-mer encodings)
 	largeKmers := []uint64{
 		0x1234567890ABCDEF,
 		0xFEDCBA0987654321,
 		0xAAAAAAAAAAAAAAAA,
 	}
 	// Add to multiple sets
 	for i := 0; i < 3; i++ {
 		for j := 0; j <= i; j++ {
 			group.Get(j).AddKmerCode(largeKmers[i])
 		}
 	}
 	result := group.QuorumAtLeast(2)
 	if result.Len() != 2 {
 		t.Errorf("Large numbers q=2: expected 2, got %d", result.Len())
 	}
 	if !result.Contains(largeKmers[1]) || !result.Contains(largeKmers[2]) {
 		t.Error("Large numbers: wrong k-mers in result")
 	}
 }
 // TestQuorumAtMostBasic tests QuorumAtMost basic functionality
 func TestQuorumAtMostBasic(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 5)
 	// Setup: k-mer i appears in exactly i sets
 	for kmer := uint64(1); kmer <= 5; kmer++ {
 		for setIdx := 0; setIdx < int(kmer); setIdx++ {
 			group.Get(setIdx).AddKmerCode(kmer)
 		}
 	}
 	tests := []struct {
 		q        int
 		expected []uint64
 	}{
 		{0, []uint64{}},                          // at most 0: none
 		{1, []uint64{1}},                         // at most 1: only k-mer 1
 		{2, []uint64{1, 2}},                      // at most 2: k-mers 1,2
 		{3, []uint64{1, 2, 3}},                   // at most 3: k-mers 1,2,3
 		{4, []uint64{1, 2, 3, 4}},                // at most 4: k-mers 1,2,3,4
 		{5, []uint64{1, 2, 3, 4, 5}},             // at most 5: all k-mers
 		{10, []uint64{1, 2, 3, 4, 5}},            // at most 10: all k-mers
 	}
 	for _, tt := range tests {
 		result := group.QuorumAtMost(tt.q)
 		if result.Len() != uint64(len(tt.expected)) {
 			t.Errorf("q=%d: expected %d k-mers, got %d", tt.q, len(tt.expected), result.Len())
 		}
 		for _, kmer := range tt.expected {
 			if !result.Contains(kmer) {
 				t.Errorf("q=%d: missing k-mer %d", tt.q, kmer)
 			}
 		}
 	}
 }
 // TestQuorumComplementIdentity tests that AtLeast and AtMost are complementary
 func TestQuorumComplementIdentity(t *testing.T) {
 	k := 5
 	group := NewKmerSetGroup(k, 4)
 	// Add random distribution
 	group.Get(0).AddKmerCode(1)
 	group.Get(0).AddKmerCode(2)
 	group.Get(0).AddKmerCode(3)
 	group.Get(1).AddKmerCode(2)
 	group.Get(1).AddKmerCode(3)
 	group.Get(1).AddKmerCode(4)
 	group.Get(2).AddKmerCode(3)
 	group.Get(2).AddKmerCode(4)
 	group.Get(3).AddKmerCode(4)
 	union := group.Union()
 	for q := 1; q < 4; q++ {
 		atMost := group.QuorumAtMost(q)
 		atLeast := group.QuorumAtLeast(q + 1)
 		// Verify: AtMost(q) ∪ AtLeast(q+1) = Union()
 		combined := atMost.Union(atLeast)
 		if combined.Len() != union.Len() {
 			t.Errorf("q=%d: AtMost(q) ∪ AtLeast(q+1) has %d k-mers, Union has %d",
 				q, combined.Len(), union.Len())
 		}
 		// Verify: AtMost(q) ∩ AtLeast(q+1) = ∅
 		overlap := atMost.Intersect(atLeast)
 		if overlap.Len() != 0 {
 			t.Errorf("q=%d: AtMost(q) and AtLeast(q+1) overlap with %d k-mers",
 				q, overlap.Len())
 		}
 	}
 }
 // BenchmarkQuorumAtLeast benchmarks quorum operations
 func BenchmarkQuorumAtLeast(b *testing.B) {
 	k := 21
 	n := 10
 	group := NewKmerSetGroup(k, n)
 	// Populate with realistic data
 	for i := 0; i < n; i++ {
 		for j := uint64(0); j < 10000; j++ {
 			if (j % uint64(n)) <= uint64(i) {
 				group.Get(i).AddKmerCode(j)
 			}
 		}
 	}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		_ = group.QuorumAtLeast(5)
 	}
 }
@@ -8,7 +8,7 @@ import (
 // corresponds to the last commit, and not the one when the file will be
 // commited
-var _Commit = "09ac15a"
+var _Commit = "a43e625"
 var _Version = "Release 4.4.0"
 // Version returns the version of the obitools package.