package obikmer import ( "fmt" "testing" ) func TestEncodeDecodeKmer(t *testing.T) { tests := []struct { kmer string code int }{ {"a", 0}, {"c", 1}, {"g", 2}, {"t", 3}, {"aa", 0}, {"ac", 1}, {"ca", 4}, {"acgt", 27}, // 0b00011011 {"cgta", 108}, // 0b01101100 {"tttt", 255}, // 0b11111111 } for _, tt := range tests { t.Run(tt.kmer, func(t *testing.T) { // Test encoding encoded := EncodeKmer(tt.kmer) if encoded != tt.code { t.Errorf("EncodeKmer(%q) = %d, want %d", tt.kmer, encoded, tt.code) } // Test decoding decoded := DecodeKmer(tt.code, len(tt.kmer)) if decoded != tt.kmer { t.Errorf("DecodeKmer(%d, %d) = %q, want %q", tt.code, len(tt.kmer), decoded, tt.kmer) } }) } } func TestNormalizeInt(t *testing.T) { tests := []struct { name string kmer string expected string }{ // Test avec k=1 {"k=1 a", "a", "a"}, {"k=1 c", "c", "c"}, // Test avec k=2 {"k=2 ca", "ca", "ac"}, {"k=2 ac", "ac", "ac"}, {"k=2 ta", "ta", "at"}, // Test avec k=4 - toutes les rotations de "acgt" {"k=4 acgt", "acgt", "acgt"}, {"k=4 cgta", "cgta", "acgt"}, {"k=4 gtac", "gtac", "acgt"}, {"k=4 tacg", "tacg", "acgt"}, // Test avec k=4 - rotations de "tgca" {"k=4 tgca", "tgca", "atgc"}, {"k=4 gcat", "gcat", "atgc"}, {"k=4 catg", "catg", "atgc"}, {"k=4 atgc", "atgc", "atgc"}, // Test avec k=3 - rotations de "atg" {"k=3 atg", "atg", "atg"}, {"k=3 tga", "tga", "atg"}, {"k=3 gat", "gat", "atg"}, // Test avec k=6 {"k=6 aaaaaa", "aaaaaa", "aaaaaa"}, {"k=6 tttttt", "tttttt", "tttttt"}, // Test avec k>6 (calcul à la volée) {"k=7 aaaaaaa", "aaaaaaa", "aaaaaaa"}, {"k=7 tgcatgc", "tgcatgc", "atgctgc"}, {"k=7 gcatgct", "gcatgct", "atgctgc"}, {"k=8 acgtacgt", "acgtacgt", "acgtacgt"}, {"k=8 gtacgtac", "gtacgtac", "acgtacgt"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { kmerCode := EncodeKmer(tt.kmer) expectedCode := EncodeKmer(tt.expected) result := NormalizeInt(kmerCode, len(tt.kmer)) if result != expectedCode { resultKmer := DecodeKmer(result, len(tt.kmer)) t.Errorf("NormalizeInt(%q) = %q (code %d), want %q (code %d)", tt.kmer, resultKmer, result, tt.expected, expectedCode) } }) } } func TestNormalizeIntConsistencyWithString(t *testing.T) { // Vérifier que NormalizeInt donne le même résultat que Normalize // pour tous les k-mers de taille 1 à 4 (pour ne pas trop ralentir les tests) bases := []byte{'a', 'c', 'g', 't'} var testKmers func(current string, maxSize int) testKmers = func(current string, maxSize int) { if len(current) > 0 { // Test normalization normalizedStr := Normalize(current) normalizedStrCode := EncodeKmer(normalizedStr) kmerCode := EncodeKmer(current) normalizedInt := NormalizeInt(kmerCode, len(current)) if normalizedInt != normalizedStrCode { normalizedIntStr := DecodeKmer(normalizedInt, len(current)) t.Errorf("Inconsistency for %q: Normalize=%q (code %d), NormalizeInt=%q (code %d)", current, normalizedStr, normalizedStrCode, normalizedIntStr, normalizedInt) } } if len(current) < maxSize { for _, base := range bases { testKmers(current+string(base), maxSize) } } } testKmers("", 4) // Test jusqu'à k=4 pour rester raisonnable } func TestCircularRotations(t *testing.T) { // Test que toutes les rotations circulaires donnent le même canonical tests := []struct { kmers []string canonical string }{ {[]string{"atg", "tga", "gat"}, "atg"}, {[]string{"acgt", "cgta", "gtac", "tacg"}, "acgt"}, {[]string{"tgca", "gcat", "catg", "atgc"}, "atgc"}, } for _, tt := range tests { canonicalCode := EncodeKmer(tt.canonical) for _, kmer := range tt.kmers { kmerCode := EncodeKmer(kmer) result := NormalizeInt(kmerCode, len(kmer)) if result != canonicalCode { resultKmer := DecodeKmer(result, len(kmer)) t.Errorf("NormalizeInt(%q) = %q, want %q", kmer, resultKmer, tt.canonical) } } } } func BenchmarkNormalizeIntSmall(b *testing.B) { // Benchmark pour k<=6 (utilise la table) kmer := "acgtac" kmerCode := EncodeKmer(kmer) kmerSize := len(kmer) b.ResetTimer() for i := 0; i < b.N; i++ { _ = NormalizeInt(kmerCode, kmerSize) } } func BenchmarkNormalizeIntLarge(b *testing.B) { // Benchmark pour k>6 (calcul à la volée) kmer := "acgtacgtac" kmerCode := EncodeKmer(kmer) kmerSize := len(kmer) b.ResetTimer() for i := 0; i < b.N; i++ { _ = NormalizeInt(kmerCode, kmerSize) } } func BenchmarkEncodeKmer(b *testing.B) { kmer := "acgtacgt" b.ResetTimer() for i := 0; i < b.N; i++ { _ = EncodeKmer(kmer) } } func TestCanonicalKmerCount(t *testing.T) { // Test exact counts for k=1 to 6 tests := []struct { k int expected int }{ {1, 4}, {2, 10}, {3, 24}, {4, 70}, {5, 208}, {6, 700}, } for _, tt := range tests { t.Run(fmt.Sprintf("k=%d", tt.k), func(t *testing.T) { result := CanonicalKmerCount(tt.k) if result != tt.expected { t.Errorf("CanonicalKmerCount(%d) = %d, want %d", tt.k, result, tt.expected) } }) } // Verify counts match table sizes for k := 1; k <= 6; k++ { // Count unique canonical codes in the table uniqueCodes := make(map[int]bool) for _, canonicalCode := range LexicographicNormalizationInt[k] { uniqueCodes[canonicalCode] = true } expected := len(uniqueCodes) result := CanonicalKmerCount(k) if result != expected { t.Errorf("CanonicalKmerCount(%d) = %d, but table has %d unique canonical codes", k, result, expected) } } } func TestNecklaceCountFormula(t *testing.T) { // Verify Moreau's formula gives the same results as hardcoded values for k=1 to 6 // and compute exact values for k=7+ tests := []struct { k int expected int }{ {1, 4}, {2, 10}, {3, 24}, {4, 70}, {5, 208}, {6, 700}, } for _, tt := range tests { t.Run(fmt.Sprintf("k=%d", tt.k), func(t *testing.T) { result := necklaceCount(tt.k, 4) if result != tt.expected { t.Errorf("necklaceCount(%d, 4) = %d, want %d", tt.k, result, tt.expected) } }) } } func TestNecklaceCountByBruteForce(t *testing.T) { // Verify necklace count for k=7 and k=8 by brute force // Generate all 4^k k-mers and count unique normalized ones bases := []byte{'a', 'c', 'g', 't'} for _, k := range []int{7, 8} { t.Run(fmt.Sprintf("k=%d", k), func(t *testing.T) { unique := make(map[int]bool) // Generate all possible k-mers var generate func(current int, depth int) generate = func(current int, depth int) { if depth == k { // Normalize and add to set normalized := NormalizeInt(current, k) unique[normalized] = true return } for _, base := range bases { newCode := (current << 2) | int(EncodeNucleotide(base)) generate(newCode, depth+1) } } generate(0, 0) bruteForceCount := len(unique) formulaCount := necklaceCount(k, 4) if bruteForceCount != formulaCount { t.Errorf("For k=%d: brute force count = %d, formula count = %d", k, bruteForceCount, formulaCount) } t.Logf("k=%d: unique canonical k-mers = %d (formula matches brute force)", k, bruteForceCount) }) } } func TestEulerTotient(t *testing.T) { tests := []struct { n int expected int }{ {1, 1}, {2, 1}, {3, 2}, {4, 2}, {5, 4}, {6, 2}, {7, 6}, {8, 4}, {9, 6}, {10, 4}, {12, 4}, {15, 8}, {20, 8}, } for _, tt := range tests { t.Run(fmt.Sprintf("φ(%d)", tt.n), func(t *testing.T) { result := eulerTotient(tt.n) if result != tt.expected { t.Errorf("eulerTotient(%d) = %d, want %d", tt.n, result, tt.expected) } }) } } func TestDivisors(t *testing.T) { tests := []struct { n int expected []int }{ {1, []int{1}}, {2, []int{1, 2}}, {6, []int{1, 2, 3, 6}}, {12, []int{1, 2, 3, 4, 6, 12}}, {15, []int{1, 3, 5, 15}}, {20, []int{1, 2, 4, 5, 10, 20}}, } for _, tt := range tests { t.Run(fmt.Sprintf("divisors(%d)", tt.n), func(t *testing.T) { result := divisors(tt.n) if len(result) != len(tt.expected) { t.Errorf("divisors(%d) = %v, want %v", tt.n, result, tt.expected) return } for i := range result { if result[i] != tt.expected[i] { t.Errorf("divisors(%d) = %v, want %v", tt.n, result, tt.expected) return } } }) } }