Files
obitools4/pkg/obikmer/skm_test.go

177 lines
3.4 KiB
Go
Raw Permalink Normal View History

package obikmer
import (
"os"
"path/filepath"
"testing"
)
func TestSkmRoundTrip(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.skm")
// Create super-kmers from a known sequence
seq := []byte("ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT")
k := 21
m := 9
superKmers := ExtractSuperKmers(seq, k, m, nil)
if len(superKmers) == 0 {
t.Fatal("no super-kmers extracted")
}
// Write
w, err := NewSkmWriter(path)
if err != nil {
t.Fatal(err)
}
for _, sk := range superKmers {
if err := w.Write(sk); err != nil {
t.Fatal(err)
}
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
// Read back
r, err := NewSkmReader(path)
if err != nil {
t.Fatal(err)
}
defer r.Close()
idx := 0
for {
sk, ok := r.Next()
if !ok {
break
}
if idx >= len(superKmers) {
t.Fatal("read more super-kmers than written")
}
expected := superKmers[idx]
if len(sk.Sequence) != len(expected.Sequence) {
t.Fatalf("super-kmer %d: length mismatch: got %d, want %d",
idx, len(sk.Sequence), len(expected.Sequence))
}
// Compare nucleotide-by-nucleotide (case insensitive since decode produces lowercase)
for j := range sk.Sequence {
got := sk.Sequence[j] | 0x20
want := expected.Sequence[j] | 0x20
if got != want {
t.Fatalf("super-kmer %d pos %d: got %c, want %c", idx, j, got, want)
}
}
idx++
}
if idx != len(superKmers) {
t.Fatalf("read %d super-kmers, want %d", idx, len(superKmers))
}
}
func TestSkmEmptyFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "empty.skm")
// Write nothing
w, err := NewSkmWriter(path)
if err != nil {
t.Fatal(err)
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
// Read back
r, err := NewSkmReader(path)
if err != nil {
t.Fatal(err)
}
defer r.Close()
_, ok := r.Next()
if ok {
t.Fatal("expected no super-kmers in empty file")
}
}
func TestSkmSingleBase(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "single.skm")
// Test with sequences of various lengths to check padding
sequences := [][]byte{
[]byte("A"),
[]byte("AC"),
[]byte("ACG"),
[]byte("ACGT"),
[]byte("ACGTA"),
}
w, err := NewSkmWriter(path)
if err != nil {
t.Fatal(err)
}
for _, seq := range sequences {
sk := SuperKmer{Sequence: seq}
if err := w.Write(sk); err != nil {
t.Fatal(err)
}
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
r, err := NewSkmReader(path)
if err != nil {
t.Fatal(err)
}
defer r.Close()
for i, expected := range sequences {
sk, ok := r.Next()
if !ok {
t.Fatalf("expected super-kmer %d, got EOF", i)
}
if len(sk.Sequence) != len(expected) {
t.Fatalf("sk %d: length %d, want %d", i, len(sk.Sequence), len(expected))
}
for j := range sk.Sequence {
got := sk.Sequence[j] | 0x20
want := expected[j] | 0x20
if got != want {
t.Fatalf("sk %d pos %d: got %c, want %c", i, j, got, want)
}
}
}
}
func TestSkmFileSize(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "size.skm")
// Write a sequence of known length
seq := []byte("ACGTACGTAC") // 10 bases
sk := SuperKmer{Sequence: seq}
w, err := NewSkmWriter(path)
if err != nil {
t.Fatal(err)
}
if err := w.Write(sk); err != nil {
t.Fatal(err)
}
if err := w.Close(); err != nil {
t.Fatal(err)
}
// Expected: 2 bytes (length) + ceil(10/4)=3 bytes (data) = 5 bytes
info, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
if info.Size() != 5 {
t.Fatalf("file size: got %d, want 5", info.Size())
}
}