Files
obitools4/pkg/obikmer/kdi_writer.go

114 lines
2.4 KiB
Go
Raw Normal View History

package obikmer
import (
"bufio"
"encoding/binary"
"os"
)
// KDI file magic bytes: "KDI\x01"
var kdiMagic = [4]byte{'K', 'D', 'I', 0x01}
// KdiWriter writes a sorted sequence of uint64 k-mers to a .kdi file
// using delta-varint encoding.
//
// Format:
//
// [magic: 4 bytes "KDI\x01"]
// [count: uint64 LE] number of k-mers
// [first: uint64 LE] first k-mer (absolute value)
// [delta_1: varint] arr[1] - arr[0]
// [delta_2: varint] arr[2] - arr[1]
// ...
//
// The caller must write k-mers in strictly increasing order.
type KdiWriter struct {
w *bufio.Writer
file *os.File
count uint64
prev uint64
first bool
path string
}
// NewKdiWriter creates a new KdiWriter writing to the given file path.
// The header (magic + count placeholder) is written immediately.
// Count is patched on Close().
func NewKdiWriter(path string) (*KdiWriter, error) {
f, err := os.Create(path)
if err != nil {
return nil, err
}
w := bufio.NewWriterSize(f, 65536)
// Write magic
if _, err := w.Write(kdiMagic[:]); err != nil {
f.Close()
return nil, err
}
// Write placeholder for count (will be patched on Close)
var countBuf [8]byte
if _, err := w.Write(countBuf[:]); err != nil {
f.Close()
return nil, err
}
return &KdiWriter{
w: w,
file: f,
first: true,
path: path,
}, nil
}
// Write adds a k-mer to the file. K-mers must be written in strictly
// increasing order.
func (kw *KdiWriter) Write(kmer uint64) error {
if kw.first {
// Write first value as absolute uint64 LE
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], kmer)
if _, err := kw.w.Write(buf[:]); err != nil {
return err
}
kw.prev = kmer
kw.first = false
} else {
delta := kmer - kw.prev
if _, err := EncodeVarint(kw.w, delta); err != nil {
return err
}
kw.prev = kmer
}
kw.count++
return nil
}
// Count returns the number of k-mers written so far.
func (kw *KdiWriter) Count() uint64 {
return kw.count
}
// Close flushes buffered data, patches the count in the header,
// and closes the file.
func (kw *KdiWriter) Close() error {
if err := kw.w.Flush(); err != nil {
kw.file.Close()
return err
}
// Patch count at offset 4 (after magic)
if _, err := kw.file.Seek(4, 0); err != nil {
kw.file.Close()
return err
}
var countBuf [8]byte
binary.LittleEndian.PutUint64(countBuf[:], kw.count)
if _, err := kw.file.Write(countBuf[:]); err != nil {
kw.file.Close()
return err
}
return kw.file.Close()
}