Files
obitools4/pkg/obikmer/kmer_set_disk.go

945 lines
25 KiB
Go
Raw Normal View History

package obikmer
import (
"fmt"
"io"
"iter"
"os"
"path"
"path/filepath"
"sort"
"sync"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidist"
"github.com/pelletier/go-toml/v2"
)
// MetadataFormat represents the metadata serialization format.
// Currently only TOML is used for disk-based indices, but the type
// is kept for backward compatibility with CLI options.
type MetadataFormat int
const (
FormatTOML MetadataFormat = iota
FormatYAML
FormatJSON
)
// String returns the file extension for the format.
func (f MetadataFormat) String() string {
switch f {
case FormatTOML:
return "toml"
case FormatYAML:
return "yaml"
case FormatJSON:
return "json"
default:
return "toml"
}
}
// KmerSetGroup is a disk-based collection of N k-mer sets sharing the same
// k, m, and partition count P. After construction (via KmerSetGroupBuilder),
// it is immutable and all operations are streaming (partition by partition).
//
// A KmerSetGroup with Size()==1 is effectively a KmerSet (singleton).
type KmerSetGroup struct {
path string // root directory
id string // user-assigned identifier
k int // k-mer size
m int // minimizer size
partitions int // number of partitions P
n int // number of sets N
setsIDs []string // IDs of individual sets
counts []uint64 // total k-mer count per set (sum over partitions)
setsMetadata []map[string]interface{} // per-set user metadata
Metadata map[string]interface{} // group-level user metadata
}
// diskMetadata is the TOML-serializable structure for metadata.toml.
type diskMetadata struct {
ID string `toml:"id,omitempty"`
K int `toml:"k"`
M int `toml:"m"`
Partitions int `toml:"partitions"`
Type string `toml:"type"`
Size int `toml:"size"`
SetsIDs []string `toml:"sets_ids,omitempty"`
Counts []uint64 `toml:"counts,omitempty"`
SetsMetadata []map[string]interface{} `toml:"sets_metadata,omitempty"`
UserMetadata map[string]interface{} `toml:"user_metadata,omitempty"`
}
// OpenKmerSetGroup opens a finalized index directory in read-only mode.
func OpenKmerSetGroup(directory string) (*KmerSetGroup, error) {
metaPath := filepath.Join(directory, "metadata.toml")
f, err := os.Open(metaPath)
if err != nil {
return nil, fmt.Errorf("obikmer: open metadata: %w", err)
}
defer f.Close()
var meta diskMetadata
if err := toml.NewDecoder(f).Decode(&meta); err != nil {
return nil, fmt.Errorf("obikmer: decode metadata: %w", err)
}
ksg := &KmerSetGroup{
path: directory,
id: meta.ID,
k: meta.K,
m: meta.M,
partitions: meta.Partitions,
n: meta.Size,
setsIDs: meta.SetsIDs,
counts: meta.Counts,
setsMetadata: meta.SetsMetadata,
Metadata: meta.UserMetadata,
}
if ksg.Metadata == nil {
ksg.Metadata = make(map[string]interface{})
}
if ksg.setsIDs == nil {
ksg.setsIDs = make([]string, ksg.n)
}
if ksg.setsMetadata == nil {
ksg.setsMetadata = make([]map[string]interface{}, ksg.n)
for i := range ksg.setsMetadata {
ksg.setsMetadata[i] = make(map[string]interface{})
}
}
if ksg.counts == nil {
// Compute counts by scanning partitions
ksg.counts = make([]uint64, ksg.n)
for s := 0; s < ksg.n; s++ {
for p := 0; p < ksg.partitions; p++ {
path := ksg.partitionPath(s, p)
r, err := NewKdiReader(path)
if err != nil {
continue
}
ksg.counts[s] += r.Count()
r.Close()
}
}
}
return ksg, nil
}
// NewFilteredKmerSetGroup creates a KmerSetGroup from pre-computed data.
// Used by the filter command to construct a new group after filtering partitions.
func NewFilteredKmerSetGroup(
directory string, k, m, partitions, n int,
setsIDs []string, counts []uint64,
setsMetadata []map[string]interface{},
) (*KmerSetGroup, error) {
ksg := &KmerSetGroup{
path: directory,
k: k,
m: m,
partitions: partitions,
n: n,
setsIDs: setsIDs,
counts: counts,
setsMetadata: setsMetadata,
Metadata: make(map[string]interface{}),
}
return ksg, nil
}
// SaveMetadata writes the metadata.toml file. This is useful after
// modifying attributes or IDs on an already-finalized index.
func (ksg *KmerSetGroup) SaveMetadata() error {
return ksg.saveMetadata()
}
// saveMetadata writes the metadata.toml file (internal).
func (ksg *KmerSetGroup) saveMetadata() error {
meta := diskMetadata{
ID: ksg.id,
K: ksg.k,
M: ksg.m,
Partitions: ksg.partitions,
Type: "KmerSetGroup",
Size: ksg.n,
SetsIDs: ksg.setsIDs,
Counts: ksg.counts,
SetsMetadata: ksg.setsMetadata,
UserMetadata: ksg.Metadata,
}
metaPath := filepath.Join(ksg.path, "metadata.toml")
f, err := os.Create(metaPath)
if err != nil {
return err
}
defer f.Close()
return toml.NewEncoder(f).Encode(meta)
}
// partitionPath returns the file path for partition p of set s.
func (ksg *KmerSetGroup) partitionPath(setIndex, partIndex int) string {
return filepath.Join(ksg.path, fmt.Sprintf("set_%d", setIndex),
fmt.Sprintf("part_%04d.kdi", partIndex))
}
// Path returns the root directory of the index.
func (ksg *KmerSetGroup) Path() string {
return ksg.path
}
// K returns the k-mer size.
func (ksg *KmerSetGroup) K() int {
return ksg.k
}
// M returns the minimizer size.
func (ksg *KmerSetGroup) M() int {
return ksg.m
}
// Partitions returns the number of partitions P.
func (ksg *KmerSetGroup) Partitions() int {
return ksg.partitions
}
// Size returns the number of sets N.
func (ksg *KmerSetGroup) Size() int {
return ksg.n
}
// Id returns the group identifier.
func (ksg *KmerSetGroup) Id() string {
return ksg.id
}
// SetId sets the group identifier and persists the change.
func (ksg *KmerSetGroup) SetId(id string) {
ksg.id = id
}
// Len returns the total number of k-mers.
// Without argument: total across all sets.
// With argument setIndex: count for that specific set.
func (ksg *KmerSetGroup) Len(setIndex ...int) uint64 {
if len(setIndex) == 0 {
var total uint64
for _, c := range ksg.counts {
total += c
}
return total
}
idx := setIndex[0]
if idx < 0 || idx >= ksg.n {
return 0
}
return ksg.counts[idx]
}
// Contains checks if a k-mer is present in the specified set.
// Uses the .kdx sparse index (if available) for fast seeking within
// each partition, then a short linear scan of at most `stride` entries.
// All partitions are searched in parallel since the k-mer's partition
// is not known without its minimizer context.
func (ksg *KmerSetGroup) Contains(setIndex int, kmer uint64) bool {
if setIndex < 0 || setIndex >= ksg.n {
return false
}
type result struct {
found bool
}
ch := make(chan result, ksg.partitions)
for p := 0; p < ksg.partitions; p++ {
go func(part int) {
r, err := NewKdiIndexedReader(ksg.partitionPath(setIndex, part))
if err != nil {
ch <- result{false}
return
}
defer r.Close()
// Use index to jump near the target
if err := r.SeekTo(kmer); err != nil {
ch <- result{false}
return
}
// Linear scan from the seek position
for {
v, ok := r.Next()
if !ok {
ch <- result{false}
return
}
if v == kmer {
ch <- result{true}
return
}
if v > kmer {
ch <- result{false}
return
}
}
}(p)
}
for i := 0; i < ksg.partitions; i++ {
res := <-ch
if res.found {
// Drain remaining goroutines
go func() {
for j := i + 1; j < ksg.partitions; j++ {
<-ch
}
}()
return true
}
}
return false
}
// Iterator returns an iterator over all k-mers in the specified set,
// in sorted order within each partition. Since partitions are independent,
// to get a globally sorted stream, use iteratorSorted.
func (ksg *KmerSetGroup) Iterator(setIndex int) iter.Seq[uint64] {
return func(yield func(uint64) bool) {
if setIndex < 0 || setIndex >= ksg.n {
return
}
// Open all partition readers and merge them
readers := make([]*KdiReader, 0, ksg.partitions)
for p := 0; p < ksg.partitions; p++ {
r, err := NewKdiReader(ksg.partitionPath(setIndex, p))
if err != nil {
continue
}
if r.Count() > 0 {
readers = append(readers, r)
} else {
r.Close()
}
}
if len(readers) == 0 {
return
}
m := NewKWayMerge(readers)
defer m.Close()
for {
kmer, _, ok := m.Next()
if !ok {
return
}
if !yield(kmer) {
return
}
}
}
}
// ==============================
// Attribute API (compatible with old API)
// ==============================
// HasAttribute checks if a metadata key exists.
func (ksg *KmerSetGroup) HasAttribute(key string) bool {
_, ok := ksg.Metadata[key]
return ok
}
// GetAttribute returns the value of an attribute.
func (ksg *KmerSetGroup) GetAttribute(key string) (interface{}, bool) {
switch key {
case "id":
return ksg.Id(), true
case "k":
return ksg.K(), true
default:
value, ok := ksg.Metadata[key]
return value, ok
}
}
// SetAttribute sets a metadata attribute.
func (ksg *KmerSetGroup) SetAttribute(key string, value interface{}) {
switch key {
case "id":
if id, ok := value.(string); ok {
ksg.SetId(id)
} else {
panic(fmt.Sprintf("id must be a string, got %T", value))
}
case "k":
panic("k is immutable")
default:
ksg.Metadata[key] = value
}
}
// DeleteAttribute removes a metadata attribute.
func (ksg *KmerSetGroup) DeleteAttribute(key string) {
delete(ksg.Metadata, key)
}
// GetIntAttribute returns an attribute as int.
func (ksg *KmerSetGroup) GetIntAttribute(key string) (int, bool) {
v, ok := ksg.GetAttribute(key)
if !ok {
return 0, false
}
switch val := v.(type) {
case int:
return val, true
case int64:
return int(val), true
case float64:
return int(val), true
}
return 0, false
}
// GetStringAttribute returns an attribute as string.
func (ksg *KmerSetGroup) GetStringAttribute(key string) (string, bool) {
v, ok := ksg.GetAttribute(key)
if !ok {
return "", false
}
if s, ok := v.(string); ok {
return s, true
}
return fmt.Sprintf("%v", v), true
}
// ==============================
// Jaccard metrics (streaming, disk-based)
// ==============================
// JaccardDistanceMatrix computes a pairwise Jaccard distance matrix
// for all sets in the group. Operates partition by partition in streaming.
func (ksg *KmerSetGroup) JaccardDistanceMatrix() *obidist.DistMatrix {
n := ksg.n
labels := make([]string, n)
for i := 0; i < n; i++ {
if i < len(ksg.setsIDs) && ksg.setsIDs[i] != "" {
labels[i] = ksg.setsIDs[i]
} else {
labels[i] = fmt.Sprintf("set_%d", i)
}
}
dm := obidist.NewDistMatrixWithLabels(labels)
// Accumulate intersection and union counts
intersections := make([][]uint64, n)
unions := make([][]uint64, n)
for i := 0; i < n; i++ {
intersections[i] = make([]uint64, n)
unions[i] = make([]uint64, n)
}
// Process partition by partition
var mu sync.Mutex
var wg sync.WaitGroup
for p := 0; p < ksg.partitions; p++ {
wg.Add(1)
go func(part int) {
defer wg.Done()
// Open all set readers for this partition
readers := make([]*KdiReader, n)
for s := 0; s < n; s++ {
r, err := NewKdiReader(ksg.partitionPath(s, part))
if err != nil {
continue
}
readers[s] = r
}
defer func() {
for _, r := range readers {
if r != nil {
r.Close()
}
}
}()
// Merge all N readers to count intersections and unions
activeReaders := make([]*KdiReader, 0, n)
activeIndices := make([]int, 0, n)
for i, r := range readers {
if r != nil && r.Count() > 0 {
activeReaders = append(activeReaders, r)
activeIndices = append(activeIndices, i)
}
}
if len(activeReaders) == 0 {
return
}
merge := NewKWayMerge(activeReaders)
// Don't close merge here since readers are managed above
// We only want to iterate
// We need per-set presence tracking, so we use a custom merge
// Rebuild with a direct approach
merge.Close() // close the merge (which closes readers)
// Reopen readers for custom merge
for s := 0; s < n; s++ {
readers[s] = nil
r, err := NewKdiReader(ksg.partitionPath(s, part))
if err != nil {
continue
}
if r.Count() > 0 {
readers[s] = r
} else {
r.Close()
}
}
// Custom k-way merge that tracks which sets contain each kmer
type entry struct {
val uint64
setIdx int
}
// Use a simpler approach: read all values for this partition into memory
// for each set, then do a merge
setKmers := make([][]uint64, n)
for s := 0; s < n; s++ {
if readers[s] == nil {
continue
}
kmers := make([]uint64, 0, readers[s].Count())
for {
v, ok := readers[s].Next()
if !ok {
break
}
kmers = append(kmers, v)
}
setKmers[s] = kmers
readers[s].Close()
readers[s] = nil
}
// Count pairwise intersections using sorted merge
// For each pair (i,j), count kmers present in both
localInter := make([][]uint64, n)
localUnion := make([][]uint64, n)
for i := 0; i < n; i++ {
localInter[i] = make([]uint64, n)
localUnion[i] = make([]uint64, n)
}
for i := 0; i < n; i++ {
localUnion[i][i] = uint64(len(setKmers[i]))
for j := i + 1; j < n; j++ {
a, b := setKmers[i], setKmers[j]
var inter uint64
ai, bi := 0, 0
for ai < len(a) && bi < len(b) {
if a[ai] == b[bi] {
inter++
ai++
bi++
} else if a[ai] < b[bi] {
ai++
} else {
bi++
}
}
localInter[i][j] = inter
localUnion[i][j] = uint64(len(a)) + uint64(len(b)) - inter
}
}
mu.Lock()
for i := 0; i < n; i++ {
for j := i; j < n; j++ {
intersections[i][j] += localInter[i][j]
unions[i][j] += localUnion[i][j]
}
}
mu.Unlock()
}(p)
}
wg.Wait()
// Compute distances from accumulated counts
for i := 0; i < n-1; i++ {
for j := i + 1; j < n; j++ {
u := unions[i][j]
if u == 0 {
dm.Set(i, j, 1.0)
} else {
dm.Set(i, j, 1.0-float64(intersections[i][j])/float64(u))
}
}
}
return dm
}
// JaccardSimilarityMatrix computes a pairwise Jaccard similarity matrix.
func (ksg *KmerSetGroup) JaccardSimilarityMatrix() *obidist.DistMatrix {
n := ksg.n
labels := make([]string, n)
for i := 0; i < n; i++ {
if i < len(ksg.setsIDs) && ksg.setsIDs[i] != "" {
labels[i] = ksg.setsIDs[i]
} else {
labels[i] = fmt.Sprintf("set_%d", i)
}
}
// Reuse distance computation
dm := ksg.JaccardDistanceMatrix()
sm := obidist.NewSimilarityMatrixWithLabels(labels)
for i := 0; i < n-1; i++ {
for j := i + 1; j < n; j++ {
sm.Set(i, j, 1.0-dm.Get(i, j))
}
}
return sm
}
// ==============================
// Set ID accessors
// ==============================
// SetsIDs returns a copy of the per-set string identifiers.
func (ksg *KmerSetGroup) SetsIDs() []string {
out := make([]string, len(ksg.setsIDs))
copy(out, ksg.setsIDs)
return out
}
// SetIDOf returns the string ID of the set at the given index.
// Returns "" if index is out of range.
func (ksg *KmerSetGroup) SetIDOf(index int) string {
if index < 0 || index >= ksg.n {
return ""
}
return ksg.setsIDs[index]
}
// SetSetID sets the string ID of the set at the given index.
func (ksg *KmerSetGroup) SetSetID(index int, id string) {
if index >= 0 && index < ksg.n {
ksg.setsIDs[index] = id
}
}
// IndexOfSetID returns the numeric index for a set ID, or -1 if not found.
func (ksg *KmerSetGroup) IndexOfSetID(id string) int {
for i, sid := range ksg.setsIDs {
if sid == id {
return i
}
}
return -1
}
// MatchSetIDs resolves glob patterns against set IDs and returns matching
// indices sorted in ascending order. Uses path.Match for pattern matching
// (supports *, ?, [...] patterns). Returns error if a pattern is malformed.
func (ksg *KmerSetGroup) MatchSetIDs(patterns []string) ([]int, error) {
seen := make(map[int]bool)
for _, pattern := range patterns {
for i, sid := range ksg.setsIDs {
matched, err := path.Match(pattern, sid)
if err != nil {
return nil, fmt.Errorf("obikmer: invalid glob pattern %q: %w", pattern, err)
}
if matched {
seen[i] = true
}
}
}
result := make([]int, 0, len(seen))
for idx := range seen {
result = append(result, idx)
}
sort.Ints(result)
return result, nil
}
// ==============================
// Per-set metadata accessors
// ==============================
// GetSetMetadata returns the value of a per-set metadata key.
func (ksg *KmerSetGroup) GetSetMetadata(setIndex int, key string) (interface{}, bool) {
if setIndex < 0 || setIndex >= ksg.n {
return nil, false
}
v, ok := ksg.setsMetadata[setIndex][key]
return v, ok
}
// SetSetMetadata sets a per-set metadata attribute.
func (ksg *KmerSetGroup) SetSetMetadata(setIndex int, key string, value interface{}) {
if setIndex < 0 || setIndex >= ksg.n {
return
}
if ksg.setsMetadata[setIndex] == nil {
ksg.setsMetadata[setIndex] = make(map[string]interface{})
}
ksg.setsMetadata[setIndex][key] = value
}
// DeleteSetMetadata removes a per-set metadata attribute.
func (ksg *KmerSetGroup) DeleteSetMetadata(setIndex int, key string) {
if setIndex < 0 || setIndex >= ksg.n {
return
}
delete(ksg.setsMetadata[setIndex], key)
}
// AllSetMetadata returns a copy of all metadata for a given set.
func (ksg *KmerSetGroup) AllSetMetadata(setIndex int) map[string]interface{} {
if setIndex < 0 || setIndex >= ksg.n {
return nil
}
out := make(map[string]interface{}, len(ksg.setsMetadata[setIndex]))
for k, v := range ksg.setsMetadata[setIndex] {
out[k] = v
}
return out
}
// ==============================
// Exported partition path and compatibility
// ==============================
// PartitionPath returns the file path for partition partIndex of set setIndex.
func (ksg *KmerSetGroup) PartitionPath(setIndex, partIndex int) string {
return ksg.partitionPath(setIndex, partIndex)
}
// SpectrumPath returns the path to the spectrum.bin file for the given set.
func (ksg *KmerSetGroup) SpectrumPath(setIndex int) string {
return filepath.Join(ksg.path, fmt.Sprintf("set_%d", setIndex), "spectrum.bin")
}
// Spectrum reads the k-mer frequency spectrum for the given set.
// Returns nil, nil if no spectrum file exists.
func (ksg *KmerSetGroup) Spectrum(setIndex int) (*KmerSpectrum, error) {
path := ksg.SpectrumPath(setIndex)
if _, err := os.Stat(path); os.IsNotExist(err) {
return nil, nil
}
return ReadSpectrum(path)
}
// IsCompatibleWith returns true if the other group has the same k, m, and partitions.
func (ksg *KmerSetGroup) IsCompatibleWith(other *KmerSetGroup) bool {
return ksg.k == other.k && ksg.m == other.m && ksg.partitions == other.partitions
}
// ==============================
// Set management operations
// ==============================
// NewEmptyCompatible creates an empty KmerSetGroup at destDir with the same
// k, m, and partitions as this group. The destination must not already exist.
func (ksg *KmerSetGroup) NewEmptyCompatible(destDir string) (*KmerSetGroup, error) {
if err := os.MkdirAll(destDir, 0755); err != nil {
return nil, fmt.Errorf("obikmer: create directory: %w", err)
}
dest := &KmerSetGroup{
path: destDir,
k: ksg.k,
m: ksg.m,
partitions: ksg.partitions,
n: 0,
setsIDs: []string{},
counts: []uint64{},
setsMetadata: []map[string]interface{}{},
Metadata: make(map[string]interface{}),
}
if err := dest.saveMetadata(); err != nil {
return nil, fmt.Errorf("obikmer: write metadata: %w", err)
}
return dest, nil
}
// RemoveSetByID removes the set with the given ID from the group.
// It deletes the set directory, renumbers all subsequent sets, and
// updates the metadata on disk.
func (ksg *KmerSetGroup) RemoveSetByID(id string) error {
idx := ksg.IndexOfSetID(id)
if idx < 0 {
return fmt.Errorf("obikmer: set ID %q not found", id)
}
// Delete the set directory
setDir := filepath.Join(ksg.path, fmt.Sprintf("set_%d", idx))
if err := os.RemoveAll(setDir); err != nil {
return fmt.Errorf("obikmer: remove set directory: %w", err)
}
// Renumber subsequent sets
for i := idx + 1; i < ksg.n; i++ {
oldDir := filepath.Join(ksg.path, fmt.Sprintf("set_%d", i))
newDir := filepath.Join(ksg.path, fmt.Sprintf("set_%d", i-1))
if err := os.Rename(oldDir, newDir); err != nil {
return fmt.Errorf("obikmer: rename set_%d to set_%d: %w", i, i-1, err)
}
}
// Update slices
ksg.setsIDs = append(ksg.setsIDs[:idx], ksg.setsIDs[idx+1:]...)
ksg.counts = append(ksg.counts[:idx], ksg.counts[idx+1:]...)
ksg.setsMetadata = append(ksg.setsMetadata[:idx], ksg.setsMetadata[idx+1:]...)
ksg.n--
return ksg.saveMetadata()
}
// CopySetsByIDTo copies sets identified by their IDs into a KmerSetGroup
// at destDir. If destDir does not exist, a new compatible empty group is
// created. If it exists, compatibility (k, m, partitions) is checked.
// If a set ID already exists in the destination, an error is returned
// unless force is true (in which case the existing set is replaced).
// Per-set metadata travels with the set.
func (ksg *KmerSetGroup) CopySetsByIDTo(ids []string, destDir string, force bool) (*KmerSetGroup, error) {
// Resolve source IDs to indices
srcIndices := make([]int, len(ids))
for i, id := range ids {
idx := ksg.IndexOfSetID(id)
if idx < 0 {
return nil, fmt.Errorf("obikmer: source set ID %q not found", id)
}
srcIndices[i] = idx
}
// Open or create destination
var dest *KmerSetGroup
metaPath := filepath.Join(destDir, "metadata.toml")
if _, err := os.Stat(metaPath); err == nil {
// Destination exists
dest, err = OpenKmerSetGroup(destDir)
if err != nil {
return nil, fmt.Errorf("obikmer: open destination: %w", err)
}
if !ksg.IsCompatibleWith(dest) {
return nil, fmt.Errorf("obikmer: incompatible groups: source (k=%d, m=%d, P=%d) vs dest (k=%d, m=%d, P=%d)",
ksg.k, ksg.m, ksg.partitions, dest.k, dest.m, dest.partitions)
}
} else {
// Create new destination
var err error
dest, err = ksg.NewEmptyCompatible(destDir)
if err != nil {
return nil, err
}
}
// Copy each set
for i, srcIdx := range srcIndices {
srcID := ids[i]
// Check for ID conflict in destination
existingIdx := dest.IndexOfSetID(srcID)
if existingIdx >= 0 {
if !force {
return nil, fmt.Errorf("obikmer: set ID %q already exists in destination (use force to replace)", srcID)
}
// Force: remove existing set in destination
if err := dest.RemoveSetByID(srcID); err != nil {
return nil, fmt.Errorf("obikmer: remove existing set %q in destination: %w", srcID, err)
}
}
// Destination set index = current dest size
destIdx := dest.n
// Create destination set directory
destSetDir := filepath.Join(destDir, fmt.Sprintf("set_%d", destIdx))
if err := os.MkdirAll(destSetDir, 0755); err != nil {
return nil, fmt.Errorf("obikmer: create dest set dir: %w", err)
}
// Copy all partition files and their .kdx indices
for p := 0; p < ksg.partitions; p++ {
srcPath := ksg.partitionPath(srcIdx, p)
destPath := dest.partitionPath(destIdx, p)
if err := copyFile(srcPath, destPath); err != nil {
return nil, fmt.Errorf("obikmer: copy partition %d of set %q: %w", p, srcID, err)
}
// Copy .kdx index if it exists
srcKdx := KdxPathForKdi(srcPath)
if _, err := os.Stat(srcKdx); err == nil {
destKdx := KdxPathForKdi(destPath)
if err := copyFile(srcKdx, destKdx); err != nil {
return nil, fmt.Errorf("obikmer: copy index %d of set %q: %w", p, srcID, err)
}
}
}
// Copy spectrum.bin if it exists
srcSpecPath := ksg.SpectrumPath(srcIdx)
if _, err := os.Stat(srcSpecPath); err == nil {
destSpecPath := filepath.Join(destSetDir, "spectrum.bin")
if err := copyFile(srcSpecPath, destSpecPath); err != nil {
return nil, fmt.Errorf("obikmer: copy spectrum of set %q: %w", srcID, err)
}
}
// Update destination metadata
dest.setsIDs = append(dest.setsIDs, srcID)
dest.counts = append(dest.counts, ksg.counts[srcIdx])
// Copy per-set metadata
srcMeta := ksg.AllSetMetadata(srcIdx)
if srcMeta == nil {
srcMeta = make(map[string]interface{})
}
dest.setsMetadata = append(dest.setsMetadata, srcMeta)
dest.n++
}
if err := dest.saveMetadata(); err != nil {
return nil, fmt.Errorf("obikmer: save destination metadata: %w", err)
}
return dest, nil
}
// copyFile copies a file from src to dst.
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return err
}
defer out.Close()
if _, err := io.Copy(out, in); err != nil {
return err
}
return out.Close()
}