Files
obitools4/pkg/obiseq/merge.go

167 lines
3.1 KiB
Go
Raw Normal View History

package obiseq
import (
"fmt"
"log"
"strings"
)
type StatsOnValues map[string]int
func (sequence BioSequence) HasStatsOn(key string) bool {
if !sequence.HasAnnotation() {
return false
}
mkey := "merged_" + key
annotations := sequence.Annotations()
_, ok := annotations[mkey]
return ok
}
2022-02-18 10:00:42 +01:00
func (sequence BioSequence) StatsOn(key string, na string) StatsOnValues {
mkey := "merged_" + key
annotations := sequence.Annotations()
istat, ok := annotations[mkey]
var stats StatsOnValues
var newstat bool
if ok {
switch istat := istat.(type) {
case StatsOnValues:
stats = istat
newstat = false
default:
stats = make(StatsOnValues, 100)
annotations[mkey] = stats
newstat = true
}
} else {
stats = make(StatsOnValues, 100)
annotations[mkey] = stats
newstat = true
}
2022-02-18 10:00:42 +01:00
if newstat && sequence.StatsPlusOne(key, sequence, na) {
delete(sequence.Annotations(), key)
}
return stats
}
2022-02-18 10:00:42 +01:00
func (sequence BioSequence) StatsPlusOne(key string, toAdd BioSequence, na string) bool {
sval := na
stats := sequence.StatsOn(key,na)
retval := false
if toAdd.HasAnnotation() {
value, ok := toAdd.Annotations()[key]
if ok {
switch value := value.(type) {
case string:
sval = value
case int,
uint8, uint16, uint32, uint64,
int8, int16, int32, int64, bool:
sval = fmt.Sprint(value)
default:
log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v)", value)
}
2022-02-18 10:00:42 +01:00
retval = true
}
2022-02-18 10:00:42 +01:00
}
2022-02-18 10:00:42 +01:00
old, ok := stats[sval]
if !ok {
old = 0
}
stats[sval] = old + 1
return retval
}
func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
for k, val := range toMerged {
old, ok := stats[k]
if !ok {
old = 0
}
stats[k] = old + val
}
return stats
}
2022-02-18 10:00:42 +01:00
func (sequence BioSequence) Merge(tomerge BioSequence, na string, inplace bool, statsOn ...string) BioSequence {
if !inplace {
sequence = sequence.Copy()
}
if sequence.HasQualities() {
sequence.SetQualities(nil)
}
annotation := sequence.Annotations()
2022-02-18 10:00:42 +01:00
count := sequence.Count() + tomerge.Count()
2022-02-18 10:00:42 +01:00
for _, key := range statsOn {
if tomerge.HasStatsOn(key) {
2022-02-18 10:00:42 +01:00
smk := sequence.StatsOn(key,na)
mmk := tomerge.StatsOn(key,na)
smk.Merge(mmk)
} else {
2022-02-18 10:00:42 +01:00
sequence.StatsPlusOne(key, tomerge,na)
}
}
if tomerge.HasAnnotation() {
ma := tomerge.Annotations()
for k, va := range annotation {
if !strings.HasPrefix(k, "merged_") {
vm, ok := ma[k]
if !ok || vm != va {
delete(annotation, k)
}
}
}
} else {
for k := range annotation {
if !strings.HasPrefix(k, "merged_") {
delete(annotation, k)
}
}
}
annotation["count"] = count
return sequence
}
2022-02-18 10:00:42 +01:00
func (sequences BioSequenceSlice) Merge(na string, statsOn ...string) BioSequenceSlice {
seq := sequences[0]
seq.SetQualities(nil)
seq.Annotations()["count"] = 1
2022-02-18 10:00:42 +01:00
for _, toMerge := range sequences[1:] {
seq.Merge(toMerge, na, true, statsOn...)
toMerge.Recycle()
}
2022-02-18 10:00:42 +01:00
return sequences[0:1]
}
2022-02-18 10:00:42 +01:00
func MergeSliceWorker(na string, statsOn ...string) SeqSliceWorker {
worker := func(sequences BioSequenceSlice) BioSequenceSlice {
2022-02-18 10:00:42 +01:00
return sequences.Merge(na, statsOn...)
}
return worker
}