2022-02-14 00:01:01 +01:00
|
|
|
package obiseq
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"log"
|
2022-02-15 00:47:02 +01:00
|
|
|
"strings"
|
2022-02-14 00:01:01 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type StatsOnValues map[string]int
|
|
|
|
|
|
|
|
func (sequence BioSequence) HasStatsOn(key string) bool {
|
|
|
|
if !sequence.HasAnnotation() {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
mkey := "merged_" + key
|
|
|
|
annotations := sequence.Annotations()
|
|
|
|
_, ok := annotations[mkey]
|
|
|
|
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
func (sequence BioSequence) StatsOn(key string, na string) StatsOnValues {
|
2022-02-14 00:01:01 +01:00
|
|
|
mkey := "merged_" + key
|
|
|
|
annotations := sequence.Annotations()
|
|
|
|
istat, ok := annotations[mkey]
|
|
|
|
|
|
|
|
var stats StatsOnValues
|
|
|
|
var newstat bool
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
switch istat := istat.(type) {
|
|
|
|
case StatsOnValues:
|
|
|
|
stats = istat
|
|
|
|
newstat = false
|
|
|
|
default:
|
|
|
|
stats = make(StatsOnValues, 100)
|
|
|
|
annotations[mkey] = stats
|
|
|
|
newstat = true
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
stats = make(StatsOnValues, 100)
|
|
|
|
annotations[mkey] = stats
|
|
|
|
newstat = true
|
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
if newstat && sequence.StatsPlusOne(key, sequence, na) {
|
2022-02-14 00:01:01 +01:00
|
|
|
delete(sequence.Annotations(), key)
|
|
|
|
}
|
|
|
|
|
|
|
|
return stats
|
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
func (sequence BioSequence) StatsPlusOne(key string, toAdd BioSequence, na string) bool {
|
|
|
|
sval := na
|
|
|
|
stats := sequence.StatsOn(key,na)
|
|
|
|
retval := false
|
|
|
|
|
2022-02-14 00:01:01 +01:00
|
|
|
if toAdd.HasAnnotation() {
|
|
|
|
value, ok := toAdd.Annotations()[key]
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
|
|
|
|
switch value := value.(type) {
|
|
|
|
case string:
|
|
|
|
sval = value
|
|
|
|
case int,
|
|
|
|
uint8, uint16, uint32, uint64,
|
|
|
|
int8, int16, int32, int64, bool:
|
|
|
|
sval = fmt.Sprint(value)
|
|
|
|
default:
|
|
|
|
log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v)", value)
|
|
|
|
}
|
2022-02-18 10:00:42 +01:00
|
|
|
retval = true
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
2022-02-18 10:00:42 +01:00
|
|
|
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
old, ok := stats[sval]
|
|
|
|
if !ok {
|
|
|
|
old = 0
|
|
|
|
}
|
|
|
|
stats[sval] = old + 1
|
|
|
|
|
|
|
|
return retval
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
|
|
|
|
for k, val := range toMerged {
|
|
|
|
old, ok := stats[k]
|
|
|
|
if !ok {
|
|
|
|
old = 0
|
|
|
|
}
|
|
|
|
stats[k] = old + val
|
|
|
|
}
|
|
|
|
|
|
|
|
return stats
|
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
func (sequence BioSequence) Merge(tomerge BioSequence, na string, inplace bool, statsOn ...string) BioSequence {
|
2022-02-14 00:01:01 +01:00
|
|
|
if !inplace {
|
|
|
|
sequence = sequence.Copy()
|
|
|
|
}
|
|
|
|
|
2022-02-15 00:47:02 +01:00
|
|
|
if sequence.HasQualities() {
|
|
|
|
sequence.SetQualities(nil)
|
|
|
|
}
|
|
|
|
|
2022-02-14 00:01:01 +01:00
|
|
|
annotation := sequence.Annotations()
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
count := sequence.Count() + tomerge.Count()
|
2022-02-14 00:01:01 +01:00
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
for _, key := range statsOn {
|
2022-02-14 00:01:01 +01:00
|
|
|
if tomerge.HasStatsOn(key) {
|
2022-02-18 10:00:42 +01:00
|
|
|
smk := sequence.StatsOn(key,na)
|
|
|
|
mmk := tomerge.StatsOn(key,na)
|
2022-02-14 00:01:01 +01:00
|
|
|
smk.Merge(mmk)
|
|
|
|
} else {
|
2022-02-18 10:00:42 +01:00
|
|
|
sequence.StatsPlusOne(key, tomerge,na)
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-15 00:47:02 +01:00
|
|
|
if tomerge.HasAnnotation() {
|
|
|
|
ma := tomerge.Annotations()
|
|
|
|
for k, va := range annotation {
|
|
|
|
if !strings.HasPrefix(k, "merged_") {
|
|
|
|
vm, ok := ma[k]
|
|
|
|
if !ok || vm != va {
|
|
|
|
delete(annotation, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for k := range annotation {
|
|
|
|
if !strings.HasPrefix(k, "merged_") {
|
|
|
|
delete(annotation, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
annotation["count"] = count
|
|
|
|
|
2022-02-14 00:01:01 +01:00
|
|
|
return sequence
|
|
|
|
}
|
2022-02-15 00:47:02 +01:00
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
func (sequences BioSequenceSlice) Merge(na string, statsOn ...string) BioSequenceSlice {
|
|
|
|
seq := sequences[0]
|
|
|
|
seq.SetQualities(nil)
|
|
|
|
seq.Annotations()["count"] = 1
|
2022-02-15 00:47:02 +01:00
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
for _, toMerge := range sequences[1:] {
|
|
|
|
seq.Merge(toMerge, na, true, statsOn...)
|
|
|
|
toMerge.Recycle()
|
2022-02-15 00:47:02 +01:00
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
return sequences[0:1]
|
2022-02-15 00:47:02 +01:00
|
|
|
}
|
|
|
|
|
2022-02-18 10:00:42 +01:00
|
|
|
func MergeSliceWorker(na string, statsOn ...string) SeqSliceWorker {
|
2022-02-15 00:47:02 +01:00
|
|
|
|
|
|
|
worker := func(sequences BioSequenceSlice) BioSequenceSlice {
|
2022-02-18 10:00:42 +01:00
|
|
|
return sequences.Merge(na, statsOn...)
|
2022-02-15 00:47:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return worker
|
|
|
|
}
|