package obiseq import ( "fmt" "strings" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" log "github.com/sirupsen/logrus" ) type StatsOnValues map[string]int func (sequence *BioSequence) HasStatsOn(key string) bool { if !sequence.HasAnnotation() { return false } mkey := "merged_" + key annotations := sequence.Annotations() _, ok := annotations[mkey] return ok } func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues { mkey := "merged_" + key annotations := sequence.Annotations() istat, ok := annotations[mkey] var stats StatsOnValues var newstat bool if ok { switch istat := istat.(type) { case StatsOnValues: stats = istat newstat = false case map[string]interface{}: stats = make(StatsOnValues, len(istat)) var err error for k, v := range istat { stats[k], err = goutils.InterfaceToInt(v) if err != nil { log.Panicf("In sequence %s : %s stat tag not only containing integer values %s", sequence.Id(), mkey, istat) } } default: stats = make(StatsOnValues, 100) annotations[mkey] = stats newstat = true } } else { stats = make(StatsOnValues, 100) annotations[mkey] = stats newstat = true } if newstat && sequence.StatsPlusOne(key, sequence, na) { delete(sequence.Annotations(), key) } return stats } func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na string) bool { sval := na stats := sequence.StatsOn(key, na) retval := false if toAdd.HasAnnotation() { value, ok := toAdd.Annotations()[key] if ok { switch value := value.(type) { case string: sval = value case int, uint8, uint16, uint32, uint64, int8, int16, int32, int64, bool: sval = fmt.Sprint(value) default: log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v)", value) } retval = true } } old, ok := stats[sval] if !ok { old = 0 } stats[sval] = old + 1 return retval } func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues { for k, val := range toMerged { old, ok := stats[k] if !ok { old = 0 } stats[k] = old + val } return stats } func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool, statsOn ...string) *BioSequence { if !inplace { sequence = sequence.Copy() } if sequence.HasQualities() { sequence.SetQualities(nil) } annotation := sequence.Annotations() count := sequence.Count() + tomerge.Count() for _, key := range statsOn { if tomerge.HasStatsOn(key) { smk := sequence.StatsOn(key, na) mmk := tomerge.StatsOn(key, na) smk.Merge(mmk) } else { sequence.StatsPlusOne(key, tomerge, na) } } if tomerge.HasAnnotation() { ma := tomerge.Annotations() for k, va := range annotation { if !strings.HasPrefix(k, "merged_") { vm, ok := ma[k] if !ok || vm != va { delete(annotation, k) } } } } else { for k := range annotation { if !strings.HasPrefix(k, "merged_") { delete(annotation, k) } } } annotation["count"] = count return sequence } func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequence { seq := sequences[0] //sequences[0] = nil seq.SetQualities(nil) if len(sequences) == 1 { seq.Annotations()["count"] = 1 for _, v := range statsOn { seq.StatsOn(v, na) } } else { for k, toMerge := range sequences[1:] { seq.Merge(toMerge, na, true, statsOn...) toMerge.Recycle() sequences[1+k] = nil } } sequences.Recycle() return seq }