mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Finalize the correct merging of statistic of occurrences, in I expect every situation
This commit is contained in:
@ -10,12 +10,20 @@ import (
|
|||||||
|
|
||||||
type StatsOnValues map[string]int
|
type StatsOnValues map[string]int
|
||||||
|
|
||||||
|
func StatsOnSlotName(key string) string {
|
||||||
|
return "merged_" + key
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Tests if the sequence has already a slot summarizing statistics
|
||||||
|
of occurrence for a given attribute.
|
||||||
|
*/
|
||||||
func (sequence *BioSequence) HasStatsOn(key string) bool {
|
func (sequence *BioSequence) HasStatsOn(key string) bool {
|
||||||
if !sequence.HasAnnotation() {
|
if !sequence.HasAnnotation() {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
mkey := "merged_" + key
|
mkey := StatsOnSlotName(key)
|
||||||
annotations := sequence.Annotations()
|
annotations := sequence.Annotations()
|
||||||
_, ok := annotations[mkey]
|
_, ok := annotations[mkey]
|
||||||
|
|
||||||
@ -23,7 +31,7 @@ func (sequence *BioSequence) HasStatsOn(key string) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||||
mkey := "merged_" + key
|
mkey := StatsOnSlotName(key)
|
||||||
annotations := sequence.Annotations()
|
annotations := sequence.Annotations()
|
||||||
istat, ok := annotations[mkey]
|
istat, ok := annotations[mkey]
|
||||||
|
|
||||||
@ -66,6 +74,7 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
|||||||
|
|
||||||
func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na string) bool {
|
func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na string) bool {
|
||||||
sval := na
|
sval := na
|
||||||
|
annotations := sequence.Annotations()
|
||||||
stats := sequence.StatsOn(key, na)
|
stats := sequence.StatsOn(key, na)
|
||||||
retval := false
|
retval := false
|
||||||
|
|
||||||
@ -93,8 +102,8 @@ func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na str
|
|||||||
if !ok {
|
if !ok {
|
||||||
old = 0
|
old = 0
|
||||||
}
|
}
|
||||||
stats[sval] = old + 1
|
stats[sval] = old + toAdd.Count()
|
||||||
|
annotations[StatsOnSlotName(key)] = stats
|
||||||
return retval
|
return retval
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,6 +119,10 @@ func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
|
|||||||
return stats
|
return stats
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
Merges two sequences
|
||||||
|
*/
|
||||||
func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool, statsOn ...string) *BioSequence {
|
func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool, statsOn ...string) *BioSequence {
|
||||||
if !inplace {
|
if !inplace {
|
||||||
sequence = sequence.Copy()
|
sequence = sequence.Copy()
|
||||||
@ -119,7 +132,7 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
|
|||||||
sequence.SetQualities(nil)
|
sequence.SetQualities(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
annotation := sequence.Annotations()
|
annotations := sequence.Annotations()
|
||||||
|
|
||||||
count := sequence.Count() + tomerge.Count()
|
count := sequence.Count() + tomerge.Count()
|
||||||
|
|
||||||
@ -127,7 +140,8 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
|
|||||||
if tomerge.HasStatsOn(key) {
|
if tomerge.HasStatsOn(key) {
|
||||||
smk := sequence.StatsOn(key, na)
|
smk := sequence.StatsOn(key, na)
|
||||||
mmk := tomerge.StatsOn(key, na)
|
mmk := tomerge.StatsOn(key, na)
|
||||||
smk.Merge(mmk)
|
|
||||||
|
annotations[StatsOnSlotName(key)] = smk.Merge(mmk)
|
||||||
} else {
|
} else {
|
||||||
sequence.StatsPlusOne(key, tomerge, na)
|
sequence.StatsPlusOne(key, tomerge, na)
|
||||||
}
|
}
|
||||||
@ -135,27 +149,35 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
|
|||||||
|
|
||||||
if tomerge.HasAnnotation() {
|
if tomerge.HasAnnotation() {
|
||||||
ma := tomerge.Annotations()
|
ma := tomerge.Annotations()
|
||||||
for k, va := range annotation {
|
for k, va := range annotations {
|
||||||
if !strings.HasPrefix(k, "merged_") {
|
if !strings.HasPrefix(k, "merged_") {
|
||||||
vm, ok := ma[k]
|
vm, ok := ma[k]
|
||||||
if !ok || vm != va {
|
if !ok || vm != va {
|
||||||
delete(annotation, k)
|
delete(annotations, k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for k := range annotation {
|
for k := range annotations {
|
||||||
if !strings.HasPrefix(k, "merged_") {
|
if !strings.HasPrefix(k, "merged_") {
|
||||||
delete(annotation, k)
|
delete(annotations, k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
annotation["count"] = count
|
annotations["count"] = count
|
||||||
|
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Merges a set of sequence into a single sequence.
|
||||||
|
|
||||||
|
The function assumes that every sequence in the batch is
|
||||||
|
identical in term of sequence. Actually the function only
|
||||||
|
aggregates the annotations of the different sequences to be merged
|
||||||
|
|
||||||
|
Quality information is lost during the merge procedure.
|
||||||
|
*/
|
||||||
func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequence {
|
func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequence {
|
||||||
seq := sequences[0]
|
seq := sequences[0]
|
||||||
//sequences[0] = nil
|
//sequences[0] = nil
|
||||||
|
Reference in New Issue
Block a user