obimultiplex saves unassigned sequence

This commit is contained in:
2022-02-01 23:25:19 +01:00
parent e9cdfd7e03
commit 98a4363d22
6 changed files with 189 additions and 0 deletions

View File

@@ -372,3 +372,70 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch, sizes ...i
return newIter
}
func (iterator IBioSequenceBatch) DivideOn(predicate SequencePredicate,
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[1]
}
trueIter := MakeIBioSequenceBatch(buffsize)
falseIter := MakeIBioSequenceBatch(buffsize)
trueIter.Add(1)
falseIter.Add(1)
go func() {
trueIter.Wait()
falseIter.Wait()
close(trueIter.Channel())
close(falseIter.Channel())
}()
go func() {
trueOrder := 0
falseOrder := 0
iterator = iterator.SortBatches()
trueSlice := make(BioSequenceSlice, 0, size)
falseSlice := make(BioSequenceSlice, 0, size)
for iterator.Next() {
seqs := iterator.Get()
for _, s := range seqs.slice {
if predicate(s) {
trueSlice = append(trueSlice, s)
} else {
falseSlice = append(falseSlice, s)
}
if len(trueSlice) == size {
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
trueOrder++
trueSlice = make(BioSequenceSlice, 0, size)
}
if len(falseSlice) == size {
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
falseOrder++
falseSlice = make(BioSequenceSlice, 0, size)
}
}
}
if len(trueSlice) > 0 {
trueIter.Channel() <- MakeBioSequenceBatch(trueOrder, trueSlice...)
}
if len(falseSlice) > 0 {
falseIter.Channel() <- MakeBioSequenceBatch(falseOrder, falseSlice...)
}
trueIter.Done()
falseIter.Done()
}()
return trueIter, falseIter
}

77
pkg/obiseq/predicate.go Normal file
View File

@@ -0,0 +1,77 @@
package obiseq
type SequencePredicate func(BioSequence) bool
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
f := func(sequence BioSequence) bool {
return predicate1(sequence) && predicate2(sequence)
}
return f
}
func (predicate1 SequencePredicate) Or(predicate2 SequencePredicate) SequencePredicate {
f := func(sequence BioSequence) bool {
return predicate1(sequence) || predicate2(sequence)
}
return f
}
func (predicate1 SequencePredicate) Xor(predicate2 SequencePredicate) SequencePredicate {
f := func(sequence BioSequence) bool {
p1 := predicate1(sequence)
p2 := predicate2(sequence)
return (p1 && !p2) || (p2 && !p1)
}
return f
}
func (predicate1 SequencePredicate) Not() SequencePredicate {
f := func(sequence BioSequence) bool {
return !predicate1(sequence)
}
return f
}
func HasAttribute(name string) SequencePredicate {
f := func(sequence BioSequence) bool {
if sequence.HasAnnotation() {
_, ok := (sequence.Annotations())[name]
return ok
}
return false
}
return f
}
func MoreAbundantThan(count int) SequencePredicate {
f := func(sequence BioSequence) bool {
return sequence.Count() > count
}
return f
}
func IsLongerOrEqualTo(length int) SequencePredicate {
f := func(sequence BioSequence) bool {
return sequence.Length() >= length
}
return f
}
func IsShorterOrEqualTo(length int) SequencePredicate {
f := func(sequence BioSequence) bool {
return sequence.Length() <= length
}
return f
}