Work on iterators and recycling of biosequences

This commit is contained in:
2022-01-14 23:11:36 +01:00
parent ef66ca4972
commit e8fff6477b
22 changed files with 350 additions and 111 deletions
+35 -21
View File
@@ -4,6 +4,7 @@ import (
"log"
"math"
"os"
"runtime"
"time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
@@ -11,34 +12,40 @@ import (
"github.com/schollz/progressbar/v3"
)
func __abs__(x int) int {
func _Abs(x int) int {
if x < 0 {
return -x
}
return x
}
func JoinPairedSequence(seqA, seqB obiseq.BioSequence) obiseq.BioSequence {
js := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
jq := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
func JoinPairedSequence(seqA, seqB obiseq.BioSequence, inplace bool) obiseq.BioSequence {
copy(js, seqA.Sequence())
copy(jq, seqA.Qualities())
if !inplace {
seqA = seqA.Copy()
}
js = append(js, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.')
jq = append(jq, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
seqA.WriteString("..........")
seqA.Write(seqB.Sequence())
js = append(js, seqB.Sequence()...)
jq = append(jq, seqB.Qualities()...)
seqA.WriteQualities(obiseq.Quality{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
seqA.WriteQualities(seqB.Qualities())
rep := obiseq.MakeBioSequence(seqA.Id(), js, seqA.Definition())
rep.SetQualities(jq)
return rep
return seqA
}
// AssemblePESequences assembles two paired sequences following
// the obipairing strategy implemented in obialign.PEAlign using
// the gap and delta parametters.
// If the length of the overlap between both sequences is less than
// overlap_min, The alignment is substituted by a simple pasting
// of the sequences with a strech of 10 dots in between them.
// the quality of the dots is set to 0.
// If the inplace parameter is set to true, the seqA and seqB are
// destroyed during the assembling process and cannot be reuse later on.
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
gap, delta, overlap_min int, with_stats bool,
inplace bool,
arena_align obialign.PEAlignArena,
arena_cons obialign.BuildAlignArena,
arena_qual obialign.BuildAlignArena) obiseq.BioSequence {
@@ -53,7 +60,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
right = path[len(path)-2]
}
lcons := cons.Length()
ali_length := lcons - __abs__(left) - __abs__(right)
ali_length := lcons - _Abs(left) - _Abs(right)
if ali_length >= overlap_min {
if with_stats {
@@ -85,14 +92,22 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
annot["seq_ab_match"] = match
annot["score_norm"] = score_norm
if inplace {
(&seqA).Recycle()
(&seqB).Recycle()
}
}
} else {
cons = JoinPairedSequence(seqA, seqB)
cons = JoinPairedSequence(seqA, seqB, inplace)
if with_stats {
annot := cons.Annotations()
annot["mode"] = "join"
}
if inplace {
(&seqB).Recycle()
}
}
return cons
@@ -101,7 +116,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
gap, delta, overlap_min int, with_stats bool, sizes ...int) obiseq.IBioSequenceBatch {
nworkers := 7
nworkers := runtime.NumCPU() - 1
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
@@ -148,13 +163,11 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
processed := 0
for i, A := range batch.Forward() {
B := batch.Reverse()[i]
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, arena, barena1, barena2)
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, true, arena, barena1, barena2)
if i%59 == 0 {
bar.Add(59)
processed += 59
}
A.Destroy()
B.Destroy()
}
bar.Add(batch.Length() - processed)
newIter.Channel() <- obiseq.MakeBioSequenceBatch(
@@ -169,9 +182,10 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
log.Printf("Start of the sequence Pairing")
for i := 0; i < nworkers; i++ {
for i := 0; i < nworkers-1; i++ {
go f(iterator.Split(), i)
}
go f(iterator, nworkers-1)
return newIter