Patch a bug for multiple amplicon per sequence.

Former-commit-id: b252d2de8e1a85d65c2951aa1958ee038e35741d
This commit is contained in:
2023-03-31 15:10:25 +02:00
parent 84b3e4d097
commit 3f69fa41d6
4 changed files with 42 additions and 33 deletions

View File

@ -188,7 +188,6 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
// copy the data into the buffer, by converting it to a Go array
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
(*C.Seq)(out),
&errno, &errmsg)
@ -200,6 +199,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
C.free(p)
// atomic.AddInt64(&_AllocatedApaSequences, -1)
}
return NilApatSequence, errors.New(message)
}

View File

@ -201,7 +201,6 @@ func OptionBatchSize(size int) WithOption {
}
func _Pcr(seq ApatSequence,
sequence *obiseq.BioSequence,
opt Options) obiseq.BioSequenceSlice {
results := make(obiseq.BioSequenceSlice, 0, 10)
@ -218,7 +217,7 @@ func _Pcr(seq ApatSequence,
length := seq.Len() - begin
if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
}
if opt.Circular() {
@ -254,26 +253,27 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
annot := amplicon.Annotations()
obiutils.MustFillMap(annot, sequence.Annotations())
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["forward_match"] = match.String()
match.Recycle()
annot["forward_error"] = erri
annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
match = match.ReverseComplement(true)
annot["reverse_match"] = match.String()
match.Recycle()
annot["reverse_error"] = errj
annot["direction"] = "forward"
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
@ -287,13 +287,14 @@ func _Pcr(seq ApatSequence,
}
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
if forwardMatches != nil {
begin := forwardMatches[0][0]
length := seq.Len() - begin
if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
}
if opt.Circular() {
@ -302,6 +303,7 @@ func _Pcr(seq ApatSequence,
}
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
if reverseMatches != nil {
for _, fm := range forwardMatches {
@ -329,14 +331,14 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
amplicon = amplicon.ReverseComplement(true)
annot := amplicon.Annotations()
obiutils.MustFillMap(annot, sequence.Annotations())
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
match.ReverseComplement(true)
annot["forward_match"] = match.String()
match.Recycle()
@ -344,11 +346,13 @@ func _Pcr(seq ApatSequence,
annot["forward_error"] = errj
annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["reverse_match"] = match.String()
match.Recycle()
annot["reverse_error"] = erri
annot["direction"] = "reverse"
results = append(results, amplicon)
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
}
@ -372,7 +376,7 @@ func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSeque
seq, _ := MakeApatSequence(sequence, opt.Circular())
defer seq.Free()
results := _Pcr(seq, sequence, opt)
results := _Pcr(seq, opt)
return results
}
@ -388,16 +392,16 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
// if AllocatedApaSequences() == 0 {
// log.Panicln("Bizarre....")
// }
amplicons := _Pcr(seq, sequences[0], options)
amplicons := _Pcr(seq, options)
if len(amplicons) > 0 {
results = append(results, amplicons...)
}
log.Debugf("Number of sequences in the slice : %d",len(sequences))
for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, sequence, options)
amplicons = _Pcr(seq, options)
if len(amplicons) > 0 {
results = append(results, amplicons...)
}

View File

@ -18,7 +18,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
)
var _FileChunkSize = 1 << 26
var _FileChunkSize = 1 << 28
type _FileChunk struct {
raw io.Reader
@ -197,7 +197,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
}
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
extbuff := make([]byte, 1<<20)
extbuff := make([]byte, 1<<22)
buff = buff[:l]
end := 0
ic := 0
@ -260,7 +260,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
// for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ {
go _ParseEmblFile(opt.Source(),entry_channel, newIter)
go _ParseEmblFile(opt.Source(), entry_channel, newIter)
}
go _ReadFlatFileChunk(reader, entry_channel)

View File

@ -11,7 +11,6 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
step := length - overlap
ifrg := func(iterator IBioSequence) IBioSequence {
order := obiutils.AtomicCounter()
newiter := MakeIBioSequence()
iterator = iterator.SortBatches()
@ -22,39 +21,45 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
}()
f := func(iterator IBioSequence, id int) {
news := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
news := obiseq.MakeBioSequenceSlice()
sl := iterator.Get()
for _, s := range sl.Slice() {
if s.Len() <= minsize {
news = append(news, s)
} else {
for i := 0; i < s.Len(); i += step {
end := obiutils.MinInt(i+length, s.Len())
fusion:=false
if (s.Len() - end) < step {
end = s.Len()
fusion = true
}
frg, err := s.Subsequence(i, end, false)
if err != nil {
log.Panicln(err)
}
news = append(news, frg)
if len(news) >= size {
newiter.Push(MakeBioSequenceBatch(order(), news))
news = obiseq.MakeBioSequenceSlice()
// if len(news) >= size {
// newiter.Push(MakeBioSequenceBatch(order(), news))
// news = obiseq.MakeBioSequenceSlice()
// }
if fusion {
i = s.Len()
}
}
s.Recycle()
}
if len(news) >= size {
o := order()
newiter.Push(MakeBioSequenceBatch(o, news))
news = obiseq.MakeBioSequenceSlice()
}
} // End of the slice loop
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
sl.Recycle(false)
} // End of the iterator loop
if len(news) > 0 {
newiter.Push(MakeBioSequenceBatch(order(), news))
}
// if len(news) > 0 {
// newiter.Push(MakeBioSequenceBatch(order(), news))
// }
newiter.Done()
@ -65,7 +70,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
}
go f(iterator, 0)
return newiter
return newiter.SortBatches().Rebatch(size)
}
return ifrg