Patch a bug for multiple amplicon per sequence.

Former-commit-id: b252d2de8e1a85d65c2951aa1958ee038e35741d
This commit is contained in:
2023-03-31 15:10:25 +02:00
parent 84b3e4d097
commit 3f69fa41d6
4 changed files with 42 additions and 33 deletions

View File

@ -188,7 +188,6 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
// copy the data into the buffer, by converting it to a Go array // copy the data into the buffer, by converting it to a Go array
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence())) p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen), pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
(*C.Seq)(out), (*C.Seq)(out),
&errno, &errmsg) &errno, &errmsg)
@ -200,6 +199,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
C.free(p) C.free(p)
// atomic.AddInt64(&_AllocatedApaSequences, -1) // atomic.AddInt64(&_AllocatedApaSequences, -1)
} }
return NilApatSequence, errors.New(message) return NilApatSequence, errors.New(message)
} }

View File

@ -201,7 +201,6 @@ func OptionBatchSize(size int) WithOption {
} }
func _Pcr(seq ApatSequence, func _Pcr(seq ApatSequence,
sequence *obiseq.BioSequence,
opt Options) obiseq.BioSequenceSlice { opt Options) obiseq.BioSequenceSlice {
results := make(obiseq.BioSequenceSlice, 0, 10) results := make(obiseq.BioSequenceSlice, 0, 10)
@ -218,7 +217,7 @@ func _Pcr(seq ApatSequence,
length := seq.Len() - begin length := seq.Len() - begin
if opt.pointer.maxLength > 0 { if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
} }
if opt.Circular() { if opt.Circular() {
@ -254,26 +253,27 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) { (opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence())) log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
annot := amplicon.Annotations() annot := amplicon.Annotations()
obiutils.MustFillMap(annot, sequence.Annotations()) obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
annot["forward_primer"] = forward.String() annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["forward_match"] = match.String() annot["forward_match"] = match.String()
match.Recycle() match.Recycle()
annot["forward_error"] = erri annot["forward_error"] = erri
annot["reverse_primer"] = reverse.String() annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
match = match.ReverseComplement(true) match = match.ReverseComplement(true)
annot["reverse_match"] = match.String() annot["reverse_match"] = match.String()
match.Recycle() match.Recycle()
annot["reverse_error"] = errj annot["reverse_error"] = errj
annot["direction"] = "forward"
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
@ -287,13 +287,14 @@ func _Pcr(seq ApatSequence,
} }
forwardMatches = reverse.FindAllIndex(seq, 0, -1) forwardMatches = reverse.FindAllIndex(seq, 0, -1)
if forwardMatches != nil { if forwardMatches != nil {
begin := forwardMatches[0][0] begin := forwardMatches[0][0]
length := seq.Len() - begin length := seq.Len() - begin
if opt.pointer.maxLength > 0 { if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
} }
if opt.Circular() { if opt.Circular() {
@ -303,6 +304,7 @@ func _Pcr(seq ApatSequence,
reverseMatches := cfwd.FindAllIndex(seq, begin, length) reverseMatches := cfwd.FindAllIndex(seq, begin, length)
if reverseMatches != nil { if reverseMatches != nil {
for _, fm := range forwardMatches { for _, fm := range forwardMatches {
@ -329,14 +331,14 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) { (opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
amplicon = amplicon.ReverseComplement(true) amplicon = amplicon.ReverseComplement(true)
annot := amplicon.Annotations() annot := amplicon.Annotations()
obiutils.MustFillMap(annot, sequence.Annotations()) obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
annot["forward_primer"] = forward.String() annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
match.ReverseComplement(true) match.ReverseComplement(true)
annot["forward_match"] = match.String() annot["forward_match"] = match.String()
match.Recycle() match.Recycle()
@ -344,11 +346,13 @@ func _Pcr(seq ApatSequence,
annot["forward_error"] = errj annot["forward_error"] = errj
annot["reverse_primer"] = reverse.String() annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["reverse_match"] = match.String() annot["reverse_match"] = match.String()
match.Recycle() match.Recycle()
annot["reverse_error"] = erri annot["reverse_error"] = erri
annot["direction"] = "reverse"
results = append(results, amplicon) results = append(results, amplicon)
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
} }
@ -372,7 +376,7 @@ func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSeque
seq, _ := MakeApatSequence(sequence, opt.Circular()) seq, _ := MakeApatSequence(sequence, opt.Circular())
defer seq.Free() defer seq.Free()
results := _Pcr(seq, sequence, opt) results := _Pcr(seq, opt)
return results return results
} }
@ -388,16 +392,16 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
// if AllocatedApaSequences() == 0 { // if AllocatedApaSequences() == 0 {
// log.Panicln("Bizarre....") // log.Panicln("Bizarre....")
// } // }
amplicons := _Pcr(seq, sequences[0], options) amplicons := _Pcr(seq, options)
if len(amplicons) > 0 { if len(amplicons) > 0 {
results = append(results, amplicons...) results = append(results, amplicons...)
} }
log.Debugf("Number of sequences in the slice : %d",len(sequences))
for _, sequence := range sequences[1:] { for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq) seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, sequence, options) amplicons = _Pcr(seq, options)
if len(amplicons) > 0 { if len(amplicons) > 0 {
results = append(results, amplicons...) results = append(results, amplicons...)
} }

View File

@ -18,7 +18,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
) )
var _FileChunkSize = 1 << 26 var _FileChunkSize = 1 << 28
type _FileChunk struct { type _FileChunk struct {
raw io.Reader raw io.Reader
@ -197,7 +197,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
} }
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer // Create an extended buffer to read from if the end of the last entry is not found in the current buffer
extbuff := make([]byte, 1<<20) extbuff := make([]byte, 1<<22)
buff = buff[:l] buff = buff[:l]
end := 0 end := 0
ic := 0 ic := 0
@ -260,7 +260,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
// for j := 0; j < opt.ParallelWorkers(); j++ { // for j := 0; j < opt.ParallelWorkers(); j++ {
for j := 0; j < nworkers; j++ { for j := 0; j < nworkers; j++ {
go _ParseEmblFile(opt.Source(),entry_channel, newIter) go _ParseEmblFile(opt.Source(), entry_channel, newIter)
} }
go _ReadFlatFileChunk(reader, entry_channel) go _ReadFlatFileChunk(reader, entry_channel)

View File

@ -11,7 +11,6 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
step := length - overlap step := length - overlap
ifrg := func(iterator IBioSequence) IBioSequence { ifrg := func(iterator IBioSequence) IBioSequence {
order := obiutils.AtomicCounter()
newiter := MakeIBioSequence() newiter := MakeIBioSequence()
iterator = iterator.SortBatches() iterator = iterator.SortBatches()
@ -22,39 +21,45 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
}() }()
f := func(iterator IBioSequence, id int) { f := func(iterator IBioSequence, id int) {
news := obiseq.MakeBioSequenceSlice()
for iterator.Next() { for iterator.Next() {
news := obiseq.MakeBioSequenceSlice()
sl := iterator.Get() sl := iterator.Get()
for _, s := range sl.Slice() { for _, s := range sl.Slice() {
if s.Len() <= minsize { if s.Len() <= minsize {
news = append(news, s) news = append(news, s)
} else { } else {
for i := 0; i < s.Len(); i += step { for i := 0; i < s.Len(); i += step {
end := obiutils.MinInt(i+length, s.Len()) end := obiutils.MinInt(i+length, s.Len())
fusion:=false
if (s.Len() - end) < step {
end = s.Len()
fusion = true
}
frg, err := s.Subsequence(i, end, false) frg, err := s.Subsequence(i, end, false)
if err != nil { if err != nil {
log.Panicln(err) log.Panicln(err)
} }
news = append(news, frg) news = append(news, frg)
if len(news) >= size { // if len(news) >= size {
newiter.Push(MakeBioSequenceBatch(order(), news)) // newiter.Push(MakeBioSequenceBatch(order(), news))
news = obiseq.MakeBioSequenceSlice() // news = obiseq.MakeBioSequenceSlice()
// }
if fusion {
i = s.Len()
} }
} }
s.Recycle() s.Recycle()
} }
if len(news) >= size {
o := order()
newiter.Push(MakeBioSequenceBatch(o, news))
news = obiseq.MakeBioSequenceSlice()
}
} // End of the slice loop } // End of the slice loop
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
sl.Recycle(false) sl.Recycle(false)
} // End of the iterator loop } // End of the iterator loop
if len(news) > 0 { // if len(news) > 0 {
newiter.Push(MakeBioSequenceBatch(order(), news)) // newiter.Push(MakeBioSequenceBatch(order(), news))
} // }
newiter.Done() newiter.Done()
@ -65,7 +70,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
} }
go f(iterator, 0) go f(iterator, 0)
return newiter return newiter.SortBatches().Rebatch(size)
} }
return ifrg return ifrg