mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a bug for multiple amplicon per sequence.
Former-commit-id: b252d2de8e1a85d65c2951aa1958ee038e35741d
This commit is contained in:
@ -188,7 +188,6 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
||||
|
||||
// copy the data into the buffer, by converting it to a Go array
|
||||
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
|
||||
|
||||
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
|
||||
(*C.Seq)(out),
|
||||
&errno, &errmsg)
|
||||
@ -200,6 +199,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
||||
C.free(p)
|
||||
// atomic.AddInt64(&_AllocatedApaSequences, -1)
|
||||
}
|
||||
|
||||
return NilApatSequence, errors.New(message)
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,6 @@ func OptionBatchSize(size int) WithOption {
|
||||
}
|
||||
|
||||
func _Pcr(seq ApatSequence,
|
||||
sequence *obiseq.BioSequence,
|
||||
opt Options) obiseq.BioSequenceSlice {
|
||||
results := make(obiseq.BioSequenceSlice, 0, 10)
|
||||
|
||||
@ -218,7 +217,7 @@ func _Pcr(seq ApatSequence,
|
||||
length := seq.Len() - begin
|
||||
|
||||
if opt.pointer.maxLength > 0 {
|
||||
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
|
||||
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
|
||||
}
|
||||
|
||||
if opt.Circular() {
|
||||
@ -254,26 +253,27 @@ func _Pcr(seq ApatSequence,
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
|
||||
annot := amplicon.Annotations()
|
||||
obiutils.MustFillMap(annot, sequence.Annotations())
|
||||
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
annot["forward_match"] = match.String()
|
||||
match.Recycle()
|
||||
|
||||
annot["forward_error"] = erri
|
||||
|
||||
annot["reverse_primer"] = reverse.String()
|
||||
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match = match.ReverseComplement(true)
|
||||
annot["reverse_match"] = match.String()
|
||||
match.Recycle()
|
||||
|
||||
annot["reverse_error"] = errj
|
||||
annot["direction"] = "forward"
|
||||
|
||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||
|
||||
@ -287,13 +287,14 @@ func _Pcr(seq ApatSequence,
|
||||
}
|
||||
|
||||
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
|
||||
|
||||
if forwardMatches != nil {
|
||||
|
||||
begin := forwardMatches[0][0]
|
||||
length := seq.Len() - begin
|
||||
|
||||
if opt.pointer.maxLength > 0 {
|
||||
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
|
||||
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
|
||||
}
|
||||
|
||||
if opt.Circular() {
|
||||
@ -302,6 +303,7 @@ func _Pcr(seq ApatSequence,
|
||||
}
|
||||
|
||||
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
|
||||
|
||||
|
||||
if reverseMatches != nil {
|
||||
for _, fm := range forwardMatches {
|
||||
@ -329,14 +331,14 @@ func _Pcr(seq ApatSequence,
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
amplicon = amplicon.ReverseComplement(true)
|
||||
|
||||
annot := amplicon.Annotations()
|
||||
obiutils.MustFillMap(annot, sequence.Annotations())
|
||||
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
match.ReverseComplement(true)
|
||||
annot["forward_match"] = match.String()
|
||||
match.Recycle()
|
||||
@ -344,11 +346,13 @@ func _Pcr(seq ApatSequence,
|
||||
annot["forward_error"] = errj
|
||||
|
||||
annot["reverse_primer"] = reverse.String()
|
||||
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
annot["reverse_match"] = match.String()
|
||||
match.Recycle()
|
||||
|
||||
annot["reverse_error"] = erri
|
||||
annot["direction"] = "reverse"
|
||||
|
||||
results = append(results, amplicon)
|
||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||
}
|
||||
@ -372,7 +376,7 @@ func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSeque
|
||||
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
||||
defer seq.Free()
|
||||
|
||||
results := _Pcr(seq, sequence, opt)
|
||||
results := _Pcr(seq, opt)
|
||||
|
||||
return results
|
||||
}
|
||||
@ -388,16 +392,16 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
// if AllocatedApaSequences() == 0 {
|
||||
// log.Panicln("Bizarre....")
|
||||
// }
|
||||
amplicons := _Pcr(seq, sequences[0], options)
|
||||
amplicons := _Pcr(seq, options)
|
||||
|
||||
if len(amplicons) > 0 {
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
|
||||
log.Debugf("Number of sequences in the slice : %d",len(sequences))
|
||||
for _, sequence := range sequences[1:] {
|
||||
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
|
||||
amplicons = _Pcr(seq, sequence, options)
|
||||
amplicons = _Pcr(seq, options)
|
||||
|
||||
if len(amplicons) > 0 {
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
var _FileChunkSize = 1 << 26
|
||||
var _FileChunkSize = 1 << 28
|
||||
|
||||
type _FileChunk struct {
|
||||
raw io.Reader
|
||||
@ -197,7 +197,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
}
|
||||
|
||||
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
||||
extbuff := make([]byte, 1<<20)
|
||||
extbuff := make([]byte, 1<<22)
|
||||
buff = buff[:l]
|
||||
end := 0
|
||||
ic := 0
|
||||
@ -260,7 +260,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
for j := 0; j < nworkers; j++ {
|
||||
go _ParseEmblFile(opt.Source(),entry_channel, newIter)
|
||||
go _ParseEmblFile(opt.Source(), entry_channel, newIter)
|
||||
}
|
||||
|
||||
go _ReadFlatFileChunk(reader, entry_channel)
|
||||
|
@ -11,7 +11,6 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
step := length - overlap
|
||||
|
||||
ifrg := func(iterator IBioSequence) IBioSequence {
|
||||
order := obiutils.AtomicCounter()
|
||||
newiter := MakeIBioSequence()
|
||||
iterator = iterator.SortBatches()
|
||||
|
||||
@ -22,39 +21,45 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequence, id int) {
|
||||
news := obiseq.MakeBioSequenceSlice()
|
||||
for iterator.Next() {
|
||||
news := obiseq.MakeBioSequenceSlice()
|
||||
sl := iterator.Get()
|
||||
for _, s := range sl.Slice() {
|
||||
|
||||
if s.Len() <= minsize {
|
||||
news = append(news, s)
|
||||
} else {
|
||||
for i := 0; i < s.Len(); i += step {
|
||||
end := obiutils.MinInt(i+length, s.Len())
|
||||
fusion:=false
|
||||
if (s.Len() - end) < step {
|
||||
end = s.Len()
|
||||
fusion = true
|
||||
}
|
||||
frg, err := s.Subsequence(i, end, false)
|
||||
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
news = append(news, frg)
|
||||
if len(news) >= size {
|
||||
newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||
news = obiseq.MakeBioSequenceSlice()
|
||||
// if len(news) >= size {
|
||||
// newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||
// news = obiseq.MakeBioSequenceSlice()
|
||||
// }
|
||||
if fusion {
|
||||
i = s.Len()
|
||||
}
|
||||
}
|
||||
s.Recycle()
|
||||
}
|
||||
if len(news) >= size {
|
||||
o := order()
|
||||
newiter.Push(MakeBioSequenceBatch(o, news))
|
||||
news = obiseq.MakeBioSequenceSlice()
|
||||
}
|
||||
} // End of the slice loop
|
||||
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
|
||||
sl.Recycle(false)
|
||||
} // End of the iterator loop
|
||||
|
||||
if len(news) > 0 {
|
||||
newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||
}
|
||||
// if len(news) > 0 {
|
||||
// newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||
// }
|
||||
|
||||
newiter.Done()
|
||||
|
||||
@ -65,7 +70,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
}
|
||||
go f(iterator, 0)
|
||||
|
||||
return newiter
|
||||
return newiter.SortBatches().Rebatch(size)
|
||||
}
|
||||
|
||||
return ifrg
|
||||
|
Reference in New Issue
Block a user