mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a bug for multiple amplicon per sequence.
Former-commit-id: b252d2de8e1a85d65c2951aa1958ee038e35741d
This commit is contained in:
@ -188,7 +188,6 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
|||||||
|
|
||||||
// copy the data into the buffer, by converting it to a Go array
|
// copy the data into the buffer, by converting it to a Go array
|
||||||
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
|
p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence()))
|
||||||
|
|
||||||
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
|
pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen),
|
||||||
(*C.Seq)(out),
|
(*C.Seq)(out),
|
||||||
&errno, &errmsg)
|
&errno, &errmsg)
|
||||||
@ -200,6 +199,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap
|
|||||||
C.free(p)
|
C.free(p)
|
||||||
// atomic.AddInt64(&_AllocatedApaSequences, -1)
|
// atomic.AddInt64(&_AllocatedApaSequences, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
return NilApatSequence, errors.New(message)
|
return NilApatSequence, errors.New(message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -201,7 +201,6 @@ func OptionBatchSize(size int) WithOption {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _Pcr(seq ApatSequence,
|
func _Pcr(seq ApatSequence,
|
||||||
sequence *obiseq.BioSequence,
|
|
||||||
opt Options) obiseq.BioSequenceSlice {
|
opt Options) obiseq.BioSequenceSlice {
|
||||||
results := make(obiseq.BioSequenceSlice, 0, 10)
|
results := make(obiseq.BioSequenceSlice, 0, 10)
|
||||||
|
|
||||||
@ -218,7 +217,7 @@ func _Pcr(seq ApatSequence,
|
|||||||
length := seq.Len() - begin
|
length := seq.Len() - begin
|
||||||
|
|
||||||
if opt.pointer.maxLength > 0 {
|
if opt.pointer.maxLength > 0 {
|
||||||
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
|
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.Circular() {
|
if opt.Circular() {
|
||||||
@ -254,26 +253,27 @@ func _Pcr(seq ApatSequence,
|
|||||||
if length > 0 && // For when primers touch or overlap
|
if length > 0 && // For when primers touch or overlap
|
||||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||||
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
|
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
|
||||||
annot := amplicon.Annotations()
|
annot := amplicon.Annotations()
|
||||||
obiutils.MustFillMap(annot, sequence.Annotations())
|
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||||
|
|
||||||
annot["forward_primer"] = forward.String()
|
annot["forward_primer"] = forward.String()
|
||||||
|
|
||||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
annot["forward_match"] = match.String()
|
annot["forward_match"] = match.String()
|
||||||
match.Recycle()
|
match.Recycle()
|
||||||
|
|
||||||
annot["forward_error"] = erri
|
annot["forward_error"] = erri
|
||||||
|
|
||||||
annot["reverse_primer"] = reverse.String()
|
annot["reverse_primer"] = reverse.String()
|
||||||
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
match = match.ReverseComplement(true)
|
match = match.ReverseComplement(true)
|
||||||
annot["reverse_match"] = match.String()
|
annot["reverse_match"] = match.String()
|
||||||
match.Recycle()
|
match.Recycle()
|
||||||
|
|
||||||
annot["reverse_error"] = errj
|
annot["reverse_error"] = errj
|
||||||
|
annot["direction"] = "forward"
|
||||||
|
|
||||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||||
|
|
||||||
@ -287,13 +287,14 @@ func _Pcr(seq ApatSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
|
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
|
||||||
|
|
||||||
if forwardMatches != nil {
|
if forwardMatches != nil {
|
||||||
|
|
||||||
begin := forwardMatches[0][0]
|
begin := forwardMatches[0][0]
|
||||||
length := seq.Len() - begin
|
length := seq.Len() - begin
|
||||||
|
|
||||||
if opt.pointer.maxLength > 0 {
|
if opt.pointer.maxLength > 0 {
|
||||||
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len()
|
length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len()
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.Circular() {
|
if opt.Circular() {
|
||||||
@ -303,6 +304,7 @@ func _Pcr(seq ApatSequence,
|
|||||||
|
|
||||||
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
|
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
|
||||||
|
|
||||||
|
|
||||||
if reverseMatches != nil {
|
if reverseMatches != nil {
|
||||||
for _, fm := range forwardMatches {
|
for _, fm := range forwardMatches {
|
||||||
|
|
||||||
@ -329,14 +331,14 @@ func _Pcr(seq ApatSequence,
|
|||||||
if length > 0 && // For when primers touch or overlap
|
if length > 0 && // For when primers touch or overlap
|
||||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||||
amplicon = amplicon.ReverseComplement(true)
|
amplicon = amplicon.ReverseComplement(true)
|
||||||
|
|
||||||
annot := amplicon.Annotations()
|
annot := amplicon.Annotations()
|
||||||
obiutils.MustFillMap(annot, sequence.Annotations())
|
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||||
annot["forward_primer"] = forward.String()
|
annot["forward_primer"] = forward.String()
|
||||||
|
|
||||||
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
match.ReverseComplement(true)
|
match.ReverseComplement(true)
|
||||||
annot["forward_match"] = match.String()
|
annot["forward_match"] = match.String()
|
||||||
match.Recycle()
|
match.Recycle()
|
||||||
@ -344,11 +346,13 @@ func _Pcr(seq ApatSequence,
|
|||||||
annot["forward_error"] = errj
|
annot["forward_error"] = errj
|
||||||
|
|
||||||
annot["reverse_primer"] = reverse.String()
|
annot["reverse_primer"] = reverse.String()
|
||||||
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
annot["reverse_match"] = match.String()
|
annot["reverse_match"] = match.String()
|
||||||
match.Recycle()
|
match.Recycle()
|
||||||
|
|
||||||
annot["reverse_error"] = erri
|
annot["reverse_error"] = erri
|
||||||
|
annot["direction"] = "reverse"
|
||||||
|
|
||||||
results = append(results, amplicon)
|
results = append(results, amplicon)
|
||||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||||
}
|
}
|
||||||
@ -372,7 +376,7 @@ func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSeque
|
|||||||
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
||||||
defer seq.Free()
|
defer seq.Free()
|
||||||
|
|
||||||
results := _Pcr(seq, sequence, opt)
|
results := _Pcr(seq, opt)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
@ -388,16 +392,16 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
|
|||||||
// if AllocatedApaSequences() == 0 {
|
// if AllocatedApaSequences() == 0 {
|
||||||
// log.Panicln("Bizarre....")
|
// log.Panicln("Bizarre....")
|
||||||
// }
|
// }
|
||||||
amplicons := _Pcr(seq, sequences[0], options)
|
amplicons := _Pcr(seq, options)
|
||||||
|
|
||||||
if len(amplicons) > 0 {
|
if len(amplicons) > 0 {
|
||||||
results = append(results, amplicons...)
|
results = append(results, amplicons...)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("Number of sequences in the slice : %d",len(sequences))
|
|
||||||
for _, sequence := range sequences[1:] {
|
for _, sequence := range sequences[1:] {
|
||||||
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
|
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
|
||||||
amplicons = _Pcr(seq, sequence, options)
|
amplicons = _Pcr(seq, options)
|
||||||
|
|
||||||
if len(amplicons) > 0 {
|
if len(amplicons) > 0 {
|
||||||
results = append(results, amplicons...)
|
results = append(results, amplicons...)
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,7 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _FileChunkSize = 1 << 26
|
var _FileChunkSize = 1 << 28
|
||||||
|
|
||||||
type _FileChunk struct {
|
type _FileChunk struct {
|
||||||
raw io.Reader
|
raw io.Reader
|
||||||
@ -197,7 +197,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
||||||
extbuff := make([]byte, 1<<20)
|
extbuff := make([]byte, 1<<22)
|
||||||
buff = buff[:l]
|
buff = buff[:l]
|
||||||
end := 0
|
end := 0
|
||||||
ic := 0
|
ic := 0
|
||||||
@ -260,7 +260,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
|
|
||||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||||
for j := 0; j < nworkers; j++ {
|
for j := 0; j < nworkers; j++ {
|
||||||
go _ParseEmblFile(opt.Source(),entry_channel, newIter)
|
go _ParseEmblFile(opt.Source(), entry_channel, newIter)
|
||||||
}
|
}
|
||||||
|
|
||||||
go _ReadFlatFileChunk(reader, entry_channel)
|
go _ReadFlatFileChunk(reader, entry_channel)
|
||||||
|
@ -11,7 +11,6 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
|||||||
step := length - overlap
|
step := length - overlap
|
||||||
|
|
||||||
ifrg := func(iterator IBioSequence) IBioSequence {
|
ifrg := func(iterator IBioSequence) IBioSequence {
|
||||||
order := obiutils.AtomicCounter()
|
|
||||||
newiter := MakeIBioSequence()
|
newiter := MakeIBioSequence()
|
||||||
iterator = iterator.SortBatches()
|
iterator = iterator.SortBatches()
|
||||||
|
|
||||||
@ -22,39 +21,45 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
f := func(iterator IBioSequence, id int) {
|
f := func(iterator IBioSequence, id int) {
|
||||||
news := obiseq.MakeBioSequenceSlice()
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
|
news := obiseq.MakeBioSequenceSlice()
|
||||||
sl := iterator.Get()
|
sl := iterator.Get()
|
||||||
for _, s := range sl.Slice() {
|
for _, s := range sl.Slice() {
|
||||||
|
|
||||||
if s.Len() <= minsize {
|
if s.Len() <= minsize {
|
||||||
news = append(news, s)
|
news = append(news, s)
|
||||||
} else {
|
} else {
|
||||||
for i := 0; i < s.Len(); i += step {
|
for i := 0; i < s.Len(); i += step {
|
||||||
end := obiutils.MinInt(i+length, s.Len())
|
end := obiutils.MinInt(i+length, s.Len())
|
||||||
|
fusion:=false
|
||||||
|
if (s.Len() - end) < step {
|
||||||
|
end = s.Len()
|
||||||
|
fusion = true
|
||||||
|
}
|
||||||
frg, err := s.Subsequence(i, end, false)
|
frg, err := s.Subsequence(i, end, false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln(err)
|
log.Panicln(err)
|
||||||
}
|
}
|
||||||
news = append(news, frg)
|
news = append(news, frg)
|
||||||
if len(news) >= size {
|
// if len(news) >= size {
|
||||||
newiter.Push(MakeBioSequenceBatch(order(), news))
|
// newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||||
news = obiseq.MakeBioSequenceSlice()
|
// news = obiseq.MakeBioSequenceSlice()
|
||||||
|
// }
|
||||||
|
if fusion {
|
||||||
|
i = s.Len()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.Recycle()
|
s.Recycle()
|
||||||
}
|
}
|
||||||
if len(news) >= size {
|
|
||||||
o := order()
|
|
||||||
newiter.Push(MakeBioSequenceBatch(o, news))
|
|
||||||
news = obiseq.MakeBioSequenceSlice()
|
|
||||||
}
|
|
||||||
} // End of the slice loop
|
} // End of the slice loop
|
||||||
|
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
|
||||||
sl.Recycle(false)
|
sl.Recycle(false)
|
||||||
} // End of the iterator loop
|
} // End of the iterator loop
|
||||||
|
|
||||||
if len(news) > 0 {
|
// if len(news) > 0 {
|
||||||
newiter.Push(MakeBioSequenceBatch(order(), news))
|
// newiter.Push(MakeBioSequenceBatch(order(), news))
|
||||||
}
|
// }
|
||||||
|
|
||||||
newiter.Done()
|
newiter.Done()
|
||||||
|
|
||||||
@ -65,7 +70,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
|||||||
}
|
}
|
||||||
go f(iterator, 0)
|
go f(iterator, 0)
|
||||||
|
|
||||||
return newiter
|
return newiter.SortBatches().Rebatch(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ifrg
|
return ifrg
|
||||||
|
Reference in New Issue
Block a user