diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index 6016d93..33773c6 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -188,7 +188,6 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap // copy the data into the buffer, by converting it to a Go array p := unsafe.Pointer(unsafe.SliceData(sequence.Sequence())) - pseqc := C.new_apatseq((*C.char)(p), C.int32_t(ic), C.int32_t(seqlen), (*C.Seq)(out), &errno, &errmsg) @@ -200,6 +199,7 @@ func MakeApatSequence(sequence *obiseq.BioSequence, circular bool, recycle ...Ap C.free(p) // atomic.AddInt64(&_AllocatedApaSequences, -1) } + return NilApatSequence, errors.New(message) } diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index a99792c..f090482 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -201,7 +201,6 @@ func OptionBatchSize(size int) WithOption { } func _Pcr(seq ApatSequence, - sequence *obiseq.BioSequence, opt Options) obiseq.BioSequenceSlice { results := make(obiseq.BioSequenceSlice, 0, 10) @@ -218,7 +217,7 @@ func _Pcr(seq ApatSequence, length := seq.Len() - begin if opt.pointer.maxLength > 0 { - length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() + length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len() } if opt.Circular() { @@ -254,26 +253,27 @@ func _Pcr(seq ApatSequence, if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { - amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) + amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence())) annot := amplicon.Annotations() - obiutils.MustFillMap(annot, sequence.Annotations()) + obiutils.MustFillMap(annot, seq.pointer.reference.Annotations()) annot["forward_primer"] = forward.String() - match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) + match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["forward_match"] = match.String() match.Recycle() annot["forward_error"] = erri annot["reverse_primer"] = reverse.String() - match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) + match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular) match = match.ReverseComplement(true) annot["reverse_match"] = match.String() match.Recycle() annot["reverse_error"] = errj + annot["direction"] = "forward" // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) @@ -287,13 +287,14 @@ func _Pcr(seq ApatSequence, } forwardMatches = reverse.FindAllIndex(seq, 0, -1) + if forwardMatches != nil { begin := forwardMatches[0][0] length := seq.Len() - begin if opt.pointer.maxLength > 0 { - length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() + length = forwardMatches[len(forwardMatches)-1][1] - begin + opt.MaxLength() + reverse.Len() } if opt.Circular() { @@ -302,6 +303,7 @@ func _Pcr(seq ApatSequence, } reverseMatches := cfwd.FindAllIndex(seq, begin, length) + if reverseMatches != nil { for _, fm := range forwardMatches { @@ -329,14 +331,14 @@ func _Pcr(seq ApatSequence, if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { - amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) + amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) amplicon = amplicon.ReverseComplement(true) annot := amplicon.Annotations() - obiutils.MustFillMap(annot, sequence.Annotations()) + obiutils.MustFillMap(annot, seq.pointer.reference.Annotations()) annot["forward_primer"] = forward.String() - match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) + match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular) match.ReverseComplement(true) annot["forward_match"] = match.String() match.Recycle() @@ -344,11 +346,13 @@ func _Pcr(seq ApatSequence, annot["forward_error"] = errj annot["reverse_primer"] = reverse.String() - match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) + match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["reverse_match"] = match.String() match.Recycle() annot["reverse_error"] = erri + annot["direction"] = "reverse" + results = append(results, amplicon) // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) } @@ -372,7 +376,7 @@ func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSeque seq, _ := MakeApatSequence(sequence, opt.Circular()) defer seq.Free() - results := _Pcr(seq, sequence, opt) + results := _Pcr(seq, opt) return results } @@ -388,16 +392,16 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice, // if AllocatedApaSequences() == 0 { // log.Panicln("Bizarre....") // } - amplicons := _Pcr(seq, sequences[0], options) + amplicons := _Pcr(seq, options) if len(amplicons) > 0 { results = append(results, amplicons...) } - log.Debugf("Number of sequences in the slice : %d",len(sequences)) for _, sequence := range sequences[1:] { seq, _ = MakeApatSequence(sequence, options.Circular(), seq) - amplicons = _Pcr(seq, sequence, options) + amplicons = _Pcr(seq, options) + if len(amplicons) > 0 { results = append(results, amplicons...) } diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 0ff9a6a..73db113 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -18,7 +18,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) -var _FileChunkSize = 1 << 26 +var _FileChunkSize = 1 << 28 type _FileChunk struct { raw io.Reader @@ -197,7 +197,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) { } // Create an extended buffer to read from if the end of the last entry is not found in the current buffer - extbuff := make([]byte, 1<<20) + extbuff := make([]byte, 1<<22) buff = buff[:l] end := 0 ic := 0 @@ -260,7 +260,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence { // for j := 0; j < opt.ParallelWorkers(); j++ { for j := 0; j < nworkers; j++ { - go _ParseEmblFile(opt.Source(),entry_channel, newIter) + go _ParseEmblFile(opt.Source(), entry_channel, newIter) } go _ReadFlatFileChunk(reader, entry_channel) diff --git a/pkg/obiiter/fragment.go b/pkg/obiiter/fragment.go index 3857c3b..785e4d9 100644 --- a/pkg/obiiter/fragment.go +++ b/pkg/obiiter/fragment.go @@ -11,7 +11,6 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable { step := length - overlap ifrg := func(iterator IBioSequence) IBioSequence { - order := obiutils.AtomicCounter() newiter := MakeIBioSequence() iterator = iterator.SortBatches() @@ -22,39 +21,45 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable { }() f := func(iterator IBioSequence, id int) { - news := obiseq.MakeBioSequenceSlice() for iterator.Next() { + news := obiseq.MakeBioSequenceSlice() sl := iterator.Get() for _, s := range sl.Slice() { + if s.Len() <= minsize { news = append(news, s) } else { for i := 0; i < s.Len(); i += step { end := obiutils.MinInt(i+length, s.Len()) + fusion:=false + if (s.Len() - end) < step { + end = s.Len() + fusion = true + } frg, err := s.Subsequence(i, end, false) + if err != nil { log.Panicln(err) } news = append(news, frg) - if len(news) >= size { - newiter.Push(MakeBioSequenceBatch(order(), news)) - news = obiseq.MakeBioSequenceSlice() + // if len(news) >= size { + // newiter.Push(MakeBioSequenceBatch(order(), news)) + // news = obiseq.MakeBioSequenceSlice() + // } + if fusion { + i = s.Len() } } s.Recycle() } - if len(news) >= size { - o := order() - newiter.Push(MakeBioSequenceBatch(o, news)) - news = obiseq.MakeBioSequenceSlice() - } } // End of the slice loop + newiter.Push(MakeBioSequenceBatch(sl.Order(), news)) sl.Recycle(false) } // End of the iterator loop - if len(news) > 0 { - newiter.Push(MakeBioSequenceBatch(order(), news)) - } + // if len(news) > 0 { + // newiter.Push(MakeBioSequenceBatch(order(), news)) + // } newiter.Done() @@ -65,7 +70,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable { } go f(iterator, 0) - return newiter + return newiter.SortBatches().Rebatch(size) } return ifrg