From e6b87ecd02502b4b4014e12596d131efb596e0eb Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 22 Jun 2024 21:01:53 +0200 Subject: [PATCH] Reduce memory allocation events Former-commit-id: fbdb2afc857b02adc2593e2278d3bd838e99b0b2 --- cmd/obitools/obicount/main.go | 1 + cmd/obitools/obipairing/main.go | 3 + cmd/obitools/obitag/main.go | 4 +- pkg/obialign/alignment.go | 37 +++++++------ pkg/obialign/pairedendalign.go | 17 +++++- pkg/obiformats/embl_read.go | 6 +- pkg/obiformats/fastaseq_read.go | 4 +- pkg/obiformats/fastqseq_read.go | 6 +- pkg/obiformats/fastseq_write_fastq.go | 17 +----- pkg/obiformats/genbank_read.go | 4 +- pkg/obiformats/seqfile_chunck_read.go | 5 +- pkg/obikmer/encodefourmer.go | 13 +++-- pkg/obioptions/options.go | 64 ++++++++++++++++++++-- pkg/obioptions/version.go | 2 +- pkg/obiseq/biosequence.go | 20 ++++++- pkg/obiseq/pool.go | 14 +++-- pkg/obitools/obiconvert/sequence_writer.go | 5 +- pkg/obitools/obipairing/pairing.go | 16 ++++-- pkg/obitools/obitagpcr/pcrtag.go | 3 +- 19 files changed, 166 insertions(+), 75 deletions(-) diff --git a/cmd/obitools/obicount/main.go b/cmd/obitools/obicount/main.go index 32ee842..2b0927d 100644 --- a/cmd/obitools/obicount/main.go +++ b/cmd/obitools/obicount/main.go @@ -35,6 +35,7 @@ func main() { _, args := optionParser(os.Args) + obioptions.SetStrictReadWorker(min(4, obioptions.CLIParallelWorkers())) fs, err := obiconvert.CLIReadBioSequences(args...) if err != nil { diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index 9948a3c..9e9ff02 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -32,6 +32,9 @@ func main() { optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet) optionParser(os.Args) + + obioptions.SetStrictReadWorker(2) + obioptions.SetStrictWriteWorker(2) pairs, err := obipairing.CLIPairedSequence() if err != nil { diff --git a/cmd/obitools/obitag/main.go b/cmd/obitools/obitag/main.go index 9495318..f48fb72 100644 --- a/cmd/obitools/obitag/main.go +++ b/cmd/obitools/obitag/main.go @@ -33,7 +33,9 @@ func main() { // defer trace.Stop() obioptions.SetWorkerPerCore(2) - obioptions.SetReadWorkerPerCore(0.5) + obioptions.SetStrictReadWorker(1) + obioptions.SetStrictWriteWorker(1) + obioptions.SetBatchSize(10) optionParser := obioptions.GenerateOptionParser(obitag.OptionSet) diff --git a/pkg/obialign/alignment.go b/pkg/obialign/alignment.go index 360146f..48a482e 100644 --- a/pkg/obialign/alignment.go +++ b/pkg/obialign/alignment.go @@ -124,25 +124,26 @@ func BuildAlignment(seqA, seqB *obiseq.BioSequence, // In that case arenas will be allocated by the function but, they will not // be reusable for other alignments and desallocated at the BuildQualityConsensus // return. -func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool) (*obiseq.BioSequence, int) { +func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool, + arenaAlign PEAlignArena) (*obiseq.BioSequence, int) { - bufferSA := obiseq.GetSlice(seqA.Len()) - bufferSB := obiseq.GetSlice(seqB.Len()) - defer obiseq.RecycleSlice(&bufferSB) + bufferSA := arenaAlign.pointer.aligneSeqA + bufferSB := arenaAlign.pointer.aligneSeqB + // defer obiseq.RecycleSlice(&bufferSB) - bufferQA := obiseq.GetSlice(seqA.Len()) - bufferQB := obiseq.GetSlice(seqB.Len()) - defer obiseq.RecycleSlice(&bufferQB) + bufferQA := arenaAlign.pointer.aligneQualA + bufferQB := arenaAlign.pointer.aligneQualB + // defer obiseq.RecycleSlice(&bufferQB) _BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ', - &bufferSA, &bufferSB) + bufferSA, bufferSB) // log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp, // len(*bufferSA), bufferSA, len(*bufferSB), bufferSB, // len(*bufferQA), bufferQA, len(*bufferQB), bufferQB) _BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0), - &bufferQA, &bufferQB) + bufferQA, bufferQB) // log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp, // len(*bufferSA), bufferSA, len(*bufferSB), bufferSB, @@ -157,10 +158,10 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis match := 0 - for i, qA = range bufferQA { - nA := bufferSA[i] - nB := bufferSB[i] - qB = bufferQB[i] + for i, qA = range *bufferQA { + nA := (*bufferSA)[i] + nB := (*bufferSB)[i] + qB = (*bufferQB)[i] if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' { mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1 @@ -171,13 +172,13 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis qm = qB } if qB > qA { - bufferSA[i] = bufferSB[i] + (*bufferSA)[i] = (*bufferSB)[i] qM = qB qm = qA } if qB == qA && nA != nB { nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31] - bufferSA[i] = _FourBitsBaseDecode[nuc] + (*bufferSA)[i] = _FourBitsBaseDecode[nuc] } q := qA + qB @@ -195,15 +196,15 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis q = 90 } - bufferQA[i] = q + (*bufferQA)[i] = q } consSeq := obiseq.NewBioSequence( seqA.Id(), - bufferSA, + *bufferSA, seqA.Definition(), ) - consSeq.SetQualities(bufferQA) + consSeq.SetQualities(*bufferQA) if statOnMismatch && len(mismatches) > 0 { consSeq.SetAttribute("pairing_mismatches", mismatches) diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index dbe429e..6817aac 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -13,6 +13,10 @@ type _PeAlignArena struct { path []int fastIndex [][]int fastBuffer []byte + aligneSeqA *[]byte + aligneSeqB *[]byte + aligneQualA *[]byte + aligneQualB *[]byte } // PEAlignArena defines memory arena usable by the @@ -30,12 +34,21 @@ var NilPEAlignArena = PEAlignArena{nil} // MakePEAlignArena makes a new arena for the alignment of two paired sequences // of maximum length indicated by lseqA and lseqB. func MakePEAlignArena(lseqA, lseqB int) PEAlignArena { + aligneSeqA := make([]byte, 0, lseqA+lseqB) + aligneSeqB := make([]byte, 0, lseqA+lseqB) + aligneQualA := make([]byte, 0, lseqA+lseqB) + aligneQualB := make([]byte, 0, lseqA+lseqB) + a := _PeAlignArena{ scoreMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)), pathMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)), path: make([]int, 2*(lseqA+lseqB)), fastIndex: make([][]int, 256), fastBuffer: make([]byte, 0, lseqA), + aligneSeqA: &aligneSeqA, + aligneSeqB: &aligneSeqB, + aligneQualA: &aligneQualA, + aligneQualB: &aligneQualB, } return PEAlignArena{&a} @@ -352,7 +365,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, func PEAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, fastAlign bool, delta int, fastScoreRel bool, - arena PEAlignArena) (int, []int, int, int, float64) { + arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) { var score, shift int var startA, startB int var partLen, over int @@ -374,7 +387,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, &arena.pointer.fastIndex, &arena.pointer.fastBuffer) - shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, seqA.Len(), seqB, fastScoreRel, nil) + shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, shift_buff, seqA.Len(), seqB, fastScoreRel, nil) if shift > 0 { over = seqA.Len() - shift diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 79da6b0..88490f4 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -169,7 +169,9 @@ func _ParseEmblFile(source string, input ChannelSeqFileChunk, func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence { opt := MakeOptions(options) - entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry) + buff := make([]byte, 1024*1024*1024*256) + + entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry) newIter := obiiter.MakeIBioSequence() nworkers := opt.ParallelWorkers() @@ -179,7 +181,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence { newIter.Add(1) go _ParseEmblFile(opt.Source(), entry_channel, newIter, opt.WithFeatureTable(), - opt.BatchSize(), + opt.BatchSize(), opt.TotalSeqSize()) } diff --git a/pkg/obiformats/fastaseq_read.go b/pkg/obiformats/fastaseq_read.go index b828b3d..4eb6656 100644 --- a/pkg/obiformats/fastaseq_read.go +++ b/pkg/obiformats/fastaseq_read.go @@ -228,7 +228,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e nworker := opt.ParallelWorkers() - chkchan := ReadSeqFileChunk(reader, _EndOfLastFastaEntry) + buff := make([]byte, 1024*1024*1024) + + chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastaEntry) chunck_order := obiutils.AtomicCounter() for i := 0; i < nworker; i++ { diff --git a/pkg/obiformats/fastqseq_read.go b/pkg/obiformats/fastqseq_read.go index 511fb1c..1bfcb92 100644 --- a/pkg/obiformats/fastqseq_read.go +++ b/pkg/obiformats/fastqseq_read.go @@ -112,7 +112,7 @@ func _storeSequenceQuality(bytes *bytes.Buffer, out *obiseq.BioSequence, quality } for i := 0; i < len(q); i++ { - q[i] = q[i] - quality_shift + q[i] -= quality_shift } out.SetQualities(q) } @@ -309,7 +309,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e nworker := opt.ParallelWorkers() chunkorder := obiutils.AtomicCounter() - chkchan := ReadSeqFileChunk(reader, _EndOfLastFastqEntry) + buff := make([]byte, 1024*1024*1024) + + chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastqEntry) for i := 0; i < nworker; i++ { out.Add(1) diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index abd6feb..aa946bf 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -46,17 +46,8 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch, for _, seq := range batch.Slice() { if seq.Len() > 0 { fs := FormatFastq(seq, formater) - lb := bs.Len() - n, _ := bs.WriteString(fs) - - if n < len(fs) { - log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences") - } + bs.WriteString(fs) bs.WriteString("\n") - - if bs.Len()-lb < len(fs)+1 { - log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences correctly") - } } else { if skipEmpty { log.Warnf("Sequence %s is empty and skiped in output", seq.Id()) @@ -69,12 +60,6 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch, chunk := bs.Bytes() - chunk = chunk[:bs.Len()] - - if chunk[0] != '@' { - log.Panicln("FormatFastqBatch: FASTQ format error") - } - return chunk } diff --git a/pkg/obiformats/genbank_read.go b/pkg/obiformats/genbank_read.go index e4dbbaf..3017995 100644 --- a/pkg/obiformats/genbank_read.go +++ b/pkg/obiformats/genbank_read.go @@ -233,7 +233,9 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence { opt := MakeOptions(options) // entry_channel := make(chan _FileChunk) - entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry) + buff := make([]byte, 1024*1024*1024*256) + + entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry) newIter := obiiter.MakeIBioSequence() nworkers := opt.ParallelWorkers() diff --git a/pkg/obiformats/seqfile_chunck_read.go b/pkg/obiformats/seqfile_chunck_read.go index 84f994d..2eb57c4 100644 --- a/pkg/obiformats/seqfile_chunck_read.go +++ b/pkg/obiformats/seqfile_chunck_read.go @@ -33,10 +33,10 @@ type LastSeqRecord func([]byte) int // Returns: // None func ReadSeqFileChunk(reader io.Reader, + buff []byte, splitter LastSeqRecord) ChannelSeqFileChunk { var err error var fullbuff []byte - var buff []byte chunk_channel := make(ChannelSeqFileChunk) @@ -46,8 +46,7 @@ func ReadSeqFileChunk(reader io.Reader, i := 0 // Initialize the buffer to the size of a chunk of data - fullbuff = make([]byte, _FileChunkSize, _FileChunkSize*2) - buff = fullbuff + fullbuff = buff // Read from the reader until the buffer is full or the end of the file is reached l, err = io.ReadFull(reader, buff) diff --git a/pkg/obikmer/encodefourmer.go b/pkg/obikmer/encodefourmer.go index 9836390..4196cf3 100644 --- a/pkg/obikmer/encodefourmer.go +++ b/pkg/obikmer/encodefourmer.go @@ -99,20 +99,20 @@ func Index4mer(seq *obiseq.BioSequence, index *[][]int, buffer *[]byte) [][]int // FastShiftFourMer runs a Fast algorithm (similar to the one used in FASTA) to compare two sequences. // The returned values are two integer values. The shift between both the sequences and the count of // matching 4mer when this shift is applied between both the sequences. -func FastShiftFourMer(index [][]int, lindex int, seq *obiseq.BioSequence, relscore bool, buffer *[]byte) (int, int, float64) { +func FastShiftFourMer(index [][]int, shifts *map[int]int, lindex int, seq *obiseq.BioSequence, relscore bool, buffer *[]byte) (int, int, float64) { iternal_buffer := Encode4mer(seq, buffer) - shifts := make(map[int]int, 3*seq.Len()) + // shifts := make(map[int]int, 3*seq.Len()) for pos, code := range iternal_buffer { for _, refpos := range index[code] { shift := refpos - pos - count, ok := shifts[shift] + count, ok := (*shifts)[shift] if ok { - shifts[shift] = count + 1 + (*shifts)[shift] = count + 1 } else { - shifts[shift] = 1 + (*shifts)[shift] = 1 } } } @@ -121,7 +121,8 @@ func FastShiftFourMer(index [][]int, lindex int, seq *obiseq.BioSequence, relsco maxcount := 0 maxscore := -1.0 - for shift, count := range shifts { + for shift, count := range *shifts { + delete((*shifts), shift) score := float64(count) if relscore { over := -shift diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index af7321b..a5cdc14 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -15,11 +15,13 @@ import ( var _Debug = false var _WorkerPerCore = 2.0 -var _ReadWorkerPerCore = 1.0 +var _ReadWorkerPerCore = 0.5 +var _WriteWorkerPerCore = 0.25 var _StrictReadWorker = 0 +var _StrictWriteWorker = 0 var _ParallelFilesRead = 0 var _MaxAllowedCPU = runtime.NumCPU() -var _BatchSize = 5000 +var _BatchSize = 2000 var _Pprof = false var _Quality_Shift_Input = byte(33) var _Quality_Shift_Output = byte(33) @@ -175,12 +177,37 @@ func CLIParallelWorkers() int { // Returns an integer representing the number of parallel workers. func CLIReadParallelWorkers() int { if StrictReadWorker() == 0 { - return int(float64(CLIMaxCPU()) * ReadWorkerPerCore()) + n := int(float64(CLIMaxCPU()) * ReadWorkerPerCore()) + if n == 0 { + n = 1 + } + return n } else { return StrictReadWorker() } } +// CLIWriteParallelWorkers returns the number of parallel workers used for +// writing files. +// +// The number of parallel workers is determined by the command line option +// --max-cpu|-m and the environment variable OBIMAXCPU. This number is +// multiplied by the variable _WriteWorkerPerCore. +// +// No parameters. +// Returns an integer representing the number of parallel workers. +func CLIWriteParallelWorkers() int { + if StrictWriteWorker() == 0 { + n := int(float64(CLIMaxCPU()) * WriteWorkerPerCore()) + if n == 0 { + n = 1 + } + return n + } else { + return StrictWriteWorker() + } +} + // CLIMaxCPU returns the maximum number of CPU cores allowed. // // The maximum number of CPU cores is determined by the command line option @@ -247,6 +274,15 @@ func ReadWorkerPerCore() float64 { return _ReadWorkerPerCore } +// WriteWorkerPerCore returns the number of worker per CPU core for +// computing the result. +// +// No parameters. +// Returns a float64 representing the number of worker per CPU core. +func WriteWorkerPerCore() float64 { + return _WriteWorkerPerCore +} + // SetBatchSize sets the size of the sequence batches. // // n - an integer representing the size of the sequence batches. @@ -318,13 +354,33 @@ func StrictReadWorker() int { return _StrictReadWorker } +// SetWriteWorker sets the number of workers for writing files. +// +// The number of worker dedicated to writing files is determined +// as the number of allowed CPU cores multiplied by number of write workers per core. +// Setting the number of write workers using this function allows to decouple the number +// of write workers from the number of CPU cores. +// +// n - an integer representing the number of workers to be set. +func SetStrictWriteWorker(n int) { + _StrictWriteWorker = n +} + +// WriteWorker returns the number of workers for writing files. +// +// No parameters. +// Returns an integer representing the number of workers. +func StrictWriteWorker() int { + return _StrictWriteWorker +} + // ParallelFilesRead returns the number of files to be read in parallel. // // No parameters. // Returns an integer representing the number of files to be read. func ParallelFilesRead() int { if _ParallelFilesRead == 0 { - return CLIParallelWorkers() + return CLIReadParallelWorkers() } else { return _ParallelFilesRead } diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index cda2792..2989ca6 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "612868a" +var _Commit = "bcaa264" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 04d4185..92ae400 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -12,6 +12,7 @@ package obiseq import ( "crypto/md5" + "slices" "sync" "sync/atomic" @@ -418,12 +419,15 @@ func (s *BioSequence) SetFeatures(feature []byte) { s.feature = feature } -// Setting the sequence of the BioSequence. +// SetSequence sets the sequence of the BioSequence. +// +// Parameters: +// - sequence: a byte slice representing the sequence to be set. func (s *BioSequence) SetSequence(sequence []byte) { if s.sequence != nil { RecycleSlice(&s.sequence) } - s.sequence = CopySlice(obiutils.InPlaceToLower(sequence)) + s.sequence = obiutils.InPlaceToLower(CopySlice(sequence)) } // Setting the qualities of the BioSequence. @@ -507,3 +511,15 @@ func (s *BioSequence) Composition() map[byte]int { return counts } + +func (s *BioSequence) Grow(length int) { + if s.sequence == nil { + s.sequence = GetSlice(length) + } else { + s.sequence = slices.Grow(s.sequence, length) + } + + if s.qualities != nil { + s.qualities = slices.Grow(s.qualities, length) + } +} diff --git a/pkg/obiseq/pool.go b/pkg/obiseq/pool.go index aa6cb1c..efe0bda 100644 --- a/pkg/obiseq/pool.go +++ b/pkg/obiseq/pool.go @@ -84,7 +84,7 @@ func CopySlice(src []byte) []byte { var BioSequenceAnnotationPool = sync.Pool{ New: func() interface{} { - bs := make(Annotation, 5) + bs := make(Annotation, 1) return &bs }, } @@ -105,15 +105,17 @@ func RecycleAnnotation(a *Annotation) { // // It returns an Annotation. func GetAnnotation(values ...Annotation) Annotation { - a := Annotation(nil) + a := (*Annotation)(nil) - for a == nil { - a = *(BioSequenceAnnotationPool.Get().(*Annotation)) + for a == nil || (*a == nil) { + a = BioSequenceAnnotationPool.Get().(*Annotation) } + annot := *a + if len(values) > 0 { - obiutils.MustFillMap(a, values[0]) + obiutils.MustFillMap(annot, values[0]) } - return a + return annot } diff --git a/pkg/obitools/obiconvert/sequence_writer.go b/pkg/obitools/obiconvert/sequence_writer.go index 52e2e50..172dbc5 100644 --- a/pkg/obitools/obiconvert/sequence_writer.go +++ b/pkg/obitools/obiconvert/sequence_writer.go @@ -53,10 +53,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence, opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.CLIParallelWorkers() / 4 - if nworkers < 2 { - nworkers = 2 - } + nworkers := obioptions.CLIWriteParallelWorkers() opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index 42bf653..0ceb7be 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -55,6 +55,8 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi seqA = seqA.Copy() } + seqA.Grow(seqB.Len() + 10) + seqA.WriteString("..........") seqA.Write(seqB.Sequence()) @@ -108,13 +110,16 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi func AssemblePESequences(seqA, seqB *obiseq.BioSequence, gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool, inplace bool, fastAlign, fastModeRel bool, - arenaAlign obialign.PEAlignArena) *obiseq.BioSequence { + arenaAlign obialign.PEAlignArena, shifh_buff *map[int]int) *obiseq.BioSequence { - score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, + score, path, fastcount, over, fastscore := obialign.PEAlign( + seqA, seqB, gap, scale, fastAlign, delta, fastModeRel, - arenaAlign) - cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true) + arenaAlign, shifh_buff, + ) + + cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true, arenaAlign) left := path[0] right := 0 @@ -238,6 +243,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, f := func(iterator obiiter.IBioSequence, wid int) { arena := obialign.MakePEAlignArena(150, 150) + shifts := make(map[int]int) for iterator.Next() { batch := iterator.Get() @@ -246,7 +252,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, B := A.PairedWith() cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, scale, - delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena) + delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena, &shifts) } newIter.Push(obiiter.MakeBioSequenceBatch( batch.Order(), diff --git a/pkg/obitools/obitagpcr/pcrtag.go b/pkg/obitools/obitagpcr/pcrtag.go index d7eb6c2..82d799c 100644 --- a/pkg/obitools/obitagpcr/pcrtag.go +++ b/pkg/obitools/obitagpcr/pcrtag.go @@ -37,6 +37,7 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, f := func(iterator obiiter.IBioSequence, wid int) { arena := obialign.MakePEAlignArena(150, 150) var err error + shifts := make(map[int]int) for iterator.Next() { batch := iterator.Get() @@ -46,7 +47,7 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, A.Copy(), B.ReverseComplement(false), gap, scale, delta, minOverlap, minIdentity, withStats, true, - fastAlign, fastScoreRel, arena, + fastAlign, fastScoreRel, arena, &shifts, ) consensus, err = ngsfilter.ExtractBarcode(consensus, true)