From 251d3be9239431d55c4f62f10f8ac6d901f1d8b2 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 24 Jan 2022 17:26:30 +0100 Subject: [PATCH] Add automatic garbage collection on ApatPattern --- cmd/obitools/obicount/main.go | 6 +-- cmd/obitools/obifind/main.go | 8 ++-- cmd/obitools/obipairing/main.go | 4 +- pkg/obiapat/pattern.go | 60 ++++++++++++++++++------ pkg/obiapat/pcr.go | 74 ++++++++++++------------------ pkg/obitools/obicount/options.go | 6 +-- pkg/obitools/obifind/iterator.go | 4 +- pkg/obitools/obifind/options.go | 20 ++++---- pkg/obitools/obipairing/options.go | 20 +++++--- pkg/obitools/obipairing/pairing.go | 9 ++-- 10 files changed, 119 insertions(+), 92 deletions(-) diff --git a/cmd/obitools/obicount/main.go b/cmd/obitools/obicount/main.go index e99ea10..d4d04e8 100644 --- a/cmd/obitools/obicount/main.go +++ b/cmd/obitools/obicount/main.go @@ -49,15 +49,15 @@ func main() { (&s).Recycle() } - if obicount.IsPrintingVariantCount() { + if obicount.CLIIsPrintingVariantCount() { fmt.Printf(" %d", nvariant) } - if obicount.IsPrintingReadCount() { + if obicount.CLIIsPrintingReadCount() { fmt.Printf(" %d", nread) } - if obicount.IsPrintingSymbolCount() { + if obicount.CLIIsPrintingSymbolCount() { fmt.Printf(" %d", nsymbol) } diff --git a/cmd/obitools/obifind/main.go b/cmd/obitools/obifind/main.go index 5ff44ff..126b024 100644 --- a/cmd/obitools/obifind/main.go +++ b/cmd/obitools/obifind/main.go @@ -22,13 +22,13 @@ func main() { } switch { - case obifind.RequestsPathForTaxid() >= 0: - taxonomy, err := obifind.LoadSelectedTaxonomy() + case obifind.CLIRequestsPathForTaxid() >= 0: + taxonomy, err := obifind.CLILoadSelectedTaxonomy() if err != nil { fmt.Printf("%+v", err) } - taxon, err := taxonomy.Taxon(obifind.RequestsPathForTaxid()) + taxon, err := taxonomy.Taxon(obifind.CLIRequestsPathForTaxid()) if err != nil { fmt.Printf("%+v", err) @@ -44,7 +44,7 @@ func main() { fmt.Sprintf("path:%d", taxon.Taxid())) case len(args) == 0: - taxonomy, err := obifind.LoadSelectedTaxonomy() + taxonomy, err := obifind.CLILoadSelectedTaxonomy() if err != nil { fmt.Printf("%+v", err) } diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index 2a29d30..ab28c6b 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -7,6 +7,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obimultiplex" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipairing" ) @@ -28,7 +29,7 @@ func main() { trace.Start(ftrace) defer trace.Stop() - optionParser := obioptions.GenerateOptionParser(obipairing.OptionSet) + optionParser := obioptions.GenerateOptionParser(obimultiplex.OptionSet) optionParser(os.Args) pairs, _ := obipairing.IBatchPairedSequence() @@ -36,6 +37,7 @@ func main() { obipairing.GapPenality(), obipairing.Delta(), obipairing.MinOverlap(), + obipairing.MinIdentity(), obipairing.WithStats(), obioptions.ParallelWorkers(), ) diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index ee3ff33..d54423a 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -8,6 +8,7 @@ package obiapat import "C" import ( "errors" + "runtime" "unsafe" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" @@ -17,10 +18,14 @@ var _MaxPatLen = int(C.MAX_PAT_LEN) // ApatPattern stores a regular pattern usable by the // Apat algorithm functions and methods -type ApatPattern struct { +type _ApatPattern struct { pointer *C.Pattern } +type ApatPattern struct { + pointer *_ApatPattern +} + // ApatSequence stores sequence in structure usable by the // Apat algorithm functions and methods type ApatSequence struct { @@ -58,15 +63,22 @@ func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) { var errno C.int32_t var errmsg *C.char - ap := C.buildPattern(cpattern, cerrormax, &errno, &errmsg) + apc := C.buildPattern(cpattern, cerrormax, &errno, &errmsg) - if ap == nil { + if apc == nil { message := C.GoString(errmsg) C.free(unsafe.Pointer(errmsg)) return NilApatPattern, errors.New(message) } - return ApatPattern{pointer: ap}, nil + ap := _ApatPattern{apc} + + runtime.SetFinalizer(&ap, func(p *_ApatPattern) { + // log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat)) + C.free(unsafe.Pointer(p.pointer)) + }) + + return ApatPattern{pointer: &ap}, nil } // ReverseComplement method builds a new ApatPattern @@ -75,38 +87,56 @@ func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) { func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) { var errno C.int32_t var errmsg *C.char - ap := C.complementPattern((*C.Pattern)(pattern.pointer), &errno, &errmsg) + apc := C.complementPattern((*C.Pattern)(pattern.pointer.pointer), &errno, &errmsg) - if ap == nil { + if apc == nil { message := C.GoString(errmsg) C.free(unsafe.Pointer(errmsg)) return ApatPattern{nil}, errors.New(message) } - return ApatPattern{pointer: ap}, nil + ap := _ApatPattern{apc} + + runtime.SetFinalizer(&ap, func(p *_ApatPattern) { + // log.Printf("Finaliser called on %s\n", C.GoString(p.pointer.cpat)) + C.free(unsafe.Pointer(p.pointer)) + }) + + return ApatPattern{pointer: &ap}, nil } // String method casts the ApatPattern to a Go String. func (pattern ApatPattern) String() string { - return C.GoString(pattern.pointer.cpat) + return C.GoString(pattern.pointer.pointer.cpat) } // Length method returns the length of the matched pattern. func (pattern ApatPattern) Length() int { - return int(pattern.pointer.patlen) + return int(pattern.pointer.pointer.patlen) } -// Free method ensure that the C structure wrapped is -// desallocated +// Release the C allocated memory of an ApatPattern instance. +// +// Thee method ensurse that the C structure wrapped in +// an ApatPattern instance is released. Normally this +// action is taken in charge by a finalizer and the call +// to the Free meethod is not mandatory. Nevertheless, +// If you choose to call this method, it will disconnect +// the finalizer associated to the ApatPattern instance +// to avoid double freeing. +// func (pattern ApatPattern) Free() { - C.free(unsafe.Pointer(pattern.pointer)) + // log.Printf("Free called on %s\n", C.GoString(pattern.pointer.pointer.cpat)) + C.free(unsafe.Pointer(pattern.pointer.pointer)) + runtime.SetFinalizer(pattern.pointer, nil) + pattern.pointer = nil } // Print method prints the ApatPattern to the standard output. // This is mainly a debug method. func (pattern ApatPattern) Print() { - C.PrintDebugPattern(C.PatternPtr(pattern.pointer)) + C.PrintDebugPattern(C.PatternPtr(pattern.pointer.pointer)) } // MakeApatSequence casts an obiseq.BioSequence to an ApatSequence. @@ -197,7 +227,7 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (l } nhits := int(C.ManberAll(sequence.pointer, - pattern.pointer, + pattern.pointer.pointer, 0, C.int32_t(begin), C.int32_t(length+C.MAX_PAT_LEN))) @@ -208,7 +238,7 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (l stktmp := (*[1 << 30]int32)(unsafe.Pointer(sequence.pointer.hitpos[0].val)) errtmp := (*[1 << 30]int32)(unsafe.Pointer(sequence.pointer.hiterr[0].val)) - patlen := int(pattern.pointer.patlen) + patlen := int(pattern.pointer.pointer.patlen) for i := 0; i < nhits; i++ { start := int(stktmp[i]) diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 32a9e7a..b6e17e0 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -217,24 +217,6 @@ func OptionBatchSize(size int) WithOption { return f } -func (options Options) Free() { - if options.pointer.forward.pointer != nil { - options.pointer.forward.Free() - } - - if options.pointer.cfwd.pointer != nil { - options.pointer.cfwd.Free() - } - - if options.pointer.reverse.pointer != nil { - options.pointer.reverse.Free() - } - - if options.pointer.crev.pointer != nil { - options.pointer.crev.Free() - } -} - func _Pcr(seq ApatSequence, sequence obiseq.BioSequence, opt Options) obiseq.BioSequenceSlice { @@ -397,7 +379,6 @@ func _Pcr(seq ApatSequence, func PCR(sequence obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceSlice { opt := MakeOptions(options) - defer opt.Free() seq, _ := MakeApatSequence(sequence, opt.Circular()) defer seq.Free() @@ -407,6 +388,33 @@ func PCR(sequence obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceS return results } +func _PCRSlice(sequences obiseq.BioSequenceSlice, + options Options) obiseq.BioSequenceSlice { + + results := make(obiseq.BioSequenceSlice, 0, len(sequences)) + + if len(sequences) > 0 { + seq, _ := MakeApatSequence(sequences[0], options.Circular()) + amplicons := _Pcr(seq, sequences[0], options) + + if len(amplicons) > 0 { + results = append(results, amplicons...) + } + + for _, sequence := range sequences[1:] { + seq, _ = MakeApatSequence(sequence, options.Circular(), seq) + amplicons = _Pcr(seq, sequence, options) + if len(amplicons) > 0 { + results = append(results, amplicons...) + } + } + + seq.Free() + } + + return results +} + // PCRSlice runs the PCR simulation algorithm on a set of // obiseq.BioSequence instances grouped in a obiseq.BioSequenceSlice. // PCR parameters are @@ -415,39 +423,17 @@ func PCR(sequence obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceS func PCRSlice(sequences obiseq.BioSequenceSlice, options ...WithOption) obiseq.BioSequenceSlice { - results := make(obiseq.BioSequenceSlice, 0, len(sequences)) - opt := MakeOptions(options) - defer opt.Free() - - if len(sequences) > 0 { - seq, _ := MakeApatSequence(sequences[0], opt.Circular()) - amplicons := _Pcr(seq, sequences[0], opt) - - if len(amplicons) > 0 { - results = append(results, amplicons...) - } - - for _, sequence := range sequences[1:] { - seq, _ = MakeApatSequence(sequence, opt.Circular(), seq) - amplicons = _Pcr(seq, sequence, opt) - if len(amplicons) > 0 { - results = append(results, amplicons...) - } - } - - seq.Free() - } - - return results + return _PCRSlice(sequences, opt) } // PCRSliceWorker is a worker function builder which produce // job function usable by the obiseq.MakeISliceWorker function. func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker { + opt := MakeOptions(options) worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { - return PCRSlice(sequences, options...) + return _PCRSlice(sequences, opt) } return worker diff --git a/pkg/obitools/obicount/options.go b/pkg/obitools/obicount/options.go index 257e4cc..7c1b6a0 100644 --- a/pkg/obitools/obicount/options.go +++ b/pkg/obitools/obicount/options.go @@ -28,21 +28,21 @@ func OptionSet(options *getoptions.GetOpt) { // Returns true if the number of reads described in the // file has to be printed. -func IsPrintingReadCount() bool { +func CLIIsPrintingReadCount() bool { return __read_count__ || !(__read_count__ || __variant_count__ || __symbol_count__) } // Returns true if the number of sequence variants described in the // file has to be printed. -func IsPrintingVariantCount() bool { +func CLIIsPrintingVariantCount() bool { return __variant_count__ || !(__read_count__ || __variant_count__ || __symbol_count__) } // Returns true if the number of symbols (sum of the sequence lengths) // described in the file has to be printed. -func IsPrintingSymbolCount() bool { +func CLIIsPrintingSymbolCount() bool { return __symbol_count__ || !(__read_count__ || __variant_count__ || __symbol_count__) } diff --git a/pkg/obitools/obifind/iterator.go b/pkg/obitools/obifind/iterator.go index cf437cd..bdbd9cc 100644 --- a/pkg/obitools/obifind/iterator.go +++ b/pkg/obitools/obifind/iterator.go @@ -22,7 +22,7 @@ func IFilterRankRestriction() func(*obitax.ITaxonSet) *obitax.ITaxonSet { } func ITaxonNameMatcher() (func(string) *obitax.ITaxonSet, error) { - taxonomy, err := LoadSelectedTaxonomy() + taxonomy, err := CLILoadSelectedTaxonomy() if err != nil { return nil, err @@ -37,7 +37,7 @@ func ITaxonNameMatcher() (func(string) *obitax.ITaxonSet, error) { func ITaxonRestrictions() (func(*obitax.ITaxonSet) *obitax.ITaxonSet, error) { - clades, err := TaxonomicalRestrictions() + clades, err := CLITaxonomicalRestrictions() if err != nil { return nil, err diff --git a/pkg/obitools/obifind/options.go b/pkg/obitools/obifind/options.go index 7cb19de..6a114be 100644 --- a/pkg/obitools/obifind/options.go +++ b/pkg/obitools/obifind/options.go @@ -44,16 +44,16 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo options.Description("Restrict output to some subclades.")) } -func SelectedNCBITaxDump() string { +func CLISelectedNCBITaxDump() string { return __taxdump__ } -func AreAlternativeNamesSelected() bool { +func CLIAreAlternativeNamesSelected() bool { return __alternative_name__ } -func TaxonomicalRestrictions() (*obitax.TaxonSet, error) { - taxonomy, err := LoadSelectedTaxonomy() +func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { + taxonomy, err := CLILoadSelectedTaxonomy() if err != nil { return nil, err @@ -73,12 +73,12 @@ func TaxonomicalRestrictions() (*obitax.TaxonSet, error) { return &ts, nil } -func LoadSelectedTaxonomy() (*obitax.Taxonomy, error) { - if SelectedNCBITaxDump() != "" { +func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) { + if CLISelectedNCBITaxDump() != "" { if __selected_taxonomy__ == nil { var err error - __selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(SelectedNCBITaxDump(), - !AreAlternativeNamesSelected()) + __selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), + !CLIAreAlternativeNamesSelected()) if err != nil { return nil, err } @@ -105,10 +105,10 @@ func OptionSet(options *getoptions.GetOpt) { options.Description("Restrict to the given taxonomic rank.")) } -func RequestsPathForTaxid() int { +func CLIRequestsPathForTaxid() int { return __taxid_path__ } -func RequestsSonsForTaxid() int { +func CLIRequestsSonsForTaxid() int { return __taxid_sons__ } diff --git a/pkg/obitools/obipairing/options.go b/pkg/obitools/obipairing/options.go index ed63f80..0a70b21 100644 --- a/pkg/obitools/obipairing/options.go +++ b/pkg/obitools/obipairing/options.go @@ -12,6 +12,7 @@ var _Delta = 5 var _MinOverlap = 20 var _GapPenality = float64(2.0) var _WithoutStats = false +var _MinIdentity = 0.9 func PairingOptionSet(options *getoptions.GetOpt) { options.StringSliceVar(&_ForwardFiles, "forward-reads", @@ -22,16 +23,19 @@ func PairingOptionSet(options *getoptions.GetOpt) { 1, 1000, options.Alias("R"), options.Description("The file names containing the reverse reads")) - options.IntVar(&_Delta, "delta", 5, + options.IntVar(&_Delta, "delta", _Delta, options.Alias("D"), - options.Description("Length added to the fast detected overlap for the precise alignement (default 5).")) - options.IntVar(&_MinOverlap, "min-overlap", 20, + options.Description("Length added to the fast detected overlap for the precise alignement")) + options.IntVar(&_MinOverlap, "min-overlap", _MinOverlap, options.Alias("O"), - options.Description("Minimum ovelap between both the reads to consider the aligment (default 20).")) - options.Float64Var(&_GapPenality, "gap-penality", 2, + options.Description("Minimum ovelap between both the reads to consider the aligment")) + options.Float64Var(&_MinIdentity, "min-identity", _MinIdentity, + options.Alias("O"), + options.Description("Minimum identity between ovelaped regions of the reads to consider the aligment")) + options.Float64Var(&_GapPenality, "gap-penality", _GapPenality, options.Alias("G"), options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2).")) - options.BoolVar(&_WithoutStats, "without-stat", false, + options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats, options.Alias("S"), options.Description("Remove alignment statistics from the produced consensus sequences.")) } @@ -65,6 +69,10 @@ func MinOverlap() int { return _MinOverlap } +func MinIdentity() float64 { + return _MinIdentity +} + func GapPenality() float64 { return _GapPenality } diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index 3cd861c..fac1553 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -105,7 +105,7 @@ func JoinPairedSequence(seqA, seqB obiseq.BioSequence, inplace bool) obiseq.BioS // input sequence. // func AssemblePESequences(seqA, seqB obiseq.BioSequence, - gap float64, delta, overlapMin int, withStats bool, + gap float64, delta, minOverlap int, minIdentity float64,withStats bool, inplace bool, arenaAlign obialign.PEAlignArena) obiseq.BioSequence { @@ -119,8 +119,9 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence, } lcons := cons.Length() aliLength := lcons - _Abs(left) - _Abs(right) + identity := float64(match)/float64(aliLength) - if aliLength >= overlapMin { + if aliLength >= minOverlap && identity >= minIdentity { if withStats { annot := cons.Annotations() annot["mode"] = "alignment" @@ -203,7 +204,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence, // each pair of processed sequences produces one sequence in the result iterator. // func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch, - gap float64, delta, minOverlap int, withStats bool, sizes ...int) obiseq.IBioSequenceBatch { + gap float64, delta, minOverlap int, minIdentity float64, withStats bool, sizes ...int) obiseq.IBioSequenceBatch { nworkers := runtime.NumCPU() * 3 / 2 buffsize := iterator.BufferSize() @@ -246,7 +247,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch, processed := 0 for i, A := range batch.Forward() { B := batch.Reverse()[i] - cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, withStats, true, arena) + cons[i] = AssemblePESequences(A, B, gap, delta, minOverlap, minIdentity, withStats, true, arena) if i%59 == 0 { bar.Add(59) processed += 59