From ff4022290204f27930ae88e3b3c1f6fe3761b055 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 14 Jan 2022 16:10:19 +0100 Subject: [PATCH] Code reefactoring --- pkg/obialign/pairedendalign.go | 62 +++++++++++----------- pkg/obiapat/pcr.go | 8 +-- pkg/obiformats/embl_read.go | 9 ---- pkg/obiformats/fastseq_read.go | 3 +- pkg/obiformats/ncbitaxdump/read.go | 13 +++-- pkg/obioptions/options.go | 4 +- pkg/obiseq/join.go | 19 +++---- pkg/obitax/iterator.go | 2 +- pkg/obitax/path.go | 3 +- pkg/obitax/taxonomy.go | 13 ++--- pkg/obitools/obiconvert/sequence_reader.go | 20 +++---- pkg/obitools/obifind/options.go | 2 +- pkg/obitools/obipairing/pairing.go | 2 + 13 files changed, 68 insertions(+), 92 deletions(-) diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index 725c828..01f2854 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -272,9 +272,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence, arena PEAlignArena) (int, []int) { var score, shift int var startA, startB int - var part_len, over int - var raw_seqA, qual_seqA []byte - var raw_seqB, qual_seqB []byte + var partLen, over int + var rawSeqA, qualSeqA []byte + var rawSeqB, qualSeqB []byte var extra5, extra3 int if !_InitializedDnaScore { @@ -286,7 +286,7 @@ func PEAlign(seqA, seqB obiseq.BioSequence, &arena.pointer.fastIndex, &arena.pointer.fastBuffer) - shift, fast_score := obikmer.FastShiftFourMer(index, seqB, nil) + shift, fastScore := obikmer.FastShiftFourMer(index, seqB, nil) if shift > 0 { over = seqA.Length() - shift @@ -294,7 +294,7 @@ func PEAlign(seqA, seqB obiseq.BioSequence, over = seqB.Length() + shift } - if fast_score+3 < over { + if fastScore+3 < over { if shift > 0 { startA = shift - delta if startA < 0 { @@ -302,14 +302,14 @@ func PEAlign(seqA, seqB obiseq.BioSequence, } extra5 = -startA startB = 0 - raw_seqA = seqA.Sequence()[startA:] - qual_seqA = seqA.Qualities()[startA:] - part_len = len(raw_seqA) - raw_seqB = seqB.Sequence()[0:part_len] - qual_seqB = seqB.Qualities()[0:part_len] - extra3 = seqB.Length() - part_len + rawSeqA = seqA.Sequence()[startA:] + qualSeqA = seqA.Qualities()[startA:] + partLen = len(rawSeqA) + rawSeqB = seqB.Sequence()[0:partLen] + qualSeqB = seqB.Qualities()[0:partLen] + extra3 = seqB.Length() - partLen score = _FillMatrixPeLeftAlign( - raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) } else { @@ -319,20 +319,20 @@ func PEAlign(seqA, seqB obiseq.BioSequence, startB = 0 } extra5 = startB - raw_seqB = seqB.Sequence()[startB:] - qual_seqB = seqB.Qualities()[startB:] - part_len = len(raw_seqB) - raw_seqA = seqA.Sequence()[:part_len] - qual_seqA = seqA.Qualities()[:part_len] - extra3 = part_len - seqA.Length() + rawSeqB = seqB.Sequence()[startB:] + qualSeqB = seqB.Qualities()[startB:] + partLen = len(rawSeqB) + rawSeqA = seqA.Sequence()[:partLen] + qualSeqA = seqA.Qualities()[:partLen] + extra3 = partLen - seqA.Length() score = _FillMatrixPeRightAlign( - raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) } arena.pointer.path = _Backtracking(arena.pointer.pathMatrix, - len(raw_seqA), len(raw_seqB), + len(rawSeqA), len(rawSeqB), &arena.pointer.path) } else { @@ -340,27 +340,27 @@ func PEAlign(seqA, seqB obiseq.BioSequence, startA = shift startB = 0 extra5 = -startA - qual_seqA = seqA.Qualities()[startA:] - part_len = len(qual_seqA) - qual_seqB = seqB.Qualities()[0:part_len] - extra3 = seqB.Length() - part_len + qualSeqA = seqA.Qualities()[startA:] + partLen = len(qualSeqA) + qualSeqB = seqB.Qualities()[0:partLen] + extra3 = seqB.Length() - partLen score = 0 } else { startA = 0 startB = -shift extra5 = startB - qual_seqB = seqB.Qualities()[startB:] - part_len = len(qual_seqB) - extra3 = part_len - seqA.Length() - qual_seqA = seqA.Qualities()[:part_len] + qualSeqB = seqB.Qualities()[startB:] + partLen = len(qualSeqB) + extra3 = partLen - seqA.Length() + qualSeqA = seqA.Qualities()[:partLen] } score = 0 - for i, qualA := range qual_seqA { - qualB := qual_seqB[i] + for i, qualA := range qualSeqA { + qualB := qualSeqB[i] score += _NucScorePartMatchMatch[qualA][qualB] } arena.pointer.path = arena.pointer.path[:0] - arena.pointer.path = append(arena.pointer.path, 0, part_len) + arena.pointer.path = append(arena.pointer.path, 0, partLen) } arena.pointer.path[0] += extra5 diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 5f4467e..dd9c865 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -195,7 +195,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["forward_match"] = match.String() - match.Revoke() + match.Destroy() annot["forward_error"] = erri @@ -203,7 +203,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match = match.ReverseComplement(true) annot["reverse_match"] = match.String() - match.Revoke() + match.Destroy() annot["reverse_error"] = errj results = append(results, amplicon) @@ -269,14 +269,14 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match.ReverseComplement(true) annot["forward_match"] = match.String() - match.Revoke() + match.Destroy() annot["forward_error"] = errj annot["reverse_primer"] = reverse.String() match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["reverse_match"] = match.String() - match.Revoke() + match.Destroy() annot["reverse_error"] = erri results = append(results, amplicon) diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 3cff6cb..eee11fd 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -16,15 +16,6 @@ import ( var __FILE_CHUNK_SIZE__ = 1 << 20 -func __slice_grow__(slice []string) []string { - return slice -} - -type __embl_chunk__ struct { - entries [][]string - order int -} - type __file_chunk__ struct { raw io.Reader order int diff --git a/pkg/obiformats/fastseq_read.go b/pkg/obiformats/fastseq_read.go index 89b3231..cb2078e 100644 --- a/pkg/obiformats/fastseq_read.go +++ b/pkg/obiformats/fastseq_read.go @@ -7,7 +7,6 @@ package obiformats import "C" import ( - "errors" "fmt" "log" "os" @@ -89,7 +88,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB err = nil if pointer == nil { - err = errors.New(fmt.Sprintf("Cannot open file %s", filename)) + err = fmt.Errorf("cannot open file %s", filename) return obiseq.NilIBioSequenceBatch, err } diff --git a/pkg/obiformats/ncbitaxdump/read.go b/pkg/obiformats/ncbitaxdump/read.go index 75a873b..484f79d 100644 --- a/pkg/obiformats/ncbitaxdump/read.go +++ b/pkg/obiformats/ncbitaxdump/read.go @@ -3,7 +3,6 @@ package ncbitaxdump import ( "bufio" "encoding/csv" - "errors" "fmt" "io" "log" @@ -95,8 +94,8 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) { nodefile, err := os.Open(path.Join(directory, "nodes.dmp")) if err != nil { - return nil, errors.New(fmt.Sprintf("Cannot open nodes file from '%s'", - directory)) + return nil, fmt.Errorf("cannot open nodes file from '%s'", + directory) } defer nodefile.Close() @@ -112,8 +111,8 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) { namefile, nerr := os.Open(path.Join(directory, "names.dmp")) if nerr != nil { - return nil, errors.New(fmt.Sprintf("Cannot open names file from '%s'", - directory)) + return nil, fmt.Errorf("cannot open names file from '%s'", + directory) } defer namefile.Close() @@ -128,8 +127,8 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) { aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp")) if aerr != nil { - return nil, errors.New(fmt.Sprintf("Cannot open merged file from '%s'", - directory)) + return nil, fmt.Errorf("cannot open merged file from '%s'", + directory) } defer aliasfile.Close() diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 0d5a971..90c6511 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -8,7 +8,6 @@ import ( ) var __debug__ = false -var __profiling__ = "" type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error) @@ -16,7 +15,6 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser options := getoptions.New() options.Bool("help", false, options.Alias("h", "?")) options.BoolVar(&__debug__, "debug", false) - // options.StringVar(&__profiling__, "profile", "") for _, o := range optionset { o(options) @@ -27,7 +25,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser remaining, err := options.Parse(args[1:]) if options.Called("help") { - fmt.Fprintf(os.Stderr, options.Help()) + fmt.Fprint(os.Stderr, options.Help()) os.Exit(1) } return options, remaining, err diff --git a/pkg/obiseq/join.go b/pkg/obiseq/join.go index b022397..9f6d21e 100644 --- a/pkg/obiseq/join.go +++ b/pkg/obiseq/join.go @@ -1,19 +1,12 @@ package obiseq -import "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" +func (sequence BioSequence) Join(seq2 BioSequence, inplace bool) BioSequence { -func (sequence BioSequence) Join(seq2 BioSequence, copy_annot bool) (BioSequence, error) { - - new_seq := MakeEmptyBioSequence() - new_seq.SetId(sequence.Id()) - new_seq.SetDefinition(sequence.Definition()) - - new_seq.Write(sequence.Sequence()) - new_seq.Write(seq2.Sequence()) - - if copy_annot { - goutils.CopyMap(new_seq.Annotations(), sequence.Annotations()) + if !inplace { + sequence = sequence.Copy() } - return new_seq, nil + sequence.Write(seq2.Sequence()) + + return sequence } diff --git a/pkg/obitax/iterator.go b/pkg/obitax/iterator.go index da1441b..e610bcb 100644 --- a/pkg/obitax/iterator.go +++ b/pkg/obitax/iterator.go @@ -39,7 +39,7 @@ func (set *TaxonSlice) Iterator() *ITaxonSet { return i } -func (taxonmy *Taxonomy) iterator() *ITaxonSet { +func (taxonmy *Taxonomy) Iterator() *ITaxonSet { return taxonmy.nodes.Iterator() } diff --git a/pkg/obitax/path.go b/pkg/obitax/path.go index 100cf08..e44daee 100644 --- a/pkg/obitax/path.go +++ b/pkg/obitax/path.go @@ -1,7 +1,6 @@ package obitax import ( - "errors" "fmt" ) @@ -14,7 +13,7 @@ func (taxon *TaxNode) Path() (*TaxonSlice, error) { taxon = taxon.pparent if taxon == nil { - return nil, errors.New(fmt.Sprint("Taxonomy must be reindexed")) + return nil, fmt.Errorf("Taxonomy must be reindexed") } path = append(path, taxon) diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index 28e6b05..8c5c956 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -1,7 +1,6 @@ package obitax import ( - "errors" "fmt" "log" ) @@ -42,15 +41,11 @@ func (taxonomy *Taxonomy) Length() int { return len(*taxonomy.nodes) } -func (taxonomy *Taxonomy) Iterator() *ITaxonSet { - return taxonomy.nodes.Iterator() -} - func (taxonomy *Taxonomy) AddNewTaxa(taxid, parent int, rank string, replace bool, init bool) (*TaxNode, error) { if !replace { _, ok := (*taxonomy.nodes)[taxid] if ok { - return nil, errors.New(fmt.Sprintf("Trying to add taxoon %d already present in the taxonomy", taxid)) + return nil, fmt.Errorf("trying to add taxoon %d already present in the taxonomy", taxid) } } @@ -66,7 +61,7 @@ func (taxonomy *Taxonomy) Taxon(taxid int) (*TaxNode, error) { if !ok { a, aok := taxonomy.alias[taxid] if !aok { - return nil, errors.New(fmt.Sprintf("Taxid %d is not part of the taxonomy", taxid)) + return nil, fmt.Errorf("Taxid %d is not part of the taxonomy", taxid) } log.Printf("Taxid %d is deprecated and must be replaced by %d", taxid, a.taxid) t = a @@ -109,9 +104,9 @@ func (taxonomy *Taxonomy) ReindexParent() error { for _, taxon := range *taxonomy.nodes { taxon.pparent, ok = (*taxonomy.nodes)[taxon.parent] if !ok { - return errors.New(fmt.Sprintf("Parent %d of taxon %d is not defined in taxonomy", + return fmt.Errorf("Parent %d of taxon %d is not defined in taxonomy", taxon.taxid, - taxon.parent)) + taxon.parent) } } diff --git a/pkg/obitools/obiconvert/sequence_reader.go b/pkg/obitools/obiconvert/sequence_reader.go index e6b9cb3..b76ea05 100644 --- a/pkg/obitools/obiconvert/sequence_reader.go +++ b/pkg/obitools/obiconvert/sequence_reader.go @@ -17,24 +17,24 @@ func __expand_list_of_files__(check_ext bool, filenames ...string) ([]string, er err = filepath.Walk(fn, func(path string, info os.FileInfo, err error) error { - + var e error for info.Mode()&os.ModeSymlink == os.ModeSymlink { - path, err = filepath.EvalSymlinks(path) - if err != nil { - return err + path, e = filepath.EvalSymlinks(path) + if e != nil { + return e } - info, err = os.Stat(path) - if err != nil { - return err + info, e = os.Stat(path) + if e != nil { + return e } } if info.IsDir() { if path != fn { - subdir, err := __expand_list_of_files__(true, path) - if err != nil { - return err + subdir, e := __expand_list_of_files__(true, path) + if e != nil { + return e } list_of_files = append(list_of_files, subdir...) } else { diff --git a/pkg/obitools/obifind/options.go b/pkg/obitools/obifind/options.go index 9375d42..7cb19de 100644 --- a/pkg/obitools/obifind/options.go +++ b/pkg/obitools/obifind/options.go @@ -86,7 +86,7 @@ func LoadSelectedTaxonomy() (*obitax.Taxonomy, error) { return __selected_taxonomy__, nil } - return nil, errors.New("No NCBII taxdump selected using option -t|--taxdump") + return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump") } func OptionSet(options *getoptions.GetOpt) { diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index c910fb8..4dc73f3 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -153,6 +153,8 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch, bar.Add(59) processed += 59 } + A.Destroy() + B.Destroy() } bar.Add(batch.Length() - processed) new_iter.Channel() <- obiseq.MakeBioSequenceBatch(