From c0ecaf90ab2f45f0d7cf78e23e4f3bf03ffea95a Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 22 Apr 2025 18:35:51 +0200 Subject: [PATCH] Add the --number option to obiannotate --- cmd/obitools/obiannotate/main.go | 5 +++ pkg/obiiter/numbering.go | 62 +++++++++++++++++++++++++++++ pkg/obioptions/version.go | 2 +- pkg/obiseq/language.go | 2 +- pkg/obitools/obiannotate/options.go | 9 +++++ 5 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 pkg/obiiter/numbering.go diff --git a/cmd/obitools/obiannotate/main.go b/cmd/obitools/obiannotate/main.go index b83dfc2..6006073 100644 --- a/cmd/obitools/obiannotate/main.go +++ b/cmd/obitools/obiannotate/main.go @@ -42,6 +42,11 @@ func main() { obiconvert.OpenSequenceDataErrorMessage(args, err) annotator := obiannotate.CLIAnnotationPipeline() + + if obiannotate.CLIHasSetNumberFlag() { + sequences = sequences.NumberSequences(1, !obiconvert.CLINoInputOrder()) + } + obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true) obiutils.WaitForLastPipe() diff --git a/pkg/obiiter/numbering.go b/pkg/obiiter/numbering.go new file mode 100644 index 0000000..4908b0d --- /dev/null +++ b/pkg/obiiter/numbering.go @@ -0,0 +1,62 @@ +package obiiter + +import ( + "sync" + "sync/atomic" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" +) + +func (iter IBioSequence) NumberSequences(start int, forceReordering bool) IBioSequence { + + next_first := &atomic.Int64{} + next_first.Store(int64(start)) + lock := &sync.Mutex{} + + w := obidefault.ParallelWorkers() + if forceReordering { + iter = iter.SortBatches() + w = 1 + } + + newIter := MakeIBioSequence() + newIter.Add(w) + + is_paired := false + + if iter.IsPaired() { + is_paired = true + newIter.MarkAsPaired() + } + + number := func(iter IBioSequence) { + for iter.Next() { + batch := iter.Get() + seqs := batch.Slice() + lock.Lock() + start := int(next_first.Load()) + next_first.Store(int64(start + len(seqs))) + lock.Unlock() + for i, seq := range seqs { + num := start + i + seq.SetAttribute("seq_number", num) + if is_paired { + seq.PairedWith().SetAttribute("seq_number", num) + } + } + newIter.Push(batch) + } + newIter.Done() + } + + go func() { + newIter.WaitAndClose() + }() + + for i := 1; i < w; i++ { + go number(iter.Split()) + } + go number(iter) + + return newIter +} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 4415d4f..f18b5eb 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "0aec5ba" +var _Commit = "a57cfda" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/language.go b/pkg/obiseq/language.go index 9292092..40e1a75 100644 --- a/pkg/obiseq/language.go +++ b/pkg/obiseq/language.go @@ -145,7 +145,7 @@ var OBILang = gval.NewLanguage( ismap := obiutils.IsAMap(args[0]) return ismap, nil }), - gval.Function("printf", func(args ...interface{}) (interface{}, error) { + gval.Function("sprintf", func(args ...interface{}) (interface{}, error) { text := fmt.Sprintf(args[0].(string), args[1:]...) return text, nil }), diff --git a/pkg/obitools/obiannotate/options.go b/pkg/obitools/obiannotate/options.go index b3c02c6..437e124 100644 --- a/pkg/obitools/obiannotate/options.go +++ b/pkg/obitools/obiannotate/options.go @@ -32,6 +32,7 @@ var _cut = "" var _taxonomicPath = false var _withRank = false var _withScientificName = false +var _withNumbering = false func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { // options.BoolVar(&_addRank, "seq-rank", _addRank, @@ -46,6 +47,10 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { options.Description("Adds attribute with seq_length as a key and sequence length as a value."), ) + options.BoolVar(&_withNumbering, "number", _withNumbering, + options.Description("Adds an attribute with seq_number as a key and an ordinal number starting from 1 as a value."), + ) + options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick, options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns.")) @@ -203,6 +208,10 @@ func CLIHasSetLengthFlag() bool { return _setSeqLength } +func CLIHasSetNumberFlag() bool { + return _withNumbering +} + func CLIHasClearAllFlag() bool { return _clearAll }