Add the --number option to obiannotate

This commit is contained in:
Eric Coissac
2025-04-22 18:35:51 +02:00
parent a57cfda675
commit c0ecaf90ab
5 changed files with 78 additions and 2 deletions

View File

@ -42,6 +42,11 @@ func main() {
obiconvert.OpenSequenceDataErrorMessage(args, err) obiconvert.OpenSequenceDataErrorMessage(args, err)
annotator := obiannotate.CLIAnnotationPipeline() annotator := obiannotate.CLIAnnotationPipeline()
if obiannotate.CLIHasSetNumberFlag() {
sequences = sequences.NumberSequences(1, !obiconvert.CLINoInputOrder())
}
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true) obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
obiutils.WaitForLastPipe() obiutils.WaitForLastPipe()

62
pkg/obiiter/numbering.go Normal file
View File

@ -0,0 +1,62 @@
package obiiter
import (
"sync"
"sync/atomic"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
)
func (iter IBioSequence) NumberSequences(start int, forceReordering bool) IBioSequence {
next_first := &atomic.Int64{}
next_first.Store(int64(start))
lock := &sync.Mutex{}
w := obidefault.ParallelWorkers()
if forceReordering {
iter = iter.SortBatches()
w = 1
}
newIter := MakeIBioSequence()
newIter.Add(w)
is_paired := false
if iter.IsPaired() {
is_paired = true
newIter.MarkAsPaired()
}
number := func(iter IBioSequence) {
for iter.Next() {
batch := iter.Get()
seqs := batch.Slice()
lock.Lock()
start := int(next_first.Load())
next_first.Store(int64(start + len(seqs)))
lock.Unlock()
for i, seq := range seqs {
num := start + i
seq.SetAttribute("seq_number", num)
if is_paired {
seq.PairedWith().SetAttribute("seq_number", num)
}
}
newIter.Push(batch)
}
newIter.Done()
}
go func() {
newIter.WaitAndClose()
}()
for i := 1; i < w; i++ {
go number(iter.Split())
}
go number(iter)
return newIter
}

View File

@ -8,7 +8,7 @@ import (
// corresponds to the last commit, and not the one when the file will be // corresponds to the last commit, and not the one when the file will be
// commited // commited
var _Commit = "0aec5ba" var _Commit = "a57cfda"
var _Version = "Release 4.4.0" var _Version = "Release 4.4.0"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.

View File

@ -145,7 +145,7 @@ var OBILang = gval.NewLanguage(
ismap := obiutils.IsAMap(args[0]) ismap := obiutils.IsAMap(args[0])
return ismap, nil return ismap, nil
}), }),
gval.Function("printf", func(args ...interface{}) (interface{}, error) { gval.Function("sprintf", func(args ...interface{}) (interface{}, error) {
text := fmt.Sprintf(args[0].(string), args[1:]...) text := fmt.Sprintf(args[0].(string), args[1:]...)
return text, nil return text, nil
}), }),

View File

@ -32,6 +32,7 @@ var _cut = ""
var _taxonomicPath = false var _taxonomicPath = false
var _withRank = false var _withRank = false
var _withScientificName = false var _withScientificName = false
var _withNumbering = false
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
// options.BoolVar(&_addRank, "seq-rank", _addRank, // options.BoolVar(&_addRank, "seq-rank", _addRank,
@ -46,6 +47,10 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
options.Description("Adds attribute with seq_length as a key and sequence length as a value."), options.Description("Adds attribute with seq_length as a key and sequence length as a value."),
) )
options.BoolVar(&_withNumbering, "number", _withNumbering,
options.Description("Adds an attribute with seq_number as a key and an ordinal number starting from 1 as a value."),
)
options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick, options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick,
options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns.")) options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns."))
@ -203,6 +208,10 @@ func CLIHasSetLengthFlag() bool {
return _setSeqLength return _setSeqLength
} }
func CLIHasSetNumberFlag() bool {
return _withNumbering
}
func CLIHasClearAllFlag() bool { func CLIHasClearAllFlag() bool {
return _clearAll return _clearAll
} }