second version of obidistribute and a first buggy version of obiuniq

This commit is contained in:
2022-02-15 00:47:02 +01:00
parent b931321ba1
commit 3586ecc483
15 changed files with 402 additions and 21 deletions

View File

@@ -0,0 +1,36 @@
package obiuniq
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
)
var _StatsOn = make([]string, 0, 10)
var _Keys = make([]string, 0, 10)
func UniqueOptionSet(options *getoptions.GetOpt) {
options.StringSliceVar(&_StatsOn, "merge",
1, 1000,
options.Alias("m"),
options.Description("Adds a merged attribute containing the list of sequence record ids merged within this group."))
options.StringSliceVar(&_Keys, "category-attribute",
1, 1000,
options.Alias("c"),
options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times)."))
}
// OptionSet adds to the basic option set every options declared for
// the obipcr command
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options)
UniqueOptionSet(options)
}
func CLIStatsOn() []string {
return _StatsOn
}
func CLIKeys() []string {
return _Keys
}

View File

@@ -0,0 +1,28 @@
package obiuniq
import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
newIter, err := obichunk.ISequenceChunk(sequences, 100, 2)
if err != nil {
log.Fatalf("error in spliting the dataset : %v", err)
}
statsOn := CLIStatsOn()
keys := CLIKeys()
parallelWorkers := obioptions.CLIParallelWorkers()
buffSize := obioptions.CLIBufferSize()
newIter = newIter.MakeISliceWorker(obiseq.UniqueSliceWorker(statsOn, keys...),
parallelWorkers, buffSize)
return newIter
}