mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
New more functionnal version of obiuniq. But it has a memory leak
This commit is contained in:
@ -9,18 +9,25 @@ var _StatsOn = make([]string, 0, 10)
|
||||
var _Keys = make([]string, 0, 10)
|
||||
var _OnDisk = false
|
||||
var _chunks = 100
|
||||
var _NAValue = "NA"
|
||||
|
||||
func UniqueOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringSliceVar(&_StatsOn, "merge",
|
||||
1, 1000,
|
||||
1, 1,
|
||||
options.Alias("m"),
|
||||
options.Description("Adds a merged attribute containing the list of sequence record ids merged within this group."))
|
||||
|
||||
options.StringSliceVar(&_Keys, "category-attribute",
|
||||
1, 1000,
|
||||
1, 1,
|
||||
options.Alias("c"),
|
||||
options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times)."))
|
||||
|
||||
options.StringVar(&_NAValue, "na-value", _NAValue,
|
||||
options.Description("Value used when the classifier tag is not defined for a sequence."))
|
||||
|
||||
options.BoolVar(&_OnDisk, "on-disk", true,
|
||||
options.Description("Allows for using a disk cache during the dereplication process. "))
|
||||
|
||||
options.IntVar(&_chunks, "chunk-count", _chunks,
|
||||
options.Description("In how many chunk the dataset is pre-devided for speeding up the process."))
|
||||
|
||||
@ -52,3 +59,7 @@ func CLINumberOfChunks() int {
|
||||
|
||||
return _chunks
|
||||
}
|
||||
|
||||
func CLINAValue() string {
|
||||
return _NAValue
|
||||
}
|
||||
|
@ -10,29 +10,41 @@ import (
|
||||
|
||||
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
|
||||
|
||||
classifier := obiseq.HashClassifier(CLINumberOfChunks())
|
||||
var newIter obiseq.IBioSequenceBatch
|
||||
var err error
|
||||
options := make([]obichunk.WithOption, 0, 30)
|
||||
|
||||
options = append(options,
|
||||
obichunk.OptionBatchCount(CLINumberOfChunks()),
|
||||
)
|
||||
|
||||
if CLIUniqueInMemory() {
|
||||
log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks())
|
||||
newIter, err = obichunk.ISequenceChunk(sequences, classifier, 2)
|
||||
options = append(options, obichunk.OptionSortOnMemory())
|
||||
} else {
|
||||
log.Printf("Running dereplication on disk with %d chunks", CLINumberOfChunks())
|
||||
newIter, err = obichunk.ISequenceChunkOnDisk(sequences, classifier, 2)
|
||||
options = append(options, obichunk.OptionSortOnDisk())
|
||||
}
|
||||
|
||||
options = append(options,
|
||||
obichunk.OptionStatOn(CLIStatsOn()...))
|
||||
|
||||
options = append(options,
|
||||
obichunk.OptionSubCategory(CLIKeys()...))
|
||||
|
||||
options = append(options,
|
||||
obichunk.OptionsParallelWorkers(
|
||||
obioptions.CLIParallelWorkers()),
|
||||
obichunk.OptionsBufferSize(
|
||||
obioptions.CLIBufferSize()),
|
||||
obichunk.OptionsBatchSize(
|
||||
obioptions.CLIBatchSize()),
|
||||
obichunk.OptionNAValue(CLINAValue()),
|
||||
)
|
||||
|
||||
iUnique, err := obichunk.IUniqueSequence(sequences, options...)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("error in spliting the dataset : %v", err)
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
statsOn := CLIStatsOn()
|
||||
keys := CLIKeys()
|
||||
parallelWorkers := obioptions.CLIParallelWorkers()
|
||||
buffSize := obioptions.CLIBufferSize()
|
||||
|
||||
newIter = newIter.MakeISliceWorker(obiseq.UniqueSliceWorker(statsOn, keys...),
|
||||
parallelWorkers, buffSize)
|
||||
|
||||
return newIter
|
||||
return iUnique
|
||||
}
|
||||
|
Reference in New Issue
Block a user