mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-09 01:00:26 +00:00
Few debug on obidistribute and some progresses on obiunique, but -c and -m options are still not working
This commit is contained in:
@@ -7,6 +7,8 @@ import (
|
||||
|
||||
var _StatsOn = make([]string, 0, 10)
|
||||
var _Keys = make([]string, 0, 10)
|
||||
var _OnDisk = false
|
||||
var _chunks = 100
|
||||
|
||||
func UniqueOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringSliceVar(&_StatsOn, "merge",
|
||||
@@ -17,6 +19,10 @@ func UniqueOptionSet(options *getoptions.GetOpt) {
|
||||
1, 1000,
|
||||
options.Alias("c"),
|
||||
options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times)."))
|
||||
options.BoolVar(&_OnDisk, "on-disk", true,
|
||||
options.Description("Allows for using a disk cache during the dereplication process. "))
|
||||
options.IntVar(&_chunks, "chunk-count", _chunks,
|
||||
options.Description("In how many chunk the dataset is pre-devided for speeding up the process."))
|
||||
|
||||
}
|
||||
|
||||
@@ -34,3 +40,15 @@ func CLIStatsOn() []string {
|
||||
func CLIKeys() []string {
|
||||
return _Keys
|
||||
}
|
||||
|
||||
func CLIUniqueInMemory() bool {
|
||||
return _OnDisk
|
||||
}
|
||||
|
||||
func CLINumberOfChunks() int {
|
||||
if _chunks <= 1 {
|
||||
return 1
|
||||
}
|
||||
|
||||
return _chunks
|
||||
}
|
||||
|
||||
@@ -10,7 +10,17 @@ import (
|
||||
|
||||
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
|
||||
|
||||
newIter, err := obichunk.ISequenceChunk(sequences, 100, 2)
|
||||
classifier := obiseq.HashClassifier(CLINumberOfChunks())
|
||||
var newIter obiseq.IBioSequenceBatch
|
||||
var err error
|
||||
|
||||
if CLIUniqueInMemory() {
|
||||
log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks())
|
||||
newIter, err = obichunk.ISequenceChunk(sequences, classifier, 2)
|
||||
} else {
|
||||
log.Printf("Running dereplication on disk with %d chunks", CLINumberOfChunks())
|
||||
newIter, err = obichunk.ISequenceChunkOnDisk(sequences, classifier, 2)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("error in spliting the dataset : %v", err)
|
||||
@@ -23,6 +33,6 @@ func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
|
||||
|
||||
newIter = newIter.MakeISliceWorker(obiseq.UniqueSliceWorker(statsOn, keys...),
|
||||
parallelWorkers, buffSize)
|
||||
|
||||
|
||||
return newIter
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user