Few debug on obidistribute and some progresses on obiunique, but -c and -m options are still not working

This commit is contained in:
2022-02-15 10:49:01 +01:00
parent 3586ecc483
commit ce226acac0
4 changed files with 110 additions and 8 deletions

View File

@@ -7,6 +7,8 @@ import (
var _StatsOn = make([]string, 0, 10)
var _Keys = make([]string, 0, 10)
var _OnDisk = false
var _chunks = 100
func UniqueOptionSet(options *getoptions.GetOpt) {
options.StringSliceVar(&_StatsOn, "merge",
@@ -17,6 +19,10 @@ func UniqueOptionSet(options *getoptions.GetOpt) {
1, 1000,
options.Alias("c"),
options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times)."))
options.BoolVar(&_OnDisk, "on-disk", true,
options.Description("Allows for using a disk cache during the dereplication process. "))
options.IntVar(&_chunks, "chunk-count", _chunks,
options.Description("In how many chunk the dataset is pre-devided for speeding up the process."))
}
@@ -34,3 +40,15 @@ func CLIStatsOn() []string {
func CLIKeys() []string {
return _Keys
}
func CLIUniqueInMemory() bool {
return _OnDisk
}
func CLINumberOfChunks() int {
if _chunks <= 1 {
return 1
}
return _chunks
}

View File

@@ -10,7 +10,17 @@ import (
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
newIter, err := obichunk.ISequenceChunk(sequences, 100, 2)
classifier := obiseq.HashClassifier(CLINumberOfChunks())
var newIter obiseq.IBioSequenceBatch
var err error
if CLIUniqueInMemory() {
log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks())
newIter, err = obichunk.ISequenceChunk(sequences, classifier, 2)
} else {
log.Printf("Running dereplication on disk with %d chunks", CLINumberOfChunks())
newIter, err = obichunk.ISequenceChunkOnDisk(sequences, classifier, 2)
}
if err != nil {
log.Fatalf("error in spliting the dataset : %v", err)
@@ -23,6 +33,6 @@ func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
newIter = newIter.MakeISliceWorker(obiseq.UniqueSliceWorker(statsOn, keys...),
parallelWorkers, buffSize)
return newIter
}