Files
obitools4/pkg/obitools/obiuniq/unique.go
2025-01-27 17:12:45 +01:00

67 lines
1.6 KiB
Go

package obiuniq
import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obichunk"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
)
func CLIUnique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
options := make([]obichunk.WithOption, 0, 30)
options = append(options,
obichunk.OptionBatchCount(CLINumberOfChunks()),
)
//
// Considers if data splitting must be done on disk or in memory
//
// --on-disk command line option
if CLIUniqueInMemory() {
log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks())
options = append(options, obichunk.OptionSortOnMemory())
} else {
log.Printf("Running dereplication on disk with %d chunks", CLINumberOfChunks())
options = append(options, obichunk.OptionSortOnDisk())
}
//
// Considers if sequences observed a single time in the dataset have to
// be conserved in the output
//
// --no-singleton
if CLINoSingleton() {
log.Printf("Removing sigletons from the output")
options = append(options, obichunk.OptionsNoSingleton())
} else {
log.Printf("Keep sigletons in the output")
}
options = append(options,
obichunk.OptionStatOn(CLIStatsOn()...))
options = append(options,
obichunk.OptionSubCategory(CLIKeys()...))
options = append(options,
obichunk.OptionsParallelWorkers(
obidefault.ParallelWorkers()),
obichunk.OptionsBatchSize(
obidefault.BatchSize()),
obichunk.OptionNAValue(CLINAValue()),
)
iUnique, err := obichunk.IUniqueSequence(sequences, options...)
if err != nil {
log.Fatal(err)
}
return iUnique
}