From abcf02e4883b93a60c81e082efdbea608355f77c Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 24 Feb 2022 12:14:52 +0100 Subject: [PATCH] Start to use leveled log --- cmd/obitools/obicount/main.go | 2 +- cmd/obitools/obimultiplex/main.go | 2 +- cmd/obitools/obipairing/main.go | 2 +- cmd/obitools/obipcr/main.go | 2 +- cmd/obitools/obiuniq/main.go | 2 +- pkg/obialign/pairedendalign.go | 2 +- pkg/obiapat/pcr.go | 2 +- pkg/obichunk/chunk_on_disk.go | 10 +++++++--- pkg/obichunk/chunks.go | 2 +- pkg/obichunk/options.go | 22 +++++++++++++++++++++ pkg/obichunk/subchunks.go | 5 +++-- pkg/obichunk/unique.go | 17 ++++++++++++++-- pkg/obiformats/dispatcher.go | 2 +- pkg/obiformats/ecopcr_read.go | 2 +- pkg/obiformats/embl_read.go | 2 +- pkg/obiformats/fastseq_json_header.go | 2 +- pkg/obiformats/fastseq_read.go | 11 ++++++----- pkg/obiformats/fastseq_write_fasta.go | 6 ++++-- pkg/obiformats/fastseq_write_fastq.go | 6 ++++-- pkg/obiformats/ncbitaxdump/read.go | 2 +- pkg/obiformats/universal_read.go | 7 ++++--- pkg/obiformats/universal_write.go | 2 +- pkg/obiiter/batchiterator.go | 11 ++++++----- pkg/obiiter/pairedbatchiterator.go | 2 +- pkg/obiiter/speed.go | 23 ++++++++++++++++------ pkg/obiiter/workers.go | 6 +++--- pkg/obingslibrary/match.go | 2 +- pkg/obioptions/options.go | 14 ++++++++++++- pkg/obiseq/biosequence.go | 5 +++-- pkg/obiseq/class.go | 2 +- pkg/obiseq/merge.go | 2 +- pkg/obiseq/predicate.go | 2 +- pkg/obitax/issuubcladeof.go | 2 +- pkg/obitax/taxonomy.go | 2 +- pkg/obitools/obiconvert/sequence_reader.go | 2 +- pkg/obitools/obiconvert/sequence_writer.go | 2 +- pkg/obitools/obidistribute/distribute.go | 2 +- pkg/obitools/obidistribute/options.go | 2 +- pkg/obitools/obimultiplex/demultiplex.go | 2 +- pkg/obitools/obipairing/pairing.go | 2 +- pkg/obitools/obipcr/options.go | 2 +- pkg/obitools/obiuniq/options.go | 15 ++++++++++++-- pkg/obitools/obiuniq/unique.go | 9 ++++++++- 43 files changed, 156 insertions(+), 67 deletions(-) diff --git a/cmd/obitools/obicount/main.go b/cmd/obitools/obicount/main.go index 90aaa14..3447c4d 100644 --- a/cmd/obitools/obicount/main.go +++ b/cmd/obitools/obicount/main.go @@ -34,7 +34,7 @@ func main() { _, args, _ := optionParser(os.Args) fs, _ := obiconvert.ReadBioSequencesBatch(args...) - nread, nvariant, nsymbol := fs.Count(true) + nvariant, nread, nsymbol := fs.Count(true) if obicount.CLIIsPrintingVariantCount() { fmt.Printf(" %d", nvariant) diff --git a/cmd/obitools/obimultiplex/main.go b/cmd/obitools/obimultiplex/main.go index 2805e3d..154ea8d 100644 --- a/cmd/obitools/obimultiplex/main.go +++ b/cmd/obitools/obimultiplex/main.go @@ -1,7 +1,7 @@ package main import ( - "log" + log "github.com/sirupsen/logrus" "os" "runtime/pprof" diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index ae10040..358a5a3 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -1,7 +1,7 @@ package main import ( - "log" + log "github.com/sirupsen/logrus" "os" "runtime/trace" diff --git a/cmd/obitools/obipcr/main.go b/cmd/obitools/obipcr/main.go index aec8621..5ecf59f 100644 --- a/cmd/obitools/obipcr/main.go +++ b/cmd/obitools/obipcr/main.go @@ -1,7 +1,7 @@ package main import ( - "log" + log "github.com/sirupsen/logrus" "os" "runtime/trace" diff --git a/cmd/obitools/obiuniq/main.go b/cmd/obitools/obiuniq/main.go index f2d6b82..9c52568 100644 --- a/cmd/obitools/obiuniq/main.go +++ b/cmd/obitools/obiuniq/main.go @@ -1,7 +1,7 @@ package main import ( - "log" + log "github.com/sirupsen/logrus" "os" "runtime/pprof" diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index 0ab4e63..1ae870b 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -1,7 +1,7 @@ package obialign import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obikmer" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index d2e2cd2..c8c85a0 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -1,7 +1,7 @@ package obiapat import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obichunk/chunk_on_disk.go b/pkg/obichunk/chunk_on_disk.go index 6529f0c..1c15cc5 100644 --- a/pkg/obichunk/chunk_on_disk.go +++ b/pkg/obichunk/chunk_on_disk.go @@ -3,10 +3,11 @@ package obichunk import ( "io/fs" "io/ioutil" - "log" "os" "path/filepath" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" @@ -55,7 +56,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch, go func() { defer func() { os.RemoveAll(dir) - log.Println("Clear the cache directory") + log.Debugln("Clear the cache directory") }() newIter.Wait() @@ -68,7 +69,8 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch, ) fileNames := find(dir, ".fastx") - log.Println("batch count ", len(fileNames)) + nbatch := len(fileNames) + log.Infof("Data splitted over %d batches", nbatch) go func() { @@ -88,6 +90,8 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch, } newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck)) + log.Infof("Start processing of batch %d/%d : %d sequences", + order, nbatch, len(chunck)) } diff --git a/pkg/obichunk/chunks.go b/pkg/obichunk/chunks.go index 8d04597..527a474 100644 --- a/pkg/obichunk/chunks.go +++ b/pkg/obichunk/chunks.go @@ -1,7 +1,7 @@ package obichunk import ( - "log" + log "github.com/sirupsen/logrus" "sync" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obichunk/options.go b/pkg/obichunk/options.go index 41d53a5..e49a614 100644 --- a/pkg/obichunk/options.go +++ b/pkg/obichunk/options.go @@ -9,6 +9,7 @@ type __options__ struct { bufferSize int batchSize int parallelWorkers int + noSingleton bool } type Options struct { @@ -27,6 +28,7 @@ func MakeOptions(setters []WithOption) Options { bufferSize: 2, batchSize: 5000, parallelWorkers: 4, + noSingleton: false, } opt := Options{&o} @@ -79,6 +81,10 @@ func (opt Options) SortOnDisk() bool { return opt.pointer.cacheOnDisk } +func (opt Options) NoSingleton() bool { + return opt.pointer.noSingleton +} + func OptionSortOnDisk() WithOption { f := WithOption(func(opt Options) { opt.pointer.cacheOnDisk = true @@ -149,3 +155,19 @@ func OptionsBufferSize(size int) WithOption { return f } + +func OptionsNoSingleton() WithOption { + f := WithOption(func(opt Options) { + opt.pointer.noSingleton = true + }) + + return f +} + +func OptionsWithSingleton() WithOption { + f := WithOption(func(opt Options) { + opt.pointer.noSingleton = false + }) + + return f +} diff --git a/pkg/obichunk/subchunks.go b/pkg/obichunk/subchunks.go index bd5253a..dbce3d6 100644 --- a/pkg/obichunk/subchunks.go +++ b/pkg/obichunk/subchunks.go @@ -1,10 +1,11 @@ package obichunk import ( - "log" "sort" "sync/atomic" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -100,7 +101,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch, classifier.Reset() if cap(ordered) < batch.Length() { - log.Println("Allocate a new ordered sequences : ", batch.Length()) + log.Debugln("Allocate a new ordered sequences : ", batch.Length()) ordered = make([]sSS, batch.Length()) } else { ordered = ordered[:batch.Length()] diff --git a/pkg/obichunk/unique.go b/pkg/obichunk/unique.go index 68eabfe..33c3e65 100644 --- a/pkg/obichunk/unique.go +++ b/pkg/obichunk/unique.go @@ -3,6 +3,8 @@ package obichunk import ( "sync" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -16,6 +18,10 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch, iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize()) + iterator = iterator.Speed("Splitting data set") + + log.Infoln("Starting data splitting") + if opts.SortOnDisk() { nworkers = 1 iterator, err = ISequenceChunkOnDisk(iterator, @@ -36,6 +42,8 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch, } } + log.Infoln("End of the data splitting") + iUnique.Add(nworkers) go func() { @@ -83,7 +91,12 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch, batch := input.Get() if icat < 0 || len(batch.Slice()) == 1 { - iUnique.Push(batch.Reorder(nextOrder())) + if opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1 { + batch.Slice()[0].Recycle() + batch.Recycle() + } else { + iUnique.Push(batch.Reorder(nextOrder())) + } } else { next.Push(batch.Reorder(o)) o++ @@ -111,5 +124,5 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch, opts.BufferSize(), ) - return iMerged.Speed(), nil + return iMerged.Speed("Variants identified"), nil } diff --git a/pkg/obiformats/dispatcher.go b/pkg/obiformats/dispatcher.go index 4428f6a..d2ca27e 100644 --- a/pkg/obiformats/dispatcher.go +++ b/pkg/obiformats/dispatcher.go @@ -2,7 +2,7 @@ package obiformats import ( "fmt" - "log" + log "github.com/sirupsen/logrus" "sync" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obiformats/ecopcr_read.go b/pkg/obiformats/ecopcr_read.go index 74192f0..c035e9f 100644 --- a/pkg/obiformats/ecopcr_read.go +++ b/pkg/obiformats/ecopcr_read.go @@ -5,7 +5,7 @@ import ( "encoding/csv" "fmt" "io" - "log" + log "github.com/sirupsen/logrus" "os" "strconv" "strings" diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index 2cb966f..ecebd46 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -5,7 +5,7 @@ import ( "bytes" "compress/gzip" "io" - "log" + log "github.com/sirupsen/logrus" "os" "strconv" "strings" diff --git a/pkg/obiformats/fastseq_json_header.go b/pkg/obiformats/fastseq_json_header.go index ec3fefc..0515fde 100644 --- a/pkg/obiformats/fastseq_json_header.go +++ b/pkg/obiformats/fastseq_json_header.go @@ -1,7 +1,7 @@ package obiformats import ( - "log" + log "github.com/sirupsen/logrus" "strings" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" diff --git a/pkg/obiformats/fastseq_read.go b/pkg/obiformats/fastseq_read.go index 459bc4d..3f5dccb 100644 --- a/pkg/obiformats/fastseq_read.go +++ b/pkg/obiformats/fastseq_read.go @@ -8,10 +8,11 @@ import "C" import ( "fmt" - "log" "os" "unsafe" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" @@ -61,7 +62,7 @@ func _FastseqReader(seqfile C.fast_kseq_p, slice = append(slice, rep) ii++ if ii >= batch_size { - // log.Printf("\n==> Pushing sequence batch\n") + //log.Printf("\n==> Pushing sequence batch\n") // start := time.Now() iterator.Push(obiiter.MakeBioSequenceBatch(i, slice)) @@ -100,7 +101,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.I fi, err := os.Stat(filename) if err == nil { size = fi.Size() - log.Printf("File size of %s is %d bytes\n", filename, size) + log.Debugf("File size of %s is %d bytes\n", filename, size) } else { size = -1 } @@ -110,10 +111,10 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.I go func() { newIter.WaitAndClose() - log.Println("End of the fastq file reading") + log.Debugln("End of the fastq file reading") }() - log.Println("Start of the fastq file reading") + log.Debugln("Start of the fastq file reading") go _FastseqReader(pointer, newIter, opt.BatchSize()) parser := opt.ParseFastSeqHeader() diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index f9eb602..7573777 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -4,10 +4,11 @@ import ( "bytes" "fmt" "io" - "log" "os" "strings" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -114,6 +115,7 @@ func WriteFastaBatch(iterator obiiter.IBioSequenceBatch, go func() { newIter.WaitAndClose() close(chunkchan) + log.Debugln("End of the fasta file writing") }() ff := func(iterator obiiter.IBioSequenceBatch) { @@ -128,7 +130,7 @@ func WriteFastaBatch(iterator obiiter.IBioSequenceBatch, newIter.Done() } - log.Println("Start of the fasta file writing") + log.Debugln("Start of the fasta file writing") go ff(iterator) for i := 0; i < nwriters-1; i++ { go ff(iterator.Split()) diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index 9520a18..83e72de 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -4,10 +4,11 @@ import ( "bytes" "fmt" "io" - "log" "os" "time" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -114,6 +115,7 @@ func WriteFastqBatch(iterator obiiter.IBioSequenceBatch, time.Sleep(time.Millisecond) } close(chunkchan) + log.Debugln("End of the fastq file writing") }() ff := func(iterator obiiter.IBioSequenceBatch) { @@ -129,7 +131,7 @@ func WriteFastqBatch(iterator obiiter.IBioSequenceBatch, newIter.Done() } - log.Println("Start of the fastq file writing") + log.Debugln("Start of the fastq file writing") go ff(iterator) for i := 0; i < nwriters-1; i++ { go ff(iterator.Split()) diff --git a/pkg/obiformats/ncbitaxdump/read.go b/pkg/obiformats/ncbitaxdump/read.go index 484f79d..a6c5d46 100644 --- a/pkg/obiformats/ncbitaxdump/read.go +++ b/pkg/obiformats/ncbitaxdump/read.go @@ -5,7 +5,7 @@ import ( "encoding/csv" "fmt" "io" - "log" + log "github.com/sirupsen/logrus" "os" "path" "strconv" diff --git a/pkg/obiformats/universal_read.go b/pkg/obiformats/universal_read.go index 0679e9f..ad42fb4 100644 --- a/pkg/obiformats/universal_read.go +++ b/pkg/obiformats/universal_read.go @@ -4,10 +4,11 @@ import ( "bufio" "compress/gzip" "io" - "log" "os" "strings" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" ) @@ -57,7 +58,7 @@ func ReadSequencesBatchFromFile(filename string, if err != nil { file.Seek(0, 0) } else { - log.Printf("File %s is gz compressed ", filename) + log.Debugf("File %s is gz compressed ", filename) reader = greader } @@ -72,7 +73,7 @@ func ReadSequencesBatchFromFile(filename string, } filetype := GuessSeqFileType(string(tag)) - log.Printf("File guessed format : %s (tag: %s)", + log.Debug("File guessed format : %s (tag: %s)", filetype, (strings.Split(string(tag), "\n"))[0]) reader = breader diff --git a/pkg/obiformats/universal_write.go b/pkg/obiformats/universal_write.go index 310444f..b19f92d 100644 --- a/pkg/obiformats/universal_write.go +++ b/pkg/obiformats/universal_write.go @@ -3,7 +3,7 @@ package obiformats import ( "fmt" "io" - "log" + log "github.com/sirupsen/logrus" "os" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obiiter/batchiterator.go b/pkg/obiiter/batchiterator.go index b5d1d9f..aa913cf 100644 --- a/pkg/obiiter/batchiterator.go +++ b/pkg/obiiter/batchiterator.go @@ -2,11 +2,12 @@ package obiiter import ( "fmt" - "log" "sync" "sync/atomic" "time" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "github.com/tevino/abool/v2" ) @@ -413,7 +414,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa func (iterator IBioSequenceBatch) Recycle() { - log.Println("Start recycling of Bioseq objects") + log.Debugln("Start recycling of Bioseq objects") recycled := 0 for iterator.Next() { // iterator.Get() @@ -424,7 +425,7 @@ func (iterator IBioSequenceBatch) Recycle() { } batch.Recycle() } - log.Printf("End of the recycling of %d Bioseq objects", recycled) + log.Debugf("End of the recycling of %d Bioseq objects", recycled) } func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) { @@ -432,7 +433,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) { reads := 0 nucleotides := 0 - log.Println("Start counting of Bioseq objects") + log.Debugln("Start counting of Bioseq objects") for iterator.Next() { // iterator.Get() batch := iterator.Get() @@ -447,7 +448,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) { } batch.Recycle() } - log.Printf("End of the counting of %d Bioseq objects", variants) + log.Debugf("End of the counting of %d Bioseq objects", variants) return variants, reads, nucleotides } diff --git a/pkg/obiiter/pairedbatchiterator.go b/pkg/obiiter/pairedbatchiterator.go index 8222264..4a9fbc7 100644 --- a/pkg/obiiter/pairedbatchiterator.go +++ b/pkg/obiiter/pairedbatchiterator.go @@ -1,7 +1,7 @@ package obiiter import ( - "log" + log "github.com/sirupsen/logrus" "sync" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" diff --git a/pkg/obiiter/speed.go b/pkg/obiiter/speed.go index 3c9fd4b..0b49fd2 100644 --- a/pkg/obiiter/speed.go +++ b/pkg/obiiter/speed.go @@ -6,7 +6,7 @@ import ( "github.com/schollz/progressbar/v3" ) -func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch { +func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch { newIter := MakeIBioSequenceBatch() newIter.Add(1) @@ -15,13 +15,25 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch { newIter.WaitAndClose() }() - bar := progressbar.NewOptions( - -1, + pbopt := make([]progressbar.Option, 0, 5) + pbopt = append(pbopt, progressbar.OptionSetWriter(os.Stderr), progressbar.OptionSetWidth(15), progressbar.OptionShowCount(), progressbar.OptionShowIts(), - progressbar.OptionSetDescription("[Sequence Processing]")) + ) + + if len(message) > 0 { + pbopt = append(pbopt, + progressbar.OptionSetDescription(message[0]), + ) + } else { + pbopt = append(pbopt, + progressbar.OptionSetDescription("[Sequence Processing]"), + ) + } + + bar := progressbar.NewOptions(-1, pbopt...) go func() { @@ -38,11 +50,10 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch { return newIter } - func SpeedPipe() Pipeable { f := func(iterator IBioSequenceBatch) IBioSequenceBatch { return iterator.Speed() } return f -} \ No newline at end of file +} diff --git a/pkg/obiiter/workers.go b/pkg/obiiter/workers.go index fa9fb4c..290e2ef 100644 --- a/pkg/obiiter/workers.go +++ b/pkg/obiiter/workers.go @@ -1,7 +1,7 @@ package obiiter import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB go func() { newIter.WaitAndClose() - log.Println("End of the batch workers") + log.Debugln("End of the batch workers") }() @@ -54,7 +54,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB newIter.Done() } - log.Println("Start of the batch workers") + log.Debugln("Start of the batch workers") for i := 0; i < nworkers-1; i++ { go f(iterator.Split()) } diff --git a/pkg/obingslibrary/match.go b/pkg/obingslibrary/match.go index 820ff64..54cf94a 100644 --- a/pkg/obingslibrary/match.go +++ b/pkg/obingslibrary/match.go @@ -3,7 +3,7 @@ package obingslibrary import ( "errors" "fmt" - "log" + log "github.com/sirupsen/logrus" "strings" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 54d8d99..0207f1e 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -2,10 +2,11 @@ package obioptions import ( "fmt" - "log" "os" "runtime" + log "github.com/sirupsen/logrus" + "github.com/DavidGamba/go-getoptions" ) @@ -43,10 +44,21 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser log.Printf("CPU number limited to %d", _MaxAllowedCPU) } + if options.Called("no-singleton") { + log.Printf("No singleton option set") + } + if options.Called("help") { fmt.Fprint(os.Stderr, options.Help()) os.Exit(1) } + + log.SetLevel(log.InfoLevel) + if options.Called("debug") { + log.SetLevel(log.DebugLevel) + log.Debugln("Switch to debug level logging") + } + return options, remaining, err } } diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index b5f15cf..318bfc2 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -2,9 +2,10 @@ package obiseq import ( "crypto/md5" - "log" "sync/atomic" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" ) @@ -15,7 +16,7 @@ var _MaxInMemSeq = int32(0) var _BioLogRate = int(100000) func LogBioSeqStatus() { - log.Printf("@@@@>>>> Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq) + log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq) } type Quality []uint8 diff --git a/pkg/obiseq/class.go b/pkg/obiseq/class.go index 1b0f0e5..cdefd36 100644 --- a/pkg/obiseq/class.go +++ b/pkg/obiseq/class.go @@ -3,7 +3,7 @@ package obiseq import ( "fmt" "hash/crc32" - "log" + log "github.com/sirupsen/logrus" "strconv" "sync" ) diff --git a/pkg/obiseq/merge.go b/pkg/obiseq/merge.go index d73e3f6..ffe548c 100644 --- a/pkg/obiseq/merge.go +++ b/pkg/obiseq/merge.go @@ -2,7 +2,7 @@ package obiseq import ( "fmt" - "log" + log "github.com/sirupsen/logrus" "strings" ) diff --git a/pkg/obiseq/predicate.go b/pkg/obiseq/predicate.go index 324cc07..3b786ca 100644 --- a/pkg/obiseq/predicate.go +++ b/pkg/obiseq/predicate.go @@ -2,7 +2,7 @@ package obiseq import ( "context" - "log" + log "github.com/sirupsen/logrus" "github.com/PaesslerAG/gval" ) diff --git a/pkg/obitax/issuubcladeof.go b/pkg/obitax/issuubcladeof.go index fbb1849..b188478 100644 --- a/pkg/obitax/issuubcladeof.go +++ b/pkg/obitax/issuubcladeof.go @@ -1,7 +1,7 @@ package obitax import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index 8c5c956..5f24437 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -2,7 +2,7 @@ package obitax import ( "fmt" - "log" + log "github.com/sirupsen/logrus" ) type TaxName struct { diff --git a/pkg/obitools/obiconvert/sequence_reader.go b/pkg/obitools/obiconvert/sequence_reader.go index 152efb2..0b0071e 100644 --- a/pkg/obitools/obiconvert/sequence_reader.go +++ b/pkg/obitools/obiconvert/sequence_reader.go @@ -1,7 +1,7 @@ package obiconvert import ( - "log" + log "github.com/sirupsen/logrus" "os" "path/filepath" "strings" diff --git a/pkg/obitools/obiconvert/sequence_writer.go b/pkg/obitools/obiconvert/sequence_writer.go index 2b01900..7c75dd7 100644 --- a/pkg/obitools/obiconvert/sequence_writer.go +++ b/pkg/obitools/obiconvert/sequence_writer.go @@ -1,7 +1,7 @@ package obiconvert import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obitools/obidistribute/distribute.go b/pkg/obitools/obidistribute/distribute.go index eda5d34..0dacdf2 100644 --- a/pkg/obitools/obidistribute/distribute.go +++ b/pkg/obitools/obidistribute/distribute.go @@ -1,7 +1,7 @@ package obidistribute import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" diff --git a/pkg/obitools/obidistribute/options.go b/pkg/obitools/obidistribute/options.go index 42c63c0..3349428 100644 --- a/pkg/obitools/obidistribute/options.go +++ b/pkg/obitools/obidistribute/options.go @@ -2,7 +2,7 @@ package obidistribute import ( "fmt" - "log" + log "github.com/sirupsen/logrus" "strings" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" diff --git a/pkg/obitools/obimultiplex/demultiplex.go b/pkg/obitools/obimultiplex/demultiplex.go index 99f8f8d..30e4e23 100644 --- a/pkg/obitools/obimultiplex/demultiplex.go +++ b/pkg/obitools/obimultiplex/demultiplex.go @@ -1,7 +1,7 @@ package obimultiplex import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary" diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index 986f203..a061256 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -1,7 +1,7 @@ package obipairing import ( - "log" + log "github.com/sirupsen/logrus" "math" "os" "runtime" diff --git a/pkg/obitools/obipcr/options.go b/pkg/obitools/obipcr/options.go index 79b04cb..a7f888a 100644 --- a/pkg/obitools/obipcr/options.go +++ b/pkg/obitools/obipcr/options.go @@ -1,7 +1,7 @@ package obipcr import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" diff --git a/pkg/obitools/obiuniq/options.go b/pkg/obitools/obiuniq/options.go index b06f4d8..7caa82b 100644 --- a/pkg/obitools/obiuniq/options.go +++ b/pkg/obitools/obiuniq/options.go @@ -10,22 +10,29 @@ var _Keys = make([]string, 0, 10) var _OnDisk = false var _chunks = 100 var _NAValue = "NA" +var _NoSingleton = false func UniqueOptionSet(options *getoptions.GetOpt) { options.StringSliceVar(&_StatsOn, "merge", 1, 1, options.Alias("m"), + options.ArgName("KEY"), options.Description("Adds a merged attribute containing the list of sequence record ids merged within this group.")) options.StringSliceVar(&_Keys, "category-attribute", 1, 1, options.Alias("c"), + options.ArgName("CATEGORY"), options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times).")) options.StringVar(&_NAValue, "na-value", _NAValue, + options.ArgName("NA_NAME"), options.Description("Value used when the classifier tag is not defined for a sequence.")) - options.BoolVar(&_OnDisk, "on-disk", true, + options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton, + options.Description("If set, sequences occurring a single time in the data set are discarded.")) + + options.BoolVar(&_OnDisk, "on-disk", _OnDisk, options.Description("Allows for using a disk cache during the dereplication process. ")) options.IntVar(&_chunks, "chunk-count", _chunks, @@ -49,7 +56,7 @@ func CLIKeys() []string { } func CLIUniqueInMemory() bool { - return _OnDisk + return !_OnDisk } func CLINumberOfChunks() int { @@ -63,3 +70,7 @@ func CLINumberOfChunks() int { func CLINAValue() string { return _NAValue } + +func CLINoSingleton() bool { + return _NoSingleton +} diff --git a/pkg/obitools/obiuniq/unique.go b/pkg/obitools/obiuniq/unique.go index f468103..c3d6b77 100644 --- a/pkg/obitools/obiuniq/unique.go +++ b/pkg/obitools/obiuniq/unique.go @@ -1,7 +1,7 @@ package obiuniq import ( - "log" + log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" @@ -24,6 +24,13 @@ func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { options = append(options, obichunk.OptionSortOnDisk()) } + if CLINoSingleton() { + log.Printf("Removing sigletons from the output") + options = append(options, obichunk.OptionsNoSingleton()) + } else { + log.Printf("Keep sigletons in the output") + } + options = append(options, obichunk.OptionStatOn(CLIStatsOn()...))