Start to use leveled log

This commit is contained in:
2022-02-24 12:14:52 +01:00
parent f18cc034bb
commit abcf02e488
43 changed files with 156 additions and 67 deletions

View File

@ -34,7 +34,7 @@ func main() {
_, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
nread, nvariant, nsymbol := fs.Count(true)
nvariant, nread, nsymbol := fs.Count(true)
if obicount.CLIIsPrintingVariantCount() {
fmt.Printf(" %d", nvariant)

View File

@ -1,7 +1,7 @@
package main
import (
"log"
log "github.com/sirupsen/logrus"
"os"
"runtime/pprof"

View File

@ -1,7 +1,7 @@
package main
import (
"log"
log "github.com/sirupsen/logrus"
"os"
"runtime/trace"

View File

@ -1,7 +1,7 @@
package main
import (
"log"
log "github.com/sirupsen/logrus"
"os"
"runtime/trace"

View File

@ -1,7 +1,7 @@
package main
import (
"log"
log "github.com/sirupsen/logrus"
"os"
"runtime/pprof"

View File

@ -1,7 +1,7 @@
package obialign
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obikmer"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"

View File

@ -1,7 +1,7 @@
package obiapat
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -3,10 +3,11 @@ package obichunk
import (
"io/fs"
"io/ioutil"
"log"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
@ -55,7 +56,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
go func() {
defer func() {
os.RemoveAll(dir)
log.Println("Clear the cache directory")
log.Debugln("Clear the cache directory")
}()
newIter.Wait()
@ -68,7 +69,8 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
)
fileNames := find(dir, ".fastx")
log.Println("batch count ", len(fileNames))
nbatch := len(fileNames)
log.Infof("Data splitted over %d batches", nbatch)
go func() {
@ -88,6 +90,8 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
}
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
log.Infof("Start processing of batch %d/%d : %d sequences",
order, nbatch, len(chunck))
}

View File

@ -1,7 +1,7 @@
package obichunk
import (
"log"
log "github.com/sirupsen/logrus"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -9,6 +9,7 @@ type __options__ struct {
bufferSize int
batchSize int
parallelWorkers int
noSingleton bool
}
type Options struct {
@ -27,6 +28,7 @@ func MakeOptions(setters []WithOption) Options {
bufferSize: 2,
batchSize: 5000,
parallelWorkers: 4,
noSingleton: false,
}
opt := Options{&o}
@ -79,6 +81,10 @@ func (opt Options) SortOnDisk() bool {
return opt.pointer.cacheOnDisk
}
func (opt Options) NoSingleton() bool {
return opt.pointer.noSingleton
}
func OptionSortOnDisk() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.cacheOnDisk = true
@ -149,3 +155,19 @@ func OptionsBufferSize(size int) WithOption {
return f
}
func OptionsNoSingleton() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.noSingleton = true
})
return f
}
func OptionsWithSingleton() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.noSingleton = false
})
return f
}

View File

@ -1,10 +1,11 @@
package obichunk
import (
"log"
"sort"
"sync/atomic"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -100,7 +101,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
classifier.Reset()
if cap(ordered) < batch.Length() {
log.Println("Allocate a new ordered sequences : ", batch.Length())
log.Debugln("Allocate a new ordered sequences : ", batch.Length())
ordered = make([]sSS, batch.Length())
} else {
ordered = ordered[:batch.Length()]

View File

@ -3,6 +3,8 @@ package obichunk
import (
"sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -16,6 +18,10 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
iterator = iterator.Speed("Splitting data set")
log.Infoln("Starting data splitting")
if opts.SortOnDisk() {
nworkers = 1
iterator, err = ISequenceChunkOnDisk(iterator,
@ -36,6 +42,8 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
}
}
log.Infoln("End of the data splitting")
iUnique.Add(nworkers)
go func() {
@ -83,7 +91,12 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
batch := input.Get()
if icat < 0 || len(batch.Slice()) == 1 {
iUnique.Push(batch.Reorder(nextOrder()))
if opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1 {
batch.Slice()[0].Recycle()
batch.Recycle()
} else {
iUnique.Push(batch.Reorder(nextOrder()))
}
} else {
next.Push(batch.Reorder(o))
o++
@ -111,5 +124,5 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
opts.BufferSize(),
)
return iMerged.Speed(), nil
return iMerged.Speed("Variants identified"), nil
}

View File

@ -2,7 +2,7 @@ package obiformats
import (
"fmt"
"log"
log "github.com/sirupsen/logrus"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -5,7 +5,7 @@ import (
"encoding/csv"
"fmt"
"io"
"log"
log "github.com/sirupsen/logrus"
"os"
"strconv"
"strings"

View File

@ -5,7 +5,7 @@ import (
"bytes"
"compress/gzip"
"io"
"log"
log "github.com/sirupsen/logrus"
"os"
"strconv"
"strings"

View File

@ -1,7 +1,7 @@
package obiformats
import (
"log"
log "github.com/sirupsen/logrus"
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"

View File

@ -8,10 +8,11 @@ import "C"
import (
"fmt"
"log"
"os"
"unsafe"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
@ -61,7 +62,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
slice = append(slice, rep)
ii++
if ii >= batch_size {
// log.Printf("\n==> Pushing sequence batch\n")
//log.Printf("\n==> Pushing sequence batch\n")
// start := time.Now()
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
@ -100,7 +101,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.I
fi, err := os.Stat(filename)
if err == nil {
size = fi.Size()
log.Printf("File size of %s is %d bytes\n", filename, size)
log.Debugf("File size of %s is %d bytes\n", filename, size)
} else {
size = -1
}
@ -110,10 +111,10 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.I
go func() {
newIter.WaitAndClose()
log.Println("End of the fastq file reading")
log.Debugln("End of the fastq file reading")
}()
log.Println("Start of the fastq file reading")
log.Debugln("Start of the fastq file reading")
go _FastseqReader(pointer, newIter, opt.BatchSize())
parser := opt.ParseFastSeqHeader()

View File

@ -4,10 +4,11 @@ import (
"bytes"
"fmt"
"io"
"log"
"os"
"strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -114,6 +115,7 @@ func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
go func() {
newIter.WaitAndClose()
close(chunkchan)
log.Debugln("End of the fasta file writing")
}()
ff := func(iterator obiiter.IBioSequenceBatch) {
@ -128,7 +130,7 @@ func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
newIter.Done()
}
log.Println("Start of the fasta file writing")
log.Debugln("Start of the fasta file writing")
go ff(iterator)
for i := 0; i < nwriters-1; i++ {
go ff(iterator.Split())

View File

@ -4,10 +4,11 @@ import (
"bytes"
"fmt"
"io"
"log"
"os"
"time"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -114,6 +115,7 @@ func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
time.Sleep(time.Millisecond)
}
close(chunkchan)
log.Debugln("End of the fastq file writing")
}()
ff := func(iterator obiiter.IBioSequenceBatch) {
@ -129,7 +131,7 @@ func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
newIter.Done()
}
log.Println("Start of the fastq file writing")
log.Debugln("Start of the fastq file writing")
go ff(iterator)
for i := 0; i < nwriters-1; i++ {
go ff(iterator.Split())

View File

@ -5,7 +5,7 @@ import (
"encoding/csv"
"fmt"
"io"
"log"
log "github.com/sirupsen/logrus"
"os"
"path"
"strconv"

View File

@ -4,10 +4,11 @@ import (
"bufio"
"compress/gzip"
"io"
"log"
"os"
"strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
@ -57,7 +58,7 @@ func ReadSequencesBatchFromFile(filename string,
if err != nil {
file.Seek(0, 0)
} else {
log.Printf("File %s is gz compressed ", filename)
log.Debugf("File %s is gz compressed ", filename)
reader = greader
}
@ -72,7 +73,7 @@ func ReadSequencesBatchFromFile(filename string,
}
filetype := GuessSeqFileType(string(tag))
log.Printf("File guessed format : %s (tag: %s)",
log.Debug("File guessed format : %s (tag: %s)",
filetype, (strings.Split(string(tag), "\n"))[0])
reader = breader

View File

@ -3,7 +3,7 @@ package obiformats
import (
"fmt"
"io"
"log"
log "github.com/sirupsen/logrus"
"os"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -2,11 +2,12 @@ package obiiter
import (
"fmt"
"log"
"sync"
"sync/atomic"
"time"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/tevino/abool/v2"
)
@ -413,7 +414,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
func (iterator IBioSequenceBatch) Recycle() {
log.Println("Start recycling of Bioseq objects")
log.Debugln("Start recycling of Bioseq objects")
recycled := 0
for iterator.Next() {
// iterator.Get()
@ -424,7 +425,7 @@ func (iterator IBioSequenceBatch) Recycle() {
}
batch.Recycle()
}
log.Printf("End of the recycling of %d Bioseq objects", recycled)
log.Debugf("End of the recycling of %d Bioseq objects", recycled)
}
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
@ -432,7 +433,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
reads := 0
nucleotides := 0
log.Println("Start counting of Bioseq objects")
log.Debugln("Start counting of Bioseq objects")
for iterator.Next() {
// iterator.Get()
batch := iterator.Get()
@ -447,7 +448,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
}
batch.Recycle()
}
log.Printf("End of the counting of %d Bioseq objects", variants)
log.Debugf("End of the counting of %d Bioseq objects", variants)
return variants, reads, nucleotides
}

View File

@ -1,7 +1,7 @@
package obiiter
import (
"log"
log "github.com/sirupsen/logrus"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"

View File

@ -6,7 +6,7 @@ import (
"github.com/schollz/progressbar/v3"
)
func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
newIter := MakeIBioSequenceBatch()
newIter.Add(1)
@ -15,13 +15,25 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
newIter.WaitAndClose()
}()
bar := progressbar.NewOptions(
-1,
pbopt := make([]progressbar.Option, 0, 5)
pbopt = append(pbopt,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionSetWidth(15),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetDescription("[Sequence Processing]"))
)
if len(message) > 0 {
pbopt = append(pbopt,
progressbar.OptionSetDescription(message[0]),
)
} else {
pbopt = append(pbopt,
progressbar.OptionSetDescription("[Sequence Processing]"),
)
}
bar := progressbar.NewOptions(-1, pbopt...)
go func() {
@ -38,11 +50,10 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
return newIter
}
func SpeedPipe() Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
return iterator.Speed()
}
return f
}
}

View File

@ -1,7 +1,7 @@
package obiiter
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
go func() {
newIter.WaitAndClose()
log.Println("End of the batch workers")
log.Debugln("End of the batch workers")
}()
@ -54,7 +54,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
newIter.Done()
}
log.Println("Start of the batch workers")
log.Debugln("Start of the batch workers")
for i := 0; i < nworkers-1; i++ {
go f(iterator.Split())
}

View File

@ -3,7 +3,7 @@ package obingslibrary
import (
"errors"
"fmt"
"log"
log "github.com/sirupsen/logrus"
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"

View File

@ -2,10 +2,11 @@ package obioptions
import (
"fmt"
"log"
"os"
"runtime"
log "github.com/sirupsen/logrus"
"github.com/DavidGamba/go-getoptions"
)
@ -43,10 +44,21 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
}
if options.Called("no-singleton") {
log.Printf("No singleton option set")
}
if options.Called("help") {
fmt.Fprint(os.Stderr, options.Help())
os.Exit(1)
}
log.SetLevel(log.InfoLevel)
if options.Called("debug") {
log.SetLevel(log.DebugLevel)
log.Debugln("Switch to debug level logging")
}
return options, remaining, err
}
}

View File

@ -2,9 +2,10 @@ package obiseq
import (
"crypto/md5"
"log"
"sync/atomic"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
)
@ -15,7 +16,7 @@ var _MaxInMemSeq = int32(0)
var _BioLogRate = int(100000)
func LogBioSeqStatus() {
log.Printf("@@@@>>>> Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
}
type Quality []uint8

View File

@ -3,7 +3,7 @@ package obiseq
import (
"fmt"
"hash/crc32"
"log"
log "github.com/sirupsen/logrus"
"strconv"
"sync"
)

View File

@ -2,7 +2,7 @@ package obiseq
import (
"fmt"
"log"
log "github.com/sirupsen/logrus"
"strings"
)

View File

@ -2,7 +2,7 @@ package obiseq
import (
"context"
"log"
log "github.com/sirupsen/logrus"
"github.com/PaesslerAG/gval"
)

View File

@ -1,7 +1,7 @@
package obitax
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)

View File

@ -2,7 +2,7 @@ package obitax
import (
"fmt"
"log"
log "github.com/sirupsen/logrus"
)
type TaxName struct {

View File

@ -1,7 +1,7 @@
package obiconvert
import (
"log"
log "github.com/sirupsen/logrus"
"os"
"path/filepath"
"strings"

View File

@ -1,7 +1,7 @@
package obiconvert
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -1,7 +1,7 @@
package obidistribute
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"

View File

@ -2,7 +2,7 @@ package obidistribute
import (
"fmt"
"log"
log "github.com/sirupsen/logrus"
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"

View File

@ -1,7 +1,7 @@
package obimultiplex
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"

View File

@ -1,7 +1,7 @@
package obipairing
import (
"log"
log "github.com/sirupsen/logrus"
"math"
"os"
"runtime"

View File

@ -1,7 +1,7 @@
package obipcr
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"

View File

@ -10,22 +10,29 @@ var _Keys = make([]string, 0, 10)
var _OnDisk = false
var _chunks = 100
var _NAValue = "NA"
var _NoSingleton = false
func UniqueOptionSet(options *getoptions.GetOpt) {
options.StringSliceVar(&_StatsOn, "merge",
1, 1,
options.Alias("m"),
options.ArgName("KEY"),
options.Description("Adds a merged attribute containing the list of sequence record ids merged within this group."))
options.StringSliceVar(&_Keys, "category-attribute",
1, 1,
options.Alias("c"),
options.ArgName("CATEGORY"),
options.Description("Adds one attribute to the list of attributes used to define sequence groups (this option can be used several times)."))
options.StringVar(&_NAValue, "na-value", _NAValue,
options.ArgName("NA_NAME"),
options.Description("Value used when the classifier tag is not defined for a sequence."))
options.BoolVar(&_OnDisk, "on-disk", true,
options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
options.Description("If set, sequences occurring a single time in the data set are discarded."))
options.BoolVar(&_OnDisk, "on-disk", _OnDisk,
options.Description("Allows for using a disk cache during the dereplication process. "))
options.IntVar(&_chunks, "chunk-count", _chunks,
@ -49,7 +56,7 @@ func CLIKeys() []string {
}
func CLIUniqueInMemory() bool {
return _OnDisk
return !_OnDisk
}
func CLINumberOfChunks() int {
@ -63,3 +70,7 @@ func CLINumberOfChunks() int {
func CLINAValue() string {
return _NAValue
}
func CLINoSingleton() bool {
return _NoSingleton
}

View File

@ -1,7 +1,7 @@
package obiuniq
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
@ -24,6 +24,13 @@ func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
options = append(options, obichunk.OptionSortOnDisk())
}
if CLINoSingleton() {
log.Printf("Removing sigletons from the output")
options = append(options, obichunk.OptionsNoSingleton())
} else {
log.Printf("Keep sigletons in the output")
}
options = append(options,
obichunk.OptionStatOn(CLIStatsOn()...))