diff --git a/doc/man/test.md b/doc/man/test.md new file mode 100644 index 0000000..cc93897 --- /dev/null +++ b/doc/man/test.md @@ -0,0 +1,20 @@ +% obigrep(1) +% Eric Coissac +% February 2023 +# NAME + +obigrep -- filters sequence files according to numerous conditions + +# SYNOPSIS + +**snowplough**␣\[**-h**\]␣\[**--configfile**␣*CONFIGFILE*\]␣\[**--sort**\]␣\[**--vim**\]␣*keyword*␣\[*keyword␣...*\]␣\[**--configfile**␣*CONFIGFILE*\]␣\[**--sort**\]␣\[**--vim**\]␣*keyword*␣\[*keyword␣...*\]␣\[**--configfile**␣*CONFIGFILE*\]␣\[**--sort**\]␣\[**--vim**\]␣*keyword*␣\[*keyword␣...*\] + +# DESCRIPTION + +**snowplough** is a simple-minded tool which creates indices and body-searches vast quantities of ServiceNow tickets, which is useful to find out how someone before you solved one particular problem. + +# GENERAL OPTIONS + +**-h**, **--help** + +: Display a friendly help message. \ No newline at end of file diff --git a/pkg/goutils/minmax.go b/pkg/goutils/minmax.go deleted file mode 100644 index a467b6f..0000000 --- a/pkg/goutils/minmax.go +++ /dev/null @@ -1,37 +0,0 @@ -package goutils - -func MinInt(x, y int) int { - if x < y { - return x - } - return y -} - -func MaxInt(x, y int) int { - if x < y { - return y - } - return x -} - -func MinMaxInt(x, y int) (int,int) { - if x < y { - return x,y - } - return y,x -} - - -func MinUInt16(x, y uint16) uint16 { - if x < y { - return x - } - return y -} - -func MaxUInt16(x, y uint16) uint16 { - if x < y { - return y - } - return x -} diff --git a/pkg/obialign/fastlcs.go b/pkg/obialign/fastlcs.go index 54bb1bb..66b9da5 100644 --- a/pkg/obialign/fastlcs.go +++ b/pkg/obialign/fastlcs.go @@ -1,7 +1,7 @@ package obialign // import ( -// "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" +// "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" // "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" // ) @@ -113,11 +113,11 @@ var _notavail = encodeValues(0, 30000, false) // // in_matrix := false // x1 := y - lB + extra // x2 := extra - y -// xs := goutils.MaxInt(goutils.MaxInt(x1, x2), 0) +// xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0) // x1 = y + extra // x2 = lA + extra - y -// xf := goutils.MinInt(goutils.MinInt(x1, x2), even-1) + 1 +// xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1 // for x := xs; x < xf; x++ { @@ -174,11 +174,11 @@ var _notavail = encodeValues(0, 30000, false) // // . 9 10 + 2 - 1 // x1 = y - lB + extra + even // x2 = extra - y + even - 1 -// xs = goutils.MaxInt(goutils.MaxInt(x1, x2), even) +// xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even) // x1 = y + extra + even // x2 = lA + extra - y + even - 1 -// xf = goutils.MinInt(goutils.MinInt(x1, x2), width-1) + 1 +// xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1 // for x := xs; x < xf; x++ { diff --git a/pkg/obialign/fastlcsegf.go b/pkg/obialign/fastlcsegf.go index fc51ca5..84f764c 100644 --- a/pkg/obialign/fastlcsegf.go +++ b/pkg/obialign/fastlcsegf.go @@ -1,8 +1,8 @@ package obialign import ( - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -109,11 +109,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ // in_matrix := false x1 := y - lB + extra x2 := extra - y - xs := goutils.MaxInt(goutils.MaxInt(x1, x2), 0) + xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0) x1 = y + extra x2 = lA + extra - y - xf := goutils.MinInt(goutils.MinInt(x1, x2), even-1) + 1 + xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1 for x := xs; x < xf; x++ { @@ -201,11 +201,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ // . 9 10 + 2 - 1 x1 = y - lB + extra + even x2 = extra - y + even - 1 - xs = goutils.MaxInt(goutils.MaxInt(x1, x2), even) + xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even) x1 = y + extra + even x2 = lA + extra - y + even - 1 - xf = goutils.MinInt(goutils.MinInt(x1, x2), width-1) + 1 + xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1 for x := xs; x < xf; x++ { diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index ec36bf1..5f86ab1 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -13,9 +13,9 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) var _MaxPatLen = int(C.MAX_PAT_LEN) @@ -355,8 +355,8 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) ( start = best[0] - nerr end = best[0] + int(pattern.pointer.pointer.patlen) + nerr - start = goutils.MaxInt(start, 0) - end = goutils.MinInt(end, sequence.Len()) + start = obiutils.MaxInt(start, 0) + end = obiutils.MinInt(end, sequence.Len()) cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat)) cseq := (*[1 << 30]byte)(unsafe.Pointer(sequence.pointer.pointer.cseq)) @@ -374,7 +374,7 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) ( nerr = lali - score start = best[0] + int(pattern.pointer.pointer.patlen) - lali - end = start + lali + end = start + lali log.Debugln("results", score, lali, start, nerr) return } diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 864f6e3..d9966e0 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -3,8 +3,8 @@ package obiapat import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) type _Options struct { @@ -210,7 +210,7 @@ func _Pcr(seq ApatSequence, reverse := opt.pointer.reverse crev := opt.pointer.crev - forwardMatches := forward.FindAllIndex(seq,0,-1) + forwardMatches := forward.FindAllIndex(seq, 0, -1) if len(forwardMatches) > 0 { @@ -256,7 +256,7 @@ func _Pcr(seq ApatSequence, (opt.MaxLength() == 0 || length <= opt.MaxLength()) { amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) annot := amplicon.Annotations() - goutils.MustFillMap(annot, sequence.Annotations()) + obiutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) @@ -284,7 +284,7 @@ func _Pcr(seq ApatSequence, } } - forwardMatches = reverse.FindAllIndex(seq,0,-1) + forwardMatches = reverse.FindAllIndex(seq, 0, -1) if forwardMatches != nil { begin := forwardMatches[0][0] @@ -331,7 +331,7 @@ func _Pcr(seq ApatSequence, amplicon = amplicon.ReverseComplement(true) annot := amplicon.Annotations() - goutils.MustFillMap(annot, sequence.Annotations()) + obiutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) diff --git a/pkg/obiformats/batch_of_files_reader.go b/pkg/obiformats/batch_of_files_reader.go index 520d989..4395daa 100644 --- a/pkg/obiformats/batch_of_files_reader.go +++ b/pkg/obiformats/batch_of_files_reader.go @@ -3,8 +3,8 @@ package obiformats import ( "log" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) func ReadSequencesBatchFromFiles(filenames []string, @@ -17,7 +17,7 @@ func ReadSequencesBatchFromFiles(filenames []string, } batchiter := obiiter.MakeIBioSequence(0) - nextCounter := goutils.AtomicCounter() + nextCounter := obiutils.AtomicCounter() batchiter.Add(concurrent_readers) diff --git a/pkg/obiformats/csv_writer.go b/pkg/obiformats/csv_writer.go index b4b590a..28fe2f6 100644 --- a/pkg/obiformats/csv_writer.go +++ b/pkg/obiformats/csv_writer.go @@ -9,9 +9,9 @@ import ( "sync" "time" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -52,7 +52,7 @@ func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string { value = opt.CSVNAValue() } - svalue, _ := goutils.InterfaceToString(value) + svalue, _ := obiutils.InterfaceToString(value) record = append(record, svalue) } @@ -132,7 +132,7 @@ func WriteCSV(iterator obiiter.IBioSequence, options ...WithOption) (obiiter.IBioSequence, error) { opt := MakeOptions(options) - file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) + file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) newIter := obiiter.MakeIBioSequence() diff --git a/pkg/obiformats/fastseq_json_header.go b/pkg/obiformats/fastseq_json_header.go index a921707..3fddf8d 100644 --- a/pkg/obiformats/fastseq_json_header.go +++ b/pkg/obiformats/fastseq_json_header.go @@ -6,8 +6,8 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/goccy/go-json" ) @@ -71,7 +71,7 @@ func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string { annotations := sequence.Annotations() if len(annotations) > 0 { - text, err := goutils.JsonMarshal(sequence.Annotations()) + text, err := obiutils.JsonMarshal(sequence.Annotations()) if err != nil { panic(err) diff --git a/pkg/obiformats/fastseq_obi_header.go b/pkg/obiformats/fastseq_obi_header.go index faa92ca..173d78f 100644 --- a/pkg/obiformats/fastseq_obi_header.go +++ b/pkg/obiformats/fastseq_obi_header.go @@ -9,8 +9,8 @@ import ( "strconv" "strings" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/goccy/go-json" ) @@ -305,7 +305,7 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string { map[string]string, map[string]interface{}, obiseq.StatsOnValues: - tv, err := goutils.JsonMarshal(t) + tv, err := obiutils.JsonMarshal(t) if err != nil { log.Fatalf("Cannot convert %v value", value) } diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index 1e6e079..d9d1b88 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -11,9 +11,9 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) func min(x, y int) int { @@ -70,7 +70,7 @@ func WriteFasta(iterator obiiter.IBioSequence, opt := MakeOptions(options) iterator = iterator.Rebatch(10000) - file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) + file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) newIter := obiiter.MakeIBioSequence() diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index e987987..eaf62ce 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -10,9 +10,9 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHeader) string { @@ -61,7 +61,7 @@ func WriteFastq(iterator obiiter.IBioSequence, opt := MakeOptions(options) - file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) + file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) newIter := obiiter.MakeIBioSequence() diff --git a/pkg/obiiter/batchiterator.go b/pkg/obiiter/batchiterator.go index 15705aa..5219f8f 100644 --- a/pkg/obiiter/batchiterator.go +++ b/pkg/obiiter/batchiterator.go @@ -10,8 +10,8 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/tevino/abool/v2" ) @@ -375,7 +375,7 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence { allPaired = allPaired && i.IsPaired() } - nextCounter := goutils.AtomicCounter() + nextCounter := obiutils.AtomicCounter() newIter := MakeIBioSequence() newIter.Add(niterator) diff --git a/pkg/obiiter/sequence_workers.go b/pkg/obiiter/sequence_workers.go index 302ae9d..9457b5b 100644 --- a/pkg/obiiter/sequence_workers.go +++ b/pkg/obiiter/sequence_workers.go @@ -2,7 +2,7 @@ package obiiter // func MakeSetAttributeWorker(rank string) obiiter.SeqWorker { -// if !goutils.Contains(taxonomy.RankList(), rank) { +// if !obiutils.Contains(taxonomy.RankList(), rank) { // log.Fatalf("%s is not a valid rank (allowed ranks are %v)", // rank, // taxonomy.RankList()) @@ -14,4 +14,4 @@ package obiiter // } // return w -// } \ No newline at end of file +// } diff --git a/pkg/obikmer/counting.go b/pkg/obikmer/counting.go index 1a2fb9a..ec80b90 100644 --- a/pkg/obikmer/counting.go +++ b/pkg/obikmer/counting.go @@ -3,8 +3,8 @@ package obikmer import ( "math" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) type Table4mer [256]uint16 @@ -32,7 +32,7 @@ func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Tabl func Common4Mer(count1, count2 *Table4mer) int { sum := 0 for i := 0; i < 256; i++ { - sum += int(goutils.MinUInt16((*count1)[i], (*count2)[i])) + sum += int(obiutils.MinUInt16((*count1)[i], (*count2)[i])) } return sum } @@ -48,7 +48,7 @@ func Sum4Mer(count *Table4mer) int { func LCS4MerBounds(count1, count2 *Table4mer) (int, int) { s1 := Sum4Mer(count1) s2 := Sum4Mer(count2) - smin := goutils.MinInt(s1, s2) + smin := obiutils.MinInt(s1, s2) cw := Common4Mer(count1, count2) @@ -65,11 +65,11 @@ func LCS4MerBounds(count1, count2 *Table4mer) (int, int) { func Error4MerBounds(count1, count2 *Table4mer) (int, int) { s1 := Sum4Mer(count1) s2 := Sum4Mer(count2) - smax := goutils.MaxInt(s1, s2) + smax := obiutils.MaxInt(s1, s2) cw := Common4Mer(count1, count2) - errorMax := smax - cw + 2* int(math.Floor(float64(cw+5)/8.0)) + errorMax := smax - cw + 2*int(math.Floor(float64(cw+5)/8.0)) errorMin := int(math.Ceil(float64(errorMax) / 4.0)) return errorMin, errorMax diff --git a/pkg/obingslibrary/match.go b/pkg/obingslibrary/match.go index 24bb4bf..0193101 100644 --- a/pkg/obingslibrary/match.go +++ b/pkg/obingslibrary/match.go @@ -7,9 +7,9 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) type DemultiplexMatch struct { @@ -104,16 +104,14 @@ func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel return nil } - - func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch { aseq, _ := obiapat.MakeApatSequence(sequence, false) - start, end, nerr ,matched := marker.forward.BestMatch(aseq, marker.taglength,-1) + start, end, nerr, matched := marker.forward.BestMatch(aseq, marker.taglength, -1) if matched { sseq := sequence.String() direct := sseq[start:end] - tagstart := goutils.MaxInt(start - marker.taglength,0) + tagstart := obiutils.MaxInt(start-marker.taglength, 0) ftag := strings.ToLower(sseq[tagstart:start]) m := DemultiplexMatch{ @@ -125,15 +123,15 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch { Error: nil, } - start, end, nerr ,matched = marker.creverse.BestMatch(aseq, start,-1) + start, end, nerr, matched = marker.creverse.BestMatch(aseq, start, -1) if matched { // extracting primer matches - reverse, _ := sequence.Subsequence(start,end, false) + reverse, _ := sequence.Subsequence(start, end, false) defer reverse.Recycle() reverse = reverse.ReverseComplement(true) - endtag := goutils.MinInt(end+marker.taglength,sequence.Len()) + endtag := obiutils.MinInt(end+marker.taglength, sequence.Len()) rtag, err := sequence.Subsequence(end, endtag, false) defer rtag.Recycle() srtag := "" @@ -165,14 +163,13 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch { return &m } - - start, end, nerr ,matched = marker.reverse.BestMatch(aseq, marker.taglength,-1) + start, end, nerr, matched = marker.reverse.BestMatch(aseq, marker.taglength, -1) if matched { sseq := sequence.String() reverse := strings.ToLower(sseq[start:end]) - tagstart := goutils.MaxInt(start - marker.taglength,0) + tagstart := obiutils.MaxInt(start-marker.taglength, 0) rtag := strings.ToLower(sseq[tagstart:start]) m := DemultiplexMatch{ @@ -184,16 +181,16 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch { Error: nil, } - start, end, nerr ,matched = marker.cforward.BestMatch(aseq, end,-1) + start, end, nerr, matched = marker.cforward.BestMatch(aseq, end, -1) if matched { - direct, _ := sequence.Subsequence(start,end, false) + direct, _ := sequence.Subsequence(start, end, false) defer direct.Recycle() direct = direct.ReverseComplement(true) - endtag := goutils.MinInt(end+marker.taglength,sequence.Len()) - ftag, err := sequence.Subsequence(end,endtag, false) + endtag := obiutils.MinInt(end+marker.taglength, sequence.Len()) + ftag, err := sequence.Subsequence(end, endtag, false) defer ftag.Recycle() sftag := "" if err != nil { diff --git a/pkg/obiseq/attributes.go b/pkg/obiseq/attributes.go index f78ea6a..f34287f 100644 --- a/pkg/obiseq/attributes.go +++ b/pkg/obiseq/attributes.go @@ -4,7 +4,7 @@ import ( "fmt" "strconv" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -17,6 +17,7 @@ func (s *BioSequence) HasAttribute(key string) bool { return ok } + // A method that returns the value of the key in the annotation map. func (s *BioSequence) GetAttribute(key string) (interface{}, bool) { var val interface{} @@ -43,7 +44,7 @@ func (s *BioSequence) GetIntAttribute(key string) (int, bool) { v, ok := s.GetAttribute(key) if ok { - val, err = goutils.InterfaceToInt(v) + val, err = obiutils.InterfaceToInt(v) ok = err == nil } @@ -52,15 +53,15 @@ func (s *BioSequence) GetIntAttribute(key string) (int, bool) { // Deleting the key from the annotation map. func (s *BioSequence) DeleteAttribute(key string) { - delete(s.Annotations(),key) + delete(s.Annotations(), key) } - + // Renaming the key in the annotation map. func (s *BioSequence) RenameAttribute(newName, oldName string) { - val,ok := s.GetAttribute(oldName) + val, ok := s.GetAttribute(oldName) - if (ok) { - s.SetAttribute(newName,val) + if ok { + s.SetAttribute(newName, val) s.DeleteAttribute(oldName) } } @@ -73,14 +74,13 @@ func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) { v, ok := s.GetAttribute(key) if ok { - val, err = goutils.InterfaceToFloat64(v) + val, err = obiutils.InterfaceToFloat64(v) ok = err == nil } return val, ok } - // A method that returns the value of the key in the annotation map. func (s *BioSequence) GetStringAttribute(key string) (string, bool) { var val string @@ -101,7 +101,7 @@ func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) { v, ok := s.GetAttribute(key) if ok { - val, err = goutils.InterfaceToBool(v) + val, err = obiutils.InterfaceToBool(v) ok = err == nil } @@ -115,7 +115,7 @@ func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) { v, ok := s.GetAttribute(key) if ok { - val, err = goutils.InterfaceToIntMap(v) + val, err = obiutils.InterfaceToIntMap(v) ok = err == nil } @@ -139,7 +139,6 @@ func (s *BioSequence) SetCount(count int) { annot["count"] = count } - // Returning the taxid of the sequence. func (s *BioSequence) Taxid() int { taxid, ok := s.GetIntAttribute("taxid") @@ -157,7 +156,6 @@ func (s *BioSequence) SetTaxid(taxid int) { annot["taxid"] = taxid } - func (s *BioSequence) OBITagRefIndex() map[int]string { var val map[int]string @@ -179,7 +177,7 @@ func (s *BioSequence) OBITagRefIndex() map[int]string { log.Panicln(err) } - val[score], err = goutils.InterfaceToString(v) + val[score], err = obiutils.InterfaceToString(v) if err != nil { log.Panicln(err) } diff --git a/pkg/obiseq/class.go b/pkg/obiseq/class.go index 84c7b4c..c2a6ae4 100644 --- a/pkg/obiseq/class.go +++ b/pkg/obiseq/class.go @@ -6,7 +6,7 @@ import ( "strconv" "sync" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -135,7 +135,7 @@ func DualAnnotationClassifier(key1, key2 string, na string) *BioSequenceClassifi locke.Lock() defer locke.Unlock() - jb, _ := goutils.JsonMarshal([2]string{val1, val2}) + jb, _ := obiutils.JsonMarshal([2]string{val1, val2}) json := string(jb) k, ok := encode[json] diff --git a/pkg/obiseq/language.go b/pkg/obiseq/language.go index 473a52a..0f3f61c 100644 --- a/pkg/obiseq/language.go +++ b/pkg/obiseq/language.go @@ -5,7 +5,7 @@ import ( "log" "strings" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/PaesslerAG/gval" ) @@ -136,11 +136,11 @@ func minFloatMap(values map[string]float64) float64 { var OBILang = gval.NewLanguage( gval.Full(), gval.Function("len", func(args ...interface{}) (interface{}, error) { - length := goutils.Len(args[0]) + length := obiutils.Len(args[0]) return (float64)(length), nil }), gval.Function("ismap", func(args ...interface{}) (interface{}, error) { - ismap := goutils.IsAMap(args[0]) + ismap := obiutils.IsAMap(args[0]) return ismap, nil }), gval.Function("printf", func(args ...interface{}) (interface{}, error) { @@ -152,7 +152,7 @@ var OBILang = gval.NewLanguage( return text, nil }), gval.Function("int", func(args ...interface{}) (interface{}, error) { - val, err := goutils.InterfaceToInt(args[0]) + val, err := obiutils.InterfaceToInt(args[0]) if err != nil { log.Fatalf("%v cannot be converted to an integer value", args[0]) @@ -160,7 +160,7 @@ var OBILang = gval.NewLanguage( return val, nil }), gval.Function("numeric", func(args ...interface{}) (interface{}, error) { - val, err := goutils.InterfaceToFloat64(args[0]) + val, err := obiutils.InterfaceToFloat64(args[0]) if err != nil { log.Fatalf("%v cannot be converted to a numeric value", args[0]) @@ -168,7 +168,7 @@ var OBILang = gval.NewLanguage( return val, nil }), gval.Function("bool", func(args ...interface{}) (interface{}, error) { - val, err := goutils.InterfaceToBool(args[0]) + val, err := obiutils.InterfaceToBool(args[0]) if err != nil { log.Fatalf("%v cannot be converted to a boolan value", args[0]) @@ -189,4 +189,3 @@ var OBILang = gval.NewLanguage( gval.Function("composition", func(args ...interface{}) (interface{}, error) { return (args[0].(*BioSequence)).Composition(), nil })) - diff --git a/pkg/obiseq/merge.go b/pkg/obiseq/merge.go index 8a28e36..c84b232 100644 --- a/pkg/obiseq/merge.go +++ b/pkg/obiseq/merge.go @@ -5,7 +5,7 @@ import ( "reflect" "strings" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -53,7 +53,7 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues { newstat = false var err error for k, v := range istat { - stats[k], err = goutils.InterfaceToInt(v) + stats[k], err = obiutils.InterfaceToInt(v) if err != nil { log.Panicf("In sequence %s : %s stat tag not only containing integer values %s", sequence.Id(), mkey, istat) diff --git a/pkg/obiseq/pool.go b/pkg/obiseq/pool.go index dc67014..c03c6f7 100644 --- a/pkg/obiseq/pool.go +++ b/pkg/obiseq/pool.go @@ -4,7 +4,7 @@ import ( "log" "sync" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) var _BioSequenceByteSlicePool = sync.Pool{ @@ -72,7 +72,7 @@ func GetAnnotation(values ...Annotation) Annotation { } if len(values) > 0 { - goutils.MustFillMap(a, values[0]) + obiutils.MustFillMap(a, values[0]) } return a diff --git a/pkg/obitax/sequence_predicate.go b/pkg/obitax/sequence_predicate.go index 3ca6d94..b65e362 100644 --- a/pkg/obitax/sequence_predicate.go +++ b/pkg/obitax/sequence_predicate.go @@ -3,8 +3,8 @@ package obitax import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) func (taxonomy *Taxonomy) IsAValidTaxon(withAutoCorrection ...bool) obiseq.SequencePredicate { @@ -59,7 +59,7 @@ func (taxonomy *Taxonomy) IsSubCladeOf(taxid int) obiseq.SequencePredicate { func (taxonomy *Taxonomy) HasRequiredRank(rank string) obiseq.SequencePredicate { - if !goutils.Contains(taxonomy.RankList(), rank) { + if !obiutils.Contains(taxonomy.RankList(), rank) { log.Fatalf("%s is not a valid rank (allowed ranks are %v)", rank, taxonomy.RankList()) diff --git a/pkg/obitax/sequence_workers.go b/pkg/obitax/sequence_workers.go index 6ca6186..ee9b690 100644 --- a/pkg/obitax/sequence_workers.go +++ b/pkg/obitax/sequence_workers.go @@ -1,14 +1,14 @@ package obitax import ( - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker { - if !goutils.Contains(taxonomy.RankList(), rank) { + if !obiutils.Contains(taxonomy.RankList(), rank) { log.Fatalf("%s is not a valid rank (allowed ranks are %v)", rank, taxonomy.RankList()) diff --git a/pkg/obitools/obiclean/obiclean.go b/pkg/obitools/obiclean/obiclean.go index 3cdd847..3e589b8 100644 --- a/pkg/obitools/obiclean/obiclean.go +++ b/pkg/obitools/obiclean/obiclean.go @@ -4,10 +4,10 @@ import ( "fmt" "os" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/schollz/progressbar/v3" log "github.com/sirupsen/logrus" ) @@ -120,7 +120,7 @@ func HeadCount(sequence *obiseq.BioSequence) int { value := 0 if ok { - value, err = goutils.InterfaceToInt(value) + value, err = obiutils.InterfaceToInt(value) if err != nil { log.Panic("obiclean_headcount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue) } @@ -136,7 +136,7 @@ func InternalCount(sequence *obiseq.BioSequence) int { value := 0 if ok { - value, err = goutils.InterfaceToInt(value) + value, err = obiutils.InterfaceToInt(value) if err != nil { log.Panic("obiclean_internalcount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue) } @@ -152,7 +152,7 @@ func SingletonCount(sequence *obiseq.BioSequence) int { value := 0 if ok { - value, err = goutils.InterfaceToInt(value) + value, err = obiutils.InterfaceToInt(value) if err != nil { log.Panic("obiclean_samplecount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue) } @@ -271,7 +271,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int { case map[string]interface{}: weight = make(map[string]int) for k, v := range iobistatus { - weight[k], err = goutils.InterfaceToInt(v) + weight[k], err = obiutils.InterfaceToInt(v) if err != nil { log.Panicf("Weight value %v cannnot be casted to an integer value\n", v) } diff --git a/pkg/obitools/obicsv/options.go b/pkg/obitools/obicsv/options.go index c4119d6..19c62e1 100644 --- a/pkg/obitools/obicsv/options.go +++ b/pkg/obitools/obicsv/options.go @@ -1,8 +1,8 @@ package obicsv import ( - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/DavidGamba/go-getoptions" ) @@ -103,18 +103,18 @@ func CLIToBeKeptAttributes() []string { _keepOnly = append(_keepOnly, _softAttributes["obipairing"]...) } - if i := goutils.LookFor(_keepOnly, "count"); i >= 0 { - _keepOnly = goutils.RemoveIndex(_keepOnly, i) + if i := obiutils.LookFor(_keepOnly, "count"); i >= 0 { + _keepOnly = obiutils.RemoveIndex(_keepOnly, i) _outputCount = true } - if i := goutils.LookFor(_keepOnly, "taxid"); i >= 0 { - _keepOnly = goutils.RemoveIndex(_keepOnly, i) + if i := obiutils.LookFor(_keepOnly, "taxid"); i >= 0 { + _keepOnly = obiutils.RemoveIndex(_keepOnly, i) _outputTaxon = true } - if i := goutils.LookFor(_keepOnly, "scientific_name"); i >= 0 { - _keepOnly = goutils.RemoveIndex(_keepOnly, i) + if i := obiutils.LookFor(_keepOnly, "scientific_name"); i >= 0 { + _keepOnly = obiutils.RemoveIndex(_keepOnly, i) _outputTaxon = true } diff --git a/pkg/obitools/obigrep/options.go b/pkg/obitools/obigrep/options.go index 2c4a5f7..932bf5a 100644 --- a/pkg/obitools/obigrep/options.go +++ b/pkg/obitools/obigrep/options.go @@ -5,11 +5,11 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats/ncbitaxdump" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/DavidGamba/go-getoptions" ) @@ -345,7 +345,7 @@ func CLIIdPatternPredicate() obiseq.SequencePredicate { func CLIIdListPredicate() obiseq.SequencePredicate { if _IdList != "" { - ids, err := goutils.ReadLines(_IdList) + ids, err := obiutils.ReadLines(_IdList) if err != nil { log.Fatalf("cannot read the id file %s : %v", _IdList, err) diff --git a/pkg/obitools/obirefidx/obirefidx.go b/pkg/obitools/obirefidx/obirefidx.go index bb3dbb9..e1e8aa7 100644 --- a/pkg/obitools/obirefidx/obirefidx.go +++ b/pkg/obitools/obirefidx/obirefidx.go @@ -5,7 +5,6 @@ import ( "log" "os" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obikmer" @@ -13,6 +12,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" "github.com/schollz/progressbar/v3" ) @@ -38,7 +38,7 @@ func IndexSequence(seqidx int, // log.Println("Redone : ",r,"/",t,"(",w,")") - o := goutils.IntOrder(score) + o := obiutils.IntOrder(score) current_taxid, err := taxo.Taxon(references[o[0]].Taxid()) current_score := score[o[0]] @@ -121,7 +121,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { indexed := obiiter.MakeIBioSequence() go func() { for i := 0; i < len(references); i += 10 { - limits <- [2]int{i, goutils.MinInt(i+10, len(references))} + limits <- [2]int{i, obiutils.MinInt(i+10, len(references))} } close(limits) }() diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 7f6e916..27950cb 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -6,7 +6,6 @@ import ( "strconv" "strings" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obikmer" @@ -15,6 +14,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obifind" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obirefidx" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" ) func FindClosests(sequence *obiseq.BioSequence, @@ -31,7 +31,7 @@ func FindClosests(sequence *obiseq.BioSequence, cw[i] = obikmer.Common4Mer(seqwords, ref) } - o := goutils.ReverseIntOrder(cw) + o := obiutils.ReverseIntOrder(cw) // mcw := 100000 // for _, i := range o { @@ -57,11 +57,11 @@ func FindClosests(sequence *obiseq.BioSequence, for i, j := range o { ref := references[j] - lmin, lmax := goutils.MinMaxInt(sequence.Len(), ref.Len()) + lmin, lmax := obiutils.MinMaxInt(sequence.Len(), ref.Len()) atMost := lmax - lmin + int(math.Ceil(float64(lmin-3-cw[j])/4.0)) - 2 if i == 0 { - maxe = goutils.MaxInt(sequence.Len(), ref.Len()) + maxe = obiutils.MaxInt(sequence.Len(), ref.Len()) } // log.Println(sequence.Id(),cw[j], maxe) diff --git a/pkg/goutils/goutils.go b/pkg/obiutils/goutils.go similarity index 99% rename from pkg/goutils/goutils.go rename to pkg/obiutils/goutils.go index 136668d..36c603b 100644 --- a/pkg/goutils/goutils.go +++ b/pkg/obiutils/goutils.go @@ -1,4 +1,4 @@ -package goutils +package obiutils import ( "bufio" @@ -13,7 +13,6 @@ import ( "github.com/barkimedes/go-deepcopy" ) - // InterfaceToInt converts a interface{} to an integer value if possible. // If not a "NotAnInteger" error is returned via the err // return value and val is set to 0. @@ -197,7 +196,6 @@ func InterfaceToFloat64Map(i interface{}) (val map[string]float64, err error) { return } - // NotABoolean defines a new type of Error : "NotABoolean" type NotABoolean struct { message string diff --git a/pkg/goutils/gzipfile.go b/pkg/obiutils/gzipfile.go similarity index 98% rename from pkg/goutils/gzipfile.go rename to pkg/obiutils/gzipfile.go index e026b9a..b535b4c 100644 --- a/pkg/goutils/gzipfile.go +++ b/pkg/obiutils/gzipfile.go @@ -1,4 +1,4 @@ -package goutils +package obiutils import ( "bufio" diff --git a/pkg/obiutils/minmax.go b/pkg/obiutils/minmax.go new file mode 100644 index 0000000..d5bec7a --- /dev/null +++ b/pkg/obiutils/minmax.go @@ -0,0 +1,83 @@ +package obiutils + +import "golang.org/x/exp/constraints" + +func MinInt(x, y int) int { + if x < y { + return x + } + return y +} + +func MaxInt(x, y int) int { + if x < y { + return y + } + return x +} + +func MinMaxInt(x, y int) (int, int) { + if x < y { + return x, y + } + return y, x +} + +func MinUInt16(x, y uint16) uint16 { + if x < y { + return x + } + return y +} + +func MaxUInt16(x, y uint16) uint16 { + if x < y { + return y + } + return x +} + +func MinSlice[T constraints.Ordered](vec []T) T { + if len(vec) == 0 { + panic("empty slice") + } + min := vec[0] + for _, v := range vec { + if v < min { + min = v + } + } + return min +} + +func MaxSlice[T constraints.Ordered](vec []T) T { + if len(vec) == 0 { + panic("empty slice") + } + max := vec[0] + for _, v := range vec { + if v > max { + max = v + } + } + return max +} + +func RangeSlice[T constraints.Ordered](vec []T) (min, max T) { + if len(vec) == 0 { + panic("empty slice") + } + + min = vec[0] + max = vec[0] + for _, v := range vec { + if v > max { + max = v + } + if v < min { + min = v + } + } + + return +} diff --git a/pkg/goutils/ranks.go b/pkg/obiutils/ranks.go similarity index 56% rename from pkg/goutils/ranks.go rename to pkg/obiutils/ranks.go index a68a109..a378735 100644 --- a/pkg/goutils/ranks.go +++ b/pkg/obiutils/ranks.go @@ -1,6 +1,8 @@ -package goutils +package obiutils -import "sort" +import ( + "sort" +) // intRanker is a helper type for the rank function. type intRanker struct { @@ -29,7 +31,7 @@ func IntOrder(data []int) []int { } sort.Sort(rk) - + return r } @@ -49,6 +51,36 @@ func ReverseIntOrder(data []int) []int { } sort.Sort(sort.Reverse(rk)) - + + return r +} + +type Ranker[T sort.Interface] struct { + x T // Data to be ranked. + r []int // A list of indexes into f that reflects rank order after sorting. +} + +// ranker satisfies the sort.Interface without mutating the reference slice, f. +func (r Ranker[_]) Len() int { return len(r.r) } +func (r Ranker[T]) Less(i, j int) bool { return r.x.Less(r.r[i], r.r[j]) } +func (r Ranker[_]) Swap(i, j int) { r.r[i], r.r[j] = r.r[j], r.r[i] } + +func Order[T sort.Interface](data T) []int { + ldata := data.Len() + if ldata == 0 { + return nil + } + r := make([]int, ldata) + rk := Ranker[T]{ + x: data, + r: r, + } + + for i := 0; i < ldata; i++ { + rk.r[i] = i + } + + sort.Sort(rk) + return r } diff --git a/pkg/goutils/slices.go b/pkg/obiutils/slices.go similarity index 83% rename from pkg/goutils/slices.go rename to pkg/obiutils/slices.go index 738abe5..c5860fa 100644 --- a/pkg/goutils/slices.go +++ b/pkg/obiutils/slices.go @@ -1,5 +1,4 @@ -package goutils - +package obiutils func Contains[T comparable](arr []T, x T) bool { for _, v := range arr { @@ -20,5 +19,5 @@ func LookFor[T comparable](arr []T, x T) int { } func RemoveIndex[T comparable](s []T, index int) []T { - return append(s[:index], s[index+1:]...) + return append(s[:index], s[index+1:]...) }