diff --git a/pkg/obialign/locatepattern.go b/pkg/obialign/locatepattern.go index f188605..d6057ee 100644 --- a/pkg/obialign/locatepattern.go +++ b/pkg/obialign/locatepattern.go @@ -1,30 +1,72 @@ package obialign +import log "github.com/sirupsen/logrus" + +// buffIndex converts a pair of coordinates (i, j) into a linear index in a matrix +// of size width x width. The coordinates are (-1)-indexed, and the linear index +// is 0-indexed as well. The function first adds 1 to both coordinates to make +// sure the (-1,-1) coordinate is at position 0 in the matrix, and then computes +// the linear index by multiplying the first coordinate by the width and adding +// the second coordinate. func buffIndex(i, j, width int) int { return (i+1)*width + (j + 1) } + +// LocatePattern is a function to locate a pattern in a sequence. +// +// It uses a dynamic programming approach to build a matrix of scores. +// The score at each cell is the maximum of the score of the cell +// above it (representing a deletion), the score of the cell to its +// left (representing an insertion), and the score of the cell +// diagonally above it (representing a match). +// +// The score of a match is 0 if the two characters are the same, +// and -1 if they are different. +// +// The function returns the start and end positions of the best +// match, as well as the number of errors in the best match. func LocatePattern(pattern, sequence []byte) (int, int, int) { + // Pattern spreads over the columns + // Sequence spreads over the rows width := len(pattern) + 1 buffsize := (len(pattern) + 1) * (len(sequence) + 1) buffer := make([]int, buffsize) + + if len(pattern) >= len(sequence) { + log.Panicf("Pattern %s must be shorter than sequence %s", pattern, sequence) + } + + // The path matrix keeps track of the best path through the matrix + // 0 : indicate the diagonal path + // 1 : indicate the up path + // -1 : indicate the left path path := make([]int, buffsize) + // Initialize the first row of the matrix for j := 0; j < len(pattern); j++ { idx := buffIndex(-1, j, width) buffer[idx] = -j - 1 path[idx] = -1 } + // Initialize the first column of the matrix + // Alignment is endgap free so first column = 0 + // to allow primer to shift freely along the sequence for i := -1; i < len(sequence); i++ { idx := buffIndex(i, -1, width) buffer[idx] = 0 path[idx] = +1 } + // Fills the matrix except the last column + // where gaps must be free too. path[0] = 0 jmax := len(pattern) - 1 for i := 0; i < len(sequence); i++ { for j := 0; j < jmax; j++ { + + // Mismatch score = -1 + // Match score = 0 match := -1 if _samenuc(pattern[j], sequence[i]) { match = 0 @@ -33,6 +75,8 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) { idx := buffIndex(i, j, width) diag := buffer[buffIndex(i-1, j-1, width)] + match + + // Each gap cost -1 left := buffer[buffIndex(i, j-1, width)] - 1 up := buffer[buffIndex(i-1, j, width)] - 1 @@ -51,9 +95,12 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) { } } + // Fills the last column considering the free up gap for i := 0; i < len(sequence); i++ { idx := buffIndex(i, jmax, width) + // Mismatch score = -1 + // Match score = 0 match := -1 if _samenuc(pattern[jmax], sequence[i]) { match = 0 @@ -65,6 +112,7 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) { score := max(diag, up, left) buffer[idx] = score + switch { case score == left: path[idx] = -1 @@ -75,11 +123,13 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) { } } + // Bactracking of the aligment + i := len(sequence) - 1 j := jmax end := -1 lali := 0 - for i > -1 && j > 0 { + for j > 0 { // C'était i > -1 && j > 0 lali++ switch path[buffIndex(i, j, width)] { case 0: @@ -100,5 +150,9 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) { } } + + // log.Warnf("from : %d to: %d error: %d match: %v", + // i, end+1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)], + // string(sequence[i:(end+1)])) return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)] } diff --git a/pkg/obiapat/obiapat.c b/pkg/obiapat/obiapat.c index 38b9f56..094b65c 100644 --- a/pkg/obiapat/obiapat.c +++ b/pkg/obiapat/obiapat.c @@ -137,6 +137,28 @@ char *reverseSequence(char *str,char isPattern) return str; } +/* -------------------------------------------- */ +/* lowercase sequence */ +/* -------------------------------------------- */ + +#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'A')) +#define TO_LOWER(c) ((c) - 'A' + 'a') + +char *LowerSequence(char *seq) +{ + char *cseq; + + for (cseq = seq ; *cseq ; cseq++) + if (IS_UPPER(*cseq)) + *cseq = TO_LOWER(*cseq); + + return seq; +} + +#undef IS_UPPER +#undef TO_LOWER + + char *ecoComplementPattern(char *nucAcSeq) { return reverseSequence(LXBioSeqComplement(nucAcSeq),1); @@ -165,6 +187,7 @@ void UpperSequence(char *seq) + /* -------------------------------------------- */ /* encode sequence */ /* IS_UPPER is slightly faster than isupper */ diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index 3cea666..6900c62 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -9,6 +9,7 @@ import "C" import ( "errors" "runtime" + "strings" "unsafe" log "github.com/sirupsen/logrus" @@ -114,7 +115,7 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) { C.free(unsafe.Pointer(errmsg)) return ApatPattern{nil}, errors.New(message) } - spat := C.GoString(apc.cpat) + spat := strings.ToLower(C.GoString(apc.cpat)) ap := _ApatPattern{apc, spat} runtime.SetFinalizer(&ap, func(p *_ApatPattern) { @@ -366,6 +367,18 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) ( return } +// FilterBestMatch filters the best non overlapping matches of a given pattern in a sequence. +// +// It takes the following parameters: +// - pattern: the pattern to search for (ApatPattern). +// - sequence: the sequence to search in (ApatSequence). +// - begin: the starting index of the search (int). +// - length: the length of the search (int). +// +// It returns a slice of [3]int representing the locations of all non-overlapping matches in the sequence. +// The two firsts values of the [3]int indicate respectively the start and the end position of +// the match. Following the GO convention the end position is not included in the +// match. The third value indicates the number of error detected for this occurrence. func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) { res := pattern.FindAllIndex(sequence, begin, length) filtered := make([][3]int, 0, len(res)) @@ -424,13 +437,15 @@ func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) { res := pattern.FilterBestMatch(sequence, begin, length) + j := 0 for _, m := range res { // Recompute the start and end position of the match // when the pattern allows for indels if m[2] > 0 && pattern.pointer.pointer.hasIndel { - start := m[0] - m[2] + // log.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String()) + start := m[0] - m[2]*2 start = max(start, 0) - end := start + int(pattern.pointer.pointer.patlen) + 2*m[2] + end := start + int(pattern.pointer.pointer.patlen) + 4*m[2] end = min(end, sequence.Len()) // 1 << 30 = 1,073,741,824 = 1Gb // It's a virtual array mapping the sequence to the pattern @@ -439,18 +454,21 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat)) frg := sequence.pointer.reference.Sequence()[start:end] - begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg) + pb, pe, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg) // olderr := m[2] m[2] = score - m[0] = start + begin - m[1] = start + end + m[0] = start + pb + m[1] = start + pe + // log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score, // frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]]) } + + if int(pattern.pointer.pointer.maxerr) >= m[2] { + res[j] = m + j++ + } } - - // log.Debugf("All matches : %v", res) - - return res + return res[0:j] } diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index ef51e37..2e5b2d0 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -642,6 +642,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) { } + ngsfilter.CheckPrimerUnicity() + for i := 0; i < len(params); i++ { param := params[i][1] if len(params[i]) < 3 { diff --git a/pkg/obiformats/universal_read.go b/pkg/obiformats/universal_read.go index 2caea86..07cad24 100644 --- a/pkg/obiformats/universal_read.go +++ b/pkg/obiformats/universal_read.go @@ -3,6 +3,8 @@ package obiformats import ( "bufio" "bytes" + "encoding/csv" + "errors" "io" "path" "regexp" @@ -39,6 +41,31 @@ type SequenceReader func(reader io.Reader, options ...WithOption) (obiiter.IBioS // - io.Reader: A modified reader with the read data. // - error: Any error encountered during the process. func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) { + csv := func(in []byte, limit uint32) bool { + in = dropLastLine(in, limit) + + br := bytes.NewReader(in) + r := csv.NewReader(br) + r.Comma = ',' + r.ReuseRecord = true + r.LazyQuotes = true + r.Comment = '#' + + lines := 0 + for { + _, err := r.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return false + } + lines++ + } + + return r.FieldsPerRecord > 1 && lines > 1 + } + fastaDetector := func(raw []byte, limit uint32) bool { ok, err := regexp.Match("^>[^ ]", raw) return ok && err == nil @@ -70,12 +97,14 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) { mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr") mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq") mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat") + mimetype.Lookup("text/plain").Extend(csv, "text/csv", ".csv") mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta") mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq") mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr") mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq") mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat") + mimetype.Lookup("application/octet-stream").Extend(csv, "text/csv", ".csv") // Create a buffer to store the read data buf := make([]byte, 1024*128) @@ -87,6 +116,7 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) { // Detect the MIME type using the mimetype library mimeType := mimetype.Detect(buf) + if mimeType == nil { return nil, nil, err } diff --git a/pkg/obifp/uint128_test.go b/pkg/obifp/uint128_test.go new file mode 100644 index 0000000..bc834ee --- /dev/null +++ b/pkg/obifp/uint128_test.go @@ -0,0 +1,250 @@ +package obifp + +import ( + "math" + "reflect" + + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUint128_Add(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + w := u.Add(v) + assert.Equal(t, Uint128{w1: 4, w0: 6}, w) + + u = Uint128{w1: 0, w0: 0} + v = Uint128{w1: 0, w0: 0} + w = u.Add(v) + assert.Equal(t, Uint128{w1: 0, w0: 0}, w) + + u = Uint128{w1: 0, w0: math.MaxUint64} + v = Uint128{w1: 0, w0: 1} + w = u.Add(v) + assert.Equal(t, Uint128{w1: 1, w0: 0}, w) +} + +func TestUint128_Add64(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := uint64(3) + w := u.Add64(v) + assert.Equal(t, Uint128{w1: 1, w0: 5}, w) + + u = Uint128{w1: 0, w0: 0} + v = uint64(0) + w = u.Add64(v) + assert.Equal(t, Uint128{w1: 0, w0: 0}, w) + + u = Uint128{w1: 0, w0: math.MaxUint64} + v = uint64(1) + w = u.Add64(v) + assert.Equal(t, Uint128{w1: 1, w0: 0}, w) +} + +func TestUint128_Sub(t *testing.T) { + u := Uint128{w1: 4, w0: 6} + v := Uint128{w1: 3, w0: 4} + w := u.Sub(v) + assert.Equal(t, Uint128{w1: 1, w0: 2}, w) + + u = Uint128{w1: 0, w0: 0} + v = Uint128{w1: 0, w0: 0} + w = u.Sub(v) + assert.Equal(t, Uint128{w1: 0, w0: 0}, w) + + u = Uint128{w1: 1, w0: 0} + v = Uint128{w1: 0, w0: 1} + w = u.Sub(v) + assert.Equal(t, Uint128{w1: 0, w0: math.MaxUint64}, w) +} + +func TestUint128_Mul64(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := uint64(3) + w := u.Mul64(v) + + if w.w1 != 3 || w.w0 != 6 { + t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 3, w0: 6}) + } + + u = Uint128{w1: 0, w0: 0} + v = uint64(0) + w = u.Mul64(v) + + if w.w1 != 0 || w.w0 != 0 { + t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 0, w0: 0}) + } + + u = Uint128{w1: 0, w0: math.MaxUint64} + v = uint64(2) + w = u.Mul64(v) + + if w.w1 != 1 || w.w0 != 18446744073709551614 { + t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 1, w0: 18446744073709551614}) + } + +} + +func TestUint128_Mul(t *testing.T) { + tests := []struct { + name string + u Uint128 + v Uint128 + expected Uint128 + }{ + { + name: "simple multiplication", + u: Uint128{w1: 1, w0: 2}, + v: Uint128{w1: 3, w0: 4}, + expected: Uint128{w1: 10, w0: 8}, + }, + { + name: "multiplication with overflow", + u: Uint128{w1: 0, w0: math.MaxUint64}, + v: Uint128{w1: 0, w0: 2}, + expected: Uint128{w1: 1, w0: 18446744073709551614}, + }, + { + name: "multiplication with zero", + u: Uint128{w1: 0, w0: 0}, + v: Uint128{w1: 0, w0: 0}, + expected: Uint128{w1: 0, w0: 0}, + }, + { + name: "multiplication with large numbers", + u: Uint128{w1: 100, w0: 200}, + v: Uint128{w1: 300, w0: 400}, + expected: Uint128{w1: 100000, w0: 80000}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := tt.u.Mul(tt.v) + if !reflect.DeepEqual(actual, tt.expected) { + t.Errorf("Mul(%v, %v) = %v, want %v", tt.u, tt.v, actual, tt.expected) + } + }) + } +} + +func TestUint128_QuoRem(t *testing.T) { + u := Uint128{w1: 3, w0: 8} + v := Uint128{w1: 0, w0: 4} + q, r := u.QuoRem(v) + assert.Equal(t, Uint128{w1: 0, w0: 2}, q) + assert.Equal(t, Uint128{w1: 0, w0: 0}, r) +} + +func TestUint128_QuoRem64(t *testing.T) { + u := Uint128{w1: 0, w0: 6} + v := uint64(3) + q, r := u.QuoRem64(v) + assert.Equal(t, Uint128{w1: 0, w0: 2}, q) + assert.Equal(t, uint64(0), r) +} + +func TestUint128_Div(t *testing.T) { + u := Uint128{w1: 3, w0: 8} + v := Uint128{w1: 0, w0: 4} + q := u.Div(v) + assert.Equal(t, Uint128{w1: 0, w0: 2}, q) +} + +func TestUint128_Div64(t *testing.T) { + u := Uint128{w1: 0, w0: 6} + v := uint64(3) + q := u.Div64(v) + assert.Equal(t, Uint128{w1: 0, w0: 2}, q) +} + +func TestUint128_Mod(t *testing.T) { + u := Uint128{w1: 3, w0: 8} + v := Uint128{w1: 0, w0: 4} + r := u.Mod(v) + assert.Equal(t, Uint128{w1: 0, w0: 0}, r) +} + +func TestUint128_Mod64(t *testing.T) { + u := Uint128{w1: 0, w0: 6} + v := uint64(3) + r := u.Mod64(v) + assert.Equal(t, uint64(0), r) +} + +func TestUint128_Cmp(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + assert.Equal(t, -1, u.Cmp(v)) +} + +func TestUint128_Cmp64(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := uint64(3) + assert.Equal(t, -1, u.Cmp64(v)) +} + +func TestUint128_Equals(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 1, w0: 2} + assert.Equal(t, true, u.Equals(v)) +} + +func TestUint128_LessThan(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + assert.Equal(t, true, u.LessThan(v)) +} + +func TestUint128_GreaterThan(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + assert.Equal(t, false, u.GreaterThan(v)) +} + +func TestUint128_LessThanOrEqual(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + assert.Equal(t, true, u.LessThanOrEqual(v)) +} + +func TestUint128_GreaterThanOrEqual(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + assert.Equal(t, false, u.GreaterThanOrEqual(v)) +} + +func TestUint128_And(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + w := u.And(v) + assert.Equal(t, Uint128{w1: 1, w0: 0}, w) +} + +func TestUint128_Or(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + w := u.Or(v) + assert.Equal(t, Uint128{w1: 3, w0: 6}, w) +} + +func TestUint128_Xor(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + v := Uint128{w1: 3, w0: 4} + w := u.Xor(v) + assert.Equal(t, Uint128{w1: 2, w0: 6}, w) +} + +func TestUint128_Not(t *testing.T) { + u := Uint128{w1: 1, w0: 2} + w := u.Not() + assert.Equal(t, Uint128{w1: math.MaxUint64 - 1, w0: math.MaxUint64 - 2}, w) +} + +func TestUint128_AsUint64(t *testing.T) { + u := Uint128{w1: 0, w0: 5} + v := u.AsUint64() + assert.Equal(t, uint64(5), v) +} diff --git a/pkg/obikmer/debruijn.go b/pkg/obikmer/debruijn.go index 8ed0fc6..a707ca7 100644 --- a/pkg/obikmer/debruijn.go +++ b/pkg/obikmer/debruijn.go @@ -10,6 +10,7 @@ import ( "slices" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" log "github.com/sirupsen/logrus" ) @@ -312,10 +313,48 @@ func (g *DeBruijnGraph) LongestPath(max_length int) []uint64 { return path } -func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) { +func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.BioSequence, error) { + if g.Len() == 0 { + return nil, fmt.Errorf("graph is empty") + } //path := g.LongestPath(max_length) path := g.HaviestPath() - s := g.DecodePath(path) + + spath := path + + if min_cov > 0 { + wp := make([]uint, len(path)) + + for i, n := range path { + wp[i] = g.graph[n] + } + + mp := uint(float64(obistats.Mode(wp))*min_cov + 0.5) + + from := 0 + for i, n := range path { + if g.graph[n] < mp { + from = i + 1 + } else { + break + } + } + + to := len(path) + + for i := len(path) - 1; i >= 0; i-- { + n := path[i] + if g.graph[n] < mp { + to = i + } else { + break + } + } + + spath = path[from:to] + } + + s := g.DecodePath(spath) if len(s) > 0 { seq := obiseq.NewBioSequence( @@ -387,6 +426,48 @@ func (g *DeBruijnGraph) Weight(index uint64) int { return int(val) } +// WeightMode returns the mode weight of the nodes in the DeBruijnGraph. +// +// It iterates over each count in the graph map and updates the max value if the current count is greater. +// Finally, it returns the mode weight as an integer. +// +// Returns: +// - int: the mode weight value. +func (g *DeBruijnGraph) WeightMode() int { + dist := make(map[uint]int) + + for _, w := range g.graph { + dist[w]++ + } + + maxi := 0 + wmax := uint(0) + + for k, v := range dist { + if v > maxi && k > 1 { + maxi = v + wmax = k + } + } + + return int(wmax) +} + +// WeightMean returns the mean weight of the nodes in the DeBruijnGraph. +// +// Returns: +// - float64: the mean weight of the nodes in the graph. +func (g *DeBruijnGraph) WeightMean() float64 { + sum := 0 + + for _, w := range g.graph { + i := int(w) + sum += i + } + + return float64(sum) / float64(len(g.graph)) +} + // append appends a sequence of nucleotides to the DeBruijnGraph. // // Parameters: @@ -637,7 +718,7 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 { } if slices.Contains(heaviestPath, currentNode) { - log.Fatalf("Cycle detected %v -> %v (%v) len(%v)", heaviestPath, currentNode, startNodes, len(heaviestPath)) + log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len()) return nil } diff --git a/pkg/obilua/lua.go b/pkg/obilua/lua.go index 1855e44..833751d 100644 --- a/pkg/obilua/lua.go +++ b/pkg/obilua/lua.go @@ -20,6 +20,8 @@ import ( func NewInterpreter() *lua.LState { lua := lua.NewState() + registerMutexType(lua) + RegisterObilib(lua) RegisterObiContext(lua) @@ -117,7 +119,7 @@ func LuaWorker(proto *lua.FunctionProto) obiseq.SeqWorker { case *obiseq.BioSequenceSlice: return *val, err default: - return nil, fmt.Errorf("worker function doesn't return the correct type") + return nil, fmt.Errorf("worker function doesn't return the correct type %T", val) } } diff --git a/pkg/obilua/lua_obicontext.go b/pkg/obilua/lua_obicontext.go index d3ee691..9551b4b 100644 --- a/pkg/obilua/lua_obicontext.go +++ b/pkg/obilua/lua_obicontext.go @@ -36,6 +36,8 @@ func obicontextGetSet(interpreter *lua.LState) int { __lua_obicontext.Store(key, float64(val)) case lua.LString: __lua_obicontext.Store(key, string(val)) + case *lua.LUserData: + __lua_obicontext.Store(key, val.Value) case *lua.LTable: __lua_obicontext.Store(key, Table2Interface(interpreter, val)) default: diff --git a/pkg/obilua/lua_push_interface.go b/pkg/obilua/lua_push_interface.go index 39e338d..2e57ff1 100644 --- a/pkg/obilua/lua_push_interface.go +++ b/pkg/obilua/lua_push_interface.go @@ -1,6 +1,8 @@ package obilua import ( + "sync" + log "github.com/sirupsen/logrus" lua "github.com/yuin/gopher-lua" @@ -46,6 +48,8 @@ func pushInterfaceToLua(L *lua.LState, val interface{}) { pushSliceBoolToLua(L, v) case nil: L.Push(lua.LNil) + case *sync.Mutex: + pushMutexToLua(L, v) default: log.Fatalf("Cannot deal with value (%T) : %v", val, val) } diff --git a/pkg/obilua/mutex.go b/pkg/obilua/mutex.go new file mode 100644 index 0000000..fa15442 --- /dev/null +++ b/pkg/obilua/mutex.go @@ -0,0 +1,63 @@ +package obilua + +import ( + lua "github.com/yuin/gopher-lua" + + "sync" +) + +const luaMutexTypeName = "Mutex" + +func registerMutexType(luaState *lua.LState) { + mutexType := luaState.NewTypeMetatable(luaMutexTypeName) + luaState.SetGlobal(luaMutexTypeName, mutexType) + + luaState.SetField(mutexType, "new", luaState.NewFunction(newMutex)) + + luaState.SetField(mutexType, "__index", + luaState.SetFuncs(luaState.NewTable(), + mutexMethods)) +} + +func mutex2Lua(interpreter *lua.LState, mutex *sync.Mutex) lua.LValue { + ud := interpreter.NewUserData() + ud.Value = mutex + interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaMutexTypeName)) + + return ud +} + +func pushMutexToLua(luaState *lua.LState, mutex *sync.Mutex) { + luaState.Push(mutex2Lua(luaState, mutex)) +} +func newMutex(luaState *lua.LState) int { + m := &sync.Mutex{} + pushMutexToLua(luaState, m) + return 1 +} + +var mutexMethods = map[string]lua.LGFunction{ + "lock": mutexLock, + "unlock": mutexUnlock, +} + +func checkMutex(L *lua.LState) *sync.Mutex { + ud := L.CheckUserData(1) + if mutex, ok := ud.Value.(*sync.Mutex); ok { + return mutex + } + L.ArgError(1, "Mutex expected") + return nil +} + +func mutexLock(L *lua.LState) int { + mutex := checkMutex(L) + mutex.Lock() + return 0 +} + +func mutexUnlock(L *lua.LState) int { + mutex := checkMutex(L) + mutex.Unlock() + return 0 +} diff --git a/pkg/obingslibrary/marker.go b/pkg/obingslibrary/marker.go index b5ea341..01f66b7 100644 --- a/pkg/obingslibrary/marker.go +++ b/pkg/obingslibrary/marker.go @@ -2,9 +2,10 @@ package obingslibrary import ( "fmt" - "log" "strings" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" diff --git a/pkg/obingslibrary/multimatch.go b/pkg/obingslibrary/multimatch.go index fd69ddc..02e7572 100644 --- a/pkg/obingslibrary/multimatch.go +++ b/pkg/obingslibrary/multimatch.go @@ -3,7 +3,7 @@ package obingslibrary import ( "fmt" "math" - "sort" + "slices" log "github.com/sirupsen/logrus" @@ -87,6 +87,8 @@ func lookForTag(seq string, delimiter byte) string { i := len(seq) - 1 + // log.Warnf("Provided fragment : %s", string(seq)) + for i >= 0 && seq[i] != delimiter { i-- } @@ -107,6 +109,7 @@ func lookForTag(seq string, delimiter byte) string { return "" } + // log.Warnf("extracted : %s", string(seq[begin:end])) return seq[begin:end] } @@ -172,11 +175,12 @@ func (marker *Marker) beginDelimitedTagExtractor( begin int, forward bool) string { - taglength := marker.Forward_spacer + marker.Forward_tag_length + // log.Warn("beginDelimitedTagExtractor") + taglength := 2*marker.Forward_spacer + marker.Forward_tag_length delimiter := marker.Forward_tag_delimiter if !forward { - taglength = marker.Reverse_spacer + marker.Reverse_tag_length + taglength = 2*marker.Reverse_spacer + marker.Reverse_tag_length delimiter = marker.Reverse_tag_delimiter } @@ -240,6 +244,7 @@ func (marker *Marker) endDelimitedTagExtractor( sequence *obiseq.BioSequence, end int, forward bool) string { + // log.Warn("endDelimitedTagExtractor") taglength := marker.Reverse_spacer + marker.Reverse_tag_length delimiter := marker.Reverse_tag_delimiter @@ -335,14 +340,17 @@ func (marker *Marker) beginTagExtractor( sequence *obiseq.BioSequence, begin int, forward bool) string { + // log.Warnf("Forward : %v -> %d %c", forward, marker.Forward_spacer, marker.Forward_tag_delimiter) + // log.Warnf("Forward : %v -> %d %c", forward, marker.Reverse_spacer, marker.Reverse_tag_delimiter) if forward { if marker.Forward_tag_delimiter == 0 { return marker.beginFixedTagExtractor(sequence, begin, forward) } else { if marker.Forward_tag_indels == 0 { + // log.Warnf("Delimited tag for forward primers %s", marker.forward.String()) return marker.beginDelimitedTagExtractor(sequence, begin, forward) } else { - // log.Warn("Rescue tag for forward primers") + // log.Warnf("Rescue tag for forward primers %s", marker.forward.String()) return marker.beginRescueTagExtractor(sequence, begin, forward) } } @@ -351,9 +359,10 @@ func (marker *Marker) beginTagExtractor( return marker.beginFixedTagExtractor(sequence, begin, forward) } else { if marker.Reverse_tag_indels == 0 { + // log.Warnf("Delimited tag for reverse/complement primers %s", marker.creverse.String()) return marker.beginDelimitedTagExtractor(sequence, begin, forward) } else { - // log.Warn("Rescue tag for reverse/complement primers") + // log.Warnf("Rescue tag for reverse/complement primers %s", marker.creverse.String()) return marker.beginRescueTagExtractor(sequence, begin, forward) } } @@ -369,9 +378,10 @@ func (marker *Marker) endTagExtractor( return marker.endFixedTagExtractor(sequence, end, forward) } else { if marker.Reverse_tag_indels == 0 { + // log.Warnf("Delimited tag for reverse primers %s", marker.reverse.String()) return marker.endDelimitedTagExtractor(sequence, end, forward) } else { - // log.Warn("Rescue tag for reverse primers") + // log.Warnf("Rescue tag for reverse primers %s", marker.reverse.String()) return marker.endRescueTagExtractor(sequence, end, forward) } } @@ -380,9 +390,10 @@ func (marker *Marker) endTagExtractor( return marker.endFixedTagExtractor(sequence, end, forward) } else { if marker.Forward_tag_indels == 0 { + // log.Warnf("Delimited tag for forward/complement primers %s", marker.cforward.String()) return marker.endDelimitedTagExtractor(sequence, end, forward) } else { - // log.Warn("Rescue tag for forward/complement primers") + // log.Warnf("Rescue tag for forward/complement primers %s", marker.cforward.String()) return marker.endRescueTagExtractor(sequence, end, forward) } } @@ -609,9 +620,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob } if len(matches) > 0 { - sort.Slice(matches, func(i, j int) bool { - return matches[i].Begin < matches[j].Begin - }) + slices.SortFunc(matches, func(a, b PrimerMatch) int { return a.Begin - b.Begin }) state := 0 var from PrimerMatch diff --git a/pkg/obingslibrary/ngslibrary.go b/pkg/obingslibrary/ngslibrary.go index 4c8e7ab..c310a27 100644 --- a/pkg/obingslibrary/ngslibrary.go +++ b/pkg/obingslibrary/ngslibrary.go @@ -36,8 +36,9 @@ func MakeNGSLibrary() NGSLibrary { } func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) { - pair := PrimerPair{strings.ToLower(forward), - strings.ToLower(reverse)} + forward = strings.ToLower(forward) + reverse = strings.ToLower(reverse) + pair := PrimerPair{forward, reverse} marker, ok := (library.Markers)[pair] if ok { diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index dc37a88..ba4d997 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "31bfc88" +var _Commit = "373464c" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/attributes.go b/pkg/obiseq/attributes.go index 10ee503..de4ea31 100644 --- a/pkg/obiseq/attributes.go +++ b/pkg/obiseq/attributes.go @@ -72,8 +72,8 @@ func (s *BioSequence) HasAttribute(key string) bool { ok := s.annotations != nil if ok { - defer s.AnnotationsUnlock() s.AnnotationsLock() + defer s.AnnotationsUnlock() _, ok = s.annotations[key] } @@ -112,8 +112,8 @@ func (s *BioSequence) GetAttribute(key string) (interface{}, bool) { ok := s.annotations != nil if ok { - defer s.AnnotationsUnlock() s.AnnotationsLock() + defer s.AnnotationsUnlock() val, ok = s.annotations[key] } @@ -144,8 +144,8 @@ func (s *BioSequence) SetAttribute(key string, value interface{}) { annot := s.Annotations() - defer s.AnnotationsUnlock() s.AnnotationsLock() + defer s.AnnotationsUnlock() annot[key] = value } @@ -205,8 +205,8 @@ func (s *BioSequence) GetFloatAttribute(key string) (float64, bool) { // No return value. func (s *BioSequence) DeleteAttribute(key string) { if s.annotations != nil { - defer s.AnnotationsUnlock() s.AnnotationsLock() + defer s.AnnotationsUnlock() delete(s.annotations, key) } } diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 00d2bd9..bb0533e 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -65,7 +65,7 @@ type BioSequence struct { feature []byte paired *BioSequence // A pointer to the paired sequence annotations Annotation - annot_lock sync.Mutex + annot_lock *sync.Mutex } // NewEmptyBioSequence creates a new BioSequence object with an empty sequence. @@ -90,7 +90,7 @@ func NewEmptyBioSequence(preallocate int) *BioSequence { feature: nil, paired: nil, annotations: nil, - annot_lock: sync.Mutex{}, + annot_lock: &sync.Mutex{}, } } diff --git a/pkg/obiseq/biosequence_test.go b/pkg/obiseq/biosequence_test.go index 400a92a..5e51203 100644 --- a/pkg/obiseq/biosequence_test.go +++ b/pkg/obiseq/biosequence_test.go @@ -187,7 +187,7 @@ func TestCopy(t *testing.T) { "annotation1": "value1", "annotation2": "value2", }, - annot_lock: sync.Mutex{}, + annot_lock: &sync.Mutex{}, } newSeq := seq.Copy() diff --git a/pkg/obiseq/merge.go b/pkg/obiseq/merge.go index 26163ae..7af3daf 100644 --- a/pkg/obiseq/merge.go +++ b/pkg/obiseq/merge.go @@ -91,8 +91,7 @@ func (sequence *BioSequence) HasStatsOn(key string) bool { // - StatsOnValues func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOnValues { mkey := StatsOnSlotName(desc.Name) - annotations := sequence.Annotations() - istat, ok := annotations[mkey] + istat, ok := sequence.GetAttribute(mkey) var stats StatsOnValues var newstat bool @@ -117,39 +116,40 @@ func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOn } } default: - stats = make(StatsOnValues, 10) - annotations[mkey] = stats + stats = make(StatsOnValues) + sequence.SetAttribute(mkey, stats) newstat = true } } else { - stats = make(StatsOnValues, 10) - annotations[mkey] = stats + stats = make(StatsOnValues) + sequence.SetAttribute(mkey, stats) newstat = true } - if newstat && sequence.StatsPlusOne(desc, sequence, na) { - delete(sequence.Annotations(), desc.Key) + if newstat { + sequence.StatsPlusOne(desc, sequence, na) } return stats } -// StatsPlusOne adds the count of the sequence toAdd to the count of the key in the stats. +// StatsPlusOne updates the statistics on the given attribute (desc) on the receiver BioSequence +// with the value of the attribute on the toAdd BioSequence. // // Parameters: -// - key: the attribute key (string) to be summarized -// - toAdd: the BioSequence to add to the stats +// - desc: StatsOnDescription of the attribute to be updated +// - toAdd: the BioSequence containing the attribute to be updated // - na: the value to be used if the attribute is not present +// // Return type: -// - bool +// - bool: true if the update was successful, false otherwise func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSequence, na string) bool { sval := na - annotations := sequence.Annotations() stats := sequence.StatsOn(desc, na) retval := false if toAdd.HasAnnotation() { - value, ok := toAdd.Annotations()[desc.Key] + value, ok := toAdd.GetAttribute(desc.Key) if ok { @@ -178,8 +178,9 @@ func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSeq if !ok { old = 0 } + stats[sval] = old + desc.Weight(toAdd) - annotations[StatsOnSlotName(desc.Name)] = stats // TODO: check if this is necessary + sequence.SetAttribute(StatsOnSlotName(desc.Name), stats) // TODO: check if this is necessary return retval } @@ -263,7 +264,7 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool } } - annotations["count"] = count + sequence.SetCount(count) return sequence } @@ -282,7 +283,7 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions) seq.SetQualities(nil) if len(sequences) == 1 { - seq.Annotations()["count"] = seq.Count() + seq.SetCount(seq.Count()) for _, desc := range statsOn { seq.StatsOn(desc, na) } @@ -296,5 +297,4 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions) sequences.Recycle(false) return seq - } diff --git a/pkg/obistats/minmax.go b/pkg/obistats/minmax.go index a88318f..420ab73 100644 --- a/pkg/obistats/minmax.go +++ b/pkg/obistats/minmax.go @@ -1,11 +1,9 @@ package obistats - - -func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T { +func Max[T int64 | int32 | int16 | int8 | int | float32 | float64](data []T) T { m := data[0] - for _,v := range data { + for _, v := range data { if v > m { m = v } @@ -14,14 +12,34 @@ func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T return m } -func Min[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T { +func Min[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint | float32 | float64](data []T) T { m := data[0] - for _,v := range data { + for _, v := range data { if v < m { m = v } } return m -} \ No newline at end of file +} + +func Mode[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint](data []T) T { + ds := make(map[T]int) + + for _, v := range data { + ds[v]++ + } + + md := T(0) + maxocc := 0 + + for v, occ := range ds { + if occ > maxocc { + maxocc = occ + md = v + } + } + + return md +} diff --git a/pkg/obitools/obiconsensus/obiconsensus.go b/pkg/obitools/obiconsensus/obiconsensus.go index c4a147f..127bb75 100644 --- a/pkg/obitools/obiconsensus/obiconsensus.go +++ b/pkg/obitools/obiconsensus/obiconsensus.go @@ -25,8 +25,19 @@ import ( func BuildConsensus(seqs obiseq.BioSequenceSlice, consensus_id string, kmer_size int, + filter_out float64, save_graph bool, dirname string) (*obiseq.BioSequence, error) { + if seqs.Len() == 0 { + return nil, fmt.Errorf("no sequence provided") + } + + if seqs.Len() == 1 { + seq := seqs[0].Copy() + seq.SetAttribute("obiconsensus_consensus", false) + return seq, nil + } + if save_graph { if dirname == "" { dirname = "." @@ -104,7 +115,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, log.Debugf("Graph size : %d\n", graph.Len()) total_kmer := graph.Len() - seq, err := graph.LongestConsensus(consensus_id) + seq, err := graph.LongestConsensus(consensus_id, filter_out) sumCount := 0 @@ -112,7 +123,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, for _, s := range seqs { sumCount += s.Count() } - + seq.SetAttribute("obiconsensus_consensus", true) seq.SetAttribute("obiconsensus_weight", sumCount) seq.SetAttribute("obiconsensus_seq_length", seq.Len()) seq.SetAttribute("obiconsensus_kmer_size", kmer_size) @@ -136,6 +147,10 @@ func SampleWeight(seqs *obiseq.BioSequenceSlice, sample, sample_key string) func stats := (*seqs)[i].StatsOn(obiseq.MakeStatsOnDescription(sample_key), "NA") + if stats == nil { + log.Panicf("Sample %s not found in sequence %d", sample, i) + } + if value, ok := stats[sample]; ok { return float64(value) } @@ -292,16 +307,16 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation], pack[degree] = v clean, err = BuildConsensus(pack, fmt.Sprintf("%s_consensus", v.Id()), - kmer_size, + kmer_size, CLILowCoverage(), CLISaveGraphToFiles(), CLIGraphFilesDirectory()) if err != nil { log.Warning(err) - clean = (*graph.Vertices)[i] + clean = (*graph.Vertices)[i].Copy() clean.SetAttribute("obiconsensus_consensus", false) - } else { - clean.SetAttribute("obiconsensus_consensus", true) + } + pack.Recycle(false) } else { @@ -318,8 +333,9 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation], annotations := v.Annotations() + staton := obiseq.StatsOnSlotName(sample_key) for k, v := range annotations { - if !clean.HasAttribute(k) { + if !clean.HasAttribute(k) && k != staton { clean.SetAttribute(k, v) } } @@ -334,6 +350,83 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation], return denoised } + +func MinionClusterDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation], + sample_key string, kmer_size int) obiseq.BioSequenceSlice { + denoised := obiseq.MakeBioSequenceSlice() + seqs := (*obiseq.BioSequenceSlice)(graph.Vertices) + weight := SampleWeight(seqs, graph.Name, sample_key) + seqWeights := make([]float64, len(*seqs)) + + // Compute weights for each vertex as the sum of the weights of its neighbors + + log.Info("") + log.Infof("Sample %s: Computing weights", graph.Name) + for i := range *seqs { + w := weight(i) + for _, j := range graph.Neighbors(i) { + w += weight(j) + } + + seqWeights[i] = w + } + + log.Infof("Sample %s: Done computing weights", graph.Name) + + log.Infof("Sample %s: Clustering", graph.Name) + // Look for vertex not having a neighbor with a higher weight + for i := range *seqs { + v := (*seqs)[i] + head := true + neighbors := graph.Neighbors(i) + for _, j := range neighbors { + if seqWeights[i] < seqWeights[j] { + head = false + continue + } + } + + if head { + pack := obiseq.MakeBioSequenceSlice(len(neighbors) + 1) + for k, j := range neighbors { + pack[k] = (*seqs)[j] + } + pack[len(neighbors)] = v + + clean, err := BuildConsensus(pack, + fmt.Sprintf("%s_consensus", v.Id()), + kmer_size, CLILowCoverage(), + CLISaveGraphToFiles(), CLIGraphFilesDirectory()) + + if err != nil { + log.Warning(err) + clean = (*graph.Vertices)[i].Copy() + clean.SetAttribute("obiconsensus_consensus", false) + } + pack.Recycle(false) + + clean.SetAttribute(sample_key, graph.Name) + + annotations := v.Annotations() + clean.SetCount(int(weight(i))) + + staton := obiseq.StatsOnSlotName(sample_key) + + for k, v := range annotations { + if !clean.HasAttribute(k) && k != staton { + clean.SetAttribute(k, v) + } + } + + denoised = append(denoised, clean) + } + } + + log.Infof("Sample %s: Done clustering", graph.Name) + + return denoised +} + func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence { dirname := CLIGraphFilesDirectory() newIter := obiiter.MakeIBioSequence() @@ -395,9 +488,17 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence { false, 1, 0, 3) } - denoised := MinionDenoise(graph, - CLISampleAttribute(), - CLIKmerSize()) + var denoised obiseq.BioSequenceSlice + + if CLICluterDenoise() { + denoised = MinionClusterDenoise(graph, + CLISampleAttribute(), + CLIKmerSize()) + } else { + denoised = MinionDenoise(graph, + CLISampleAttribute(), + CLIKmerSize()) + } newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised)) @@ -411,9 +512,14 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence { newIter.WaitAndClose() }() - obiuniq.AddStatsOn(CLISampleAttribute()) - // obiuniq.AddStatsOn("sample:obiconsensus_weight") - obiuniq.SetUniqueInMemory(false) - obiuniq.SetNoSingleton(CLINoSingleton()) - return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false)) + res := newIter + if CLIUnique() { + obiuniq.AddStatsOn(CLISampleAttribute()) + // obiuniq.AddStatsOn("sample:obiconsensus_weight") + obiuniq.SetUniqueInMemory(false) + obiuniq.SetNoSingleton(CLINoSingleton()) + res = obiuniq.CLIUnique(newIter) + } + + return res.Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false)) } diff --git a/pkg/obitools/obiconsensus/options.go b/pkg/obitools/obiconsensus/options.go index 4efde43..48d11d5 100644 --- a/pkg/obitools/obiconsensus/options.go +++ b/pkg/obitools/obiconsensus/options.go @@ -8,8 +8,6 @@ import ( var _distStepMax = 1 var _sampleAttribute = "sample" -var _ratioMax = 1.0 - var _clusterMode = false var _onlyHead = false @@ -20,6 +18,10 @@ var _NoSingleton = false var _saveGraph = "__@@NOSAVE@@__" var _saveRatio = "__@@NOSAVE@@__" +var _lowCoverage = 0.0 + +var _unique = false + // ObiminionOptionSet sets the options for obiminion. // // options: The options for configuring obiminion. @@ -50,6 +52,19 @@ func ObiminionOptionSet(options *getoptions.GetOpt) { options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton, options.Description("If set, sequences occurring a single time in the data set are discarded.")) + options.BoolVar(&_clusterMode, "cluster", _clusterMode, + options.Alias("C"), + options.Description("Switch obiconsensus into its clustering mode."), + ) + + options.BoolVar(&_unique, "unique", _unique, + options.Alias("U"), + options.Description("If set, sequences are dereplicated on the output (obiuniq)."), + ) + + options.Float64Var(&_lowCoverage, "low-coverage", _lowCoverage, + options.Description("If the coverage of a sample is lower than this value, it will be discarded."), + ) } // OptionSet sets up the options for the obiminion package. @@ -129,4 +144,16 @@ func CLIKmerSize() int { // Returns a boolean value indicating whether or not singleton sequences should be discarded. func CLINoSingleton() bool { return _NoSingleton -} \ No newline at end of file +} + +func CLICluterDenoise() bool { + return _clusterMode +} + +func CLIUnique() bool { + return _unique +} + +func CLILowCoverage() float64 { + return _lowCoverage +}