correction of several small bugs

2025-06-29 16:20:46 +00:00 · 2024-09-03 06:08:07 -03:00
parent 373464cb06
commit 65ae82622e
22 changed files with 770 additions and 79 deletions
--- a/pkg/obialign/locatepattern.go
+++ b/pkg/obialign/locatepattern.go
@ -1,30 +1,72 @@
 package obialign

+import log "github.com/sirupsen/logrus"
+
+// buffIndex converts a pair of coordinates (i, j) into a linear index in a matrix
+// of size width x width. The coordinates are (-1)-indexed, and the linear index
+// is 0-indexed as well. The function first adds 1 to both coordinates to make
+// sure the (-1,-1) coordinate is at position 0 in the matrix, and then computes
+// the linear index by multiplying the first coordinate by the width and adding
+// the second coordinate.
 func buffIndex(i, j, width int) int {
 	return (i+1)*width + (j + 1)
 }
+
+// LocatePattern is a function to locate a pattern in a sequence.
+//
+// It uses a dynamic programming approach to build a matrix of scores.
+// The score at each cell is the maximum of the score of the cell
+// above it (representing a deletion), the score of the cell to its
+// left (representing an insertion), and the score of the cell
+// diagonally above it (representing a match).
+//
+// The score of a match is 0 if the two characters are the same,
+// and -1 if they are different.
+//
+// The function returns the start and end positions of the best
+// match, as well as the number of errors in the best match.
 func LocatePattern(pattern, sequence []byte) (int, int, int) {
+	// Pattern spreads over the columns
+	// Sequence spreads over the rows
 	width := len(pattern) + 1
 	buffsize := (len(pattern) + 1) * (len(sequence) + 1)
 	buffer := make([]int, buffsize)
+
+	if len(pattern) >= len(sequence) {
+		log.Panicf("Pattern %s must be shorter than sequence %s", pattern, sequence)
+	}
+
+	// The path matrix keeps track of the best path through the matrix
+	//  0 : indicate the diagonal path
+	//  1 : indicate the up path
+	// -1 : indicate the left path
 	path := make([]int, buffsize)

+	// Initialize the first row of the matrix
 	for j := 0; j < len(pattern); j++ {
 		idx := buffIndex(-1, j, width)
 		buffer[idx] = -j - 1
 		path[idx] = -1
 	}

+	// Initialize the first column of the matrix
+	// Alignment is endgap free so first column = 0
+	// to allow primer to shift freely along the sequence
 	for i := -1; i < len(sequence); i++ {
 		idx := buffIndex(i, -1, width)
 		buffer[idx] = 0
 		path[idx] = +1
 	}

+	// Fills the matrix except the last column
+	// where gaps must be free too.
 	path[0] = 0
 	jmax := len(pattern) - 1
 	for i := 0; i < len(sequence); i++ {
 		for j := 0; j < jmax; j++ {
+
+			// Mismatch score = -1
+			// Match score = 0
 			match := -1
 			if _samenuc(pattern[j], sequence[i]) {
 				match = 0
@ -33,6 +75,8 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
 			idx := buffIndex(i, j, width)

 			diag := buffer[buffIndex(i-1, j-1, width)] + match
+
+			// Each gap cost -1
 			left := buffer[buffIndex(i, j-1, width)] - 1
 			up := buffer[buffIndex(i-1, j, width)] - 1

@ -51,9 +95,12 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
 		}
 	}

+	// Fills the last column considering the free up gap
 	for i := 0; i < len(sequence); i++ {
 		idx := buffIndex(i, jmax, width)

+		// Mismatch score = -1
+		// Match score = 0
 		match := -1
 		if _samenuc(pattern[jmax], sequence[i]) {
 			match = 0
@ -65,6 +112,7 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {

 		score := max(diag, up, left)
 		buffer[idx] = score
+
 		switch {
 		case score == left:
 			path[idx] = -1
@ -75,11 +123,13 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
 		}
 	}

+	// Bactracking of the aligment
+
 	i := len(sequence) - 1
 	j := jmax
 	end := -1
 	lali := 0
-	for i > -1 && j > 0 {
+	for j > 0 { // C'était i > -1 && j > 0
 		lali++
 		switch path[buffIndex(i, j, width)] {
 		case 0:
@ -100,5 +150,9 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
 		}

 	}
+
+	// log.Warnf("from : %d to: %d error: %d match: %v",
+	// 	i, end+1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)],
+	// 	string(sequence[i:(end+1)]))
 	return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
 }
--- a/pkg/obiapat/obiapat.c
+++ b/pkg/obiapat/obiapat.c
@ -137,6 +137,28 @@ char *reverseSequence(char *str,char isPattern)
        return str;
 }

+/* -------------------------------------------- */
+/* lowercase sequence                           */
+/* -------------------------------------------- */
+
+#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'A'))
+#define TO_LOWER(c) ((c) - 'A' + 'a')
+
+char *LowerSequence(char *seq)
+{
+        char *cseq;
+
+        for (cseq = seq ; *cseq ; cseq++) 
+            if (IS_UPPER(*cseq))
+                *cseq = TO_LOWER(*cseq);
+
+		return seq;
+}
+ 
+#undef IS_UPPER
+#undef TO_LOWER
+
+
 char *ecoComplementPattern(char *nucAcSeq)
 {
    return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
@ -165,6 +187,7 @@ void UpperSequence(char *seq)



+
 /* -------------------------------------------- */
 /* encode sequence                              */
 /* IS_UPPER is slightly faster than isupper     */
--- a/pkg/obiapat/pattern.go
+++ b/pkg/obiapat/pattern.go
@ -9,6 +9,7 @@ import "C"
 import (
 	"errors"
 	"runtime"
+	"strings"
 	"unsafe"

 	log "github.com/sirupsen/logrus"
@ -114,7 +115,7 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
 		C.free(unsafe.Pointer(errmsg))
 		return ApatPattern{nil}, errors.New(message)
 	}
-	spat := C.GoString(apc.cpat)
+	spat := strings.ToLower(C.GoString(apc.cpat))
 	ap := _ApatPattern{apc, spat}

 	runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
@ -366,6 +367,18 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
 	return
 }

+// FilterBestMatch filters the best non overlapping matches of a given pattern in a sequence.
+//
+// It takes the following parameters:
+// - pattern: the pattern to search for (ApatPattern).
+// - sequence: the sequence to search in (ApatSequence).
+// - begin: the starting index of the search (int).
+// - length: the length of the search (int).
+//
+// It returns a slice of [3]int representing the locations of all non-overlapping matches in the sequence.
+// The two firsts values of the [3]int indicate respectively the start and the end position of
+// the match. Following the GO convention the end position is not included in the
+// match. The third value indicates the number of error detected for this occurrence.
 func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
 	res := pattern.FindAllIndex(sequence, begin, length)
 	filtered := make([][3]int, 0, len(res))
@ -424,13 +437,15 @@ func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length
 func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
 	res := pattern.FilterBestMatch(sequence, begin, length)

+	j := 0
 	for _, m := range res {
 		// Recompute the start and end position of the match
 		// when the pattern allows for indels
 		if m[2] > 0 && pattern.pointer.pointer.hasIndel {
-			start := m[0] - m[2]
+			// log.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String())
+			start := m[0] - m[2]*2
 			start = max(start, 0)
-			end := start + int(pattern.pointer.pointer.patlen) + 2*m[2]
+			end := start + int(pattern.pointer.pointer.patlen) + 4*m[2]
 			end = min(end, sequence.Len())
 			// 1 << 30 = 1,073,741,824 = 1Gb
 			// It's a virtual array mapping the sequence to the pattern
@ -439,18 +454,21 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
 			cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
 			frg := sequence.pointer.reference.Sequence()[start:end]

-			begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
+			pb, pe, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)

 			// olderr := m[2]
 			m[2] = score
-			m[0] = start + begin
-			m[1] = start + end
+			m[0] = start + pb
+			m[1] = start + pe
+
 			// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
 			// 	frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
 		}
+
+		if int(pattern.pointer.pointer.maxerr) >= m[2] {
+			res[j] = m
+			j++
+		}
 	}
-
-	// log.Debugf("All matches : %v", res)
-
-	return res
+	return res[0:j]
 }
--- a/pkg/obiformats/ngsfilter_read.go
+++ b/pkg/obiformats/ngsfilter_read.go
@ -642,6 +642,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {

 	}

+	ngsfilter.CheckPrimerUnicity()
+
 	for i := 0; i < len(params); i++ {
 		param := params[i][1]
 		if len(params[i]) < 3 {
--- a/pkg/obiformats/universal_read.go
+++ b/pkg/obiformats/universal_read.go
@ -3,6 +3,8 @@ package obiformats
 import (
 	"bufio"
 	"bytes"
+	"encoding/csv"
+	"errors"
 	"io"
 	"path"
 	"regexp"
@ -39,6 +41,31 @@ type SequenceReader func(reader io.Reader, options ...WithOption) (obiiter.IBioS
 // - io.Reader: A modified reader with the read data.
 // - error: Any error encountered during the process.
 func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
+	csv := func(in []byte, limit uint32) bool {
+		in = dropLastLine(in, limit)
+
+		br := bytes.NewReader(in)
+		r := csv.NewReader(br)
+		r.Comma = ','
+		r.ReuseRecord = true
+		r.LazyQuotes = true
+		r.Comment = '#'
+
+		lines := 0
+		for {
+			_, err := r.Read()
+			if errors.Is(err, io.EOF) {
+				break
+			}
+			if err != nil {
+				return false
+			}
+			lines++
+		}
+
+		return r.FieldsPerRecord > 1 && lines > 1
+	}
+
 	fastaDetector := func(raw []byte, limit uint32) bool {
 		ok, err := regexp.Match("^>[^ ]", raw)
 		return ok && err == nil
@ -70,12 +97,14 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
 	mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
 	mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
 	mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
+	mimetype.Lookup("text/plain").Extend(csv, "text/csv", ".csv")

 	mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
 	mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
 	mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
 	mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
 	mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
+	mimetype.Lookup("application/octet-stream").Extend(csv, "text/csv", ".csv")

 	// Create a buffer to store the read data
 	buf := make([]byte, 1024*128)
@ -87,6 +116,7 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {

 	// Detect the MIME type using the mimetype library
 	mimeType := mimetype.Detect(buf)
+
 	if mimeType == nil {
 		return nil, nil, err
 	}
--- a/pkg/obifp/uint128_test.go
+++ b/pkg/obifp/uint128_test.go
@ -0,0 +1,250 @@
+package obifp
+
+import (
+	"math"
+	"reflect"
+
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestUint128_Add(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	w := u.Add(v)
+	assert.Equal(t, Uint128{w1: 4, w0: 6}, w)
+
+	u = Uint128{w1: 0, w0: 0}
+	v = Uint128{w1: 0, w0: 0}
+	w = u.Add(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
+
+	u = Uint128{w1: 0, w0: math.MaxUint64}
+	v = Uint128{w1: 0, w0: 1}
+	w = u.Add(v)
+	assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
+}
+
+func TestUint128_Add64(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := uint64(3)
+	w := u.Add64(v)
+	assert.Equal(t, Uint128{w1: 1, w0: 5}, w)
+
+	u = Uint128{w1: 0, w0: 0}
+	v = uint64(0)
+	w = u.Add64(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
+
+	u = Uint128{w1: 0, w0: math.MaxUint64}
+	v = uint64(1)
+	w = u.Add64(v)
+	assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
+}
+
+func TestUint128_Sub(t *testing.T) {
+	u := Uint128{w1: 4, w0: 6}
+	v := Uint128{w1: 3, w0: 4}
+	w := u.Sub(v)
+	assert.Equal(t, Uint128{w1: 1, w0: 2}, w)
+
+	u = Uint128{w1: 0, w0: 0}
+	v = Uint128{w1: 0, w0: 0}
+	w = u.Sub(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
+
+	u = Uint128{w1: 1, w0: 0}
+	v = Uint128{w1: 0, w0: 1}
+	w = u.Sub(v)
+	assert.Equal(t, Uint128{w1: 0, w0: math.MaxUint64}, w)
+}
+
+func TestUint128_Mul64(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := uint64(3)
+	w := u.Mul64(v)
+
+	if w.w1 != 3 || w.w0 != 6 {
+		t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 3, w0: 6})
+	}
+
+	u = Uint128{w1: 0, w0: 0}
+	v = uint64(0)
+	w = u.Mul64(v)
+
+	if w.w1 != 0 || w.w0 != 0 {
+		t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 0, w0: 0})
+	}
+
+	u = Uint128{w1: 0, w0: math.MaxUint64}
+	v = uint64(2)
+	w = u.Mul64(v)
+
+	if w.w1 != 1 || w.w0 != 18446744073709551614 {
+		t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 1, w0: 18446744073709551614})
+	}
+
+}
+
+func TestUint128_Mul(t *testing.T) {
+	tests := []struct {
+		name     string
+		u        Uint128
+		v        Uint128
+		expected Uint128
+	}{
+		{
+			name:     "simple multiplication",
+			u:        Uint128{w1: 1, w0: 2},
+			v:        Uint128{w1: 3, w0: 4},
+			expected: Uint128{w1: 10, w0: 8},
+		},
+		{
+			name:     "multiplication with overflow",
+			u:        Uint128{w1: 0, w0: math.MaxUint64},
+			v:        Uint128{w1: 0, w0: 2},
+			expected: Uint128{w1: 1, w0: 18446744073709551614},
+		},
+		{
+			name:     "multiplication with zero",
+			u:        Uint128{w1: 0, w0: 0},
+			v:        Uint128{w1: 0, w0: 0},
+			expected: Uint128{w1: 0, w0: 0},
+		},
+		{
+			name:     "multiplication with large numbers",
+			u:        Uint128{w1: 100, w0: 200},
+			v:        Uint128{w1: 300, w0: 400},
+			expected: Uint128{w1: 100000, w0: 80000},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			actual := tt.u.Mul(tt.v)
+			if !reflect.DeepEqual(actual, tt.expected) {
+				t.Errorf("Mul(%v, %v) = %v, want %v", tt.u, tt.v, actual, tt.expected)
+			}
+		})
+	}
+}
+
+func TestUint128_QuoRem(t *testing.T) {
+	u := Uint128{w1: 3, w0: 8}
+	v := Uint128{w1: 0, w0: 4}
+	q, r := u.QuoRem(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
+	assert.Equal(t, Uint128{w1: 0, w0: 0}, r)
+}
+
+func TestUint128_QuoRem64(t *testing.T) {
+	u := Uint128{w1: 0, w0: 6}
+	v := uint64(3)
+	q, r := u.QuoRem64(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
+	assert.Equal(t, uint64(0), r)
+}
+
+func TestUint128_Div(t *testing.T) {
+	u := Uint128{w1: 3, w0: 8}
+	v := Uint128{w1: 0, w0: 4}
+	q := u.Div(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
+}
+
+func TestUint128_Div64(t *testing.T) {
+	u := Uint128{w1: 0, w0: 6}
+	v := uint64(3)
+	q := u.Div64(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
+}
+
+func TestUint128_Mod(t *testing.T) {
+	u := Uint128{w1: 3, w0: 8}
+	v := Uint128{w1: 0, w0: 4}
+	r := u.Mod(v)
+	assert.Equal(t, Uint128{w1: 0, w0: 0}, r)
+}
+
+func TestUint128_Mod64(t *testing.T) {
+	u := Uint128{w1: 0, w0: 6}
+	v := uint64(3)
+	r := u.Mod64(v)
+	assert.Equal(t, uint64(0), r)
+}
+
+func TestUint128_Cmp(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	assert.Equal(t, -1, u.Cmp(v))
+}
+
+func TestUint128_Cmp64(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := uint64(3)
+	assert.Equal(t, -1, u.Cmp64(v))
+}
+
+func TestUint128_Equals(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 1, w0: 2}
+	assert.Equal(t, true, u.Equals(v))
+}
+
+func TestUint128_LessThan(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	assert.Equal(t, true, u.LessThan(v))
+}
+
+func TestUint128_GreaterThan(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	assert.Equal(t, false, u.GreaterThan(v))
+}
+
+func TestUint128_LessThanOrEqual(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	assert.Equal(t, true, u.LessThanOrEqual(v))
+}
+
+func TestUint128_GreaterThanOrEqual(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	assert.Equal(t, false, u.GreaterThanOrEqual(v))
+}
+
+func TestUint128_And(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	w := u.And(v)
+	assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
+}
+
+func TestUint128_Or(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	w := u.Or(v)
+	assert.Equal(t, Uint128{w1: 3, w0: 6}, w)
+}
+
+func TestUint128_Xor(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	v := Uint128{w1: 3, w0: 4}
+	w := u.Xor(v)
+	assert.Equal(t, Uint128{w1: 2, w0: 6}, w)
+}
+
+func TestUint128_Not(t *testing.T) {
+	u := Uint128{w1: 1, w0: 2}
+	w := u.Not()
+	assert.Equal(t, Uint128{w1: math.MaxUint64 - 1, w0: math.MaxUint64 - 2}, w)
+}
+
+func TestUint128_AsUint64(t *testing.T) {
+	u := Uint128{w1: 0, w0: 5}
+	v := u.AsUint64()
+	assert.Equal(t, uint64(5), v)
+}
--- a/pkg/obikmer/debruijn.go
+++ b/pkg/obikmer/debruijn.go
@ -10,6 +10,7 @@ import (
 	"slices"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
 	log "github.com/sirupsen/logrus"
 )

@ -312,10 +313,48 @@ func (g *DeBruijnGraph) LongestPath(max_length int) []uint64 {
 	return path
 }

-func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) {
+func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.BioSequence, error) {
+	if g.Len() == 0 {
+		return nil, fmt.Errorf("graph is empty")
+	}
 	//path := g.LongestPath(max_length)
 	path := g.HaviestPath()
-	s := g.DecodePath(path)
+
+	spath := path
+
+	if min_cov > 0 {
+		wp := make([]uint, len(path))
+
+		for i, n := range path {
+			wp[i] = g.graph[n]
+		}
+
+		mp := uint(float64(obistats.Mode(wp))*min_cov + 0.5)
+
+		from := 0
+		for i, n := range path {
+			if g.graph[n] < mp {
+				from = i + 1
+			} else {
+				break
+			}
+		}
+
+		to := len(path)
+
+		for i := len(path) - 1; i >= 0; i-- {
+			n := path[i]
+			if g.graph[n] < mp {
+				to = i
+			} else {
+				break
+			}
+		}
+
+		spath = path[from:to]
+	}
+
+	s := g.DecodePath(spath)

 	if len(s) > 0 {
 		seq := obiseq.NewBioSequence(
@ -387,6 +426,48 @@ func (g *DeBruijnGraph) Weight(index uint64) int {
 	return int(val)
 }

+// WeightMode returns the mode weight of the nodes in the DeBruijnGraph.
+//
+// It iterates over each count in the graph map and updates the max value if the current count is greater.
+// Finally, it returns the mode weight as an integer.
+//
+// Returns:
+// - int: the mode weight value.
+func (g *DeBruijnGraph) WeightMode() int {
+	dist := make(map[uint]int)
+
+	for _, w := range g.graph {
+		dist[w]++
+	}
+
+	maxi := 0
+	wmax := uint(0)
+
+	for k, v := range dist {
+		if v > maxi && k > 1 {
+			maxi = v
+			wmax = k
+		}
+	}
+
+	return int(wmax)
+}
+
+// WeightMean returns the mean weight of the nodes in the DeBruijnGraph.
+//
+// Returns:
+// - float64: the mean weight of the nodes in the graph.
+func (g *DeBruijnGraph) WeightMean() float64 {
+	sum := 0
+
+	for _, w := range g.graph {
+		i := int(w)
+		sum += i
+	}
+
+	return float64(sum) / float64(len(g.graph))
+}
+
 // append appends a sequence of nucleotides to the DeBruijnGraph.
 //
 // Parameters:
@ -637,7 +718,7 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
 	}

 	if slices.Contains(heaviestPath, currentNode) {
-		log.Fatalf("Cycle detected %v -> %v (%v) len(%v)", heaviestPath, currentNode, startNodes, len(heaviestPath))
+		log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
 		return nil
 	}

--- a/pkg/obilua/lua.go
+++ b/pkg/obilua/lua.go
@ -20,6 +20,8 @@ import (
 func NewInterpreter() *lua.LState {
 	lua := lua.NewState()

+	registerMutexType(lua)
+
 	RegisterObilib(lua)
 	RegisterObiContext(lua)

@ -117,7 +119,7 @@ func LuaWorker(proto *lua.FunctionProto) obiseq.SeqWorker {
 				case *obiseq.BioSequenceSlice:
 					return *val, err
 				default:
-					return nil, fmt.Errorf("worker function doesn't return the correct type")
+					return nil, fmt.Errorf("worker function doesn't return the correct type %T", val)
 				}
 			}

--- a/pkg/obilua/lua_obicontext.go
+++ b/pkg/obilua/lua_obicontext.go
@ -36,6 +36,8 @@ func obicontextGetSet(interpreter *lua.LState) int {
 			__lua_obicontext.Store(key, float64(val))
 		case lua.LString:
 			__lua_obicontext.Store(key, string(val))
+		case *lua.LUserData:
+			__lua_obicontext.Store(key, val.Value)
 		case *lua.LTable:
 			__lua_obicontext.Store(key, Table2Interface(interpreter, val))
 		default:
--- a/pkg/obilua/lua_push_interface.go
+++ b/pkg/obilua/lua_push_interface.go
@ -1,6 +1,8 @@
 package obilua

 import (
+	"sync"
+
 	log "github.com/sirupsen/logrus"

 	lua "github.com/yuin/gopher-lua"
@ -46,6 +48,8 @@ func pushInterfaceToLua(L *lua.LState, val interface{}) {
 		pushSliceBoolToLua(L, v)
 	case nil:
 		L.Push(lua.LNil)
+	case *sync.Mutex:
+		pushMutexToLua(L, v)
 	default:
 		log.Fatalf("Cannot deal with value (%T) : %v", val, val)
 	}
--- a/pkg/obilua/mutex.go
+++ b/pkg/obilua/mutex.go
@ -0,0 +1,63 @@
+package obilua
+
+import (
+	lua "github.com/yuin/gopher-lua"
+
+	"sync"
+)
+
+const luaMutexTypeName = "Mutex"
+
+func registerMutexType(luaState *lua.LState) {
+	mutexType := luaState.NewTypeMetatable(luaMutexTypeName)
+	luaState.SetGlobal(luaMutexTypeName, mutexType)
+
+	luaState.SetField(mutexType, "new", luaState.NewFunction(newMutex))
+
+	luaState.SetField(mutexType, "__index",
+		luaState.SetFuncs(luaState.NewTable(),
+			mutexMethods))
+}
+
+func mutex2Lua(interpreter *lua.LState, mutex *sync.Mutex) lua.LValue {
+	ud := interpreter.NewUserData()
+	ud.Value = mutex
+	interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaMutexTypeName))
+
+	return ud
+}
+
+func pushMutexToLua(luaState *lua.LState, mutex *sync.Mutex) {
+	luaState.Push(mutex2Lua(luaState, mutex))
+}
+func newMutex(luaState *lua.LState) int {
+	m := &sync.Mutex{}
+	pushMutexToLua(luaState, m)
+	return 1
+}
+
+var mutexMethods = map[string]lua.LGFunction{
+	"lock":   mutexLock,
+	"unlock": mutexUnlock,
+}
+
+func checkMutex(L *lua.LState) *sync.Mutex {
+	ud := L.CheckUserData(1)
+	if mutex, ok := ud.Value.(*sync.Mutex); ok {
+		return mutex
+	}
+	L.ArgError(1, "Mutex expected")
+	return nil
+}
+
+func mutexLock(L *lua.LState) int {
+	mutex := checkMutex(L)
+	mutex.Lock()
+	return 0
+}
+
+func mutexUnlock(L *lua.LState) int {
+	mutex := checkMutex(L)
+	mutex.Unlock()
+	return 0
+}
--- a/pkg/obingslibrary/marker.go
+++ b/pkg/obingslibrary/marker.go
@ -2,9 +2,10 @@ package obingslibrary

 import (
 	"fmt"
-	"log"
 	"strings"

+	log "github.com/sirupsen/logrus"
+
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
--- a/pkg/obingslibrary/multimatch.go
+++ b/pkg/obingslibrary/multimatch.go
@ -3,7 +3,7 @@ package obingslibrary
 import (
 	"fmt"
 	"math"
-	"sort"
+	"slices"

 	log "github.com/sirupsen/logrus"

@ -87,6 +87,8 @@ func lookForTag(seq string, delimiter byte) string {

 	i := len(seq) - 1

+	// log.Warnf("Provided fragment : %s", string(seq))
+
 	for i >= 0 && seq[i] != delimiter {
 		i--
 	}
@ -107,6 +109,7 @@ func lookForTag(seq string, delimiter byte) string {
 		return ""
 	}

+	// log.Warnf("extracted : %s", string(seq[begin:end]))
 	return seq[begin:end]
 }

@ -172,11 +175,12 @@ func (marker *Marker) beginDelimitedTagExtractor(
 	begin int,
 	forward bool) string {

-	taglength := marker.Forward_spacer + marker.Forward_tag_length
+	// log.Warn("beginDelimitedTagExtractor")
+	taglength := 2*marker.Forward_spacer + marker.Forward_tag_length
 	delimiter := marker.Forward_tag_delimiter

 	if !forward {
-		taglength = marker.Reverse_spacer + marker.Reverse_tag_length
+		taglength = 2*marker.Reverse_spacer + marker.Reverse_tag_length
 		delimiter = marker.Reverse_tag_delimiter
 	}

@ -240,6 +244,7 @@ func (marker *Marker) endDelimitedTagExtractor(
 	sequence *obiseq.BioSequence,
 	end int,
 	forward bool) string {
+	// log.Warn("endDelimitedTagExtractor")

 	taglength := marker.Reverse_spacer + marker.Reverse_tag_length
 	delimiter := marker.Reverse_tag_delimiter
@ -335,14 +340,17 @@ func (marker *Marker) beginTagExtractor(
 	sequence *obiseq.BioSequence,
 	begin int,
 	forward bool) string {
+	// log.Warnf("Forward : %v -> %d %c", forward, marker.Forward_spacer, marker.Forward_tag_delimiter)
+	// log.Warnf("Forward : %v -> %d %c", forward, marker.Reverse_spacer, marker.Reverse_tag_delimiter)
 	if forward {
 		if marker.Forward_tag_delimiter == 0 {
 			return marker.beginFixedTagExtractor(sequence, begin, forward)
 		} else {
 			if marker.Forward_tag_indels == 0 {
+				// log.Warnf("Delimited tag for forward primers %s", marker.forward.String())
 				return marker.beginDelimitedTagExtractor(sequence, begin, forward)
 			} else {
-				// log.Warn("Rescue tag for forward primers")
+				// log.Warnf("Rescue tag for forward primers %s", marker.forward.String())
 				return marker.beginRescueTagExtractor(sequence, begin, forward)
 			}
 		}
@ -351,9 +359,10 @@ func (marker *Marker) beginTagExtractor(
 			return marker.beginFixedTagExtractor(sequence, begin, forward)
 		} else {
 			if marker.Reverse_tag_indels == 0 {
+				// log.Warnf("Delimited tag for reverse/complement primers %s", marker.creverse.String())
 				return marker.beginDelimitedTagExtractor(sequence, begin, forward)
 			} else {
-				// log.Warn("Rescue tag for reverse/complement primers")
+				// log.Warnf("Rescue tag for reverse/complement primers %s", marker.creverse.String())
 				return marker.beginRescueTagExtractor(sequence, begin, forward)
 			}
 		}
@ -369,9 +378,10 @@ func (marker *Marker) endTagExtractor(
 			return marker.endFixedTagExtractor(sequence, end, forward)
 		} else {
 			if marker.Reverse_tag_indels == 0 {
+				// log.Warnf("Delimited tag for reverse primers %s", marker.reverse.String())
 				return marker.endDelimitedTagExtractor(sequence, end, forward)
 			} else {
-				// log.Warn("Rescue tag for reverse primers")
+				// log.Warnf("Rescue tag for reverse primers %s", marker.reverse.String())
 				return marker.endRescueTagExtractor(sequence, end, forward)
 			}
 		}
@ -380,9 +390,10 @@ func (marker *Marker) endTagExtractor(
 			return marker.endFixedTagExtractor(sequence, end, forward)
 		} else {
 			if marker.Forward_tag_indels == 0 {
+				// log.Warnf("Delimited tag for forward/complement primers %s", marker.cforward.String())
 				return marker.endDelimitedTagExtractor(sequence, end, forward)
 			} else {
-				// log.Warn("Rescue tag for forward/complement primers")
+				// log.Warnf("Rescue tag for forward/complement primers %s", marker.cforward.String())
 				return marker.endRescueTagExtractor(sequence, end, forward)
 			}
 		}
@ -609,9 +620,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
 	}

 	if len(matches) > 0 {
-		sort.Slice(matches, func(i, j int) bool {
-			return matches[i].Begin < matches[j].Begin
-		})
+		slices.SortFunc(matches, func(a, b PrimerMatch) int { return a.Begin - b.Begin })

 		state := 0
 		var from PrimerMatch
--- a/pkg/obingslibrary/ngslibrary.go
+++ b/pkg/obingslibrary/ngslibrary.go
@ -36,8 +36,9 @@ func MakeNGSLibrary() NGSLibrary {
 }

 func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
-	pair := PrimerPair{strings.ToLower(forward),
-		strings.ToLower(reverse)}
+	forward = strings.ToLower(forward)
+	reverse = strings.ToLower(reverse)
+	pair := PrimerPair{forward, reverse}
 	marker, ok := (library.Markers)[pair]

 	if ok {
--- a/pkg/obioptions/version.go
+++ b/pkg/obioptions/version.go
@ -7,7 +7,7 @@ import (
 // TODO: The version number is extracted from git. This induces that the version
 // corresponds to the last commit, and not the one when the file will be
 // commited
-var _Commit = "31bfc88"
+var _Commit = "373464c"
 var _Version = "Release 4.2.0"

 // Version returns the version of the obitools package.
--- a/pkg/obiseq/attributes.go
+++ b/pkg/obiseq/attributes.go
@ -72,8 +72,8 @@ func (s *BioSequence) HasAttribute(key string) bool {
 	ok := s.annotations != nil

 	if ok {
-		defer s.AnnotationsUnlock()
 		s.AnnotationsLock()
+		defer s.AnnotationsUnlock()
 		_, ok = s.annotations[key]
 	}

@ -112,8 +112,8 @@ func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
 	ok := s.annotations != nil

 	if ok {
-		defer s.AnnotationsUnlock()
 		s.AnnotationsLock()
+		defer s.AnnotationsUnlock()
 		val, ok = s.annotations[key]
 	}

@ -144,8 +144,8 @@ func (s *BioSequence) SetAttribute(key string, value interface{}) {

 	annot := s.Annotations()

-	defer s.AnnotationsUnlock()
 	s.AnnotationsLock()
+	defer s.AnnotationsUnlock()
 	annot[key] = value
 }

@ -205,8 +205,8 @@ func (s *BioSequence) GetFloatAttribute(key string) (float64, bool) {
 // No return value.
 func (s *BioSequence) DeleteAttribute(key string) {
 	if s.annotations != nil {
-		defer s.AnnotationsUnlock()
 		s.AnnotationsLock()
+		defer s.AnnotationsUnlock()
 		delete(s.annotations, key)
 	}
 }
--- a/pkg/obiseq/biosequence.go
+++ b/pkg/obiseq/biosequence.go
@ -65,7 +65,7 @@ type BioSequence struct {
 	feature     []byte
 	paired      *BioSequence // A pointer to the paired sequence
 	annotations Annotation
-	annot_lock  sync.Mutex
+	annot_lock  *sync.Mutex
 }

 // NewEmptyBioSequence creates a new BioSequence object with an empty sequence.
@ -90,7 +90,7 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
 		feature:     nil,
 		paired:      nil,
 		annotations: nil,
-		annot_lock:  sync.Mutex{},
+		annot_lock:  &sync.Mutex{},
 	}
 }

--- a/pkg/obiseq/biosequence_test.go
+++ b/pkg/obiseq/biosequence_test.go
@ -187,7 +187,7 @@ func TestCopy(t *testing.T) {
 			"annotation1": "value1",
 			"annotation2": "value2",
 		},
-		annot_lock: sync.Mutex{},
+		annot_lock: &sync.Mutex{},
 	}

 	newSeq := seq.Copy()
--- a/pkg/obiseq/merge.go
+++ b/pkg/obiseq/merge.go
@ -91,8 +91,7 @@ func (sequence *BioSequence) HasStatsOn(key string) bool {
 // - StatsOnValues
 func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOnValues {
 	mkey := StatsOnSlotName(desc.Name)
-	annotations := sequence.Annotations()
-	istat, ok := annotations[mkey]
+	istat, ok := sequence.GetAttribute(mkey)

 	var stats StatsOnValues
 	var newstat bool
@ -117,39 +116,40 @@ func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOn
 				}
 			}
 		default:
-			stats = make(StatsOnValues, 10)
-			annotations[mkey] = stats
+			stats = make(StatsOnValues)
+			sequence.SetAttribute(mkey, stats)
 			newstat = true
 		}
 	} else {
-		stats = make(StatsOnValues, 10)
-		annotations[mkey] = stats
+		stats = make(StatsOnValues)
+		sequence.SetAttribute(mkey, stats)
 		newstat = true
 	}

-	if newstat && sequence.StatsPlusOne(desc, sequence, na) {
-		delete(sequence.Annotations(), desc.Key)
+	if newstat {
+		sequence.StatsPlusOne(desc, sequence, na)
 	}

 	return stats
 }

-// StatsPlusOne adds the count of the sequence toAdd to the count of the key in the stats.
+// StatsPlusOne updates the statistics on the given attribute (desc) on the receiver BioSequence
+// with the value of the attribute on the toAdd BioSequence.
 //
 // Parameters:
-// - key: the attribute key (string) to be summarized
-// - toAdd: the BioSequence to add to the stats
+// - desc: StatsOnDescription of the attribute to be updated
+// - toAdd: the BioSequence containing the attribute to be updated
 // - na: the value to be used if the attribute is not present
+//
 // Return type:
-// - bool
+// - bool: true if the update was successful, false otherwise
 func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSequence, na string) bool {
 	sval := na
-	annotations := sequence.Annotations()
 	stats := sequence.StatsOn(desc, na)
 	retval := false

 	if toAdd.HasAnnotation() {
-		value, ok := toAdd.Annotations()[desc.Key]
+		value, ok := toAdd.GetAttribute(desc.Key)

 		if ok {

@ -178,8 +178,9 @@ func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSeq
 	if !ok {
 		old = 0
 	}
+
 	stats[sval] = old + desc.Weight(toAdd)
-	annotations[StatsOnSlotName(desc.Name)] = stats // TODO: check if this is necessary
+	sequence.SetAttribute(StatsOnSlotName(desc.Name), stats) // TODO: check if this is necessary
 	return retval
 }

@ -263,7 +264,7 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
 		}
 	}

-	annotations["count"] = count
+	sequence.SetCount(count)
 	return sequence
 }

@ -282,7 +283,7 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions)
 	seq.SetQualities(nil)

 	if len(sequences) == 1 {
-		seq.Annotations()["count"] = seq.Count()
+		seq.SetCount(seq.Count())
 		for _, desc := range statsOn {
 			seq.StatsOn(desc, na)
 		}
@ -296,5 +297,4 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions)

 	sequences.Recycle(false)
 	return seq
-
 }
--- a/pkg/obistats/minmax.go
+++ b/pkg/obistats/minmax.go
@ -1,11 +1,9 @@
 package obistats

-
-
-func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T {
+func Max[T int64 | int32 | int16 | int8 | int | float32 | float64](data []T) T {
 	m := data[0]

-	for _,v := range data {
+	for _, v := range data {
 		if v > m {
 			m = v
 		}
@ -14,14 +12,34 @@ func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T
 	return m
 }

-func Min[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T {
+func Min[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint | float32 | float64](data []T) T {
 	m := data[0]

-	for _,v := range data {
+	for _, v := range data {
 		if v < m {
 			m = v
 		}
 	}

 	return m
-}
+}
+
+func Mode[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint](data []T) T {
+	ds := make(map[T]int)
+
+	for _, v := range data {
+		ds[v]++
+	}
+
+	md := T(0)
+	maxocc := 0
+
+	for v, occ := range ds {
+		if occ > maxocc {
+			maxocc = occ
+			md = v
+		}
+	}
+
+	return md
+}
--- a/pkg/obitools/obiconsensus/obiconsensus.go
+++ b/pkg/obitools/obiconsensus/obiconsensus.go
@ -25,8 +25,19 @@ import (
 func BuildConsensus(seqs obiseq.BioSequenceSlice,
 	consensus_id string,
 	kmer_size int,
+	filter_out float64,
 	save_graph bool, dirname string) (*obiseq.BioSequence, error) {

+	if seqs.Len() == 0 {
+		return nil, fmt.Errorf("no sequence provided")
+	}
+
+	if seqs.Len() == 1 {
+		seq := seqs[0].Copy()
+		seq.SetAttribute("obiconsensus_consensus", false)
+		return seq, nil
+	}
+
 	if save_graph {
 		if dirname == "" {
 			dirname = "."
@ -104,7 +115,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
 	log.Debugf("Graph size : %d\n", graph.Len())
 	total_kmer := graph.Len()

-	seq, err := graph.LongestConsensus(consensus_id)
+	seq, err := graph.LongestConsensus(consensus_id, filter_out)

 	sumCount := 0

@ -112,7 +123,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
 		for _, s := range seqs {
 			sumCount += s.Count()
 		}
-
+		seq.SetAttribute("obiconsensus_consensus", true)
 		seq.SetAttribute("obiconsensus_weight", sumCount)
 		seq.SetAttribute("obiconsensus_seq_length", seq.Len())
 		seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
@ -136,6 +147,10 @@ func SampleWeight(seqs *obiseq.BioSequenceSlice, sample, sample_key string) func

 		stats := (*seqs)[i].StatsOn(obiseq.MakeStatsOnDescription(sample_key), "NA")

+		if stats == nil {
+			log.Panicf("Sample %s not found in sequence %d", sample, i)
+		}
+
 		if value, ok := stats[sample]; ok {
 			return float64(value)
 		}
@ -292,16 +307,16 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
 			pack[degree] = v
 			clean, err = BuildConsensus(pack,
 				fmt.Sprintf("%s_consensus", v.Id()),
-				kmer_size,
+				kmer_size, CLILowCoverage(),
 				CLISaveGraphToFiles(), CLIGraphFilesDirectory())

 			if err != nil {
 				log.Warning(err)
-				clean = (*graph.Vertices)[i]
+				clean = (*graph.Vertices)[i].Copy()
 				clean.SetAttribute("obiconsensus_consensus", false)
-			} else {
-				clean.SetAttribute("obiconsensus_consensus", true)
+
 			}
+
 			pack.Recycle(false)

 		} else {
@ -318,8 +333,9 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],

 		annotations := v.Annotations()

+		staton := obiseq.StatsOnSlotName(sample_key)
 		for k, v := range annotations {
-			if !clean.HasAttribute(k) {
+			if !clean.HasAttribute(k) && k != staton {
 				clean.SetAttribute(k, v)
 			}
 		}
@ -334,6 +350,83 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],

 	return denoised
 }
+
+func MinionClusterDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
+	sample_key string, kmer_size int) obiseq.BioSequenceSlice {
+	denoised := obiseq.MakeBioSequenceSlice()
+	seqs := (*obiseq.BioSequenceSlice)(graph.Vertices)
+	weight := SampleWeight(seqs, graph.Name, sample_key)
+	seqWeights := make([]float64, len(*seqs))
+
+	// Compute weights for each vertex as the sum of the weights of its neighbors
+
+	log.Info("")
+	log.Infof("Sample %s: Computing weights", graph.Name)
+	for i := range *seqs {
+		w := weight(i)
+		for _, j := range graph.Neighbors(i) {
+			w += weight(j)
+		}
+
+		seqWeights[i] = w
+	}
+
+	log.Infof("Sample %s: Done computing weights", graph.Name)
+
+	log.Infof("Sample %s: Clustering", graph.Name)
+	// Look for vertex not having a neighbor with a higher weight
+	for i := range *seqs {
+		v := (*seqs)[i]
+		head := true
+		neighbors := graph.Neighbors(i)
+		for _, j := range neighbors {
+			if seqWeights[i] < seqWeights[j] {
+				head = false
+				continue
+			}
+		}
+
+		if head {
+			pack := obiseq.MakeBioSequenceSlice(len(neighbors) + 1)
+			for k, j := range neighbors {
+				pack[k] = (*seqs)[j]
+			}
+			pack[len(neighbors)] = v
+
+			clean, err := BuildConsensus(pack,
+				fmt.Sprintf("%s_consensus", v.Id()),
+				kmer_size, CLILowCoverage(),
+				CLISaveGraphToFiles(), CLIGraphFilesDirectory())
+
+			if err != nil {
+				log.Warning(err)
+				clean = (*graph.Vertices)[i].Copy()
+				clean.SetAttribute("obiconsensus_consensus", false)
+			}
+			pack.Recycle(false)
+
+			clean.SetAttribute(sample_key, graph.Name)
+
+			annotations := v.Annotations()
+			clean.SetCount(int(weight(i)))
+
+			staton := obiseq.StatsOnSlotName(sample_key)
+
+			for k, v := range annotations {
+				if !clean.HasAttribute(k) && k != staton {
+					clean.SetAttribute(k, v)
+				}
+			}
+
+			denoised = append(denoised, clean)
+		}
+	}
+
+	log.Infof("Sample %s: Done clustering", graph.Name)
+
+	return denoised
+}
+
 func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
 	dirname := CLIGraphFilesDirectory()
 	newIter := obiiter.MakeIBioSequence()
@ -395,9 +488,17 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
 					false, 1, 0, 3)
 			}

-			denoised := MinionDenoise(graph,
-				CLISampleAttribute(),
-				CLIKmerSize())
+			var denoised obiseq.BioSequenceSlice
+
+			if CLICluterDenoise() {
+				denoised = MinionClusterDenoise(graph,
+					CLISampleAttribute(),
+					CLIKmerSize())
+			} else {
+				denoised = MinionDenoise(graph,
+					CLISampleAttribute(),
+					CLIKmerSize())
+			}

 			newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised))

@ -411,9 +512,14 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
 		newIter.WaitAndClose()
 	}()

-	obiuniq.AddStatsOn(CLISampleAttribute())
-	// obiuniq.AddStatsOn("sample:obiconsensus_weight")
-	obiuniq.SetUniqueInMemory(false)
-	obiuniq.SetNoSingleton(CLINoSingleton())
-	return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
+	res := newIter
+	if CLIUnique() {
+		obiuniq.AddStatsOn(CLISampleAttribute())
+		// obiuniq.AddStatsOn("sample:obiconsensus_weight")
+		obiuniq.SetUniqueInMemory(false)
+		obiuniq.SetNoSingleton(CLINoSingleton())
+		res = obiuniq.CLIUnique(newIter)
+	}
+
+	return res.Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
 }
--- a/pkg/obitools/obiconsensus/options.go
+++ b/pkg/obitools/obiconsensus/options.go
@ -8,8 +8,6 @@ import (
 var _distStepMax = 1
 var _sampleAttribute = "sample"

-var _ratioMax = 1.0
-
 var _clusterMode = false
 var _onlyHead = false

@ -20,6 +18,10 @@ var _NoSingleton = false
 var _saveGraph = "__@@NOSAVE@@__"
 var _saveRatio = "__@@NOSAVE@@__"

+var _lowCoverage = 0.0
+
+var _unique = false
+
 // ObiminionOptionSet sets the options for obiminion.
 //
 // options: The options for configuring obiminion.
@ -50,6 +52,19 @@ func ObiminionOptionSet(options *getoptions.GetOpt) {
 	options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
 		options.Description("If set, sequences occurring a single time in the data set are discarded."))

+	options.BoolVar(&_clusterMode, "cluster", _clusterMode,
+		options.Alias("C"),
+		options.Description("Switch obiconsensus into its clustering mode."),
+	)
+
+	options.BoolVar(&_unique, "unique", _unique,
+		options.Alias("U"),
+		options.Description("If set, sequences are dereplicated on the output (obiuniq)."),
+	)
+
+	options.Float64Var(&_lowCoverage, "low-coverage", _lowCoverage,
+		options.Description("If the coverage of a sample is lower than this value, it will be discarded."),
+	)
 }

 // OptionSet sets up the options for the obiminion package.
@ -129,4 +144,16 @@ func CLIKmerSize() int {
 // Returns a boolean value indicating whether or not singleton sequences should be discarded.
 func CLINoSingleton() bool {
 	return _NoSingleton
-}
+}
+
+func CLICluterDenoise() bool {
+	return _clusterMode
+}
+
+func CLIUnique() bool {
+	return _unique
+}
+
+func CLILowCoverage() float64 {
+	return _lowCoverage
+}