Add obiminion first version

Former-commit-id: aa5ace7bd4d2266333715fca7094d1c3cbbb5e6d
2025-06-29 16:20:46 +00:00 · 2024-05-14 08:16:12 +02:00
parent 9e63013bc2
commit 017030bcce
24 changed files with 1599 additions and 469 deletions
--- a/cmd/obitools/obiminion/main.go
+++ b/cmd/obitools/obiminion/main.go
@ -0,0 +1,33 @@
+package main
+
+import (
+	"os"
+
+	log "github.com/sirupsen/logrus"
+
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiminion"
+
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
+)
+
+func main() {
+	optionParser := obioptions.GenerateOptionParser(obiminion.OptionSet)
+
+	_, args := optionParser(os.Args)
+
+	fs, err := obiconvert.CLIReadBioSequences(args...)
+
+	if err != nil {
+		log.Errorf("Cannot open file (%v)", err)
+		os.Exit(1)
+	}
+
+	cleaned := obiminion.CLIOBIMinion(fs)
+
+	obiconvert.CLIWriteBioSequences(cleaned, true)
+
+	obiiter.WaitForLastPipe()
+
+}
--- a/cmd/obitools/obiuniq/main.go
+++ b/cmd/obitools/obiuniq/main.go
@ -43,7 +43,7 @@ func main() {
 		os.Exit(1)
 	}

-	unique := obiuniq.Unique(sequences)
+	unique := obiuniq.CLIUnique(sequences)
 	obiconvert.CLIWriteBioSequences(unique, true)

 	obiiter.WaitForLastPipe()
--- a/pkg/obialign/fastlcs.go
+++ b/pkg/obialign/fastlcs.go
@ -58,182 +58,3 @@ var _empty = encodeValues(0, 0, false)
 var _out = encodeValues(0, 30000, true)
 var _notavail = encodeValues(0, 30000, false)

-// func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
-
-// 	lA := seqA.Len()
-// 	lB := seqB.Len()
-
-// 	// Ensure that A is the longest
-// 	if lA < lB {
-// 		seqA, seqB = seqB, seqA
-// 		lA, lB = lB, lA
-// 	}
-
-// 	if maxError == -1 {
-// 		maxError = lA * 2
-// 	}
-
-// 	delta := lA - lB
-
-// 	// The difference of length is larger the maximum allowed errors
-// 	if delta > maxError {
-// 		return -1, -1
-// 	}
-
-// 	// Doit-on vraiment diviser par deux ??? pas certain
-// 	extra := (maxError - delta) + 1
-
-// 	even := 1 + delta + 2*extra
-// 	width := 2*even - 1
-
-// 	if buffer == nil {
-// 		var local []uint64
-// 		buffer = &local
-// 	}
-
-// 	if cap(*buffer) < 2*width {
-// 		*buffer = make([]uint64, 3*width)
-// 	}
-
-// 	previous := (*buffer)[0:width]
-// 	current := (*buffer)[width:(2 * width)]
-
-// 	previous[extra] = _empty
-// 	previous[extra+even] = encodeValues(0, 1, false)
-// 	previous[extra+even-1] = encodeValues(0, 1, false)
-
-// 	N := lB + ((delta) >> 1)
-
-// 	bA := seqA.Sequence()
-// 	bB := seqB.Sequence()
-
-// 	// log.Println("N = ", N)
-
-// 	for y := 1; y <= N; y++ {
-// 		// in_matrix := false
-// 		x1 := y - lB + extra
-// 		x2 := extra - y
-// 		xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0)
-
-// 		x1 = y + extra
-// 		x2 = lA + extra - y
-// 		xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1
-
-// 		for x := xs; x < xf; x++ {
-
-// 			i := y - x + extra
-// 			j := y + x - extra
-
-// 			var Sdiag, Sleft, Sup uint64
-
-// 			switch {
-
-// 			case i == 0:
-// 				Sup = _notavail
-// 				Sdiag = _notavail
-// 				Sleft = encodeValues(0, j-1, false)
-// 			case j == 0:
-// 				Sup = encodeValues(0, i-1, false)
-// 				Sdiag = _notavail
-// 				Sleft = _notavail
-// 			default:
-// 				Sdiag = previous[x]
-
-// 				if bA[j-1] == bB[i-1] {
-// 					Sdiag = _incscore(Sdiag)
-// 				}
-
-// 				if x < (even - 1) {
-// 					Sup = previous[x+even]
-// 				} else {
-// 					Sup = _out
-// 				}
-// 				if x > 0 {
-// 					Sleft = previous[x+even-1]
-// 				} else {
-// 					Sleft = _out
-// 				}
-// 			}
-
-// 			var score uint64
-// 			switch {
-// 			case Sdiag >= Sup && Sdiag >= Sleft:
-// 				score = Sdiag
-// 			case Sup >= Sleft:
-// 				score = Sup
-// 			default:
-// 				score = Sleft
-// 			}
-
-// 			if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
-// 				score = _setout(score)
-// 			}
-
-// 			current[x] = _incpath(score)
-// 		}
-// 		// . 9   10 + 2 - 1
-// 		x1 = y - lB + extra + even
-// 		x2 = extra - y + even - 1
-// 		xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even)
-
-// 		x1 = y + extra + even
-// 		x2 = lA + extra - y + even - 1
-// 		xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1
-
-// 		for x := xs; x < xf; x++ {
-
-// 			i := y - x + extra + even
-// 			j := y + x - extra - even + 1
-
-// 			var Sdiag, Sleft, Sup uint64
-
-// 			switch {
-
-// 			case i == 0:
-// 				Sup = _notavail
-// 				Sdiag = _notavail
-// 				Sleft = encodeValues(0, j-1, false)
-// 			case j == 0:
-// 				Sup = encodeValues(0, i-1, false)
-// 				Sdiag = _notavail
-// 				Sleft = _notavail
-// 			default:
-// 				Sdiag = previous[x]
-// 				if bA[j-1] == bB[i-1] {
-// 					Sdiag = _incscore(Sdiag)
-// 				}
-
-// 				Sleft = current[x-even]
-// 				Sup = current[x-even+1]
-
-// 			}
-
-// 			var score uint64
-// 			switch {
-// 			case Sdiag >= Sup && Sdiag >= Sleft:
-// 				score = Sdiag
-// 			case Sup >= Sleft:
-// 				score = Sup
-// 			default:
-// 				score = Sleft
-// 			}
-
-// 			if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
-// 				score = _setout(score)
-// 			}
-
-// 			current[x] = _incpath(score)
-// 		}
-
-// 		previous, current = current, previous
-
-// 	}
-
-// 	s, l, o := decodeValues(previous[(delta%2)*even+extra+(delta>>1)])
-
-// 	if o {
-// 		return -1, -1
-// 	}
-
-// 	return s, l
-// }
--- a/pkg/obialign/fastlcsegf.go
+++ b/pkg/obialign/fastlcsegf.go
@ -130,11 +130,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
 		// in_matrix := false
 		x1 := y - lB + extra
 		x2 := extra - y
-		xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0)
+		xs := obiutils.Max(obiutils.Max(x1, x2), 0)

 		x1 = y + extra
 		x2 = lA + extra - y
-		xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1
+		xf := obiutils.Min(obiutils.Min(x1, x2), even-1) + 1

 		for x := xs; x < xf; x++ {

@ -222,11 +222,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
 		// . 9   10 + 2 - 1
 		x1 = y - lB + extra + even
 		x2 = extra - y + even - 1
-		xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even)
+		xs = obiutils.Max(obiutils.Max(x1, x2), even)

 		x1 = y + extra + even
 		x2 = lA + extra - y + even - 1
-		xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1
+		xf = obiutils.Min(obiutils.Min(x1, x2), width-1) + 1

 		for x := xs; x < xf; x++ {

@ -348,16 +348,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
 //   - Matching: 1
 //   - Mismatch or gap: 0
 //
-// Compared to FastLCSScoreByte the length of the shortest alignment returned does not include the end-gaps.
+// Parameters:
+// - seqA: The first bio sequence.
+// - seqB: The second bio sequence.
+// - maxError: The maximum allowed error between the sequences. If set to -1, no limit is applied.
+// - buffer: A pointer to a uint64 slice to store intermediate results. If nil, a new slice is created.
 //
-// if buffer != nil, the buffer is used to store intermediate results.
-// Otherwise, a new buffer is allocated.
-//
-// seqA: The first bio sequence.
-// seqB: The second bio sequence.
-// maxError: The maximum allowed error between the sequences.
-// buffer: A buffer to store intermediate results.
-// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
+// Returns:
+// - The score of the longest common subsequence.
+// - The length of the shortest alignment corresponding to the LCS.
 func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
 	return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
 }
@ -372,14 +371,16 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
 //   - Matching: 1
 //   - Mismatch or gap: 0
 //
-// if buffer != nil, the buffer is used to store intermediate results.
-// Otherwise, a new buffer is allocated.
+// Parameters:
+// - seqA: The first bio sequence.
+// - seqB: The second bio sequence.
+// - maxError: The maximum allowed error between the sequences. If set to -1, no limit is applied.
+// - buffer: A pointer to a uint64 slice to store intermediate results. If nil, a new slice is created.
 //
-// seqA: The first bio sequence.
-// seqB: The second bio sequence.
-// maxError: The maximum allowed error between the sequences.
-// buffer: A buffer to store intermediate results.
-// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
+// Returns:
+// - The score of the longest common subsequence.
+// - The length of the shortest alignment corresponding to the LCS.
 func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
 	return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
 }
+
--- a/pkg/obiapat/pattern.go
+++ b/pkg/obiapat/pattern.go
@ -348,8 +348,8 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (

 	start = best[0] - nerr
 	end = best[0] + int(pattern.pointer.pointer.patlen) + nerr
-	start = obiutils.MaxInt(start, 0)
-	end = obiutils.MinInt(end, sequence.Len())
+	start = obiutils.Max(start, 0)
+	end = obiutils.Min(end, sequence.Len())

 	cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
 	frg := sequence.pointer.reference.Sequence()[start:end]
@ -387,8 +387,8 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
 		if m[2] > 0 && pattern.pointer.pointer.hasIndel {
 			start := m[0] - m[2]
 			end := m[0] + int(pattern.pointer.pointer.patlen) + m[2]
-			start = obiutils.MaxInt(start, 0)
-			end = obiutils.MinInt(end, sequence.Len())
+			start = obiutils.Max(start, 0)
+			end = obiutils.Min(end, sequence.Len())

 			cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
 			frg := sequence.pointer.reference.Sequence()[start:end]
--- a/pkg/obigraph/graph.go
+++ b/pkg/obigraph/graph.go
@ -0,0 +1,327 @@
+package obigraph
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"text/template"
+
+	log "github.com/sirupsen/logrus"
+)
+
+type Edge[T any] struct {
+	From int
+	To   int
+	Data *T
+}
+
+type Edges[T any] map[int]map[int]*T
+type Graph[V any, T any] struct {
+	Name         string
+	Vertices     *[]V
+	Edges        *Edges[T]
+	ReverseEdges *Edges[T]
+	VertexWeight func(int) float64
+	VertexId     func(int) string
+	EdgeWeight   func(int, int) float64
+}
+
+func NewEdges[T any]() *Edges[T] {
+	e := make(map[int]map[int]*T)
+
+	return (*Edges[T])(&e)
+}
+
+// AddEdge adds an edge to the graph between two vertices.
+//
+// Parameters:
+// - from: the index of the starting vertex.
+// - to: the index of the ending vertex.
+// - data: a pointer to the data associated with the edge.
+func (e *Edges[T]) AddEdge(from, to int, data *T) {
+
+	fnode, ok := (*e)[from]
+	if !ok {
+		fnode = make(map[int]*T)
+		(*e)[from] = fnode
+	}
+	fnode[to] = data
+}
+
+// NewGraph creates a new graph with the specified name and vertices.
+//
+// Parameters:
+// - name: a string representing the name of the graph.
+// - vertices: a slice of vertices of type V.
+//
+// Returns:
+// - Graph[V, T]: the newly created graph.
+func NewGraph[V, T any](name string, vertices *[]V) *Graph[V, T] {
+	return &Graph[V, T]{
+		Name:         name,
+		Vertices:     vertices,
+		Edges:        NewEdges[T](),
+		ReverseEdges: NewEdges[T](),
+		VertexWeight: func(i int) float64 {
+			return 1.0
+		},
+		EdgeWeight: func(i, j int) float64 {
+			return 1.0
+		},
+		VertexId: func(i int) string {
+			return fmt.Sprintf("V%d", i)
+		},
+	}
+}
+
+// AddEdge adds an edge between two vertices in the graph.
+//
+// Parameters:
+// - from: the index of the starting vertex.
+// - to: the index of the ending vertex.
+// - data: a pointer to the data associated with the edge.
+func (g *Graph[V, T]) AddEdge(from, to int, data *T) {
+	lv := len(*g.Vertices)
+	if from >= lv || to >= lv {
+		log.Errorf("out of bounds vertex index: %d or %d (max: %d)", from, to, lv-1)
+	}
+
+	g.Edges.AddEdge(from, to, data)
+	g.Edges.AddEdge(to, from, data)
+	g.ReverseEdges.AddEdge(to, from, data)
+	g.ReverseEdges.AddEdge(from, to, data)
+}
+
+// AddDirectedEdge adds a directed edge from one vertex to another in the graph.
+//
+// Parameters:
+// - from: an integer representing the index of the starting vertex.
+// - to: an integer representing the index of the ending vertex.
+// - data: a pointer to the data associated with the edge.
+func (g *Graph[V, T]) AddDirectedEdge(from, to int, data *T) {
+	lv := len(*g.Vertices)
+
+	if from >= lv || to >= lv {
+		log.Errorf("out of bounds vertex index: %d or %d (max: %d)", from, to, lv-1)
+	}
+
+	g.Edges.AddEdge(from, to, data)
+	g.ReverseEdges.AddEdge(to, from, data)
+}
+
+// SetAsDirectedEdge sets the edge from one vertex to another as directed in the graph.
+//
+// Parameters:
+// - from: an integer representing the index of the starting vertex.
+// - to: an integer representing the index of the ending vertex.
+func (g *Graph[V, T]) SetAsDirectedEdge(from, to int) {
+	lv := len(*g.Vertices)
+
+	if from >= lv || to >= lv {
+		log.Errorf("out of bounds vertex index: %d or %d (max: %d)", from, to, lv-1)
+	}
+
+	if _, ok := (*g.Edges)[from][to]; ok {
+		if _, ok := (*g.Edges)[to][from]; ok {
+			delete((*g.Edges)[to], from)
+			delete((*g.Edges)[from], to)
+		}
+
+		return
+	}
+
+	log.Error("no edge from ", from, " to ", to)
+
+}
+
+// Neighbors generates a list of neighbor vertices for a given vertex index in the graph.
+//
+// Parameters:
+// - v: an integer representing the index of the vertex.
+// Returns:
+// - []int: a list of neighbor vertices.
+func (g *Graph[V, T]) Neighbors(v int) []int {
+	if neighbors, ok := (*g.Edges)[v]; ok {
+		rep := make([]int, 0, len(neighbors))
+
+		for k := range neighbors {
+			rep = append(rep, k)
+		}
+
+		return rep
+	}
+
+	return nil
+
+}
+
+// Degree calculates the degree of a vertex in a graph.
+//
+// Parameters:
+// - v: an integer representing the index of the vertex.
+//
+// Returns:
+// - an integer representing the degree of the vertex.
+func (g *Graph[V, T]) Degree(v int) int {
+	if neighbors, ok := (*g.Edges)[v]; ok {
+		return len(neighbors)
+	}
+	return 0
+}
+
+// Parents returns a list of parent vertices for a given vertex index in the graph.
+//
+// Parameters:
+// - v: an integer representing the index of the vertex.
+//
+// Returns:
+// - []int: a list of parent vertices.
+func (g *Graph[V, T]) Parents(v int) []int {
+	if parents, ok := (*g.ReverseEdges)[v]; ok {
+		rep := make([]int, 0, len(parents))
+
+		for k := range parents {
+			rep = append(rep, k)
+		}
+
+		return rep
+	}
+
+	return nil
+}
+
+// ParentDegree calculates the degree of a vertex in a graph by counting the number of its parent vertices.
+//
+// Parameters:
+// - v: an integer representing the index of the vertex.
+//
+// Returns:
+// - an integer representing the degree of the vertex.
+func (g *Graph[V, T]) ParentDegree(v int) int {
+	if parents, ok := (*g.ReverseEdges)[v]; ok {
+		return len(parents)
+	}
+	return 0
+
+}
+
+type gml_graph[V any, T any] struct {
+	Graph       *Graph[V, T]
+	As_directed bool
+	Min_degree  int
+	Threshold   float64
+	Scale       int
+}
+
+// Gml generates a GML representation of the graph.
+//
+// as_directed: whether the graph should be treated as directed or undirected.
+// threshold: the threshold value.
+// scale: the scaling factor.
+// string: the GML representation of the graph.
+func (g *Graph[V, T]) Gml(as_directed bool, min_degree int, threshold float64, scale int) string {
+	//	(*seqs)[1].Count
+	var gml bytes.Buffer
+
+	data := gml_graph[V, T]{
+		Graph:       g,
+		As_directed: as_directed,
+		Min_degree:  min_degree,
+		Threshold:   threshold,
+		Scale:       scale,
+	}
+
+	digraphTpl := template.New("gml_digraph")
+
+	digraph := ` {{$context := .}}
+				graph [
+				comment "{{ if $context.As_directed }}Directed graph{{ else }}Undirected graph{{ end }} {{ Name }}"
+				directed {{ if $context.As_directed }}1{{ else }}0{{ end }}
+
+				{{range $index, $data:= $context.Graph.Vertices}}
+				{{ if (ge (Degree $index) $context.Min_degree)}}
+				node [ 
+					id {{$index}}
+					graphics [
+						type "{{ Shape $index }}"
+						h {{ Sqrt (VertexWeight $index) }}
+						w {{ Sqrt (VertexWeight $index) }}
+					]
+				]
+				{{ end }}
+				{{ end }}
+
+				{{range $source, $data:= $context.Graph.Edges}}
+				{{range $target, $edge:= $data}}
+				{{ if and (ge $source $context.Min_degree) (ge $target $context.Min_degree) (or $context.As_directed (lt $source $target))}}
+				edge [ source {{$source}} 
+					   target {{$target}} 
+					   color "#00FF00"
+					   ]
+				{{ end }}
+				{{ end }}
+				{{ end }}
+
+				]
+				`
+
+	tmpl, err := digraphTpl.Funcs(template.FuncMap{
+		"Sqrt":         func(i float64) int { return scale * int(math.Floor(math.Sqrt(i))) },
+		"Name":         func() string { return g.Name },
+		"VertexId":     func(i int) string { return g.VertexId(i) },
+		"Degree":       func(i int) int { return g.Degree(i) },
+		"VertexWeight": func(i int) float64 { return g.VertexWeight(i) },
+		"Shape": func(i int) string {
+			if g.VertexWeight(i) >= threshold {
+				return "circle"
+			} else {
+				return "rectangle"
+			}
+		},
+	}).Parse(digraph)
+
+	if err != nil {
+		panic(err)
+	}
+
+	err = tmpl.Execute(&gml, data)
+
+	if err != nil {
+		panic(err)
+	}
+
+	return gml.String()
+
+}
+
+// WriteGml writes the GML representation of the graph to an io.Writer.
+//
+// w: the io.Writer to write the GML representation to.
+// as_directed: whether the graph should be treated as directed or undirected.
+// threshold: the threshold value.
+// scale: the scaling factor.
+func (g *Graph[V, T]) WriteGml(w io.Writer, as_directed bool, min_degree int, threshold float64, scale int) {
+
+	_, err := w.Write([]byte(g.Gml(as_directed, min_degree, threshold, scale)))
+	if err != nil {
+		panic(err)
+	}
+}
+
+// WriteGmlFile writes the graph in GML format to the specified file.
+//
+// filename: the name of the file to write the GML representation to.
+// as_directed: whether the graph should be treated as directed or undirected.
+// threshold: the threshold value.
+// scale: the scaling factor.
+func (g *Graph[V, T]) WriteGmlFile(filename string, as_directed bool, min_degree int, threshold float64, scale int) {
+
+	f, err := os.Create(filename)
+	if err != nil {
+		panic(err)
+	}
+	defer f.Close()
+	g.WriteGml(f, as_directed, min_degree, threshold, scale)
+}
--- a/pkg/obigraph/graphbuffer.go
+++ b/pkg/obigraph/graphbuffer.go
@ -0,0 +1,104 @@
+package obigraph
+
+import (
+	"io"
+	"os"
+)
+
+type GraphBuffer[V, T any] struct {
+	Graph   *Graph[V, T]
+	Channel chan Edge[T]
+}
+
+// NewGraphBuffer creates a new GraphBuffer with the given name and vertices.
+//
+// Parameters:
+// - name: the name of the GraphBuffer.
+// - vertices: a slice of vertices to initialize the GraphBuffer.
+//
+// Returns:
+// - GraphBuffer[V, T]: the newly created GraphBuffer.
+func NewGraphBuffer[V, T any](name string, vertices *[]V) *GraphBuffer[V, T] {
+	buffer := GraphBuffer[V, T]{
+		Graph:   NewGraph[V, T](name, vertices),
+		Channel: make(chan Edge[T]),
+	}
+
+	go func() {
+		for edge := range buffer.Channel {
+			buffer.Graph.AddEdge(edge.From, edge.To, edge.Data)
+		}
+	}()
+
+	return &buffer
+}
+
+// AddEdge adds an edge to the GraphBuffer.
+//
+// Parameters:
+// - from: the index of the starting vertex.
+// - to: the index of the ending vertex.
+// - data: a pointer to the data associated with the edge.
+func (g *GraphBuffer[V, T]) AddEdge(from, to int, data *T) {
+	g.Channel <- Edge[T]{
+		From: from,
+		To:   to,
+		Data: data,
+	}
+}
+
+// AddDirectedEdge adds a directed edge from one vertex to another in the GraphBuffer.
+//
+// Parameters:
+// - from: the index of the starting vertex.
+// - to: the index of the ending vertex.
+// - data: a pointer to the data associated with the edge.
+func (g *GraphBuffer[V, T]) AddDirectedEdge(from, to int, data *T) {
+	g.Channel <- Edge[T]{
+		From: from,
+		To:   to,
+		Data: data,
+	}
+}
+
+// Gml generates a GML representation of the graph.
+//
+// as_directed: whether the graph should be treated as directed or undirected.
+// min_degree: the minimum degree of vertices to include in the GML representation.
+// threshold: the threshold value.
+// scale: the scaling factor.
+// string: the GML representation of the graph.
+func (g *GraphBuffer[V, T]) Gml(as_directed bool, min_degree int, threshold float64, scale int) string {
+	return g.Graph.Gml(as_directed, min_degree, threshold, scale)
+}
+
+func (g *GraphBuffer[V, T]) WriteGmlFile(filename string, as_directed bool, min_degree int, threshold float64, scale int) {
+
+	f, err := os.Create(filename)
+	if err != nil {
+		panic(err)
+	}
+	defer f.Close()
+	g.WriteGml(f, as_directed, min_degree, threshold, scale)
+}
+
+// WriteGml writes the GML representation of the graph to an io.Writer.
+//
+// w: the io.Writer to write the GML representation to.
+// as_directed: whether the graph should be treated as directed or undirected.
+// min_degree: the minimum degree of vertices to include in the GML representation.
+// threshold: the threshold value.
+// scale: the scaling factor.
+func (g *GraphBuffer[V, T]) WriteGml(w io.Writer, as_directed bool, min_degree int, threshold float64, scale int) {
+	_, err := w.Write([]byte(g.Gml(as_directed, min_degree, threshold, scale)))
+	if err != nil {
+		panic(err)
+	}
+}
+
+// Close closes the GraphBuffer by closing its channel.
+//
+// No parameters.
+func (g *GraphBuffer[V, T]) Close() {
+	close(g.Channel)
+}
--- a/pkg/obiiter/fragment.go
+++ b/pkg/obiiter/fragment.go
@ -30,7 +30,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
 						news = append(news, s)
 					} else {
 						for i := 0; i < s.Len(); i += step {
-							end := obiutils.MinInt(i+length, s.Len())
+							end := obiutils.Min(i+length, s.Len())
 							fusion := false
 							if (s.Len() - end) < step {
 								end = s.Len()
--- a/pkg/obikmer/counting.go
+++ b/pkg/obikmer/counting.go
@ -9,9 +9,10 @@ import (

 type Table4mer [256]uint16

-func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Table4mer {

-	iternal_buffer := Encode4mer(seq, buffer)
+
+func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Table4mer {
+	iternal_buffer := Encode4mer(seq, buffer) // The slice of 4-mer codes

 	if counts == nil {
 		var w Table4mer
@ -19,7 +20,7 @@ func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Tabl
 	}

 	// Every cells of the counter is set to zero
-	for i := 0; i < 256; i++ {
+	for i := 0; i < 256; i++ { // 256 is the number of possible 4-mer codes
 		(*counts)[i] = 0
 	}

@ -32,7 +33,7 @@ func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Tabl
 func Common4Mer(count1, count2 *Table4mer) int {
 	sum := 0
 	for i := 0; i < 256; i++ {
-		sum += int(obiutils.MinUInt16((*count1)[i], (*count2)[i]))
+		sum += int(obiutils.Min((*count1)[i], (*count2)[i]))
 	}
 	return sum
 }
@ -48,7 +49,7 @@ func Sum4Mer(count *Table4mer) int {
 func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
 	s1 := Sum4Mer(count1)
 	s2 := Sum4Mer(count2)
-	smin := obiutils.MinInt(s1, s2)
+	smin := obiutils.Min(s1, s2)

 	cw := Common4Mer(count1, count2)

@ -65,7 +66,7 @@ func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
 func Error4MerBounds(count1, count2 *Table4mer) (int, int) {
 	s1 := Sum4Mer(count1)
 	s2 := Sum4Mer(count2)
-	smax := obiutils.MaxInt(s1, s2)
+	smax := obiutils.Max(s1, s2)

 	cw := Common4Mer(count1, count2)

--- a/pkg/obikmer/debruijn.go
+++ b/pkg/obikmer/debruijn.go
@ -2,13 +2,16 @@ package obikmer

 import (
 	"bytes"
+	"container/heap"
 	"fmt"
 	"math"
 	"math/bits"
+	"slices"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
 	"github.com/daichi-m/go18ds/sets/linkedhashset"
 	"github.com/daichi-m/go18ds/stacks/arraystack"
+	log "github.com/sirupsen/logrus"
 )

 type KmerIdx32 uint32
@ -49,36 +52,60 @@ type KmerIdx_t interface {
 }

 type DeBruijnGraph struct {
-	kmersize int
-	kmermask uint64
-	prevc    uint64
+	kmersize int    // k-mer size
+	kmermask uint64 // mask used to set to 0 the bits that are not in the k-mer
+	prevc    uint64 //
 	prevg    uint64
 	prevt    uint64
-	graph    map[uint64]uint
+	graph    map[uint64]uint // Kmer are encoded as uint64 with 2 bits per character
 }

+// MakeDeBruijnGraph creates a De Bruijn Graph with the specified k-mer size.
+//
+// Parameters:
+//
+//	kmersize int - the size of the k-mers
+//
+// Returns:
+//
+//	*DeBruijnGraph - a pointer to the created De Bruijn's Graph
 func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
 	g := DeBruijnGraph{
 		kmersize: kmersize,
-		kmermask: ^(^uint64(0) << (uint64(kmersize+1) * 2)),
-		prevc:    uint64(1) << (uint64(kmersize) * 2),
-		prevg:    uint64(2) << (uint64(kmersize) * 2),
-		prevt:    uint64(3) << (uint64(kmersize) * 2),
+		kmermask: ^(^uint64(0) << (uint64(kmersize) * 2)), // k-mer mask used to set to 0 the bits that are not in the k-mer
+		prevc:    uint64(1) << (uint64(kmersize-1) * 2),
+		prevg:    uint64(2) << (uint64(kmersize-1) * 2),
+		prevt:    uint64(3) << (uint64(kmersize-1) * 2),
 		graph:    make(map[uint64]uint),
 	}

 	return &g
 }

+// KmerSize returns the size of the k-mers in the DeBruijn graph.
+//
+// This function takes no parameters.
+// It returns an integer representing the size of the k-mers.
 func (g *DeBruijnGraph) KmerSize() int {
 	return g.kmersize
 }

+// Len returns the length of the graph.
+//
+// This function takes no parameters.
+// It returns an integer representing the number of nodes in the graph.
 func (g *DeBruijnGraph) Len() int {
 	return len(g.graph)
 }

-func (g *DeBruijnGraph) MaxLink() int {
+// MaxWeight returns the maximum weight of a node from the DeBruijn's Graph.
+//
+// It iterates over each count in the graph map and updates the max value if the current count is greater.
+// Finally, it returns the maximum weight as an integer.
+//
+// Returns:
+// - int: the maximum weight value.
+func (g *DeBruijnGraph) MaxWeight() int {
 	max := uint(0)
 	for _, count := range g.graph {
 		if count > max {
@ -89,8 +116,12 @@ func (g *DeBruijnGraph) MaxLink() int {
 	return int(max)
 }

-func (g *DeBruijnGraph) LinkSpectrum() []int {
-	max := g.MaxLink()
+// WeightSpectrum calculates the weight spectrum of nodes in the DeBruijn's graph.
+//
+// No parameters.
+// Returns an array of integers representing the weight spectrum.
+func (g *DeBruijnGraph) WeightSpectrum() []int {
+	max := g.MaxWeight()
 	spectrum := make([]int, max+1)
 	for _, count := range g.graph {
 		spectrum[int(count)]++
@ -99,7 +130,10 @@ func (g *DeBruijnGraph) LinkSpectrum() []int {
 	return spectrum
 }

-func (g *DeBruijnGraph) FilterMin(min int) {
+// FilterMinWeight filters the DeBruijnGraph by removing nodes with weight less than the specified minimum.
+//
+// min: an integer representing the minimum count threshold.
+func (g *DeBruijnGraph) FilterMinWeight(min int) {
 	umin := uint(min)
 	for idx, count := range g.graph {
 		if count < umin {
@ -109,8 +143,12 @@ func (g *DeBruijnGraph) FilterMin(min int) {
 }

 func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
+	if _, ok := g.graph[index]; !ok {
+		log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
+	}
+
 	rep := make([]uint64, 0, 4)
-	index = index >> 2
+	index >>= 2

 	if _, ok := g.graph[index]; ok {
 		rep = append(rep, index)
@ -135,6 +173,10 @@ func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
 }

 func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
+	if _, ok := g.graph[index]; !ok {
+		log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
+	}
+
 	rep := make([]uint64, 0, 4)
 	index = (index << 2) & g.kmermask

@ -160,11 +202,11 @@ func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
 	return rep
 }

-func (g *DeBruijnGraph) MaxNext(index uint64) (uint64, bool) {
+func (g *DeBruijnGraph) MaxNext(index uint64) (uint64, int, bool) {
 	ns := g.Nexts(index)

 	if len(ns) == 0 {
-		return uint64(0), false
+		return uint64(0), 0, false
 	}

 	max := uint(0)
@ -177,7 +219,34 @@ func (g *DeBruijnGraph) MaxNext(index uint64) (uint64, bool) {
 		}
 	}

-	return rep, true
+	return rep, int(max), true
+}
+
+func (g *DeBruijnGraph) Heads() []uint64 {
+	rep := make([]uint64, 0, 10)
+
+	for k := range g.graph {
+		if len(g.Previouses(k)) == 0 {
+			rep = append(rep, k)
+		}
+	}
+
+	return rep
+}
+
+func (g *DeBruijnGraph) MaxHead() (uint64, int, bool) {
+	rep := uint64(0)
+	max := uint(0)
+	found := false
+	for k, w := range g.graph {
+		if len(g.Previouses(k)) == 0 && w > max {
+			rep = k
+			max = w
+			found = true
+		}
+	}
+
+	return rep, int(max), found
 }

 func (g *DeBruijnGraph) MaxPath() []uint64 {
@ -185,17 +254,17 @@ func (g *DeBruijnGraph) MaxPath() []uint64 {
 	ok := false
 	idx := uint64(0)

-	idx, ok = g.MaxHead()
+	idx, _, ok = g.MaxHead()

 	for ok {
 		path = append(path, idx)
-		idx, ok = g.MaxNext(idx)
+		idx, _, ok = g.MaxNext(idx)
 	}

 	return path
 }

-func (g *DeBruijnGraph) LongestPath() []uint64 {
+func (g *DeBruijnGraph) LongestPath(max_length int) []uint64 {
 	var path []uint64
 	wmax := uint(0)
 	ok := true
@ -209,7 +278,11 @@ func (g *DeBruijnGraph) LongestPath() []uint64 {
 			nw := g.graph[idx]
 			w += nw
 			lp = append(lp, idx)
-			idx, ok = g.MaxNext(idx)
+			idx, _, ok = g.MaxNext(idx)
+			if max_length > 0 && len(lp) > max_length {
+				ok = false
+				w = 0
+			}
 		}

 		if w > wmax {
@ -221,8 +294,9 @@ func (g *DeBruijnGraph) LongestPath() []uint64 {
 	return path
 }

-func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) {
-	path := g.LongestPath()
+func (g *DeBruijnGraph) LongestConsensus(id string, max_length int) (*obiseq.BioSequence, error) {
+	//path := g.LongestPath(max_length)
+	path := g.HaviestPath()
 	s := g.DecodePath(path)

 	if len(s) > 0 {
@ -238,37 +312,10 @@ func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error)
 	return nil, fmt.Errorf("cannot identify optimum path")
 }

-func (g *DeBruijnGraph) Heads() []uint64 {
-	rep := make([]uint64, 0, 10)
-
-	for k := range g.graph {
-		if len(g.Previouses(k)) == 0 {
-			rep = append(rep, k)
-		}
-	}
-
-	return rep
-}
-
-func (g *DeBruijnGraph) MaxHead() (uint64, bool) {
-	rep := uint64(0)
-	max := uint(0)
-	found := false
-	for k, w := range g.graph {
-		if len(g.Previouses(k)) == 0 && w > max {
-			rep = k
-			found = true
-		}
-	}
-
-	return rep, found
-}
-
 func (g *DeBruijnGraph) DecodeNode(index uint64) string {
 	rep := make([]byte, g.kmersize)
-	index >>= 2
 	for i := g.kmersize - 1; i >= 0; i-- {
-		rep[i], _ = decode[index&3]
+		rep[i] = decode[index&3]
 		index >>= 2
 	}

@ -282,7 +329,7 @@ func (g *DeBruijnGraph) DecodePath(path []uint64) string {
 	if len(path) > 0 {
 		buf.WriteString(g.DecodeNode(path[0]))

-		for _, idx := range path {
+		for _, idx := range path[1:] {
 			buf.WriteByte(decode[idx&3])
 		}
 	}
@ -307,6 +354,13 @@ func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence, error) {
 	return nil, fmt.Errorf("cannot identify optimum path")
 }

+// Weight returns the weight of the node at the given index in the DeBruijnGraph.
+//
+// Parameters:
+// - index: the index of the node in the graph.
+//
+// Returns:
+// - int: the weight of the node.
 func (g *DeBruijnGraph) Weight(index uint64) int {
 	val, ok := g.graph[index]
 	if !ok {
@ -315,59 +369,64 @@ func (g *DeBruijnGraph) Weight(index uint64) int {
 	return int(val)
 }

+// append appends a sequence of nucleotides to the DeBruijnGraph.
+//
+// Parameters:
+// - sequence: a byte slice representing the sequence of nucleotides to append.
+// - current: the current node in the graph to which the sequence will be appended.
+// - weight: the weight of the added nodes.
 func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int) {

-	for i := 0; i < len(sequence); i++ {
-		current <<= 2
-		current &= graph.kmermask
-		b := iupac[sequence[i]]
-		if len(b) == 1 {
-			current |= b[0]
-			graph.graph[current] = uint(graph.Weight(current) + weight)
-		} else {
-			for j := 0; j < len(b); j++ {
-				current &= ^uint64(3)
-				current |= b[j]
-
-				graph.graph[current] = uint(graph.Weight(current) + weight)
-				graph.append(sequence[(i+1):], current, weight)
-			}
+	if len(sequence) == 0 {
 		return
 	}

+	current <<= 2
+	current &= graph.kmermask
+	b := iupac[sequence[0]]
+	current |= b[0]
+	graph.graph[current] = uint(graph.Weight(current) + weight)
+	graph.append(sequence[1:], current, weight)
+
+	for j := 1; j < len(b); j++ {
+		current &= ^uint64(3)
+		current |= b[j]
+		graph.graph[current] = uint(graph.Weight(current) + weight)
+		graph.append(sequence[1:], current, weight)
 	}
 }

 func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
-	key := uint64(0)
-	s := sequence.Sequence()
-	w := sequence.Count()
-	init := make([]uint64, 0, 16)
-	var f func(start int, key uint64)
-	f = func(start int, key uint64) {
-		for i := start; i < graph.kmersize; i++ {
+	s := sequence.Sequence() // Get the sequence as a byte slice
+	w := sequence.Count()    // Get the weight of the sequence
+
+	var initFirstKmer func(start int, key uint64)
+
+	// Initialize the first k-mer
+	// start is the index of the nucleotide in the k-mer to add
+	// key is the value of the k-mer index before adding the start nucleotide
+	initFirstKmer = func(start int, key uint64) {
+		if start == 0 {
+			key = 0
+		}
+
+		if start < graph.kmersize {
 			key <<= 2
-			b := iupac[s[i]]
-			if len(b) == 1 {
-				key |= b[0]
-			} else {
-				for j := 0; j < len(b); j++ {
+			b := iupac[s[start]]
+
+			for _, code := range b {
 				key &= ^uint64(3)
-					key |= b[j]
-					f(i+1, key)
+				key |= code
+				initFirstKmer(start+1, key)
 			}
-				return
+		} else {
+			graph.graph[key] = uint(graph.Weight(key) + w)
+			graph.append(s[graph.kmersize:], key, w)
 		}
 	}
-		init = append(init, key&graph.kmermask)
-	}

 	if sequence.Len() > graph.kmersize {
-		f(0, key)
-
-		for _, idx := range init {
-			graph.append(s[graph.kmersize:], idx, w)
-		}
+		initFirstKmer(0, 0)
 	}
 }

@ -381,32 +440,38 @@ func (graph *DeBruijnGraph) Gml() string {
 		
 		`)

+	nodeidx := make(map[uint64]int)
+	nodeid := 0
+
 	for idx := range graph.graph {
+		nodeid++
+		nodeidx[idx] = nodeid
+		n := graph.Nexts(idx)
+		p := graph.Previouses(idx)
+
+		if len(n) == 0 || len(p) == 0 {
+
 			node := graph.DecodeNode(idx)
 			buffer.WriteString(
-			fmt.Sprintf("node [ id \"%s\" ]\n", node),
+				fmt.Sprintf("node [ id \"%d\" \n label \"%s\" ]\n", nodeid, node),
 			)
-		n := graph.Nexts(uint64(idx))
-		if len(n) == 0 {
-			idx <<= 2
-			idx &= graph.kmermask
-			node := graph.DecodeNode(idx)
+		} else {
 			buffer.WriteString(
-				fmt.Sprintf("node [ id \"%s\" \n label \"%s\" ]\n", node, node),
+				fmt.Sprintf("node [ id \"%d\" ]\n", nodeid),
 			)
 		}
 	}

-	for idx, weight := range graph.graph {
-		src := graph.DecodeNode(idx)
-		label := decode[idx&3]
-		idx <<= 2
-		idx &= graph.kmermask
-		dst := graph.DecodeNode(idx)
-
+	for idx := range graph.graph {
+		srcid := nodeidx[idx]
+		n := graph.Nexts(idx)
+		for _, dst := range n {
+			dstid := nodeidx[dst]
+			label := decode[dst&3]
+			weight := graph.Weight(dst)
 			buffer.WriteString(
-			fmt.Sprintf(`edge [ source "%s"
-				target "%s" 
+				fmt.Sprintf(`edge [ source "%d"
+					target "%d" 
 					color "#00FF00"
 					label "%c[%d]"
 					graphics		[
@ -416,9 +481,11 @@ func (graph *DeBruijnGraph) Gml() string {
 						]
 					]
 					
-				`, src, dst, label, weight, math.Log(float64(weight))),
+					`, srcid, dstid, label, weight, math.Sqrt(float64(weight))),
 			)
 		}
+
+	}
 	buffer.WriteString("]\n")
 	return buffer.String()

@ -472,3 +539,151 @@ func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
 	ident &= 0x5555555555555555 & g.kmermask
 	return bits.OnesCount64(ident)
 }
+
+type UInt64Heap []uint64
+
+func (h UInt64Heap) Len() int           { return len(h) }
+func (h UInt64Heap) Less(i, j int) bool { return h[i] < h[j] }
+func (h UInt64Heap) Swap(i, j int)      { h[i], h[j] = h[j], h[i] }
+
+func (h *UInt64Heap) Push(x any) {
+	// Push and Pop use pointer receivers because they modify the slice's length,
+	// not just its contents.
+	*h = append(*h, x.(uint64))
+}
+
+func (h *UInt64Heap) Pop() any {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
+
+func (g *DeBruijnGraph) HaviestPath() []uint64 {
+
+	if g.HasCycle() {
+		return nil
+	}
+	// Initialize the distance array and visited set
+	distances := make(map[uint64]int)
+	visited := make(map[uint64]bool)
+	prevNodes := make(map[uint64]uint64)
+	heaviestNode := uint64(0)
+	heaviestWeight := 0
+
+	queue := &UInt64Heap{}
+	heap.Init(queue)
+
+	startNodes := make(map[uint64]struct{})
+	for _, n := range g.Heads() {
+		startNodes[n] = struct{}{}
+		heap.Push(queue, n)
+		distances[n] = g.Weight(n)
+		prevNodes[n] = 0
+		visited[n] = false
+	}
+
+	// Priority queue to keep track of nodes to visit
+	for len(*queue) > 0 {
+		// Get the node with the smallest distance
+		currentNode := heap.Pop(queue).(uint64)
+
+		// If the current node has already been visited, skip it
+		if visited[currentNode] {
+			continue
+		}
+
+		// Mark the node as visited
+		visited[currentNode] = true
+		weight := distances[currentNode]
+
+		// Update the heaviest node
+		if weight > heaviestWeight {
+			heaviestWeight = weight
+			heaviestNode = currentNode
+		}
+
+		if currentNode == 0 {
+			log.Warn("current node is 0")
+		}
+		// Update the distance of the neighbors
+		nextNodes := g.Nexts(currentNode)
+		for _, nextNode := range nextNodes {
+			if nextNode == 0 {
+				log.Warn("next node is 0")
+			}
+			weight := g.Weight(nextNode) + distances[currentNode]
+			if distances[nextNode] < weight {
+				distances[nextNode] = weight
+				prevNodes[nextNode] = currentNode
+				visited[nextNode] = false
+				heap.Push(queue, nextNode)
+
+				// Keep track of the node with the heaviest weight
+				if weight > heaviestWeight {
+					heaviestWeight = weight
+					heaviestNode = nextNode
+				}
+			}
+		}
+	}
+
+	log.Infof("Heaviest node: %d [%v]", heaviestNode, heaviestWeight)
+	// Reconstruct the path from the start node to the heaviest node found
+	heaviestPath := make([]uint64, 0)
+	currentNode := heaviestNode
+	for _, ok := startNodes[currentNode]; !ok && !slices.Contains(heaviestPath, currentNode); _, ok = startNodes[currentNode] {
+		heaviestPath = append(heaviestPath, currentNode)
+		//log.Infof("Current node: %d <- %d", currentNode, prevNodes[currentNode])
+		currentNode = prevNodes[currentNode]
+	}
+
+	if slices.Contains(heaviestPath, currentNode) {
+		log.Fatalf("Cycle detected %v -> %v (%v) len(%v)", heaviestPath, currentNode, startNodes, len(heaviestPath))
+		return nil
+	}
+
+	heaviestPath = append(heaviestPath, currentNode)
+
+	// Reverse the path
+	slices.Reverse(heaviestPath)
+
+	return heaviestPath
+}
+
+func (g *DeBruijnGraph) HasCycle() bool {
+	// Initialize the visited and stack arrays
+	visited := make(map[uint64]bool)
+	stack := make(map[uint64]bool)
+
+	// Helper function to perform DFS
+	var dfs func(node uint64) bool
+	dfs = func(node uint64) bool {
+		visited[node] = true
+		stack[node] = true
+
+		nextNodes := g.Nexts(node)
+		for _, nextNode := range nextNodes {
+			if !visited[nextNode] {
+				if dfs(nextNode) {
+					return true
+				}
+			} else if stack[nextNode] {
+				return true
+			}
+		}
+		stack[node] = false
+		return false
+	}
+
+	// Perform DFS on each node to check for cycles
+	for node := range g.graph {
+		if !visited[node] {
+			if dfs(node) {
+				return true
+			}
+		}
+	}
+	return false
+}
--- a/pkg/obingslibrary/match.go
+++ b/pkg/obingslibrary/match.go
@ -130,7 +130,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {

 		sseq := sequence.String()
 		direct := sseq[start:end]
-		tagstart := obiutils.MaxInt(start-marker.taglength, 0)
+		tagstart := obiutils.Max(start-marker.taglength, 0)
 		ftag := strings.ToLower(sseq[tagstart:start])

 		m := DemultiplexMatch{
@ -150,7 +150,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
 			reverse, _ := sequence.Subsequence(start, end, false)
 			defer reverse.Recycle()
 			reverse = reverse.ReverseComplement(true)
-			endtag := obiutils.MinInt(end+marker.taglength, sequence.Len())
+			endtag := obiutils.Min(end+marker.taglength, sequence.Len())
 			rtag, err := sequence.Subsequence(end, endtag, false)
 			defer rtag.Recycle()
 			srtag := ""
@ -201,7 +201,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
 		sseq := sequence.String()

 		reverse := strings.ToLower(sseq[start:end])
-		tagstart := obiutils.MaxInt(start-marker.taglength, 0)
+		tagstart := obiutils.Max(start-marker.taglength, 0)
 		rtag := strings.ToLower(sseq[tagstart:start])

 		m := DemultiplexMatch{
@ -221,7 +221,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
 			defer direct.Recycle()
 			direct = direct.ReverseComplement(true)

-			endtag := obiutils.MinInt(end+marker.taglength, sequence.Len())
+			endtag := obiutils.Min(end+marker.taglength, sequence.Len())
 			ftag, err := sequence.Subsequence(end, endtag, false)
 			defer ftag.Recycle()
 			sftag := ""
--- a/pkg/obiseq/language.go
+++ b/pkg/obiseq/language.go
@ -38,13 +38,7 @@ import (
 // 	return float64(m)
 // }

-// func minIntVector(values []int) float64 {
-// 	m := values[0]
-// 	for _, v := range values {
-// 		if v < m {
-// 			m = v
-// 		}
-// 	}
+

 // 	return float64(m)
 // }
--- a/pkg/obisuffix/suffix_array.go
+++ b/pkg/obisuffix/suffix_array.go
@ -27,7 +27,7 @@ func SuffixLess(suffixarray SuffixArray) func(i, j int) bool {
 		sj := suffixarray.Suffixes[j]
 		bj := (*suffixarray.Sequences)[int(sj.Idx)].Sequence()[sj.Pos:]

-		l := obiutils.MinInt(len(bi), len(bj))
+		l := obiutils.Min(len(bi), len(bj))
 		p := 0
 		for p < l && bi[p] == bj[p] {
 			p++
@ -92,7 +92,7 @@ func (suffixarray *SuffixArray) CommonSuffix() []int {
 		si := suffixarray.Suffixes[i]
 		bi := (*suffixarray.Sequences)[int(si.Idx)].Sequence()[si.Pos:]

-		l := obiutils.MinInt(len(bi), len(bp))
+		l := obiutils.Min(len(bi), len(bp))
 		p := 0
 		for p < l && bi[p] == bp[p] {
 			p++
--- a/pkg/obitools/obiconsensus/obiconsensus.go
+++ b/pkg/obitools/obiconsensus/obiconsensus.go
@ -4,92 +4,93 @@ import (
 	"fmt"
 	"os"
 	"path"
-	"sort"
+	"slices"

 	log "github.com/sirupsen/logrus"

+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
 )

 func BuildConsensus(seqs obiseq.BioSequenceSlice,
+	consensus_id string,
 	kmer_size int, quorum float64,
 	min_depth float64,
+	max_length int,
 	save_graph bool, dirname string) (*obiseq.BioSequence, error) {

+	if save_graph {
+		if dirname == "" {
+			dirname = "."
+		}
+
+		if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
+			// path does not exist or is not directory
+			os.RemoveAll(dirname)
+			err := os.Mkdir(dirname, 0755)
+
+			if err != nil {
+				log.Panicf("Cannot create directory %s for saving graphs", dirname)
+			}
+		}
+
+		fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s.fasta", consensus_id)))
+
+		if err == nil {
+			defer fasta.Close()
+			fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(0, seqs), obiformats.FormatFastSeqJsonHeader, false))
+			fasta.Close()
+		}
+
+	}
+
 	log.Printf("Number of reads : %d\n", len(seqs))

 	if kmer_size < 0 {
 		longest := make([]int, len(seqs))

-		for i := range seqs {
-			s := seqs[i : i+1]
+		for i, seq := range seqs {
+			s := obiseq.BioSequenceSlice{seq}
 			sa := obisuffix.BuildSuffixArray(&s)
-			longest[i] = obiutils.MaxSlice(sa.CommonSuffix())
+			longest[i] = slices.Max(sa.CommonSuffix())
 		}

-		o := obiutils.Order(sort.IntSlice(longest))
-		i := int(float64(len(seqs)) * quorum)
+		// o := obiutils.Order(sort.IntSlice(longest))
+		// i := int(float64(len(seqs)) * quorum)

-		kmer_size = longest[o[i]] + 1
+		// if i >= len(o) {
+		// 	i = len(o) - 1
+		// }
+
+		kmer_size = slices.Max(longest) + 1
+
+		// kmer_size = longest[o[i]] + 1
 		log.Printf("estimated kmer size : %d", kmer_size)
 	}

-	graph := obikmer.MakeDeBruijnGraph(kmer_size)
+	var graph *obikmer.DeBruijnGraph
+	for {
+		graph = obikmer.MakeDeBruijnGraph(kmer_size)

 		for _, s := range seqs {
 			graph.Push(s)
 		}

-	log.Printf("Graph size : %d\n", graph.Len())
-	total_kmer := graph.Len()
-
-	threshold := 0
-
-	switch {
-	case min_depth < 0:
-		spectrum := graph.LinkSpectrum()
-		cum := make(map[int]int)
-
-		spectrum[1] = 0
-		for i := 2; i < len(spectrum); i++ {
-			spectrum[i] += spectrum[i-1]
-			cum[spectrum[i]]++
-		}
-
-		max := 0
-		kmax := 0
-		for k, obs := range cum {
-			if obs > max {
-				max = obs
-				kmax = k
-			}
-		}
-
-		for i, total := range spectrum {
-			if total == kmax {
-				threshold = i
+		if !graph.HasCycle() {
 			break
 		}
-		}
-		threshold /= 2
-	case min_depth >= 1:
-		threshold = int(min_depth)
-	default:
-		threshold = int(float64(len(seqs)) * min_depth)
-	}

-	graph.FilterMin(threshold)
-
-	log.Printf("Graph size : %d\n", graph.Len())
+		kmer_size++
+		log.Infof("Cycle detected, increasing kmer size to %d\n", kmer_size)
+	}

 	if save_graph {

 		file, err := os.Create(path.Join(dirname,
-			fmt.Sprintf("%s.gml", seqs[0].Source())))
+			fmt.Sprintf("%s_raw_consensus.gml", consensus_id)))

 		if err != nil {
 			fmt.Println(err)
@ -99,14 +100,72 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
 		}
 	}

-	id := seqs[0].Source()
-	if id == "" {
-		id = seqs[0].Id()
-	}
-	seq, err := graph.LongestConsensus(id)
+	log.Printf("Graph size : %d\n", graph.Len())
+	total_kmer := graph.Len()
+
+	// threshold := 0
+
+	// switch {
+	// case min_depth < 0:
+	// 	spectrum := graph.WeightSpectrum()
+	// 	cum := make(map[int]int)
+
+	// 	spectrum[1] = 0
+	// 	for i := 2; i < len(spectrum); i++ {
+	// 		spectrum[i] += spectrum[i-1]
+	// 		cum[spectrum[i]]++
+	// 	}
+
+	// 	max := 0
+	// 	kmax := 0
+	// 	for k, obs := range cum {
+	// 		if obs > max {
+	// 			max = obs
+	// 			kmax = k
+	// 		}
+	// 	}
+
+	// 	for i, total := range spectrum {
+	// 		if total == kmax {
+	// 			threshold = i
+	// 			break
+	// 		}
+	// 	}
+	// 	threshold /= 2
+
+	// 	if threshold < 1 {
+	// 		threshold = 1
+	// 	}
+
+	// 	log.Info("Estimated kmer_min_occur = ", threshold)
+	// case min_depth >= 1:
+	// 	threshold = int(min_depth)
+	// default:
+	// 	threshold = int(float64(len(seqs)) * min_depth)
+	// }
+
+	// graph.FilterMinWeight(threshold)
+
+	// log.Printf("Graph size : %d\n", graph.Len())
+
+	// if save_graph {
+
+	// 	file, err := os.Create(path.Join(dirname,
+	// 		fmt.Sprintf("%s_consensus.gml", consensus_id)))
+
+	// 	if err != nil {
+	// 		fmt.Println(err)
+	// 	} else {
+	// 		file.WriteString(graph.Gml())
+	// 		file.Close()
+	// 	}
+	// }
+
+	seq, err := graph.LongestConsensus(consensus_id, max_length)

 	sumCount := 0

+	if seq != nil {
 		for _, s := range seqs {
 			sumCount += s.Count()
 		}
@ -114,14 +173,60 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
 		seq.SetCount(sumCount)
 		seq.SetAttribute("seq_length", seq.Len())
 		seq.SetAttribute("kmer_size", kmer_size)
-	seq.SetAttribute("kmer_min_occur", threshold)
-	seq.SetAttribute("kmer_max_occur", graph.MaxLink())
+		//seq.SetAttribute("kmer_min_occur", threshold)
+		seq.SetAttribute("kmer_max_occur", graph.MaxWeight())
 		seq.SetAttribute("filtered_graph_size", graph.Len())
 		seq.SetAttribute("full_graph_size", total_kmer)
-
+	}
 	return seq, err
 }

+// func BuildConsensusWithTimeout(seqs obiseq.BioSequenceSlice,
+// 	kmer_size int, quorum float64,
+// 	min_depth float64,
+// 	save_graph bool, dirname string, timeout time.Duration) (*obiseq.BioSequence, error) {
+
+// 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+// 	defer cancel()
+
+// 	consensus := func() *obiseq.BioSequence {
+// 		cons, err := BuildConsensus(seqs, kmer_size, quorum, min_depth, save_graph, dirname,)
+// 		if err != nil {
+// 			cons = nil
+// 		}
+
+// 		return cons
+// 	}
+
+// 	computation := func() <-chan *obiseq.BioSequence {
+// 		result := make(chan *obiseq.BioSequence)
+
+// 		go func() {
+// 			select {
+// 			case <-ctx.Done():
+// 				result <- nil
+// 			default:
+// 				result <- consensus()
+
+// 			}
+// 		}()
+
+// 		return result
+// 	}
+
+// 	calcResult := computation()
+
+// 	select {
+// 	case result := <-calcResult:
+// 		if result == nil {
+// 			return nil, fmt.Errorf("cannot compute consensus")
+// 		}
+// 		return result, nil
+// 	case <-ctx.Done():
+// 		return nil, fmt.Errorf("compute consensus timeout, exiting")
+// 	}
+// }
+
 func Consensus(iterator obiiter.IBioSequence) obiiter.IBioSequence {
 	newIter := obiiter.MakeIBioSequence()
 	size := 10
@ -153,10 +258,19 @@ func Consensus(iterator obiiter.IBioSequence) obiiter.IBioSequence {
 		for iterator.Next() {
 			seqs := iterator.Get()

-			consensus, err := BuildConsensus(seqs.Slice(),
+			sequences := seqs.Slice()
+
+			id := sequences[0].Source()
+			if id == "" {
+				id = sequences[0].Id()
+			}
+			consensus, err := BuildConsensus(sequences,
+				id,
 				CLIKmerSize(), CLIThreshold(),
 				CLIKmerDepth(),
-				CLISaveGraphToFiles(), CLIGraphFilesDirectory(),
+				CLIMaxConsensusLength(),
+				CLISaveGraphToFiles(),
+				CLIGraphFilesDirectory(),
 			)

 			if err == nil {
--- a/pkg/obitools/obiconsensus/options.go
+++ b/pkg/obitools/obiconsensus/options.go
@ -9,6 +9,7 @@ var _saveGraph = "__@@NOSAVE@@__"
 var _kmerSize = -1
 var _threshold = 0.99
 var _mindepth = -1.0
+var _consensus_max_length = -1

 func ObiconsensusOptionSet(options *getoptions.GetOpt) {

@ -38,6 +39,12 @@ func ObiconsensusOptionSet(options *getoptions.GetOpt) {
 			"Default value = -1, which means that the DEPTH is estimated from the data"),
 	)

+	options.IntVar(&_consensus_max_length, "consensus-max-length", _consensus_max_length,
+		options.ArgName("LENGTH"),
+		options.Description("Maximum length of the consensus sequence. "+
+			"Default value = -1, which means that no limit is applied"),
+	)	
+
 }

 func OptionSet(options *getoptions.GetOpt) {
@ -67,3 +74,7 @@ func CLIKmerDepth() float64 {
 func CLIThreshold() float64 {
 	return _threshold
 }
+
+func CLIMaxConsensusLength() int {
+	return _consensus_max_length
+}
--- a/pkg/obitools/obiminion/obiminion.go
+++ b/pkg/obitools/obiminion/obiminion.go
@ -0,0 +1,290 @@
+package obiminion
+
+import (
+	"fmt"
+	"os"
+	"sync"
+
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obigraph"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiannotate"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiuniq"
+	"github.com/schollz/progressbar/v3"
+	log "github.com/sirupsen/logrus"
+)
+
+// SampleWeight calculates the weight of a sample based on the statistics of a sequence.
+//
+// Parameters:
+// - seqs: a pointer to BioSequenceSlice representing the sequences (*BioSequenceSlice)
+// - sample: the sample for which the weight is calculated (string)
+// - sample_key: the key used to access the sample's statistics (string)
+// Return type: a function that takes an integer index and returns the weight of the sample at that index (func(int) int)
+func SampleWeight(seqs *obiseq.BioSequenceSlice, sample, sample_key string) func(int) float64 {
+
+	f := func(i int) float64 {
+
+		stats := (*seqs)[i].StatsOn(sample_key, "NA")
+
+		if value, ok := stats[sample]; ok {
+			return float64(value)
+		}
+
+		return 0
+	}
+
+	return f
+}
+
+// SeqBySamples sorts the sequences by samples.
+//
+// Parameters:
+// - seqs: a pointer to BioSequenceSlice representing the sequences (*BioSequenceSlice)
+// - sample_key: a string representing the sample key (string)
+//
+// Return type:
+// - map[string]BioSequenceSlice: a map indexed by sample names, each containing a slice of BioSequence objects (map[string]BioSequenceSlice)
+func SeqBySamples(seqs obiseq.BioSequenceSlice, sample_key string) map[string]*obiseq.BioSequenceSlice {
+
+	samples := make(map[string]*obiseq.BioSequenceSlice)
+
+	for _, s := range seqs {
+		if s.HasStatsOn(sample_key) {
+			stats := s.StatsOn(sample_key, "NA")
+			for k := range stats {
+				if seqset, ok := samples[k]; ok {
+					*seqset = append(*seqset, s)
+					samples[k] = seqset
+				} else {
+					samples[k] = &obiseq.BioSequenceSlice{s}
+				}
+			}
+		} else {
+			if k, ok := s.GetStringAttribute(sample_key); ok {
+				if seqset, ok := samples[k]; ok {
+					*seqset = append(*seqset, s)
+					samples[k] = seqset
+				} else {
+					samples[k] = &obiseq.BioSequenceSlice{s}
+				}
+			}
+		}
+	}
+
+	return samples
+
+}
+
+type Mutation struct {
+	Position int
+	SeqA     byte
+	SeqB     byte
+	Ratio    float64
+}
+
+func BuildDiffSeqGraph(name, name_key string,
+	seqs *obiseq.BioSequenceSlice,
+	distmax, nworkers int) *obigraph.Graph[*obiseq.BioSequence, Mutation] {
+	graph := obigraph.NewGraphBuffer[*obiseq.BioSequence, Mutation](name, (*[]*obiseq.BioSequence)(seqs))
+	iseq := make(chan int)
+	defer graph.Close()
+
+	ls := len(*seqs)
+
+	sw := SampleWeight(seqs, name, name_key)
+	graph.Graph.VertexWeight = sw
+
+	waiting := sync.WaitGroup{}
+	waiting.Add(nworkers)
+
+	bar := (*progressbar.ProgressBar)(nil)
+	if obiconvert.CLIProgressBar() {
+
+		pbopt := make([]progressbar.Option, 0, 5)
+		pbopt = append(pbopt,
+			progressbar.OptionSetWriter(os.Stderr),
+			progressbar.OptionSetWidth(15),
+			progressbar.OptionShowIts(),
+			progressbar.OptionSetPredictTime(true),
+			progressbar.OptionSetDescription(fmt.Sprintf("[Build graph] on %s", name)),
+		)
+
+		bar = progressbar.NewOptions(len(*seqs), pbopt...)
+	}
+
+	computeEdges := func() {
+		defer waiting.Done()
+		for i := range iseq {
+			s1 := (*seqs)[i]
+			for j := i + 1; j < ls; j++ {
+				s2 := (*seqs)[j]
+				ratio := sw(i) / sw(j)
+				ok, pos, a1, a2 := obialign.D1Or0(s1, s2)
+				if ok >= 0 {
+					graph.AddEdge(i, j, &Mutation{pos, a1, a2, ratio})
+				} else if distmax > 1 {
+					lcs, lali := obialign.FastLCSScore(s1, s2, distmax, nil)
+					dist := lali - lcs
+					if lcs > 0 && dist <= distmax {
+						// log.Infof("Seq %s and %s: LCSScore: %d, dist: %d\n", s1.Id(), s2.Id(), lcs, dist)
+						graph.AddEdge(i, j, &Mutation{pos, a1, a2, ratio})
+					}
+				}
+			}
+
+			if bar != nil {
+				bar.Add(1)
+			}
+		}
+	}
+
+	for i := 0; i < nworkers; i++ {
+		go computeEdges()
+	}
+
+	for i := 0; i < ls; i++ {
+		iseq <- i
+	}
+	close(iseq)
+
+	waiting.Wait()
+	return graph.Graph
+}
+
+func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
+	sample_key string, kmer_size int, max_length int, threshold float64, depth float64) obiseq.BioSequenceSlice {
+	denoised := obiseq.MakeBioSequenceSlice(len(*graph.Vertices))
+
+	for i, v := range *graph.Vertices {
+		var err error
+		var clean *obiseq.BioSequence
+		degree := graph.Degree(i)
+		if degree > 4 {
+			pack := obiseq.MakeBioSequenceSlice(degree + 1)
+			for k,j := range graph.Neighbors(i) {
+				pack[k] = (*graph.Vertices)[j]
+			}
+			pack[degree] = v
+			clean, err = obiconsensus.BuildConsensus(pack,
+				fmt.Sprintf("%s_consensus", v.Id()),
+				kmer_size,
+				threshold,
+				depth, max_length,
+				CLISaveGraphToFiles(), CLIGraphFilesDirectory())
+
+			if err != nil {
+				log.Warning(err)
+				clean = (*graph.Vertices)[i]
+				clean.SetAttribute("obiminion_consensus", false)
+			} else {
+				clean.SetAttribute("obiminion_consensus", true)
+			}
+			pack.Recycle(false)
+		} else {
+			clean = obiseq.NewBioSequence(v.Id(), v.Sequence(), v.Definition())
+			clean.SetAttribute("obiminion_consensus", false)
+		}
+
+		clean.SetCount(int(graph.VertexWeight(i)))
+		clean.SetAttribute(sample_key, graph.Name)
+
+		denoised[i] = clean
+	}
+
+	return denoised
+}
+func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
+	dirname := CLIGraphFilesDirectory()
+	newIter := obiiter.MakeIBioSequence()
+
+	db := itertator.Load()
+
+	log.Infof("Sequence dataset of %d sequeences loaded\n", len(db))
+
+	samples := SeqBySamples(db, CLISampleAttribute())
+	db.Recycle(false)
+
+	log.Infof("Dataset composed of %d samples\n", len(samples))
+	if CLIMaxConsensusLength() > 0 {
+		log.Infof("Maximum consensus length: %d\n", CLIMaxConsensusLength())
+	}
+
+	log.Infof("Dataset composed of %d samples\n", len(samples))
+
+	if CLISaveGraphToFiles() {
+		if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
+			// path does not exist or is not directory
+			os.RemoveAll(dirname)
+			err := os.Mkdir(dirname, 0755)
+
+			if err != nil {
+				log.Panicf("Cannot create directory %s for saving graphs", dirname)
+			}
+		}
+	}
+
+	bar := (*progressbar.ProgressBar)(nil)
+	if obiconvert.CLIProgressBar() {
+
+		pbopt := make([]progressbar.Option, 0, 5)
+		pbopt = append(pbopt,
+			progressbar.OptionSetWriter(os.Stderr),
+			progressbar.OptionSetWidth(15),
+			progressbar.OptionShowIts(),
+			progressbar.OptionSetPredictTime(true),
+			progressbar.OptionSetDescription("[Filter graph on abundance ratio]"),
+		)
+
+		bar = progressbar.NewOptions(len(samples), pbopt...)
+	}
+
+	newIter.Add(1)
+
+	go func() {
+		sample_order := 0
+		for sample, seqs := range samples {
+			graph := BuildDiffSeqGraph(sample,
+				CLISampleAttribute(),
+				seqs,
+				CLIDistStepMax(),
+				obioptions.CLIParallelWorkers())
+			if bar != nil {
+				bar.Add(1)
+			}
+
+			if CLISaveGraphToFiles() {
+				graph.WriteGmlFile(fmt.Sprintf("%s/%s.gml",
+					CLIGraphFilesDirectory(),
+					sample),
+					false, 1, 0, 3)
+			}
+
+			denoised := MinionDenoise(graph,
+				CLISampleAttribute(),
+				CLIKmerSize(),
+				CLIMaxConsensusLength(),
+				CLIThreshold(),
+				CLIKmerDepth())
+
+			newIter.Push(obiiter.MakeBioSequenceBatch(sample_order, denoised))
+
+			sample_order++
+		}
+
+		newIter.Done()
+	}()
+
+	go func() {
+		newIter.WaitAndClose()
+	}()
+
+	obiuniq.AddStatsOn(CLISampleAttribute())
+	obiuniq.SetUniqueInMemory(false)
+	obiuniq.SetNoSingleton(CLINoSingleton())
+	return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
+}
--- a/pkg/obitools/obiminion/options.go
+++ b/pkg/obitools/obiminion/options.go
@ -0,0 +1,179 @@
+package obiminion
+
+import (
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
+	"github.com/DavidGamba/go-getoptions"
+)
+
+var _distStepMax = 1
+var _sampleAttribute = "sample"
+
+var _ratioMax = 1.0
+var _minEvalRate = 1000
+
+var _clusterMode = false
+var _onlyHead = false
+
+var _kmerSize = -1
+var _threshold = 1.0
+var _mindepth = -1.0
+
+var _consensus_max_length = 1000
+
+var _NoSingleton = false
+
+var _saveGraph = "__@@NOSAVE@@__"
+var _saveRatio = "__@@NOSAVE@@__"
+
+// ObiminionOptionSet sets the options for obiminion.
+//
+// options: The options for configuring obiminion.
+func ObiminionOptionSet(options *getoptions.GetOpt) {
+	options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
+		options.Alias("s"),
+		options.Description("Attribute containing sample descriptions (default %s)."))
+
+	options.IntVar(&_distStepMax, "distance", _distStepMax,
+		options.Alias("d"),
+		options.Description("Maximum numbers of differences between two variant sequences (default: %d)."))
+
+	options.IntVar(&_minEvalRate, "min-eval-rate", _minEvalRate,
+		options.Description("Minimum abundance of a sequence to be used to evaluate mutation rate."))
+
+	options.StringVar(&_saveGraph, "save-graph", _saveGraph,
+		options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
+			"The graph files follow the graphml format."),
+	)
+
+	options.StringVar(&_saveRatio, "save-ratio", _saveRatio,
+		options.Description("Creates a file containing the set of abundance ratio on the graph edge. "+
+			"The ratio file follows the csv format."),
+	)
+	options.IntVar(&_kmerSize, "kmer-size", _kmerSize,
+		options.ArgName("SIZE"),
+		options.Description("The size of the kmer used to build the consensus. "+
+			"Default value = -1, which means that the kmer size is estimated from the data"),
+	)
+
+	options.Float64Var(&_threshold, "threshold", _threshold,
+		options.ArgName("RATIO"),
+		options.Description("A threshold between O and 1 used to determine the optimal "+
+			"kmer size"),
+	)
+
+	options.Float64Var(&_mindepth, "min-depth", _mindepth,
+		options.ArgName("DEPTH"),
+		options.Description("if DEPTH is between 0 and 1, it corresponds to fraction of the "+
+			"reads in which a kmer must occurs to be conserved in the graph. If DEPTH is greater "+
+			"than 1, indicate the minimum count of occurrence for a kmer to be kept. "+
+			"Default value = -1, which means that the DEPTH is estimated from the data"),
+	)
+
+	options.IntVar(&_consensus_max_length, "consensus-max-length", _consensus_max_length,
+		options.ArgName("LENGTH"),
+		options.Description("Maximum length of the consensus sequence. "+
+			"Default value = -1, which means that no limit is applied"),
+	)
+
+	options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
+		options.Description("If set, sequences occurring a single time in the data set are discarded."))
+
+
+}
+
+// OptionSet sets up the options for the obiminion package.
+//
+// It takes a pointer to a getoptions.GetOpt object as a parameter.
+// It does not return any value.
+func OptionSet(options *getoptions.GetOpt) {
+	obiconvert.InputOptionSet(options)
+	obiconvert.OutputOptionSet(options)
+	ObiminionOptionSet(options)
+}
+
+// CLIDistStepMax returns the maximum distance between two sequences.
+//
+// The value of the distance is set by the user with the `-d` flag.
+//
+// No parameters.
+// Returns an integer.
+func CLIDistStepMax() int {
+	return _distStepMax
+}
+
+// CLISampleAttribute returns the name of the attribute used to store sample name.
+//
+// The value of the sample attribute is set by the user with the `-s` flag.
+//
+// No parameters.
+// Returns a string.
+func CLISampleAttribute() string {
+	return _sampleAttribute
+}
+
+// > The function `CLIMinCountToEvalMutationRate()` returns the minimum number of reads that must be
+// observed before the mutation rate can be evaluated
+func CLIMinCountToEvalMutationRate() int {
+	return _minEvalRate
+}
+
+func ClusterMode() bool {
+	return _clusterMode
+}
+
+// `OnlyHead()` returns a boolean value that indicates whether the `-h` flag was passed to the program
+func OnlyHead() bool {
+	return _onlyHead
+}
+
+// Returns true it the obliclean graphs must be saved
+func CLISaveGraphToFiles() bool {
+	return _saveGraph != "__@@NOSAVE@@__"
+}
+
+// It returns the directory where the graph files are saved
+func CLIGraphFilesDirectory() string {
+	return _saveGraph
+}
+
+// Returns true it the table of ratio must be saved
+func IsSaveRatioTable() bool {
+	return _saveRatio != "__@@NOSAVE@@__"
+}
+
+// It returns the filename of the file that stores the ratio table
+func RatioTableFilename() string {
+	return _saveRatio
+}
+
+// CLIKmerSize returns the value of the kmer size to use for building the consensus.
+//
+// The value of the kmer size is set by the user with the `-k` flag.
+// The value -1 means that the kmer size is estimated as the minimum value that
+// insure that no kmer are present more than one time in a sequence.
+//
+// No parameters.
+// Returns an integer value.
+func CLIKmerSize() int {
+	return _kmerSize
+}
+
+func CLIKmerDepth() float64 {
+	return _mindepth
+}
+
+func CLIThreshold() float64 {
+	return _threshold
+}
+
+func CLIMaxConsensusLength() int {
+	return _consensus_max_length
+}
+
+// CLINoSingleton returns a boolean value indicating whether or not singleton sequences should be discarded.
+//
+// No parameters.
+// Returns a boolean value indicating whether or not singleton sequences should be discarded.
+func CLINoSingleton() bool {
+	return _NoSingleton
+}
--- a/pkg/obitools/obipcr/pcr.go
+++ b/pkg/obitools/obipcr/pcr.go
@ -47,8 +47,8 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
 		frags := obiiter.IFragments(
 			CLIMaxLength()*1000,
 			CLIMaxLength()*100,
-			CLIMaxLength()+obiutils.MaxInt(len(CLIForwardPrimer()),
-				len(CLIReversePrimer()))+obiutils.MinInt(len(CLIForwardPrimer()),
+			CLIMaxLength()+obiutils.Max(len(CLIForwardPrimer()),
+				len(CLIReversePrimer()))+obiutils.Min(len(CLIForwardPrimer()),
 				len(CLIReversePrimer()))/2,
 			100,
 			obioptions.CLIParallelWorkers(),
--- a/pkg/obitools/obirefidx/obirefidx.go
+++ b/pkg/obitools/obirefidx/obirefidx.go
@ -63,7 +63,7 @@ func IndexSequence(seqidx int,
 			if lca[order] == ancestor {
 				// nseq[i]++
 				if mini != -1 {
-					wordmin = obiutils.MaxInt(sequence.Len(), references[order].Len()) - 3 - 4*mini
+					wordmin = obiutils.Max(sequence.Len(), references[order].Len()) - 3 - 4*mini
 				}

 				if cw[order] < wordmin {
@ -189,7 +189,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
 	indexed := obiiter.MakeIBioSequence()
 	go func() {
 		for i := 0; i < len(references); i += 10 {
-			limits <- [2]int{i, obiutils.MinInt(i+10, len(references))}
+			limits <- [2]int{i, obiutils.Min(i+10, len(references))}
 		}
 		close(limits)
 	}()
--- a/pkg/obitools/obitag/obitag.go
+++ b/pkg/obitools/obitag/obitag.go
@ -110,7 +110,7 @@ func FindClosests(sequence *obiseq.BioSequence,
 			d, _, _, _ := obialign.D1Or0(sequence, references[order])
 			if d >= 0 {
 				score = d
-				alilength = obiutils.MaxInt(sequence.Len(), ref.Len())
+				alilength = obiutils.Max(sequence.Len(), ref.Len())
 				lcs = alilength - score
 			}
 		} else {
--- a/pkg/obitools/obiuniq/options.go
+++ b/pkg/obitools/obiuniq/options.go
@ -12,6 +12,11 @@ var _chunks = 100
 var _NAValue = "NA"
 var _NoSingleton = false

+// UniqueOptionSet sets up unique options for the obiuniq command.
+//
+// It configures various options such as merging attributes, category attributes,
+// defining the NA value, handling singleton sequences, choosing between in-memory
+// or disk storage, and specifying the chunk count for dataset division.
 func UniqueOptionSet(options *getoptions.GetOpt) {
 	options.StringSliceVar(&_StatsOn, "merge",
 		1, 1,
@ -40,25 +45,67 @@ func UniqueOptionSet(options *getoptions.GetOpt) {

 }

+
 // OptionSet adds to the basic option set every options declared for
 // the obiuniq command
+//
+// It takes a pointer to a GetOpt struct as its parameter and does not return anything.
 func OptionSet(options *getoptions.GetOpt) {
 	obiconvert.OptionSet(options)
 	UniqueOptionSet(options)
 }

+// CLIStatsOn returns the list of variables on witch statistics are computed.
+//
+// It does not take any parameters.
+// It returns a slice of strings representing the statistics on values.
 func CLIStatsOn() []string {
 	return _StatsOn
 }

+// SetStatsOn sets the list of variables on witch statistics are computed.
+//
+// It takes a slice of strings as its parameter and does not return anything.
+func SetStatsOn(statsOn []string) {
+	_StatsOn = statsOn
+}
+
+// AddStatsOn adds a variable to the list of variables on witch statistics are computed.
+//
+// Parameters:
+// - statsOn: variadic strings representing the statistics to be added.
+func AddStatsOn(statsOn ...string) {
+	_StatsOn = append(_StatsOn, statsOn...)
+}
+
+// CLIKeys returns the keys used to distinguished among identical sequences.
+//
+// It does not take any parameters.
+// It returns a slice of strings representing the keys used by the CLI.
 func CLIKeys() []string {
 	return _Keys
 }

+// CLIUniqueInMemory returns if the unique function is running in memory only.
+//
+// It does not take any parameters.
+// It returns a boolean value indicating whether the function is running in memory or not.
 func CLIUniqueInMemory() bool {
 	return _InMemory
 }

+// SetUniqueInMemory sets whether the unique function is running in memory or not.
+//
+// inMemory bool - A boolean value indicating whether the function is running in memory.
+// No return value.
+func SetUniqueInMemory(inMemory bool) {
+	_InMemory = inMemory
+}
+
+// CLINumberOfChunks returns the number of chunks used for the first bucket sort step used by the unique function.
+//
+// It does not take any parameters.
+// It returns an integer representing the number of chunks.
 func CLINumberOfChunks() int {
 	if _chunks <= 1 {
 		return 1
@ -67,10 +114,40 @@ func CLINumberOfChunks() int {
 	return _chunks
 }

+// SetNumberOfChunks sets the number of chunks used for the first bucket sort step used by the unique function.
+//
+// chunks int - The number of chunks to be set.
+// No return value.
+func SetNumberOfChunks(chunks int) {
+	_chunks = chunks
+}
+
+// CLINAValue returns the value used as a placeholder for missing values.
+//
+// No parameters.
+// Return type: string.
 func CLINAValue() string {
 	return _NAValue
 }

+// SetNAValue sets the NA value to the specified string.
+//
+// value string - The value to set as the NA value.
+func SetNAValue(value string) {
+	_NAValue = value
+}
+
+// CLINoSingleton returns a boolean value indicating whether or not singleton sequences should be discarded.
+//
+// No parameters.
+// Returns a boolean value indicating whether or not singleton sequences should be discarded.
 func CLINoSingleton() bool {
 	return _NoSingleton
 }
+
+// SetNoSingleton sets the boolean value indicating whether or not singleton sequences should be discarded.
+//
+// noSingleton bool - The boolean value to set for _NoSingleton.
+func SetNoSingleton(noSingleton bool) {
+	_NoSingleton = noSingleton
+}
--- a/pkg/obitools/obiuniq/unique.go
+++ b/pkg/obitools/obiuniq/unique.go
@ -8,7 +8,7 @@ import (
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
 )

-func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
+func CLIUnique(sequences obiiter.IBioSequence) obiiter.IBioSequence {

 	options := make([]obichunk.WithOption, 0, 30)

--- a/pkg/obiutils/minmax.go
+++ b/pkg/obiutils/minmax.go
@ -1,69 +1,32 @@
 package obiutils

-import "golang.org/x/exp/constraints"
+import (
+	"golang.org/x/exp/constraints"
+)

-func MinInt(x, y int) int {
+func Min[T constraints.Ordered](x, y T) T {
 	if x < y {
 		return x
 	}
 	return y
 }

-func MaxInt(x, y int) int {
+func Max[T constraints.Ordered](x, y T) T {
 	if x < y {
 		return y
 	}
 	return x
 }

-func MinMaxInt(x, y int) (int, int) {
+func MinMax[T constraints.Ordered](x, y T) (T, T) {
 	if x < y {
 		return x, y
 	}
 	return y, x
 }

-func MinUInt16(x, y uint16) uint16 {
-	if x < y {
-		return x
-	}
-	return y
-}

-func MaxUInt16(x, y uint16) uint16 {
-	if x < y {
-		return y
-	}
-	return x
-}
-
-func MinSlice[T constraints.Ordered](vec []T) T {
-	if len(vec) == 0 {
-		panic("empty slice")
-	}
-	min := vec[0]
-	for _, v := range vec {
-		if v < min {
-			min = v
-		}
-	}
-	return min
-}
-
-func MaxSlice[T constraints.Ordered](vec []T) T {
-	if len(vec) == 0 {
-		panic("empty slice")
-	}
-	max := vec[0]
-	for _, v := range vec {
-		if v > max {
-			max = v
-		}
-	}
-	return max
-}
-
-func RangeSlice[T constraints.Ordered](vec []T) (min, max T) {
+func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
 	if len(vec) == 0 {
 		panic("empty slice")
 	}
--- a/pkg/obiutils/uint128.go
+++ b/pkg/obiutils/uint128.go
@ -13,8 +13,8 @@ import (
 // Zero is a zero-valued uint128.
 var Zero Uint128

-// Max is the largest possible uint128 value.
-var Max = New(math.MaxUint64, math.MaxUint64)
+// MaxUint128 is the largest possible uint128 value.
+var MaxUint128 = New(math.MaxUint64, math.MaxUint64)

 // A Uint128 is an unsigned 128-bit number.
 type Uint128 struct {