mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-10 01:30:27 +00:00
A first version of obimicroasm...
This commit is contained in:
@@ -8,9 +8,12 @@ import (
|
||||
"math/bits"
|
||||
"os"
|
||||
"slices"
|
||||
"sort"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
"github.com/ef-ds/deque/v2"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -89,12 +92,18 @@ type DeBruijnGraph struct {
|
||||
//
|
||||
// *DeBruijnGraph - a pointer to the created De Bruijn's Graph
|
||||
func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
|
||||
if kmersize > 31 {
|
||||
log.Panicf("k-mer size %d is too large", kmersize)
|
||||
}
|
||||
|
||||
kmermask := (^uint64(0) << (uint64(kmersize) * 2))
|
||||
|
||||
g := DeBruijnGraph{
|
||||
kmersize: kmersize,
|
||||
kmermask: ^(^uint64(0) << (uint64(kmersize) * 2)), // k-mer mask used to set to 0 the bits that are not in the k-mer
|
||||
prevc: uint64(1) << (uint64(kmersize-1) * 2),
|
||||
prevg: uint64(2) << (uint64(kmersize-1) * 2),
|
||||
prevt: uint64(3) << (uint64(kmersize-1) * 2),
|
||||
kmermask: kmermask, // k-mer mask used to set to 1 the bits that are not in the k-mer
|
||||
prevc: (uint64(1) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||
prevg: (uint64(2) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||
prevt: (uint64(3) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||
graph: make(map[uint64]uint),
|
||||
}
|
||||
|
||||
@@ -161,19 +170,34 @@ func (g *DeBruijnGraph) FilterMinWeight(min int) {
|
||||
}
|
||||
}
|
||||
|
||||
// FilterMinWeight filters the DeBruijnGraph by removing nodes with weight less than the specified minimum.
|
||||
//
|
||||
// min: an integer representing the minimum count threshold.
|
||||
func (g *DeBruijnGraph) FilterMaxWeight(min int) {
|
||||
umin := uint(min)
|
||||
for idx, count := range g.graph {
|
||||
if count > umin {
|
||||
delete(g.graph, idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
|
||||
if _, ok := g.graph[index]; !ok {
|
||||
log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
|
||||
}
|
||||
|
||||
rep := make([]uint64, 0, 4)
|
||||
|
||||
index &= ^g.kmermask
|
||||
index >>= 2
|
||||
|
||||
if _, ok := g.graph[index]; ok {
|
||||
rep = append(rep, index)
|
||||
key := index | g.kmermask
|
||||
if _, ok := g.graph[key]; ok {
|
||||
rep = append(rep, key)
|
||||
}
|
||||
|
||||
key := index | g.prevc
|
||||
key = index | g.prevc
|
||||
if _, ok := g.graph[key]; ok {
|
||||
rep = append(rep, key)
|
||||
}
|
||||
@@ -197,7 +221,7 @@ func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
|
||||
}
|
||||
|
||||
rep := make([]uint64, 0, 4)
|
||||
index = (index << 2) & g.kmermask
|
||||
index = (index << 2) | g.kmermask
|
||||
|
||||
if _, ok := g.graph[index]; ok {
|
||||
rep = append(rep, index)
|
||||
@@ -268,6 +292,33 @@ func (g *DeBruijnGraph) MaxHead() (uint64, int, bool) {
|
||||
return rep, int(max), found
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) Terminals() []uint64 {
|
||||
rep := make([]uint64, 0, 10)
|
||||
|
||||
for k := range g.graph {
|
||||
if len(g.Nexts(k)) == 0 {
|
||||
rep = append(rep, k)
|
||||
}
|
||||
}
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) MaxTerminal() (uint64, int, bool) {
|
||||
rep := uint64(0)
|
||||
max := uint(0)
|
||||
found := false
|
||||
for k, w := range g.graph {
|
||||
if len(g.Nexts(k)) == 0 && w > max {
|
||||
rep = k
|
||||
max = w
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
return rep, int(max), found
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) MaxPath() []uint64 {
|
||||
path := make([]uint64, 0, 1000)
|
||||
ok := false
|
||||
@@ -318,7 +369,11 @@ func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.Bi
|
||||
return nil, fmt.Errorf("graph is empty")
|
||||
}
|
||||
//path := g.LongestPath(max_length)
|
||||
path := g.HaviestPath()
|
||||
path, err := g.HaviestPath(nil, nil, false)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
spath := path
|
||||
|
||||
@@ -481,7 +536,7 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
|
||||
}
|
||||
|
||||
current <<= 2
|
||||
current &= graph.kmermask
|
||||
current |= graph.kmermask
|
||||
b := iupac[sequence[0]]
|
||||
current |= b[0]
|
||||
graph.graph[current] = uint(graph.Weight(current) + weight)
|
||||
@@ -495,6 +550,36 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
|
||||
}
|
||||
}
|
||||
|
||||
// func (graph *DeBruijnGraph) search(current uint64, extension []byte, path []uint64, error,errormax int) ([]uint64,error) {
|
||||
|
||||
// path = append(path, current)
|
||||
|
||||
// if len(extension) == 0 {
|
||||
// return path,nil
|
||||
// }
|
||||
|
||||
// current <<= 2
|
||||
// current &= graph.kmermask
|
||||
// b := iupac[extension[0]]
|
||||
|
||||
// newPath := path
|
||||
// if len(b) > 1 {
|
||||
// newPath = slices.Clone(path)
|
||||
// }
|
||||
|
||||
// current |= b[0]
|
||||
|
||||
// _, ok := graph.graph[current]
|
||||
// if ok {
|
||||
// newPath = append(newPath, current)
|
||||
// }
|
||||
// rep, err := graph.search(current, extension[1:], newPath, error,errormax)
|
||||
// if err != nil {
|
||||
// return path,err
|
||||
// }
|
||||
|
||||
// }
|
||||
|
||||
// Push appends a BioSequence to the DeBruijnGraph.
|
||||
//
|
||||
// Parameters:
|
||||
@@ -523,6 +608,7 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
|
||||
initFirstKmer(start+1, key)
|
||||
}
|
||||
} else {
|
||||
key |= graph.kmermask
|
||||
graph.graph[key] = uint(graph.Weight(key) + w)
|
||||
graph.append(s[graph.kmersize:], key, w)
|
||||
}
|
||||
@@ -533,6 +619,110 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
|
||||
}
|
||||
}
|
||||
|
||||
func (graph *DeBruijnGraph) search(sequence []byte, mismatch, errormax int) []uint64 {
|
||||
var initFirstKmer func(start int, key uint64) []uint64
|
||||
|
||||
initFirstKmer = func(start int, key uint64) []uint64 {
|
||||
if start == graph.kmersize {
|
||||
key |= graph.kmermask
|
||||
if _, ok := graph.graph[key]; ok {
|
||||
return []uint64{key}
|
||||
} else {
|
||||
return []uint64{}
|
||||
}
|
||||
}
|
||||
|
||||
keys := make([]uint64, 0, 1000)
|
||||
|
||||
if start == 0 {
|
||||
key = 0
|
||||
}
|
||||
|
||||
key <<= 2
|
||||
b := iupac[sequence[start]]
|
||||
|
||||
for _, code := range b {
|
||||
key &= ^uint64(3)
|
||||
key |= code
|
||||
keys = append(keys, initFirstKmer(start+1, key)...)
|
||||
}
|
||||
|
||||
// w := []string{}
|
||||
// for _, k := range keys {
|
||||
// w = append(w, graph.DecodeNode(k))
|
||||
// }
|
||||
// // log.Warnf("For %s found %d matches : %v", sequence, len(keys), w)
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
rep := initFirstKmer(0, 0)
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func (graph *DeBruijnGraph) Search(sequence *obiseq.BioSequence, errormax int) []uint64 {
|
||||
|
||||
s := sequence.Sequence() // Get the sequence as a byte slice
|
||||
|
||||
if len(s) < graph.KmerSize() {
|
||||
s = slices.Clone(s)
|
||||
for len(s) < graph.KmerSize() {
|
||||
s = append(s, 'n')
|
||||
}
|
||||
}
|
||||
|
||||
log.Warnf("searching for %s", s)
|
||||
keys := graph.search(s, 0, errormax)
|
||||
|
||||
for mismatch := 1; mismatch <= errormax; mismatch++ {
|
||||
log.Warnf("searching with %d error for %s", mismatch, s)
|
||||
for probe := range IterateOneError(s[0:graph.kmersize]) {
|
||||
keys = append(keys,
|
||||
graph.search(probe, mismatch, errormax)...,
|
||||
)
|
||||
}
|
||||
}
|
||||
keys = obiutils.Unique(keys)
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
func (graph *DeBruijnGraph) BackSearch(sequence *obiseq.BioSequence, errormax int) []uint64 {
|
||||
lkmer := graph.KmerSize()
|
||||
|
||||
s := sequence.Sequence() // Get the sequence as a byte slice
|
||||
|
||||
if len(s) < lkmer {
|
||||
sn := []byte{}
|
||||
ls := len(s)
|
||||
for ls < lkmer {
|
||||
sn = append(sn, 'n')
|
||||
ls++
|
||||
}
|
||||
s = append(sn, s...)
|
||||
} else {
|
||||
s = s[(len(s) - lkmer):]
|
||||
}
|
||||
|
||||
log.Warnf("back-searching for %s", s)
|
||||
|
||||
keys := graph.search(s, 0, errormax)
|
||||
|
||||
for mismatch := 1; mismatch <= errormax; mismatch++ {
|
||||
log.Warnf("searching with %d error for %s", mismatch, s)
|
||||
for probe := range IterateOneError(s[0:graph.kmersize]) {
|
||||
// log.Warnf("searching with %d error for %s", mismatch, probe)
|
||||
keys = append(keys,
|
||||
graph.search(probe, mismatch, errormax)...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
keys = obiutils.Unique(keys)
|
||||
return keys
|
||||
}
|
||||
|
||||
func (graph *DeBruijnGraph) Gml() string {
|
||||
buffer := bytes.NewBuffer(make([]byte, 0, 1000))
|
||||
|
||||
@@ -614,7 +804,7 @@ func (graph *DeBruijnGraph) WriteGml(filename string) error {
|
||||
func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
|
||||
ident := ^((kmer1 & kmer2) | (^kmer1 & ^kmer2))
|
||||
ident |= (ident >> 1)
|
||||
ident &= 0x5555555555555555 & g.kmermask
|
||||
ident &= 0x5555555555555555 & ^g.kmermask
|
||||
return bits.OnesCount64(ident)
|
||||
}
|
||||
|
||||
@@ -638,11 +828,23 @@ func (h *UInt64Heap) Pop() any {
|
||||
return x
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
||||
func (g *DeBruijnGraph) HaviestPath(starts, stops []uint64, backPath bool) ([]uint64, error) {
|
||||
|
||||
if g.HasCycle() {
|
||||
return nil
|
||||
// if g.HasCycle() {
|
||||
// return nil, fmt.Errorf("graph has a cycle")
|
||||
// }
|
||||
|
||||
following := g.Nexts
|
||||
|
||||
if backPath {
|
||||
following = g.Previouses
|
||||
}
|
||||
|
||||
stopNodes := make(map[uint64]bool, len(stops))
|
||||
for _, n := range stops {
|
||||
stopNodes[n] = true
|
||||
}
|
||||
|
||||
// Initialize the distance array and visited set
|
||||
distances := make(map[uint64]int)
|
||||
visited := make(map[uint64]bool)
|
||||
@@ -654,7 +856,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
||||
heap.Init(queue)
|
||||
|
||||
startNodes := make(map[uint64]struct{})
|
||||
for _, n := range g.Heads() {
|
||||
if starts == nil {
|
||||
starts = g.Heads()
|
||||
}
|
||||
|
||||
for _, n := range starts {
|
||||
startNodes[n] = struct{}{}
|
||||
heap.Push(queue, n)
|
||||
distances[n] = g.Weight(n)
|
||||
@@ -686,7 +892,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
||||
log.Warn("current node is 0")
|
||||
}
|
||||
// Update the distance of the neighbors
|
||||
nextNodes := g.Nexts(currentNode)
|
||||
|
||||
nextNodes := following(currentNode)
|
||||
if _, ok := stopNodes[currentNode]; ok {
|
||||
nextNodes = []uint64{}
|
||||
}
|
||||
for _, nextNode := range nextNodes {
|
||||
if nextNode == 0 {
|
||||
log.Warn("next node is 0")
|
||||
@@ -718,16 +928,178 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
||||
}
|
||||
|
||||
if slices.Contains(heaviestPath, currentNode) {
|
||||
log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
|
||||
return nil
|
||||
return nil, fmt.Errorf("cycle detected in heaviest path")
|
||||
}
|
||||
|
||||
heaviestPath = append(heaviestPath, currentNode)
|
||||
|
||||
// Reverse the path
|
||||
slices.Reverse(heaviestPath)
|
||||
if !backPath {
|
||||
slices.Reverse(heaviestPath)
|
||||
}
|
||||
|
||||
return heaviestPath
|
||||
return heaviestPath, nil
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) HaviestPathDSU(starts, stops []uint64, backPath bool) ([]uint64, error) {
|
||||
// Collect and sort edges
|
||||
type Edge struct {
|
||||
weight float64
|
||||
u, v uint64
|
||||
}
|
||||
edges := make([]Edge, 0)
|
||||
|
||||
// Function to get next nodes (either Nexts or Previouses based on backPath)
|
||||
following := g.Nexts
|
||||
previouses := g.Previouses
|
||||
if backPath {
|
||||
following = g.Previouses
|
||||
previouses = g.Nexts
|
||||
}
|
||||
|
||||
// Collect all edges
|
||||
for u := range g.graph {
|
||||
for _, v := range following(u) {
|
||||
edges = append(edges, Edge{
|
||||
weight: float64(min(g.Weight(u), g.Weight(v))),
|
||||
u: u,
|
||||
v: v,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Sort edges by weight in descending order
|
||||
sort.Slice(edges, func(i, j int) bool {
|
||||
return edges[i].weight > edges[j].weight
|
||||
})
|
||||
|
||||
// Initialize disjoint set data structure
|
||||
parent := make(map[uint64]uint64)
|
||||
for u := range g.graph {
|
||||
parent[u] = u
|
||||
}
|
||||
|
||||
// Find with path compression
|
||||
var find func(uint64) uint64
|
||||
find = func(node uint64) uint64 {
|
||||
if parent[node] != node {
|
||||
parent[node] = find(parent[node])
|
||||
}
|
||||
return parent[node]
|
||||
}
|
||||
|
||||
// Union function that returns true if cycle is detected
|
||||
union := func(u, v uint64) bool {
|
||||
rootU := find(u)
|
||||
rootV := find(v)
|
||||
if rootU == rootV {
|
||||
return true // Cycle detected
|
||||
}
|
||||
parent[rootV] = rootU
|
||||
return false
|
||||
}
|
||||
|
||||
// If no specific starts provided, use graph heads
|
||||
if starts == nil {
|
||||
if !backPath {
|
||||
starts = g.Heads()
|
||||
} else {
|
||||
starts = g.Terminals()
|
||||
}
|
||||
}
|
||||
|
||||
// If no specific stops provided, use graph terminals
|
||||
if stops == nil {
|
||||
if !backPath {
|
||||
stops = g.Terminals()
|
||||
} else {
|
||||
stops = g.Heads()
|
||||
}
|
||||
}
|
||||
|
||||
// Convert stops to a map for O(1) lookup
|
||||
stopNodes := make(map[uint64]bool)
|
||||
for _, stop := range stops {
|
||||
stopNodes[stop] = false
|
||||
}
|
||||
|
||||
var path []uint64
|
||||
maxCapacity := math.Inf(-1)
|
||||
stopEdge := []Edge{}
|
||||
|
||||
// Process edges in descending order of weight
|
||||
for _, edge := range edges {
|
||||
if stopNodes[edge.u] {
|
||||
continue // Skip edges from stop nodes
|
||||
}
|
||||
|
||||
if in, ok := stopNodes[edge.v]; ok {
|
||||
if !in {
|
||||
stopEdge = append(stopEdge, edge)
|
||||
stopNodes[edge.v] = true
|
||||
}
|
||||
}
|
||||
|
||||
if union(edge.u, edge.v) {
|
||||
continue // Skip if creates cycle
|
||||
}
|
||||
|
||||
pathFound := false
|
||||
for _, sedge := range stopEdge {
|
||||
// Check if any start-stop pair is connected
|
||||
fv := find(sedge.v)
|
||||
for _, s := range starts {
|
||||
fs := find(s)
|
||||
// log.Warnf("Start: %d, Stop: %d", fs, fv)
|
||||
if fs == fv {
|
||||
pathFound = true
|
||||
maxCapacity = edge.weight
|
||||
|
||||
// Reconstruct path
|
||||
current := sedge.v
|
||||
path = []uint64{current}
|
||||
for current != s {
|
||||
oldcurrent := current
|
||||
// log.Warnf("Start: %d, Current: %d, Previous: %v", s, current, previouses(current))
|
||||
for _, prev := range previouses(current) {
|
||||
if find(prev) == fs {
|
||||
path = append(path, prev)
|
||||
current = prev
|
||||
break
|
||||
}
|
||||
}
|
||||
if current == oldcurrent {
|
||||
log.Fatalf("We are stuck")
|
||||
}
|
||||
|
||||
}
|
||||
// log.Warnf("Built path: %v", path)
|
||||
break
|
||||
}
|
||||
}
|
||||
if pathFound {
|
||||
break
|
||||
}
|
||||
}
|
||||
if pathFound {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// log.Warnf("Stop edge: %v", stopEdge)
|
||||
|
||||
// Process edges in descending order of weight
|
||||
|
||||
if path == nil {
|
||||
return nil, fmt.Errorf("no valid path found")
|
||||
}
|
||||
|
||||
if !backPath {
|
||||
slices.Reverse(path)
|
||||
}
|
||||
log.Warnf("Max capacity: %5.0f: %v", maxCapacity, g.DecodePath(path))
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func (g *DeBruijnGraph) HasCycle() bool {
|
||||
@@ -765,3 +1137,59 @@ func (g *DeBruijnGraph) HasCycle() bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// HasCycleInDegree détecte la présence d'un cycle dans le graphe en utilisant la méthode des degrés entrants.
|
||||
// Cette méthode est basée sur le tri topologique : si on ne peut pas trier tous les nœuds,
|
||||
// alors il y a un cycle.
|
||||
//
|
||||
// Returns:
|
||||
// - bool: true si le graphe contient un cycle, false sinon
|
||||
func (g *DeBruijnGraph) HasCycleInDegree() bool {
|
||||
// Créer une map pour stocker les degrés entrants de chaque nœud
|
||||
inDegree := make(map[uint64]int)
|
||||
|
||||
// Initialiser les degrés entrants à 0 pour tous les nœuds
|
||||
for node := range g.graph {
|
||||
inDegree[node] = 0
|
||||
}
|
||||
|
||||
// Calculer les degrés entrants
|
||||
for node := range g.graph {
|
||||
for _, next := range g.Nexts(node) {
|
||||
inDegree[next]++
|
||||
}
|
||||
}
|
||||
|
||||
// Créer une deque pour stocker les nœuds avec un degré entrant de 0
|
||||
queue := deque.Deque[uint64]{}
|
||||
|
||||
// Ajouter tous les nœuds avec un degré entrant de 0 à la deque
|
||||
for node := range g.graph {
|
||||
if inDegree[node] == 0 {
|
||||
queue.PushBack(node)
|
||||
}
|
||||
}
|
||||
|
||||
visited := 0 // Compteur de nœuds visités
|
||||
|
||||
// Parcours BFS
|
||||
for queue.Len() > 0 {
|
||||
// Retirer le premier nœud de la deque
|
||||
node, _ := queue.PopFront()
|
||||
visited++
|
||||
|
||||
// Pour chaque nœud adjacent
|
||||
for _, next := range g.Nexts(node) {
|
||||
// Réduire son degré entrant
|
||||
inDegree[next]--
|
||||
|
||||
// Si le degré entrant devient 0, l'ajouter à la deque
|
||||
if inDegree[next] == 0 {
|
||||
queue.PushBack(next)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// S'il y a un cycle, on n'aura pas pu visiter tous les nœuds
|
||||
return visited != len(g.graph)
|
||||
}
|
||||
|
||||
45
pkg/obikmer/oneerror.go
Normal file
45
pkg/obikmer/oneerror.go
Normal file
@@ -0,0 +1,45 @@
|
||||
package obikmer
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"slices"
|
||||
)
|
||||
|
||||
var baseError = map[byte]byte{
|
||||
'a': 'b',
|
||||
'c': 'd',
|
||||
'g': 'h',
|
||||
't': 'v',
|
||||
'r': 'y',
|
||||
'y': 'r',
|
||||
's': 'w',
|
||||
'w': 's',
|
||||
'k': 'm',
|
||||
'm': 'k',
|
||||
'd': 'c',
|
||||
'v': 't',
|
||||
'h': 'g',
|
||||
'b': 'a',
|
||||
}
|
||||
|
||||
type BytesItem []byte
|
||||
|
||||
func IterateOneError(kmer []byte) iter.Seq[BytesItem] {
|
||||
lkmer := len(kmer)
|
||||
return func(yield func(BytesItem) bool) {
|
||||
for p := 0; p < lkmer; p++ {
|
||||
for p < lkmer && kmer[p] == 'n' {
|
||||
p++
|
||||
}
|
||||
|
||||
if p < lkmer {
|
||||
nkmer := slices.Clone(kmer)
|
||||
nkmer[p] = baseError[kmer[p]]
|
||||
if !yield(nkmer) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "6a8061c"
|
||||
var _Commit = "4774438"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
||||
@@ -25,7 +25,7 @@ func (s *BioSequence) UnPair() {
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) IsPaired() bool {
|
||||
return (*s)[0].paired != nil
|
||||
return s != nil && s.Len() > 0 && (*s)[0].paired != nil
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {
|
||||
|
||||
520
pkg/obitools/obimicroasm/microasm.go
Normal file
520
pkg/obitools/obimicroasm/microasm.go
Normal file
@@ -0,0 +1,520 @@
|
||||
package obimicroasm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"slices"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix"
|
||||
)
|
||||
|
||||
func BuildFilterOnPatternReadPairWorker(
|
||||
forward, reverse string,
|
||||
errormax int,
|
||||
cutReads bool,
|
||||
) obiseq.SeqWorker {
|
||||
forwardPatternDir, err := obiapat.MakeApatPattern(forward, errormax, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot compile forward primer %s : %v", forward, err)
|
||||
}
|
||||
|
||||
reverse_rev := obiseq.NewBioSequence("fp", []byte(reverse), "").ReverseComplement(true).String()
|
||||
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, errormax, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", reverse, err)
|
||||
}
|
||||
|
||||
matchRead := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
var aseq obiapat.ApatSequence
|
||||
var err error
|
||||
var read, match *obiseq.BioSequence
|
||||
|
||||
aseq, err = obiapat.MakeApatSequence(sequence, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
|
||||
}
|
||||
|
||||
start, end, nerr, matched := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||
|
||||
if matched {
|
||||
read = sequence
|
||||
|
||||
if cutReads {
|
||||
read, err = sequence.Subsequence(start, sequence.Len(), false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot cut, on forward, forward read %s [%d,%d] : %v",
|
||||
sequence.Id(), start, sequence.Len(), err)
|
||||
}
|
||||
}
|
||||
|
||||
read.SetAttribute("forward_primer", forward)
|
||||
match, _ = sequence.Subsequence(start, end, false)
|
||||
read.SetAttribute("forward_match", match.String())
|
||||
read.SetAttribute("forward_error", nerr)
|
||||
|
||||
aseq, err = obiapat.MakeApatSequence(read, false, aseq)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
|
||||
}
|
||||
|
||||
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||
|
||||
if matched {
|
||||
|
||||
frread := read
|
||||
|
||||
if cutReads {
|
||||
frread, err = read.Subsequence(0, end, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot xxx cut, on reverse, forward read %s [%d,%d] : %v",
|
||||
sequence.Id(), start, read.Len(), err)
|
||||
}
|
||||
}
|
||||
|
||||
frread.SetAttribute("reverse_primer", reverse)
|
||||
match, _ = read.Subsequence(start, end, false)
|
||||
frread.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||
frread.SetAttribute("reverse_error", nerr)
|
||||
|
||||
read = frread
|
||||
// log.Warnf("Forward-Reverse primer matched on %s : %d\n%s", read.Id(), read.Len(),
|
||||
// obiformats.FormatFasta(read, obiformats.FormatFastSeqJsonHeader))
|
||||
}
|
||||
|
||||
} else {
|
||||
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||
|
||||
if matched {
|
||||
read = sequence
|
||||
if cutReads {
|
||||
read, err = sequence.Subsequence(0, end, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot yyy cut, on reverse, forward read %s [%d,%d] : %v",
|
||||
sequence.Id(), 0, end, err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
read.SetAttribute("reverse_primer", reverse)
|
||||
match, _ = read.Subsequence(start, end, false)
|
||||
read.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||
read.SetAttribute("reverse_error", nerr)
|
||||
} else {
|
||||
read = nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return read
|
||||
}
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (result obiseq.BioSequenceSlice, err error) {
|
||||
result = obiseq.MakeBioSequenceSlice()
|
||||
|
||||
paired := sequence.PairedWith()
|
||||
sequence.UnPair()
|
||||
|
||||
read := matchRead(sequence)
|
||||
|
||||
if read == nil {
|
||||
sequence = sequence.ReverseComplement(true)
|
||||
read = matchRead(sequence)
|
||||
}
|
||||
|
||||
if read != nil {
|
||||
result = append(result, read)
|
||||
}
|
||||
|
||||
if paired != nil {
|
||||
read = matchRead(paired)
|
||||
|
||||
if read == nil {
|
||||
read = matchRead(paired.ReverseComplement(true))
|
||||
}
|
||||
|
||||
if read != nil {
|
||||
result = append(result, read)
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func ExtractOnPatterns(iter obiiter.IBioSequence,
|
||||
forward, reverse string,
|
||||
errormax int,
|
||||
cutReads bool,
|
||||
) obiseq.BioSequenceSlice {
|
||||
|
||||
matched := iter.MakeIWorker(
|
||||
BuildFilterOnPatternReadPairWorker(forward, reverse, errormax, cutReads),
|
||||
false,
|
||||
)
|
||||
|
||||
rep := obiseq.MakeBioSequenceSlice()
|
||||
|
||||
for matched.Next() {
|
||||
frgs := matched.Get()
|
||||
rep = append(rep, frgs.Slice()...)
|
||||
}
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func BuildPCRProduct(seqs obiseq.BioSequenceSlice,
|
||||
consensus_id string,
|
||||
kmer_size int,
|
||||
forward, reverse string,
|
||||
backtrack bool,
|
||||
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
||||
|
||||
from := obiseq.NewBioSequence("forward", []byte(forward), "")
|
||||
to := obiseq.NewBioSequence("reverse", []byte(CLIReversePrimer()), "").ReverseComplement(true)
|
||||
|
||||
if backtrack {
|
||||
from, to = to, from
|
||||
}
|
||||
|
||||
if seqs.Len() == 0 {
|
||||
return nil, fmt.Errorf("no sequence provided")
|
||||
}
|
||||
|
||||
if save_graph {
|
||||
if dirname == "" {
|
||||
dirname = "."
|
||||
}
|
||||
|
||||
if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
|
||||
// path does not exist or is not directory
|
||||
os.RemoveAll(dirname)
|
||||
err := os.Mkdir(dirname, 0755)
|
||||
|
||||
if err != nil {
|
||||
log.Panicf("Cannot create directory %s for saving graphs", dirname)
|
||||
}
|
||||
}
|
||||
|
||||
fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s_consensus.fasta", consensus_id)))
|
||||
|
||||
if err == nil {
|
||||
defer fasta.Close()
|
||||
fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(
|
||||
fmt.Sprintf("%s_consensus", consensus_id),
|
||||
0,
|
||||
seqs,
|
||||
),
|
||||
obiformats.FormatFastSeqJsonHeader, false).Bytes())
|
||||
fasta.Close()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.Debugf("Number of reads : %d\n", len(seqs))
|
||||
|
||||
if kmer_size < 0 {
|
||||
longest := make([]int, len(seqs))
|
||||
|
||||
for i, seq := range seqs {
|
||||
s := obiseq.BioSequenceSlice{seq}
|
||||
sa := obisuffix.BuildSuffixArray(&s)
|
||||
longest[i] = slices.Max(sa.CommonSuffix())
|
||||
}
|
||||
|
||||
// spectrum := map[int]int{}
|
||||
// for _, s := range longest {
|
||||
// spectrum[s]++
|
||||
// }
|
||||
|
||||
// log.Warnf("spectum kmer size : %v", spectrum)
|
||||
|
||||
kmer_size = slices.Max(longest) + 1
|
||||
log.Infof("estimated kmer size : %d", kmer_size)
|
||||
}
|
||||
|
||||
var graph *obikmer.DeBruijnGraph
|
||||
|
||||
var hp []uint64
|
||||
var err error
|
||||
var starts []uint64
|
||||
var stops []uint64
|
||||
|
||||
for {
|
||||
graph = obikmer.MakeDeBruijnGraph(kmer_size)
|
||||
|
||||
for _, s := range seqs {
|
||||
graph.Push(s)
|
||||
}
|
||||
|
||||
if !backtrack {
|
||||
starts = graph.Search(from, CLIAllowedMismatch())
|
||||
stops = graph.BackSearch(to, CLIAllowedMismatch())
|
||||
} else {
|
||||
starts = graph.BackSearch(from, CLIAllowedMismatch())
|
||||
stops = graph.Search(to, CLIAllowedMismatch())
|
||||
}
|
||||
|
||||
log.Infof("Found %d starts", len(starts))
|
||||
pweight := map[int]int{}
|
||||
for _, s := range starts {
|
||||
w := graph.Weight(s)
|
||||
pweight[w]++
|
||||
log.Warnf("Starts : %s (%d)\n", graph.DecodeNode(s), w)
|
||||
}
|
||||
|
||||
log.Infof("Found %d stops", len(stops))
|
||||
for _, s := range stops {
|
||||
w := graph.Weight(s)
|
||||
pweight[w]++
|
||||
log.Warnf("Stop : %s (%d)\n", graph.DecodeNode(s), w)
|
||||
}
|
||||
|
||||
log.Infof("Weight spectrum : %v", pweight)
|
||||
|
||||
wmax := 0
|
||||
sw := 0
|
||||
for w := range pweight {
|
||||
sw += w
|
||||
if w > wmax {
|
||||
wmax = w
|
||||
}
|
||||
}
|
||||
|
||||
graph.FilterMinWeight(int(sw / len(pweight)))
|
||||
graph.FilterMaxWeight(int(wmax * 2))
|
||||
|
||||
log.Infof("Minimum coverage : %d", int(sw/len(pweight)))
|
||||
log.Infof("Maximum coverage : %d", int(wmax*2))
|
||||
|
||||
if !graph.HasCycleInDegree() {
|
||||
break
|
||||
}
|
||||
|
||||
kmer_size++
|
||||
|
||||
if kmer_size > 31 {
|
||||
break
|
||||
}
|
||||
|
||||
SetKmerSize(kmer_size)
|
||||
log.Warnf("Cycle detected, increasing kmer size to %d\n", kmer_size)
|
||||
}
|
||||
|
||||
if !backtrack {
|
||||
starts = graph.Search(from, CLIAllowedMismatch())
|
||||
stops = graph.BackSearch(to, CLIAllowedMismatch())
|
||||
} else {
|
||||
starts = graph.BackSearch(from, CLIAllowedMismatch())
|
||||
stops = graph.Search(to, CLIAllowedMismatch())
|
||||
}
|
||||
|
||||
hp, err = graph.HaviestPath(starts, stops, backtrack)
|
||||
|
||||
log.Debugf("Graph size : %d\n", graph.Len())
|
||||
|
||||
maxw := graph.MaxWeight()
|
||||
modew := graph.WeightMode()
|
||||
meanw := graph.WeightMean()
|
||||
specw := graph.WeightSpectrum()
|
||||
kmer := graph.KmerSize()
|
||||
|
||||
log.Warnf("Weigh mode: %d Weigth mean : %4.1f Weigth max : %d, kmer = %d", modew, meanw, maxw, kmer)
|
||||
log.Warn(specw)
|
||||
|
||||
if save_graph {
|
||||
|
||||
file, err := os.Create(path.Join(dirname,
|
||||
fmt.Sprintf("%s_consensus.gml", consensus_id)))
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
} else {
|
||||
file.WriteString(graph.Gml())
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
s := graph.DecodePath(hp)
|
||||
|
||||
seq := obiseq.NewBioSequence(consensus_id, []byte(s), "")
|
||||
|
||||
total_kmer := graph.Len()
|
||||
sumCount := 0
|
||||
|
||||
if seq != nil {
|
||||
for _, s := range seqs {
|
||||
sumCount += s.Count()
|
||||
}
|
||||
seq.SetAttribute("obiconsensus_consensus", true)
|
||||
seq.SetAttribute("obiconsensus_weight", sumCount)
|
||||
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
|
||||
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
|
||||
seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight())
|
||||
seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len())
|
||||
seq.SetAttribute("obiconsensus_full_graph_size", total_kmer)
|
||||
}
|
||||
|
||||
log.Warnf("Consensus sequence : \n%s", obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
|
||||
|
||||
return seq, nil
|
||||
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func CLIAssemblePCR() *obiseq.BioSequence {
|
||||
|
||||
pairs, err := CLIPairedSequence()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Cannot open file (%v)", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
matched := ExtractOnPatterns(pairs,
|
||||
CLIForwardPrimer(),
|
||||
CLIReversePrimer(),
|
||||
CLIAllowedMismatch(),
|
||||
true,
|
||||
)
|
||||
|
||||
seq, err := BuildPCRProduct(
|
||||
matched,
|
||||
CLIGraphFilesDirectory(),
|
||||
CLIKmerSize(),
|
||||
CLIForwardPrimer(),
|
||||
CLIReversePrimer(),
|
||||
false,
|
||||
CLISaveGraphToFiles(),
|
||||
CLIGraphFilesDirectory())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot build the consensus sequence : %v", err)
|
||||
|
||||
}
|
||||
|
||||
forwardPatternDir, err := obiapat.MakeApatPattern(
|
||||
CLIForwardPrimer(),
|
||||
CLIAllowedMismatch(),
|
||||
false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot compile forward primer %s : %v", CLIForwardPrimer(), err)
|
||||
}
|
||||
|
||||
reverse_rev := obiseq.NewBioSequence("fp", []byte(CLIReversePrimer()), "").ReverseComplement(true).String()
|
||||
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, CLIAllowedMismatch(), false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", CLIReversePrimer(), err)
|
||||
}
|
||||
|
||||
aseq, err := obiapat.MakeApatSequence(seq, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot build apat sequence: %v", err)
|
||||
}
|
||||
|
||||
fstart, fend, fnerr, hasfw := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||
rstart, rend, rnerr, hasrev := reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||
|
||||
for hasfw && !hasrev {
|
||||
var rseq *obiseq.BioSequence
|
||||
rseq, err = BuildPCRProduct(
|
||||
matched,
|
||||
CLIGraphFilesDirectory(),
|
||||
CLIKmerSize(),
|
||||
CLIForwardPrimer(),
|
||||
CLIReversePrimer(),
|
||||
true,
|
||||
CLISaveGraphToFiles(),
|
||||
CLIGraphFilesDirectory())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot build Reverse PCR sequence: %v", err)
|
||||
}
|
||||
|
||||
kmerSize, _ := seq.GetIntAttribute("obiconsensus_kmer_size")
|
||||
fp, _ := seq.Subsequence(seq.Len()-kmerSize, seq.Len(), false)
|
||||
rp, _ := rseq.Subsequence(0, kmerSize, false)
|
||||
rp = rp.ReverseComplement(true)
|
||||
|
||||
pairs, err := CLIPairedSequence()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Cannot open file (%v)", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
nmatched := ExtractOnPatterns(pairs,
|
||||
fp.String(),
|
||||
rp.String(),
|
||||
CLIAllowedMismatch(),
|
||||
true,
|
||||
)
|
||||
|
||||
in := map[string]bool{}
|
||||
|
||||
for _, s := range matched {
|
||||
in[s.String()] = true
|
||||
}
|
||||
|
||||
for _, s := range nmatched {
|
||||
if !in[s.String()] {
|
||||
matched = append(matched, s)
|
||||
}
|
||||
}
|
||||
|
||||
seq, err = BuildPCRProduct(
|
||||
matched,
|
||||
CLIGraphFilesDirectory(),
|
||||
CLIKmerSize(),
|
||||
CLIForwardPrimer(),
|
||||
CLIReversePrimer(),
|
||||
false,
|
||||
CLISaveGraphToFiles(),
|
||||
CLIGraphFilesDirectory())
|
||||
|
||||
aseq, err := obiapat.MakeApatSequence(seq, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot build apat sequence: %v", err)
|
||||
}
|
||||
fstart, fend, fnerr, hasfw = forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||
rstart, rend, rnerr, hasrev = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||
|
||||
}
|
||||
|
||||
marker, _ := seq.Subsequence(fstart, rend, false)
|
||||
|
||||
marker.SetAttribute("forward_primer", CLIForwardPrimer())
|
||||
match, _ := seq.Subsequence(fstart, fend, false)
|
||||
marker.SetAttribute("forward_match", match.String())
|
||||
marker.SetAttribute("forward_error", fnerr)
|
||||
|
||||
marker.SetAttribute("reverse_primer", CLIReversePrimer())
|
||||
match, _ = seq.Subsequence(rstart, rend, false)
|
||||
marker.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||
marker.SetAttribute("reverse_error", rnerr)
|
||||
|
||||
return marker
|
||||
}
|
||||
139
pkg/obitools/obimicroasm/options.go
Normal file
139
pkg/obitools/obimicroasm/options.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package obimicroasm
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var _ForwardFile = ""
|
||||
var _ReverseFile = ""
|
||||
var _ForwardPrimer string
|
||||
var _ReversePrimer string
|
||||
var _AllowedMismatch = 0
|
||||
var _kmerSize = -1
|
||||
|
||||
var _saveGraph = "__@@NOSAVE@@__"
|
||||
|
||||
func MicroAsmOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||
options.Alias("F"),
|
||||
options.ArgName("FILENAME_F"),
|
||||
options.Required("You must provide at a forward file"),
|
||||
options.Description("The file names containing the forward reads"))
|
||||
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
||||
options.Alias("R"),
|
||||
options.ArgName("FILENAME_R"),
|
||||
options.Required("You must provide a reverse file"),
|
||||
options.Description("The file names containing the reverse reads"))
|
||||
options.StringVar(&_ForwardPrimer, "forward", "",
|
||||
options.Required("You must provide a forward primer"),
|
||||
options.Description("The forward primer used for the electronic PCR."))
|
||||
|
||||
options.StringVar(&_ReversePrimer, "reverse", "",
|
||||
options.Required("You must provide a reverse primer"),
|
||||
options.Description("The reverse primer used for the electronic PCR."))
|
||||
|
||||
options.IntVar(&_AllowedMismatch, "allowed-mismatches", 0,
|
||||
options.Alias("e"),
|
||||
options.Description("Maximum number of mismatches allowed for each primer."))
|
||||
options.IntVar(&_kmerSize, "kmer-size", _kmerSize,
|
||||
options.ArgName("SIZE"),
|
||||
options.Description("The size of the kmer used to build the consensus. "+
|
||||
"Default value = -1, which means that the kmer size is estimated from the data"),
|
||||
)
|
||||
|
||||
options.StringVar(&_saveGraph, "save-graph", _saveGraph,
|
||||
options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
|
||||
"The graph files follow the graphml format."),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
MicroAsmOptionSet(options)
|
||||
}
|
||||
|
||||
// CLIForwardPrimer returns the sequence of the forward primer as indicated by the
|
||||
// --forward command line option
|
||||
func CLIForwardPrimer() string {
|
||||
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%+v", err)
|
||||
}
|
||||
|
||||
pattern.Free()
|
||||
|
||||
return _ForwardPrimer
|
||||
}
|
||||
|
||||
// CLIReversePrimer returns the sequence of the reverse primer as indicated by the
|
||||
// --reverse command line option
|
||||
func CLIReversePrimer() string {
|
||||
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%+v", err)
|
||||
}
|
||||
|
||||
pattern.Free()
|
||||
|
||||
return _ReversePrimer
|
||||
}
|
||||
|
||||
// CLIAllowedMismatch returns the allowed mistmatch count between each
|
||||
// primer and the sequences as indicated by the
|
||||
// --allowed-mismatches|-e command line option
|
||||
func CLIAllowedMismatch() int {
|
||||
return _AllowedMismatch
|
||||
}
|
||||
|
||||
func CLIPairedSequence() (obiiter.IBioSequence, error) {
|
||||
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
paired := forward.PairTo(reverse)
|
||||
|
||||
return paired, nil
|
||||
}
|
||||
|
||||
func CLIForwardFile() string {
|
||||
return _ForwardFile
|
||||
}
|
||||
|
||||
// Returns true it the obliclean graphs must be saved
|
||||
func CLISaveGraphToFiles() bool {
|
||||
return _saveGraph != "__@@NOSAVE@@__"
|
||||
}
|
||||
|
||||
// It returns the directory where the graph files are saved
|
||||
func CLIGraphFilesDirectory() string {
|
||||
return _saveGraph
|
||||
}
|
||||
|
||||
// CLIKmerSize returns the value of the kmer size to use for building the consensus.
|
||||
//
|
||||
// The value of the kmer size is set by the user with the `-k` flag.
|
||||
// The value -1 means that the kmer size is estimated as the minimum value that
|
||||
// insure that no kmer are present more than one time in a sequence.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns an integer value.
|
||||
func CLIKmerSize() int {
|
||||
return _kmerSize
|
||||
}
|
||||
|
||||
func SetKmerSize(kmerSize int) {
|
||||
_kmerSize = kmerSize
|
||||
}
|
||||
20
pkg/obiutils/unique.go
Normal file
20
pkg/obiutils/unique.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package obiutils
|
||||
|
||||
// Unique returns a new slice containing only unique values from the input slice.
|
||||
// The order of elements in the output slice is not guaranteed to match the input order.
|
||||
//
|
||||
// Parameters:
|
||||
// - slice: The input slice containing potentially duplicate values
|
||||
//
|
||||
// Returns:
|
||||
// - A new slice containing only unique values
|
||||
func Unique[T comparable](slice []T) []T {
|
||||
// Create a map to track unique values
|
||||
seen := Set[T]{}
|
||||
|
||||
for _, v := range slice {
|
||||
seen.Add(v)
|
||||
}
|
||||
|
||||
return seen.Members()
|
||||
}
|
||||
Reference in New Issue
Block a user