mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
correction of several small bugs
This commit is contained in:
@ -1,30 +1,72 @@
|
|||||||
package obialign
|
package obialign
|
||||||
|
|
||||||
|
import log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
// buffIndex converts a pair of coordinates (i, j) into a linear index in a matrix
|
||||||
|
// of size width x width. The coordinates are (-1)-indexed, and the linear index
|
||||||
|
// is 0-indexed as well. The function first adds 1 to both coordinates to make
|
||||||
|
// sure the (-1,-1) coordinate is at position 0 in the matrix, and then computes
|
||||||
|
// the linear index by multiplying the first coordinate by the width and adding
|
||||||
|
// the second coordinate.
|
||||||
func buffIndex(i, j, width int) int {
|
func buffIndex(i, j, width int) int {
|
||||||
return (i+1)*width + (j + 1)
|
return (i+1)*width + (j + 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LocatePattern is a function to locate a pattern in a sequence.
|
||||||
|
//
|
||||||
|
// It uses a dynamic programming approach to build a matrix of scores.
|
||||||
|
// The score at each cell is the maximum of the score of the cell
|
||||||
|
// above it (representing a deletion), the score of the cell to its
|
||||||
|
// left (representing an insertion), and the score of the cell
|
||||||
|
// diagonally above it (representing a match).
|
||||||
|
//
|
||||||
|
// The score of a match is 0 if the two characters are the same,
|
||||||
|
// and -1 if they are different.
|
||||||
|
//
|
||||||
|
// The function returns the start and end positions of the best
|
||||||
|
// match, as well as the number of errors in the best match.
|
||||||
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
||||||
|
// Pattern spreads over the columns
|
||||||
|
// Sequence spreads over the rows
|
||||||
width := len(pattern) + 1
|
width := len(pattern) + 1
|
||||||
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
||||||
buffer := make([]int, buffsize)
|
buffer := make([]int, buffsize)
|
||||||
|
|
||||||
|
if len(pattern) >= len(sequence) {
|
||||||
|
log.Panicf("Pattern %s must be shorter than sequence %s", pattern, sequence)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The path matrix keeps track of the best path through the matrix
|
||||||
|
// 0 : indicate the diagonal path
|
||||||
|
// 1 : indicate the up path
|
||||||
|
// -1 : indicate the left path
|
||||||
path := make([]int, buffsize)
|
path := make([]int, buffsize)
|
||||||
|
|
||||||
|
// Initialize the first row of the matrix
|
||||||
for j := 0; j < len(pattern); j++ {
|
for j := 0; j < len(pattern); j++ {
|
||||||
idx := buffIndex(-1, j, width)
|
idx := buffIndex(-1, j, width)
|
||||||
buffer[idx] = -j - 1
|
buffer[idx] = -j - 1
|
||||||
path[idx] = -1
|
path[idx] = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize the first column of the matrix
|
||||||
|
// Alignment is endgap free so first column = 0
|
||||||
|
// to allow primer to shift freely along the sequence
|
||||||
for i := -1; i < len(sequence); i++ {
|
for i := -1; i < len(sequence); i++ {
|
||||||
idx := buffIndex(i, -1, width)
|
idx := buffIndex(i, -1, width)
|
||||||
buffer[idx] = 0
|
buffer[idx] = 0
|
||||||
path[idx] = +1
|
path[idx] = +1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fills the matrix except the last column
|
||||||
|
// where gaps must be free too.
|
||||||
path[0] = 0
|
path[0] = 0
|
||||||
jmax := len(pattern) - 1
|
jmax := len(pattern) - 1
|
||||||
for i := 0; i < len(sequence); i++ {
|
for i := 0; i < len(sequence); i++ {
|
||||||
for j := 0; j < jmax; j++ {
|
for j := 0; j < jmax; j++ {
|
||||||
|
|
||||||
|
// Mismatch score = -1
|
||||||
|
// Match score = 0
|
||||||
match := -1
|
match := -1
|
||||||
if _samenuc(pattern[j], sequence[i]) {
|
if _samenuc(pattern[j], sequence[i]) {
|
||||||
match = 0
|
match = 0
|
||||||
@ -33,6 +75,8 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
|||||||
idx := buffIndex(i, j, width)
|
idx := buffIndex(i, j, width)
|
||||||
|
|
||||||
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
||||||
|
|
||||||
|
// Each gap cost -1
|
||||||
left := buffer[buffIndex(i, j-1, width)] - 1
|
left := buffer[buffIndex(i, j-1, width)] - 1
|
||||||
up := buffer[buffIndex(i-1, j, width)] - 1
|
up := buffer[buffIndex(i-1, j, width)] - 1
|
||||||
|
|
||||||
@ -51,9 +95,12 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fills the last column considering the free up gap
|
||||||
for i := 0; i < len(sequence); i++ {
|
for i := 0; i < len(sequence); i++ {
|
||||||
idx := buffIndex(i, jmax, width)
|
idx := buffIndex(i, jmax, width)
|
||||||
|
|
||||||
|
// Mismatch score = -1
|
||||||
|
// Match score = 0
|
||||||
match := -1
|
match := -1
|
||||||
if _samenuc(pattern[jmax], sequence[i]) {
|
if _samenuc(pattern[jmax], sequence[i]) {
|
||||||
match = 0
|
match = 0
|
||||||
@ -65,6 +112,7 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
|||||||
|
|
||||||
score := max(diag, up, left)
|
score := max(diag, up, left)
|
||||||
buffer[idx] = score
|
buffer[idx] = score
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case score == left:
|
case score == left:
|
||||||
path[idx] = -1
|
path[idx] = -1
|
||||||
@ -75,11 +123,13 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bactracking of the aligment
|
||||||
|
|
||||||
i := len(sequence) - 1
|
i := len(sequence) - 1
|
||||||
j := jmax
|
j := jmax
|
||||||
end := -1
|
end := -1
|
||||||
lali := 0
|
lali := 0
|
||||||
for i > -1 && j > 0 {
|
for j > 0 { // C'était i > -1 && j > 0
|
||||||
lali++
|
lali++
|
||||||
switch path[buffIndex(i, j, width)] {
|
switch path[buffIndex(i, j, width)] {
|
||||||
case 0:
|
case 0:
|
||||||
@ -100,5 +150,9 @@ func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Warnf("from : %d to: %d error: %d match: %v",
|
||||||
|
// i, end+1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)],
|
||||||
|
// string(sequence[i:(end+1)]))
|
||||||
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||||
}
|
}
|
||||||
|
@ -137,6 +137,28 @@ char *reverseSequence(char *str,char isPattern)
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* -------------------------------------------- */
|
||||||
|
/* lowercase sequence */
|
||||||
|
/* -------------------------------------------- */
|
||||||
|
|
||||||
|
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'A'))
|
||||||
|
#define TO_LOWER(c) ((c) - 'A' + 'a')
|
||||||
|
|
||||||
|
char *LowerSequence(char *seq)
|
||||||
|
{
|
||||||
|
char *cseq;
|
||||||
|
|
||||||
|
for (cseq = seq ; *cseq ; cseq++)
|
||||||
|
if (IS_UPPER(*cseq))
|
||||||
|
*cseq = TO_LOWER(*cseq);
|
||||||
|
|
||||||
|
return seq;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef IS_UPPER
|
||||||
|
#undef TO_LOWER
|
||||||
|
|
||||||
|
|
||||||
char *ecoComplementPattern(char *nucAcSeq)
|
char *ecoComplementPattern(char *nucAcSeq)
|
||||||
{
|
{
|
||||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||||
@ -165,6 +187,7 @@ void UpperSequence(char *seq)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* -------------------------------------------- */
|
/* -------------------------------------------- */
|
||||||
/* encode sequence */
|
/* encode sequence */
|
||||||
/* IS_UPPER is slightly faster than isupper */
|
/* IS_UPPER is slightly faster than isupper */
|
||||||
|
@ -9,6 +9,7 @@ import "C"
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -114,7 +115,7 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
|
|||||||
C.free(unsafe.Pointer(errmsg))
|
C.free(unsafe.Pointer(errmsg))
|
||||||
return ApatPattern{nil}, errors.New(message)
|
return ApatPattern{nil}, errors.New(message)
|
||||||
}
|
}
|
||||||
spat := C.GoString(apc.cpat)
|
spat := strings.ToLower(C.GoString(apc.cpat))
|
||||||
ap := _ApatPattern{apc, spat}
|
ap := _ApatPattern{apc, spat}
|
||||||
|
|
||||||
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
||||||
@ -366,6 +367,18 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FilterBestMatch filters the best non overlapping matches of a given pattern in a sequence.
|
||||||
|
//
|
||||||
|
// It takes the following parameters:
|
||||||
|
// - pattern: the pattern to search for (ApatPattern).
|
||||||
|
// - sequence: the sequence to search in (ApatSequence).
|
||||||
|
// - begin: the starting index of the search (int).
|
||||||
|
// - length: the length of the search (int).
|
||||||
|
//
|
||||||
|
// It returns a slice of [3]int representing the locations of all non-overlapping matches in the sequence.
|
||||||
|
// The two firsts values of the [3]int indicate respectively the start and the end position of
|
||||||
|
// the match. Following the GO convention the end position is not included in the
|
||||||
|
// match. The third value indicates the number of error detected for this occurrence.
|
||||||
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
res := pattern.FindAllIndex(sequence, begin, length)
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
filtered := make([][3]int, 0, len(res))
|
filtered := make([][3]int, 0, len(res))
|
||||||
@ -424,13 +437,15 @@ func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length
|
|||||||
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
res := pattern.FilterBestMatch(sequence, begin, length)
|
res := pattern.FilterBestMatch(sequence, begin, length)
|
||||||
|
|
||||||
|
j := 0
|
||||||
for _, m := range res {
|
for _, m := range res {
|
||||||
// Recompute the start and end position of the match
|
// Recompute the start and end position of the match
|
||||||
// when the pattern allows for indels
|
// when the pattern allows for indels
|
||||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||||
start := m[0] - m[2]
|
// log.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String())
|
||||||
|
start := m[0] - m[2]*2
|
||||||
start = max(start, 0)
|
start = max(start, 0)
|
||||||
end := start + int(pattern.pointer.pointer.patlen) + 2*m[2]
|
end := start + int(pattern.pointer.pointer.patlen) + 4*m[2]
|
||||||
end = min(end, sequence.Len())
|
end = min(end, sequence.Len())
|
||||||
// 1 << 30 = 1,073,741,824 = 1Gb
|
// 1 << 30 = 1,073,741,824 = 1Gb
|
||||||
// It's a virtual array mapping the sequence to the pattern
|
// It's a virtual array mapping the sequence to the pattern
|
||||||
@ -439,18 +454,21 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
|||||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||||
|
|
||||||
begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
pb, pe, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||||
|
|
||||||
// olderr := m[2]
|
// olderr := m[2]
|
||||||
m[2] = score
|
m[2] = score
|
||||||
m[0] = start + begin
|
m[0] = start + pb
|
||||||
m[1] = start + end
|
m[1] = start + pe
|
||||||
|
|
||||||
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||||
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// log.Debugf("All matches : %v", res)
|
if int(pattern.pointer.pointer.maxerr) >= m[2] {
|
||||||
|
res[j] = m
|
||||||
return res
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res[0:j]
|
||||||
}
|
}
|
||||||
|
@ -642,6 +642,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ngsfilter.CheckPrimerUnicity()
|
||||||
|
|
||||||
for i := 0; i < len(params); i++ {
|
for i := 0; i < len(params); i++ {
|
||||||
param := params[i][1]
|
param := params[i][1]
|
||||||
if len(params[i]) < 3 {
|
if len(params[i]) < 3 {
|
||||||
|
@ -3,6 +3,8 @@ package obiformats
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
@ -39,6 +41,31 @@ type SequenceReader func(reader io.Reader, options ...WithOption) (obiiter.IBioS
|
|||||||
// - io.Reader: A modified reader with the read data.
|
// - io.Reader: A modified reader with the read data.
|
||||||
// - error: Any error encountered during the process.
|
// - error: Any error encountered during the process.
|
||||||
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||||
|
csv := func(in []byte, limit uint32) bool {
|
||||||
|
in = dropLastLine(in, limit)
|
||||||
|
|
||||||
|
br := bytes.NewReader(in)
|
||||||
|
r := csv.NewReader(br)
|
||||||
|
r.Comma = ','
|
||||||
|
r.ReuseRecord = true
|
||||||
|
r.LazyQuotes = true
|
||||||
|
r.Comment = '#'
|
||||||
|
|
||||||
|
lines := 0
|
||||||
|
for {
|
||||||
|
_, err := r.Read()
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
lines++
|
||||||
|
}
|
||||||
|
|
||||||
|
return r.FieldsPerRecord > 1 && lines > 1
|
||||||
|
}
|
||||||
|
|
||||||
fastaDetector := func(raw []byte, limit uint32) bool {
|
fastaDetector := func(raw []byte, limit uint32) bool {
|
||||||
ok, err := regexp.Match("^>[^ ]", raw)
|
ok, err := regexp.Match("^>[^ ]", raw)
|
||||||
return ok && err == nil
|
return ok && err == nil
|
||||||
@ -70,12 +97,14 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
mimetype.Lookup("text/plain").Extend(csv, "text/csv", ".csv")
|
||||||
|
|
||||||
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(csv, "text/csv", ".csv")
|
||||||
|
|
||||||
// Create a buffer to store the read data
|
// Create a buffer to store the read data
|
||||||
buf := make([]byte, 1024*128)
|
buf := make([]byte, 1024*128)
|
||||||
@ -87,6 +116,7 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
|
|
||||||
// Detect the MIME type using the mimetype library
|
// Detect the MIME type using the mimetype library
|
||||||
mimeType := mimetype.Detect(buf)
|
mimeType := mimetype.Detect(buf)
|
||||||
|
|
||||||
if mimeType == nil {
|
if mimeType == nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
250
pkg/obifp/uint128_test.go
Normal file
250
pkg/obifp/uint128_test.go
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
package obifp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestUint128_Add(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
w := u.Add(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 4, w0: 6}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: 0}
|
||||||
|
v = Uint128{w1: 0, w0: 0}
|
||||||
|
w = u.Add(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: math.MaxUint64}
|
||||||
|
v = Uint128{w1: 0, w0: 1}
|
||||||
|
w = u.Add(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Add64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := uint64(3)
|
||||||
|
w := u.Add64(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 1, w0: 5}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: 0}
|
||||||
|
v = uint64(0)
|
||||||
|
w = u.Add64(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: math.MaxUint64}
|
||||||
|
v = uint64(1)
|
||||||
|
w = u.Add64(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Sub(t *testing.T) {
|
||||||
|
u := Uint128{w1: 4, w0: 6}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
w := u.Sub(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 1, w0: 2}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: 0}
|
||||||
|
v = Uint128{w1: 0, w0: 0}
|
||||||
|
w = u.Sub(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 0}, w)
|
||||||
|
|
||||||
|
u = Uint128{w1: 1, w0: 0}
|
||||||
|
v = Uint128{w1: 0, w0: 1}
|
||||||
|
w = u.Sub(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: math.MaxUint64}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Mul64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := uint64(3)
|
||||||
|
w := u.Mul64(v)
|
||||||
|
|
||||||
|
if w.w1 != 3 || w.w0 != 6 {
|
||||||
|
t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 3, w0: 6})
|
||||||
|
}
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: 0}
|
||||||
|
v = uint64(0)
|
||||||
|
w = u.Mul64(v)
|
||||||
|
|
||||||
|
if w.w1 != 0 || w.w0 != 0 {
|
||||||
|
t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 0, w0: 0})
|
||||||
|
}
|
||||||
|
|
||||||
|
u = Uint128{w1: 0, w0: math.MaxUint64}
|
||||||
|
v = uint64(2)
|
||||||
|
w = u.Mul64(v)
|
||||||
|
|
||||||
|
if w.w1 != 1 || w.w0 != 18446744073709551614 {
|
||||||
|
t.Errorf("Mul64(%v, %v) = %v, want %v", u, v, w, Uint128{w1: 1, w0: 18446744073709551614})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Mul(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
u Uint128
|
||||||
|
v Uint128
|
||||||
|
expected Uint128
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "simple multiplication",
|
||||||
|
u: Uint128{w1: 1, w0: 2},
|
||||||
|
v: Uint128{w1: 3, w0: 4},
|
||||||
|
expected: Uint128{w1: 10, w0: 8},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiplication with overflow",
|
||||||
|
u: Uint128{w1: 0, w0: math.MaxUint64},
|
||||||
|
v: Uint128{w1: 0, w0: 2},
|
||||||
|
expected: Uint128{w1: 1, w0: 18446744073709551614},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiplication with zero",
|
||||||
|
u: Uint128{w1: 0, w0: 0},
|
||||||
|
v: Uint128{w1: 0, w0: 0},
|
||||||
|
expected: Uint128{w1: 0, w0: 0},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiplication with large numbers",
|
||||||
|
u: Uint128{w1: 100, w0: 200},
|
||||||
|
v: Uint128{w1: 300, w0: 400},
|
||||||
|
expected: Uint128{w1: 100000, w0: 80000},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
actual := tt.u.Mul(tt.v)
|
||||||
|
if !reflect.DeepEqual(actual, tt.expected) {
|
||||||
|
t.Errorf("Mul(%v, %v) = %v, want %v", tt.u, tt.v, actual, tt.expected)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_QuoRem(t *testing.T) {
|
||||||
|
u := Uint128{w1: 3, w0: 8}
|
||||||
|
v := Uint128{w1: 0, w0: 4}
|
||||||
|
q, r := u.QuoRem(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 0}, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_QuoRem64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 0, w0: 6}
|
||||||
|
v := uint64(3)
|
||||||
|
q, r := u.QuoRem64(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
|
||||||
|
assert.Equal(t, uint64(0), r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Div(t *testing.T) {
|
||||||
|
u := Uint128{w1: 3, w0: 8}
|
||||||
|
v := Uint128{w1: 0, w0: 4}
|
||||||
|
q := u.Div(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Div64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 0, w0: 6}
|
||||||
|
v := uint64(3)
|
||||||
|
q := u.Div64(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 2}, q)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Mod(t *testing.T) {
|
||||||
|
u := Uint128{w1: 3, w0: 8}
|
||||||
|
v := Uint128{w1: 0, w0: 4}
|
||||||
|
r := u.Mod(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 0, w0: 0}, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Mod64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 0, w0: 6}
|
||||||
|
v := uint64(3)
|
||||||
|
r := u.Mod64(v)
|
||||||
|
assert.Equal(t, uint64(0), r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Cmp(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
assert.Equal(t, -1, u.Cmp(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Cmp64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := uint64(3)
|
||||||
|
assert.Equal(t, -1, u.Cmp64(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Equals(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 1, w0: 2}
|
||||||
|
assert.Equal(t, true, u.Equals(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_LessThan(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
assert.Equal(t, true, u.LessThan(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_GreaterThan(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
assert.Equal(t, false, u.GreaterThan(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_LessThanOrEqual(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
assert.Equal(t, true, u.LessThanOrEqual(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_GreaterThanOrEqual(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
assert.Equal(t, false, u.GreaterThanOrEqual(v))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_And(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
w := u.And(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 1, w0: 0}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Or(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
w := u.Or(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 3, w0: 6}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Xor(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
v := Uint128{w1: 3, w0: 4}
|
||||||
|
w := u.Xor(v)
|
||||||
|
assert.Equal(t, Uint128{w1: 2, w0: 6}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_Not(t *testing.T) {
|
||||||
|
u := Uint128{w1: 1, w0: 2}
|
||||||
|
w := u.Not()
|
||||||
|
assert.Equal(t, Uint128{w1: math.MaxUint64 - 1, w0: math.MaxUint64 - 2}, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUint128_AsUint64(t *testing.T) {
|
||||||
|
u := Uint128{w1: 0, w0: 5}
|
||||||
|
v := u.AsUint64()
|
||||||
|
assert.Equal(t, uint64(5), v)
|
||||||
|
}
|
@ -10,6 +10,7 @@ import (
|
|||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -312,10 +313,48 @@ func (g *DeBruijnGraph) LongestPath(max_length int) []uint64 {
|
|||||||
return path
|
return path
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) {
|
func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.BioSequence, error) {
|
||||||
|
if g.Len() == 0 {
|
||||||
|
return nil, fmt.Errorf("graph is empty")
|
||||||
|
}
|
||||||
//path := g.LongestPath(max_length)
|
//path := g.LongestPath(max_length)
|
||||||
path := g.HaviestPath()
|
path := g.HaviestPath()
|
||||||
s := g.DecodePath(path)
|
|
||||||
|
spath := path
|
||||||
|
|
||||||
|
if min_cov > 0 {
|
||||||
|
wp := make([]uint, len(path))
|
||||||
|
|
||||||
|
for i, n := range path {
|
||||||
|
wp[i] = g.graph[n]
|
||||||
|
}
|
||||||
|
|
||||||
|
mp := uint(float64(obistats.Mode(wp))*min_cov + 0.5)
|
||||||
|
|
||||||
|
from := 0
|
||||||
|
for i, n := range path {
|
||||||
|
if g.graph[n] < mp {
|
||||||
|
from = i + 1
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
to := len(path)
|
||||||
|
|
||||||
|
for i := len(path) - 1; i >= 0; i-- {
|
||||||
|
n := path[i]
|
||||||
|
if g.graph[n] < mp {
|
||||||
|
to = i
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spath = path[from:to]
|
||||||
|
}
|
||||||
|
|
||||||
|
s := g.DecodePath(spath)
|
||||||
|
|
||||||
if len(s) > 0 {
|
if len(s) > 0 {
|
||||||
seq := obiseq.NewBioSequence(
|
seq := obiseq.NewBioSequence(
|
||||||
@ -387,6 +426,48 @@ func (g *DeBruijnGraph) Weight(index uint64) int {
|
|||||||
return int(val)
|
return int(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WeightMode returns the mode weight of the nodes in the DeBruijnGraph.
|
||||||
|
//
|
||||||
|
// It iterates over each count in the graph map and updates the max value if the current count is greater.
|
||||||
|
// Finally, it returns the mode weight as an integer.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - int: the mode weight value.
|
||||||
|
func (g *DeBruijnGraph) WeightMode() int {
|
||||||
|
dist := make(map[uint]int)
|
||||||
|
|
||||||
|
for _, w := range g.graph {
|
||||||
|
dist[w]++
|
||||||
|
}
|
||||||
|
|
||||||
|
maxi := 0
|
||||||
|
wmax := uint(0)
|
||||||
|
|
||||||
|
for k, v := range dist {
|
||||||
|
if v > maxi && k > 1 {
|
||||||
|
maxi = v
|
||||||
|
wmax = k
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(wmax)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WeightMean returns the mean weight of the nodes in the DeBruijnGraph.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - float64: the mean weight of the nodes in the graph.
|
||||||
|
func (g *DeBruijnGraph) WeightMean() float64 {
|
||||||
|
sum := 0
|
||||||
|
|
||||||
|
for _, w := range g.graph {
|
||||||
|
i := int(w)
|
||||||
|
sum += i
|
||||||
|
}
|
||||||
|
|
||||||
|
return float64(sum) / float64(len(g.graph))
|
||||||
|
}
|
||||||
|
|
||||||
// append appends a sequence of nucleotides to the DeBruijnGraph.
|
// append appends a sequence of nucleotides to the DeBruijnGraph.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@ -637,7 +718,7 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if slices.Contains(heaviestPath, currentNode) {
|
if slices.Contains(heaviestPath, currentNode) {
|
||||||
log.Fatalf("Cycle detected %v -> %v (%v) len(%v)", heaviestPath, currentNode, startNodes, len(heaviestPath))
|
log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,8 @@ import (
|
|||||||
func NewInterpreter() *lua.LState {
|
func NewInterpreter() *lua.LState {
|
||||||
lua := lua.NewState()
|
lua := lua.NewState()
|
||||||
|
|
||||||
|
registerMutexType(lua)
|
||||||
|
|
||||||
RegisterObilib(lua)
|
RegisterObilib(lua)
|
||||||
RegisterObiContext(lua)
|
RegisterObiContext(lua)
|
||||||
|
|
||||||
@ -117,7 +119,7 @@ func LuaWorker(proto *lua.FunctionProto) obiseq.SeqWorker {
|
|||||||
case *obiseq.BioSequenceSlice:
|
case *obiseq.BioSequenceSlice:
|
||||||
return *val, err
|
return *val, err
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("worker function doesn't return the correct type")
|
return nil, fmt.Errorf("worker function doesn't return the correct type %T", val)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,6 +36,8 @@ func obicontextGetSet(interpreter *lua.LState) int {
|
|||||||
__lua_obicontext.Store(key, float64(val))
|
__lua_obicontext.Store(key, float64(val))
|
||||||
case lua.LString:
|
case lua.LString:
|
||||||
__lua_obicontext.Store(key, string(val))
|
__lua_obicontext.Store(key, string(val))
|
||||||
|
case *lua.LUserData:
|
||||||
|
__lua_obicontext.Store(key, val.Value)
|
||||||
case *lua.LTable:
|
case *lua.LTable:
|
||||||
__lua_obicontext.Store(key, Table2Interface(interpreter, val))
|
__lua_obicontext.Store(key, Table2Interface(interpreter, val))
|
||||||
default:
|
default:
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
package obilua
|
package obilua
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
lua "github.com/yuin/gopher-lua"
|
lua "github.com/yuin/gopher-lua"
|
||||||
@ -46,6 +48,8 @@ func pushInterfaceToLua(L *lua.LState, val interface{}) {
|
|||||||
pushSliceBoolToLua(L, v)
|
pushSliceBoolToLua(L, v)
|
||||||
case nil:
|
case nil:
|
||||||
L.Push(lua.LNil)
|
L.Push(lua.LNil)
|
||||||
|
case *sync.Mutex:
|
||||||
|
pushMutexToLua(L, v)
|
||||||
default:
|
default:
|
||||||
log.Fatalf("Cannot deal with value (%T) : %v", val, val)
|
log.Fatalf("Cannot deal with value (%T) : %v", val, val)
|
||||||
}
|
}
|
||||||
|
63
pkg/obilua/mutex.go
Normal file
63
pkg/obilua/mutex.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
const luaMutexTypeName = "Mutex"
|
||||||
|
|
||||||
|
func registerMutexType(luaState *lua.LState) {
|
||||||
|
mutexType := luaState.NewTypeMetatable(luaMutexTypeName)
|
||||||
|
luaState.SetGlobal(luaMutexTypeName, mutexType)
|
||||||
|
|
||||||
|
luaState.SetField(mutexType, "new", luaState.NewFunction(newMutex))
|
||||||
|
|
||||||
|
luaState.SetField(mutexType, "__index",
|
||||||
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
|
mutexMethods))
|
||||||
|
}
|
||||||
|
|
||||||
|
func mutex2Lua(interpreter *lua.LState, mutex *sync.Mutex) lua.LValue {
|
||||||
|
ud := interpreter.NewUserData()
|
||||||
|
ud.Value = mutex
|
||||||
|
interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaMutexTypeName))
|
||||||
|
|
||||||
|
return ud
|
||||||
|
}
|
||||||
|
|
||||||
|
func pushMutexToLua(luaState *lua.LState, mutex *sync.Mutex) {
|
||||||
|
luaState.Push(mutex2Lua(luaState, mutex))
|
||||||
|
}
|
||||||
|
func newMutex(luaState *lua.LState) int {
|
||||||
|
m := &sync.Mutex{}
|
||||||
|
pushMutexToLua(luaState, m)
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var mutexMethods = map[string]lua.LGFunction{
|
||||||
|
"lock": mutexLock,
|
||||||
|
"unlock": mutexUnlock,
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkMutex(L *lua.LState) *sync.Mutex {
|
||||||
|
ud := L.CheckUserData(1)
|
||||||
|
if mutex, ok := ud.Value.(*sync.Mutex); ok {
|
||||||
|
return mutex
|
||||||
|
}
|
||||||
|
L.ArgError(1, "Mutex expected")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func mutexLock(L *lua.LState) int {
|
||||||
|
mutex := checkMutex(L)
|
||||||
|
mutex.Lock()
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func mutexUnlock(L *lua.LState) int {
|
||||||
|
mutex := checkMutex(L)
|
||||||
|
mutex.Unlock()
|
||||||
|
return 0
|
||||||
|
}
|
@ -2,9 +2,10 @@ package obingslibrary
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
@ -3,7 +3,7 @@ package obingslibrary
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"sort"
|
"slices"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@ -87,6 +87,8 @@ func lookForTag(seq string, delimiter byte) string {
|
|||||||
|
|
||||||
i := len(seq) - 1
|
i := len(seq) - 1
|
||||||
|
|
||||||
|
// log.Warnf("Provided fragment : %s", string(seq))
|
||||||
|
|
||||||
for i >= 0 && seq[i] != delimiter {
|
for i >= 0 && seq[i] != delimiter {
|
||||||
i--
|
i--
|
||||||
}
|
}
|
||||||
@ -107,6 +109,7 @@ func lookForTag(seq string, delimiter byte) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Warnf("extracted : %s", string(seq[begin:end]))
|
||||||
return seq[begin:end]
|
return seq[begin:end]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,11 +175,12 @@ func (marker *Marker) beginDelimitedTagExtractor(
|
|||||||
begin int,
|
begin int,
|
||||||
forward bool) string {
|
forward bool) string {
|
||||||
|
|
||||||
taglength := marker.Forward_spacer + marker.Forward_tag_length
|
// log.Warn("beginDelimitedTagExtractor")
|
||||||
|
taglength := 2*marker.Forward_spacer + marker.Forward_tag_length
|
||||||
delimiter := marker.Forward_tag_delimiter
|
delimiter := marker.Forward_tag_delimiter
|
||||||
|
|
||||||
if !forward {
|
if !forward {
|
||||||
taglength = marker.Reverse_spacer + marker.Reverse_tag_length
|
taglength = 2*marker.Reverse_spacer + marker.Reverse_tag_length
|
||||||
delimiter = marker.Reverse_tag_delimiter
|
delimiter = marker.Reverse_tag_delimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,6 +244,7 @@ func (marker *Marker) endDelimitedTagExtractor(
|
|||||||
sequence *obiseq.BioSequence,
|
sequence *obiseq.BioSequence,
|
||||||
end int,
|
end int,
|
||||||
forward bool) string {
|
forward bool) string {
|
||||||
|
// log.Warn("endDelimitedTagExtractor")
|
||||||
|
|
||||||
taglength := marker.Reverse_spacer + marker.Reverse_tag_length
|
taglength := marker.Reverse_spacer + marker.Reverse_tag_length
|
||||||
delimiter := marker.Reverse_tag_delimiter
|
delimiter := marker.Reverse_tag_delimiter
|
||||||
@ -335,14 +340,17 @@ func (marker *Marker) beginTagExtractor(
|
|||||||
sequence *obiseq.BioSequence,
|
sequence *obiseq.BioSequence,
|
||||||
begin int,
|
begin int,
|
||||||
forward bool) string {
|
forward bool) string {
|
||||||
|
// log.Warnf("Forward : %v -> %d %c", forward, marker.Forward_spacer, marker.Forward_tag_delimiter)
|
||||||
|
// log.Warnf("Forward : %v -> %d %c", forward, marker.Reverse_spacer, marker.Reverse_tag_delimiter)
|
||||||
if forward {
|
if forward {
|
||||||
if marker.Forward_tag_delimiter == 0 {
|
if marker.Forward_tag_delimiter == 0 {
|
||||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
if marker.Forward_tag_indels == 0 {
|
if marker.Forward_tag_indels == 0 {
|
||||||
|
// log.Warnf("Delimited tag for forward primers %s", marker.forward.String())
|
||||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
// log.Warn("Rescue tag for forward primers")
|
// log.Warnf("Rescue tag for forward primers %s", marker.forward.String())
|
||||||
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -351,9 +359,10 @@ func (marker *Marker) beginTagExtractor(
|
|||||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
if marker.Reverse_tag_indels == 0 {
|
if marker.Reverse_tag_indels == 0 {
|
||||||
|
// log.Warnf("Delimited tag for reverse/complement primers %s", marker.creverse.String())
|
||||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
// log.Warn("Rescue tag for reverse/complement primers")
|
// log.Warnf("Rescue tag for reverse/complement primers %s", marker.creverse.String())
|
||||||
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -369,9 +378,10 @@ func (marker *Marker) endTagExtractor(
|
|||||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
if marker.Reverse_tag_indels == 0 {
|
if marker.Reverse_tag_indels == 0 {
|
||||||
|
// log.Warnf("Delimited tag for reverse primers %s", marker.reverse.String())
|
||||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
// log.Warn("Rescue tag for reverse primers")
|
// log.Warnf("Rescue tag for reverse primers %s", marker.reverse.String())
|
||||||
return marker.endRescueTagExtractor(sequence, end, forward)
|
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -380,9 +390,10 @@ func (marker *Marker) endTagExtractor(
|
|||||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
if marker.Forward_tag_indels == 0 {
|
if marker.Forward_tag_indels == 0 {
|
||||||
|
// log.Warnf("Delimited tag for forward/complement primers %s", marker.cforward.String())
|
||||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
// log.Warn("Rescue tag for forward/complement primers")
|
// log.Warnf("Rescue tag for forward/complement primers %s", marker.cforward.String())
|
||||||
return marker.endRescueTagExtractor(sequence, end, forward)
|
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -609,9 +620,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(matches) > 0 {
|
if len(matches) > 0 {
|
||||||
sort.Slice(matches, func(i, j int) bool {
|
slices.SortFunc(matches, func(a, b PrimerMatch) int { return a.Begin - b.Begin })
|
||||||
return matches[i].Begin < matches[j].Begin
|
|
||||||
})
|
|
||||||
|
|
||||||
state := 0
|
state := 0
|
||||||
var from PrimerMatch
|
var from PrimerMatch
|
||||||
|
@ -36,8 +36,9 @@ func MakeNGSLibrary() NGSLibrary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
|
func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
|
||||||
pair := PrimerPair{strings.ToLower(forward),
|
forward = strings.ToLower(forward)
|
||||||
strings.ToLower(reverse)}
|
reverse = strings.ToLower(reverse)
|
||||||
|
pair := PrimerPair{forward, reverse}
|
||||||
marker, ok := (library.Markers)[pair]
|
marker, ok := (library.Markers)[pair]
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "31bfc88"
|
var _Commit = "373464c"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -72,8 +72,8 @@ func (s *BioSequence) HasAttribute(key string) bool {
|
|||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
defer s.AnnotationsUnlock()
|
|
||||||
s.AnnotationsLock()
|
s.AnnotationsLock()
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
_, ok = s.annotations[key]
|
_, ok = s.annotations[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -112,8 +112,8 @@ func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
|||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
defer s.AnnotationsUnlock()
|
|
||||||
s.AnnotationsLock()
|
s.AnnotationsLock()
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
val, ok = s.annotations[key]
|
val, ok = s.annotations[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,8 +144,8 @@ func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
|||||||
|
|
||||||
annot := s.Annotations()
|
annot := s.Annotations()
|
||||||
|
|
||||||
defer s.AnnotationsUnlock()
|
|
||||||
s.AnnotationsLock()
|
s.AnnotationsLock()
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
annot[key] = value
|
annot[key] = value
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,8 +205,8 @@ func (s *BioSequence) GetFloatAttribute(key string) (float64, bool) {
|
|||||||
// No return value.
|
// No return value.
|
||||||
func (s *BioSequence) DeleteAttribute(key string) {
|
func (s *BioSequence) DeleteAttribute(key string) {
|
||||||
if s.annotations != nil {
|
if s.annotations != nil {
|
||||||
defer s.AnnotationsUnlock()
|
|
||||||
s.AnnotationsLock()
|
s.AnnotationsLock()
|
||||||
|
defer s.AnnotationsUnlock()
|
||||||
delete(s.annotations, key)
|
delete(s.annotations, key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ type BioSequence struct {
|
|||||||
feature []byte
|
feature []byte
|
||||||
paired *BioSequence // A pointer to the paired sequence
|
paired *BioSequence // A pointer to the paired sequence
|
||||||
annotations Annotation
|
annotations Annotation
|
||||||
annot_lock sync.Mutex
|
annot_lock *sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewEmptyBioSequence creates a new BioSequence object with an empty sequence.
|
// NewEmptyBioSequence creates a new BioSequence object with an empty sequence.
|
||||||
@ -90,7 +90,7 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
|
|||||||
feature: nil,
|
feature: nil,
|
||||||
paired: nil,
|
paired: nil,
|
||||||
annotations: nil,
|
annotations: nil,
|
||||||
annot_lock: sync.Mutex{},
|
annot_lock: &sync.Mutex{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -187,7 +187,7 @@ func TestCopy(t *testing.T) {
|
|||||||
"annotation1": "value1",
|
"annotation1": "value1",
|
||||||
"annotation2": "value2",
|
"annotation2": "value2",
|
||||||
},
|
},
|
||||||
annot_lock: sync.Mutex{},
|
annot_lock: &sync.Mutex{},
|
||||||
}
|
}
|
||||||
|
|
||||||
newSeq := seq.Copy()
|
newSeq := seq.Copy()
|
||||||
|
@ -91,8 +91,7 @@ func (sequence *BioSequence) HasStatsOn(key string) bool {
|
|||||||
// - StatsOnValues
|
// - StatsOnValues
|
||||||
func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOnValues {
|
func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOnValues {
|
||||||
mkey := StatsOnSlotName(desc.Name)
|
mkey := StatsOnSlotName(desc.Name)
|
||||||
annotations := sequence.Annotations()
|
istat, ok := sequence.GetAttribute(mkey)
|
||||||
istat, ok := annotations[mkey]
|
|
||||||
|
|
||||||
var stats StatsOnValues
|
var stats StatsOnValues
|
||||||
var newstat bool
|
var newstat bool
|
||||||
@ -117,39 +116,40 @@ func (sequence *BioSequence) StatsOn(desc StatsOnDescription, na string) StatsOn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
stats = make(StatsOnValues, 10)
|
stats = make(StatsOnValues)
|
||||||
annotations[mkey] = stats
|
sequence.SetAttribute(mkey, stats)
|
||||||
newstat = true
|
newstat = true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
stats = make(StatsOnValues, 10)
|
stats = make(StatsOnValues)
|
||||||
annotations[mkey] = stats
|
sequence.SetAttribute(mkey, stats)
|
||||||
newstat = true
|
newstat = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if newstat && sequence.StatsPlusOne(desc, sequence, na) {
|
if newstat {
|
||||||
delete(sequence.Annotations(), desc.Key)
|
sequence.StatsPlusOne(desc, sequence, na)
|
||||||
}
|
}
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
}
|
}
|
||||||
|
|
||||||
// StatsPlusOne adds the count of the sequence toAdd to the count of the key in the stats.
|
// StatsPlusOne updates the statistics on the given attribute (desc) on the receiver BioSequence
|
||||||
|
// with the value of the attribute on the toAdd BioSequence.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - key: the attribute key (string) to be summarized
|
// - desc: StatsOnDescription of the attribute to be updated
|
||||||
// - toAdd: the BioSequence to add to the stats
|
// - toAdd: the BioSequence containing the attribute to be updated
|
||||||
// - na: the value to be used if the attribute is not present
|
// - na: the value to be used if the attribute is not present
|
||||||
|
//
|
||||||
// Return type:
|
// Return type:
|
||||||
// - bool
|
// - bool: true if the update was successful, false otherwise
|
||||||
func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSequence, na string) bool {
|
func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSequence, na string) bool {
|
||||||
sval := na
|
sval := na
|
||||||
annotations := sequence.Annotations()
|
|
||||||
stats := sequence.StatsOn(desc, na)
|
stats := sequence.StatsOn(desc, na)
|
||||||
retval := false
|
retval := false
|
||||||
|
|
||||||
if toAdd.HasAnnotation() {
|
if toAdd.HasAnnotation() {
|
||||||
value, ok := toAdd.Annotations()[desc.Key]
|
value, ok := toAdd.GetAttribute(desc.Key)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
|
|
||||||
@ -178,8 +178,9 @@ func (sequence *BioSequence) StatsPlusOne(desc StatsOnDescription, toAdd *BioSeq
|
|||||||
if !ok {
|
if !ok {
|
||||||
old = 0
|
old = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
stats[sval] = old + desc.Weight(toAdd)
|
stats[sval] = old + desc.Weight(toAdd)
|
||||||
annotations[StatsOnSlotName(desc.Name)] = stats // TODO: check if this is necessary
|
sequence.SetAttribute(StatsOnSlotName(desc.Name), stats) // TODO: check if this is necessary
|
||||||
return retval
|
return retval
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +264,7 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
annotations["count"] = count
|
sequence.SetCount(count)
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,7 +283,7 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions)
|
|||||||
seq.SetQualities(nil)
|
seq.SetQualities(nil)
|
||||||
|
|
||||||
if len(sequences) == 1 {
|
if len(sequences) == 1 {
|
||||||
seq.Annotations()["count"] = seq.Count()
|
seq.SetCount(seq.Count())
|
||||||
for _, desc := range statsOn {
|
for _, desc := range statsOn {
|
||||||
seq.StatsOn(desc, na)
|
seq.StatsOn(desc, na)
|
||||||
}
|
}
|
||||||
@ -296,5 +297,4 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn StatsOnDescriptions)
|
|||||||
|
|
||||||
sequences.Recycle(false)
|
sequences.Recycle(false)
|
||||||
return seq
|
return seq
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
package obistats
|
package obistats
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func Max[T int64 | int32 | int16 | int8 | int | float32 | float64](data []T) T {
|
func Max[T int64 | int32 | int16 | int8 | int | float32 | float64](data []T) T {
|
||||||
m := data[0]
|
m := data[0]
|
||||||
|
|
||||||
@ -14,7 +12,7 @@ func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T
|
|||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func Min[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T {
|
func Min[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint | float32 | float64](data []T) T {
|
||||||
m := data[0]
|
m := data[0]
|
||||||
|
|
||||||
for _, v := range data {
|
for _, v := range data {
|
||||||
@ -25,3 +23,23 @@ func Min[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T
|
|||||||
|
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Mode[T int64 | int32 | int16 | int8 | int | uint64 | uint32 | uint16 | uint8 | uint](data []T) T {
|
||||||
|
ds := make(map[T]int)
|
||||||
|
|
||||||
|
for _, v := range data {
|
||||||
|
ds[v]++
|
||||||
|
}
|
||||||
|
|
||||||
|
md := T(0)
|
||||||
|
maxocc := 0
|
||||||
|
|
||||||
|
for v, occ := range ds {
|
||||||
|
if occ > maxocc {
|
||||||
|
maxocc = occ
|
||||||
|
md = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return md
|
||||||
|
}
|
||||||
|
@ -25,8 +25,19 @@ import (
|
|||||||
func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||||
consensus_id string,
|
consensus_id string,
|
||||||
kmer_size int,
|
kmer_size int,
|
||||||
|
filter_out float64,
|
||||||
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
||||||
|
|
||||||
|
if seqs.Len() == 0 {
|
||||||
|
return nil, fmt.Errorf("no sequence provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
if seqs.Len() == 1 {
|
||||||
|
seq := seqs[0].Copy()
|
||||||
|
seq.SetAttribute("obiconsensus_consensus", false)
|
||||||
|
return seq, nil
|
||||||
|
}
|
||||||
|
|
||||||
if save_graph {
|
if save_graph {
|
||||||
if dirname == "" {
|
if dirname == "" {
|
||||||
dirname = "."
|
dirname = "."
|
||||||
@ -104,7 +115,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
|||||||
log.Debugf("Graph size : %d\n", graph.Len())
|
log.Debugf("Graph size : %d\n", graph.Len())
|
||||||
total_kmer := graph.Len()
|
total_kmer := graph.Len()
|
||||||
|
|
||||||
seq, err := graph.LongestConsensus(consensus_id)
|
seq, err := graph.LongestConsensus(consensus_id, filter_out)
|
||||||
|
|
||||||
sumCount := 0
|
sumCount := 0
|
||||||
|
|
||||||
@ -112,7 +123,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
|||||||
for _, s := range seqs {
|
for _, s := range seqs {
|
||||||
sumCount += s.Count()
|
sumCount += s.Count()
|
||||||
}
|
}
|
||||||
|
seq.SetAttribute("obiconsensus_consensus", true)
|
||||||
seq.SetAttribute("obiconsensus_weight", sumCount)
|
seq.SetAttribute("obiconsensus_weight", sumCount)
|
||||||
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
|
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
|
||||||
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
|
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
|
||||||
@ -136,6 +147,10 @@ func SampleWeight(seqs *obiseq.BioSequenceSlice, sample, sample_key string) func
|
|||||||
|
|
||||||
stats := (*seqs)[i].StatsOn(obiseq.MakeStatsOnDescription(sample_key), "NA")
|
stats := (*seqs)[i].StatsOn(obiseq.MakeStatsOnDescription(sample_key), "NA")
|
||||||
|
|
||||||
|
if stats == nil {
|
||||||
|
log.Panicf("Sample %s not found in sequence %d", sample, i)
|
||||||
|
}
|
||||||
|
|
||||||
if value, ok := stats[sample]; ok {
|
if value, ok := stats[sample]; ok {
|
||||||
return float64(value)
|
return float64(value)
|
||||||
}
|
}
|
||||||
@ -292,16 +307,16 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
|||||||
pack[degree] = v
|
pack[degree] = v
|
||||||
clean, err = BuildConsensus(pack,
|
clean, err = BuildConsensus(pack,
|
||||||
fmt.Sprintf("%s_consensus", v.Id()),
|
fmt.Sprintf("%s_consensus", v.Id()),
|
||||||
kmer_size,
|
kmer_size, CLILowCoverage(),
|
||||||
CLISaveGraphToFiles(), CLIGraphFilesDirectory())
|
CLISaveGraphToFiles(), CLIGraphFilesDirectory())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warning(err)
|
log.Warning(err)
|
||||||
clean = (*graph.Vertices)[i]
|
clean = (*graph.Vertices)[i].Copy()
|
||||||
clean.SetAttribute("obiconsensus_consensus", false)
|
clean.SetAttribute("obiconsensus_consensus", false)
|
||||||
} else {
|
|
||||||
clean.SetAttribute("obiconsensus_consensus", true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pack.Recycle(false)
|
pack.Recycle(false)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -318,8 +333,9 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
|||||||
|
|
||||||
annotations := v.Annotations()
|
annotations := v.Annotations()
|
||||||
|
|
||||||
|
staton := obiseq.StatsOnSlotName(sample_key)
|
||||||
for k, v := range annotations {
|
for k, v := range annotations {
|
||||||
if !clean.HasAttribute(k) {
|
if !clean.HasAttribute(k) && k != staton {
|
||||||
clean.SetAttribute(k, v)
|
clean.SetAttribute(k, v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -334,6 +350,83 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
|||||||
|
|
||||||
return denoised
|
return denoised
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func MinionClusterDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
||||||
|
sample_key string, kmer_size int) obiseq.BioSequenceSlice {
|
||||||
|
denoised := obiseq.MakeBioSequenceSlice()
|
||||||
|
seqs := (*obiseq.BioSequenceSlice)(graph.Vertices)
|
||||||
|
weight := SampleWeight(seqs, graph.Name, sample_key)
|
||||||
|
seqWeights := make([]float64, len(*seqs))
|
||||||
|
|
||||||
|
// Compute weights for each vertex as the sum of the weights of its neighbors
|
||||||
|
|
||||||
|
log.Info("")
|
||||||
|
log.Infof("Sample %s: Computing weights", graph.Name)
|
||||||
|
for i := range *seqs {
|
||||||
|
w := weight(i)
|
||||||
|
for _, j := range graph.Neighbors(i) {
|
||||||
|
w += weight(j)
|
||||||
|
}
|
||||||
|
|
||||||
|
seqWeights[i] = w
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Sample %s: Done computing weights", graph.Name)
|
||||||
|
|
||||||
|
log.Infof("Sample %s: Clustering", graph.Name)
|
||||||
|
// Look for vertex not having a neighbor with a higher weight
|
||||||
|
for i := range *seqs {
|
||||||
|
v := (*seqs)[i]
|
||||||
|
head := true
|
||||||
|
neighbors := graph.Neighbors(i)
|
||||||
|
for _, j := range neighbors {
|
||||||
|
if seqWeights[i] < seqWeights[j] {
|
||||||
|
head = false
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if head {
|
||||||
|
pack := obiseq.MakeBioSequenceSlice(len(neighbors) + 1)
|
||||||
|
for k, j := range neighbors {
|
||||||
|
pack[k] = (*seqs)[j]
|
||||||
|
}
|
||||||
|
pack[len(neighbors)] = v
|
||||||
|
|
||||||
|
clean, err := BuildConsensus(pack,
|
||||||
|
fmt.Sprintf("%s_consensus", v.Id()),
|
||||||
|
kmer_size, CLILowCoverage(),
|
||||||
|
CLISaveGraphToFiles(), CLIGraphFilesDirectory())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warning(err)
|
||||||
|
clean = (*graph.Vertices)[i].Copy()
|
||||||
|
clean.SetAttribute("obiconsensus_consensus", false)
|
||||||
|
}
|
||||||
|
pack.Recycle(false)
|
||||||
|
|
||||||
|
clean.SetAttribute(sample_key, graph.Name)
|
||||||
|
|
||||||
|
annotations := v.Annotations()
|
||||||
|
clean.SetCount(int(weight(i)))
|
||||||
|
|
||||||
|
staton := obiseq.StatsOnSlotName(sample_key)
|
||||||
|
|
||||||
|
for k, v := range annotations {
|
||||||
|
if !clean.HasAttribute(k) && k != staton {
|
||||||
|
clean.SetAttribute(k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
denoised = append(denoised, clean)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Sample %s: Done clustering", graph.Name)
|
||||||
|
|
||||||
|
return denoised
|
||||||
|
}
|
||||||
|
|
||||||
func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||||
dirname := CLIGraphFilesDirectory()
|
dirname := CLIGraphFilesDirectory()
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
@ -395,9 +488,17 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
false, 1, 0, 3)
|
false, 1, 0, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
denoised := MinionDenoise(graph,
|
var denoised obiseq.BioSequenceSlice
|
||||||
|
|
||||||
|
if CLICluterDenoise() {
|
||||||
|
denoised = MinionClusterDenoise(graph,
|
||||||
CLISampleAttribute(),
|
CLISampleAttribute(),
|
||||||
CLIKmerSize())
|
CLIKmerSize())
|
||||||
|
} else {
|
||||||
|
denoised = MinionDenoise(graph,
|
||||||
|
CLISampleAttribute(),
|
||||||
|
CLIKmerSize())
|
||||||
|
}
|
||||||
|
|
||||||
newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised))
|
newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised))
|
||||||
|
|
||||||
@ -411,9 +512,14 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
newIter.WaitAndClose()
|
newIter.WaitAndClose()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
res := newIter
|
||||||
|
if CLIUnique() {
|
||||||
obiuniq.AddStatsOn(CLISampleAttribute())
|
obiuniq.AddStatsOn(CLISampleAttribute())
|
||||||
// obiuniq.AddStatsOn("sample:obiconsensus_weight")
|
// obiuniq.AddStatsOn("sample:obiconsensus_weight")
|
||||||
obiuniq.SetUniqueInMemory(false)
|
obiuniq.SetUniqueInMemory(false)
|
||||||
obiuniq.SetNoSingleton(CLINoSingleton())
|
obiuniq.SetNoSingleton(CLINoSingleton())
|
||||||
return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
|
res = obiuniq.CLIUnique(newIter)
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
|
||||||
}
|
}
|
||||||
|
@ -8,8 +8,6 @@ import (
|
|||||||
var _distStepMax = 1
|
var _distStepMax = 1
|
||||||
var _sampleAttribute = "sample"
|
var _sampleAttribute = "sample"
|
||||||
|
|
||||||
var _ratioMax = 1.0
|
|
||||||
|
|
||||||
var _clusterMode = false
|
var _clusterMode = false
|
||||||
var _onlyHead = false
|
var _onlyHead = false
|
||||||
|
|
||||||
@ -20,6 +18,10 @@ var _NoSingleton = false
|
|||||||
var _saveGraph = "__@@NOSAVE@@__"
|
var _saveGraph = "__@@NOSAVE@@__"
|
||||||
var _saveRatio = "__@@NOSAVE@@__"
|
var _saveRatio = "__@@NOSAVE@@__"
|
||||||
|
|
||||||
|
var _lowCoverage = 0.0
|
||||||
|
|
||||||
|
var _unique = false
|
||||||
|
|
||||||
// ObiminionOptionSet sets the options for obiminion.
|
// ObiminionOptionSet sets the options for obiminion.
|
||||||
//
|
//
|
||||||
// options: The options for configuring obiminion.
|
// options: The options for configuring obiminion.
|
||||||
@ -50,6 +52,19 @@ func ObiminionOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
|
options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
|
||||||
options.Description("If set, sequences occurring a single time in the data set are discarded."))
|
options.Description("If set, sequences occurring a single time in the data set are discarded."))
|
||||||
|
|
||||||
|
options.BoolVar(&_clusterMode, "cluster", _clusterMode,
|
||||||
|
options.Alias("C"),
|
||||||
|
options.Description("Switch obiconsensus into its clustering mode."),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.BoolVar(&_unique, "unique", _unique,
|
||||||
|
options.Alias("U"),
|
||||||
|
options.Description("If set, sequences are dereplicated on the output (obiuniq)."),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.Float64Var(&_lowCoverage, "low-coverage", _lowCoverage,
|
||||||
|
options.Description("If the coverage of a sample is lower than this value, it will be discarded."),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionSet sets up the options for the obiminion package.
|
// OptionSet sets up the options for the obiminion package.
|
||||||
@ -130,3 +145,15 @@ func CLIKmerSize() int {
|
|||||||
func CLINoSingleton() bool {
|
func CLINoSingleton() bool {
|
||||||
return _NoSingleton
|
return _NoSingleton
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLICluterDenoise() bool {
|
||||||
|
return _clusterMode
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIUnique() bool {
|
||||||
|
return _unique
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLILowCoverage() float64 {
|
||||||
|
return _lowCoverage
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user