mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 08:10:45 +00:00
Swich to the system min and max functions and remove the version from obiutils
Former-commit-id: 8c4558921b0d0c266b070f16e83813de6e6d4a0f
This commit is contained in:
@ -2,7 +2,6 @@ package obialign
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
var _iupac = [26]byte{
|
||||
@ -130,11 +129,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// in_matrix := false
|
||||
x1 := y - lB + extra
|
||||
x2 := extra - y
|
||||
xs := obiutils.Max(obiutils.Max(x1, x2), 0)
|
||||
xs := max(x1, x2, 0)
|
||||
|
||||
x1 = y + extra
|
||||
x2 = lA + extra - y
|
||||
xf := obiutils.Min(obiutils.Min(x1, x2), even-1) + 1
|
||||
xf := min(x1, x2, even-1) + 1
|
||||
|
||||
for x := xs; x < xf; x++ {
|
||||
|
||||
@ -222,11 +221,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// . 9 10 + 2 - 1
|
||||
x1 = y - lB + extra + even
|
||||
x2 = extra - y + even - 1
|
||||
xs = obiutils.Max(obiutils.Max(x1, x2), even)
|
||||
xs = max(x1, x2, even)
|
||||
|
||||
x1 = y + extra + even
|
||||
x2 = lA + extra - y + even - 1
|
||||
xf = obiutils.Min(obiutils.Min(x1, x2), width-1) + 1
|
||||
xf = min(x1, x2, width-1) + 1
|
||||
|
||||
for x := xs; x < xf; x++ {
|
||||
|
||||
@ -383,4 +382,3 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
|
||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@ import (
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
var _MaxPatLen = int(C.MAX_PAT_LEN)
|
||||
@ -53,26 +52,24 @@ var NilApatPattern = ApatPattern{nil}
|
||||
// type.
|
||||
var NilApatSequence = ApatSequence{nil}
|
||||
|
||||
// MakeApatPattern creates an ApatPattern object based on the given pattern, error maximum and allowsIndel flag.
|
||||
// MakeApatPattern creates an ApatPattern object based on the given pattern, error maximum, and allowsIndel flag.
|
||||
//
|
||||
// The pattern is a short DNA sequence (up to 64 symboles).
|
||||
// Parameters:
|
||||
// The pattern is a short DNA sequence (up to 64 symbols).
|
||||
// Ambiguities can be represented or using UIPAC symboles,
|
||||
// or using the [...] classical in regular pattern grammar.
|
||||
// For example, the ambiguity A/T can be indicated using W
|
||||
// or [AT]. A nucleotide can be negated by preceding it with
|
||||
// a '!'. The APAT algorithm allows for error during the
|
||||
// matching process. The maximum number of tolerated error
|
||||
// is indicated at the construction of the pattern using
|
||||
// the errormax parameter. Some positions can be marked as not
|
||||
// a '!'. The pattern is converted to uppercase.
|
||||
// Some positions can be marked as not
|
||||
// allowed for mismatches. They have to be signaled using a '#'
|
||||
// sign after the corresponding nucleotide.
|
||||
//
|
||||
// Parameters:
|
||||
// pattern: The input pattern string.
|
||||
// errormax: The maximum number of errors allowed.
|
||||
// allowsIndel: A flag indicating whether indels are allowed or not.
|
||||
// errormax is the maximum number of errors allowed in the pattern.
|
||||
//
|
||||
// Returns an ApatPattern object and an error.
|
||||
// allowsIndel is a flag indicating whether indels are allowed in the pattern.
|
||||
//
|
||||
// Returns an ApatPattern object and an error if the pattern is invalid.
|
||||
func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPattern, error) {
|
||||
cpattern := C.CString(pattern)
|
||||
defer C.free(unsafe.Pointer(cpattern))
|
||||
@ -264,6 +261,7 @@ func (sequence ApatSequence) Free() {
|
||||
// values of the [3]int indicate respectively the start and the end position of
|
||||
// the match. Following the GO convention the end position is not included in the
|
||||
// match. The third value indicates the number of error detected for this occurrence.
|
||||
|
||||
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||
if begin < 0 {
|
||||
begin = 0
|
||||
@ -348,8 +346,8 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
||||
|
||||
start = best[0] - nerr
|
||||
end = best[0] + int(pattern.pointer.pointer.patlen) + nerr
|
||||
start = obiutils.Max(start, 0)
|
||||
end = obiutils.Min(end, sequence.Len())
|
||||
start = max(start, 0)
|
||||
end = min(end, sequence.Len())
|
||||
|
||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||
@ -377,6 +375,22 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
||||
// return int(_AllocatedApaSequences)
|
||||
// }
|
||||
|
||||
// AllMatches finds all matches of a given pattern in a sequence.
|
||||
//
|
||||
// It only works if the parttern is a pure IUPAC sequence without
|
||||
// supplementary characters normally allowed : ! and #.
|
||||
//
|
||||
// It takes the following parameters:
|
||||
// - pattern: the pattern to search for (ApatPattern).
|
||||
// - sequence: the sequence to search in (ApatSequence).
|
||||
// - begin: the starting index of the search (int).
|
||||
// - length: the length of the search (int).
|
||||
//
|
||||
// It returns a slice of [3]int representing the locations of all matches in the sequence.
|
||||
// The AllMatches methood returns return a slice of [3]int. The two firsts
|
||||
// values of the [3]int indicate respectively the start and the end position of
|
||||
// the match. Following the GO convention the end position is not included in the
|
||||
// match. The third value indicates the number of error detected for this occurrence.
|
||||
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||
res := pattern.FindAllIndex(sequence, begin, length)
|
||||
|
||||
@ -384,12 +398,17 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
||||
buffer := sbuffer[:]
|
||||
|
||||
for _, m := range res {
|
||||
// Recompute the start and end position of the match
|
||||
// when the pattern allows for indels
|
||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||
start := m[0] - m[2]
|
||||
end := m[0] + int(pattern.pointer.pointer.patlen) + m[2]
|
||||
start = obiutils.Max(start, 0)
|
||||
end = obiutils.Min(end, sequence.Len())
|
||||
|
||||
start = max(start, 0)
|
||||
end = min(end, sequence.Len())
|
||||
// 1 << 30 = 1,073,741,824 = 1Gb
|
||||
// It's a virtual array mapping the sequence to the pattern
|
||||
// in the C code.
|
||||
// No allocations are done here.
|
||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||
|
||||
|
@ -100,87 +100,6 @@ func _EndOfLastFastqEntry(buffer []byte) int {
|
||||
return cut
|
||||
}
|
||||
|
||||
func lastFastqCut(buffer []byte) ([]byte, []byte) {
|
||||
imax := len(buffer)
|
||||
cut := imax
|
||||
state := 0
|
||||
restart := imax - 1
|
||||
for i := restart; i >= 0 && state < 7; i-- {
|
||||
C := buffer[i]
|
||||
is_end_of_line := C == '\r' || C == '\n'
|
||||
is_space := C == ' ' || C == '\t'
|
||||
is_sep := is_space || is_end_of_line
|
||||
|
||||
switch state {
|
||||
case 0:
|
||||
if C == '+' {
|
||||
// Potential start of quality part step 1
|
||||
state = 1
|
||||
restart = i
|
||||
}
|
||||
case 1:
|
||||
if is_end_of_line {
|
||||
// Potential start of quality part step 2
|
||||
state = 2
|
||||
} else {
|
||||
// it was not the start of quality part
|
||||
state = 0
|
||||
i = restart
|
||||
}
|
||||
case 2:
|
||||
if is_sep {
|
||||
// Potential start of quality part step 2 (stay in the same state)
|
||||
state = 2
|
||||
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
||||
// End of the sequence
|
||||
state = 3
|
||||
} else {
|
||||
// it was not the start of quality part
|
||||
state = 0
|
||||
i = restart
|
||||
}
|
||||
case 3:
|
||||
if is_end_of_line {
|
||||
// Entrering in the header line
|
||||
state = 4
|
||||
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
||||
// progressing along of the sequence
|
||||
state = 3
|
||||
} else {
|
||||
// it was not the sequence part
|
||||
state = 0
|
||||
i = restart
|
||||
}
|
||||
case 4:
|
||||
if is_end_of_line {
|
||||
state = 4
|
||||
} else {
|
||||
state = 5
|
||||
}
|
||||
case 5:
|
||||
if is_end_of_line {
|
||||
// It was not the header line
|
||||
state = 0
|
||||
i = restart
|
||||
} else if C == '@' {
|
||||
state = 6
|
||||
cut = i
|
||||
}
|
||||
case 6:
|
||||
if is_end_of_line {
|
||||
state = 7
|
||||
} else {
|
||||
state = 0
|
||||
i = restart
|
||||
}
|
||||
}
|
||||
}
|
||||
if state == 7 {
|
||||
return buffer[:cut], bytes.Clone(buffer[cut:])
|
||||
}
|
||||
return []byte{}, buffer
|
||||
}
|
||||
|
||||
func _ParseFastqFile(source string,
|
||||
input ChannelSeqFileChunk,
|
||||
out obiiter.IBioSequence,
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
@ -30,7 +29,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
news = append(news, s)
|
||||
} else {
|
||||
for i := 0; i < s.Len(); i += step {
|
||||
end := obiutils.Min(i+length, s.Len())
|
||||
end := min(i+length, s.Len())
|
||||
fusion := false
|
||||
if (s.Len() - end) < step {
|
||||
end = s.Len()
|
||||
|
@ -4,13 +4,10 @@ import (
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
type Table4mer [256]uint16
|
||||
|
||||
|
||||
|
||||
func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Table4mer {
|
||||
iternal_buffer := Encode4mer(seq, buffer) // The slice of 4-mer codes
|
||||
|
||||
@ -33,7 +30,7 @@ func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Tabl
|
||||
func Common4Mer(count1, count2 *Table4mer) int {
|
||||
sum := 0
|
||||
for i := 0; i < 256; i++ {
|
||||
sum += int(obiutils.Min((*count1)[i], (*count2)[i]))
|
||||
sum += int(min((*count1)[i], (*count2)[i]))
|
||||
}
|
||||
return sum
|
||||
}
|
||||
@ -49,7 +46,7 @@ func Sum4Mer(count *Table4mer) int {
|
||||
func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
|
||||
s1 := Sum4Mer(count1)
|
||||
s2 := Sum4Mer(count2)
|
||||
smin := obiutils.Min(s1, s2)
|
||||
smin := min(s1, s2)
|
||||
|
||||
cw := Common4Mer(count1, count2)
|
||||
|
||||
@ -66,7 +63,7 @@ func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
|
||||
func Error4MerBounds(count1, count2 *Table4mer) (int, int) {
|
||||
s1 := Sum4Mer(count1)
|
||||
s2 := Sum4Mer(count2)
|
||||
smax := obiutils.Max(s1, s2)
|
||||
smax := max(s1, s2)
|
||||
|
||||
cw := Common4Mer(count1, count2)
|
||||
|
||||
|
@ -10,7 +10,6 @@ import (
|
||||
"slices"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -472,7 +471,7 @@ func (graph *DeBruijnGraph) Gml() string {
|
||||
n := graph.Nexts(idx)
|
||||
for _, dst := range n {
|
||||
dstid := nodeidx[dst]
|
||||
weight := obiutils.Min(graph.Weight(dst), weight)
|
||||
weight := min(graph.Weight(dst), weight)
|
||||
label := decode[dst&3]
|
||||
buffer.WriteString(
|
||||
fmt.Sprintf(`edge [ source "%d"
|
||||
|
@ -9,7 +9,6 @@ import (
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
type DemultiplexMatch struct {
|
||||
@ -130,7 +129,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
|
||||
sseq := sequence.String()
|
||||
direct := sseq[start:end]
|
||||
tagstart := obiutils.Max(start-marker.taglength, 0)
|
||||
tagstart := max(start-marker.taglength, 0)
|
||||
ftag := strings.ToLower(sseq[tagstart:start])
|
||||
|
||||
m := DemultiplexMatch{
|
||||
@ -150,7 +149,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
reverse, _ := sequence.Subsequence(start, end, false)
|
||||
defer reverse.Recycle()
|
||||
reverse = reverse.ReverseComplement(true)
|
||||
endtag := obiutils.Min(end+marker.taglength, sequence.Len())
|
||||
endtag := min(end+marker.taglength, sequence.Len())
|
||||
rtag, err := sequence.Subsequence(end, endtag, false)
|
||||
defer rtag.Recycle()
|
||||
srtag := ""
|
||||
@ -201,7 +200,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
sseq := sequence.String()
|
||||
|
||||
reverse := strings.ToLower(sseq[start:end])
|
||||
tagstart := obiutils.Max(start-marker.taglength, 0)
|
||||
tagstart := max(start-marker.taglength, 0)
|
||||
rtag := strings.ToLower(sseq[tagstart:start])
|
||||
|
||||
m := DemultiplexMatch{
|
||||
@ -221,7 +220,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
defer direct.Recycle()
|
||||
direct = direct.ReverseComplement(true)
|
||||
|
||||
endtag := obiutils.Min(end+marker.taglength, sequence.Len())
|
||||
endtag := min(end+marker.taglength, sequence.Len())
|
||||
ftag, err := sequence.Subsequence(end, endtag, false)
|
||||
defer ftag.Recycle()
|
||||
sftag := ""
|
||||
|
@ -6,7 +6,6 @@ import (
|
||||
"sort"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
type Suffix struct {
|
||||
@ -27,7 +26,7 @@ func SuffixLess(suffixarray SuffixArray) func(i, j int) bool {
|
||||
sj := suffixarray.Suffixes[j]
|
||||
bj := (*suffixarray.Sequences)[int(sj.Idx)].Sequence()[sj.Pos:]
|
||||
|
||||
l := obiutils.Min(len(bi), len(bj))
|
||||
l := min(len(bi), len(bj))
|
||||
p := 0
|
||||
for p < l && bi[p] == bj[p] {
|
||||
p++
|
||||
@ -92,7 +91,7 @@ func (suffixarray *SuffixArray) CommonSuffix() []int {
|
||||
si := suffixarray.Suffixes[i]
|
||||
bi := (*suffixarray.Sequences)[int(si.Idx)].Sequence()[si.Pos:]
|
||||
|
||||
l := obiutils.Min(len(bi), len(bp))
|
||||
l := min(len(bi), len(bp))
|
||||
p := 0
|
||||
for p < l && bi[p] == bp[p] {
|
||||
p++
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -47,8 +46,8 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
frags := obiiter.IFragments(
|
||||
CLIMaxLength()*1000,
|
||||
CLIMaxLength()*100,
|
||||
CLIMaxLength()+obiutils.Max(len(CLIForwardPrimer()),
|
||||
len(CLIReversePrimer()))+obiutils.Min(len(CLIForwardPrimer()),
|
||||
CLIMaxLength()+max(len(CLIForwardPrimer()),
|
||||
len(CLIReversePrimer()))+min(len(CLIForwardPrimer()),
|
||||
len(CLIReversePrimer()))/2,
|
||||
100,
|
||||
obioptions.CLIParallelWorkers(),
|
||||
|
@ -63,7 +63,7 @@ func IndexSequence(seqidx int,
|
||||
if lca[order] == ancestor {
|
||||
// nseq[i]++
|
||||
if mini != -1 {
|
||||
wordmin = obiutils.Max(sequence.Len(), references[order].Len()) - 3 - 4*mini
|
||||
wordmin = max(sequence.Len(), references[order].Len()) - 3 - 4*mini
|
||||
}
|
||||
|
||||
if cw[order] < wordmin {
|
||||
@ -189,7 +189,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
indexed := obiiter.MakeIBioSequence()
|
||||
go func() {
|
||||
for i := 0; i < len(references); i += 10 {
|
||||
limits <- [2]int{i, obiutils.Min(i+10, len(references))}
|
||||
limits <- [2]int{i, min(i+10, len(references))}
|
||||
}
|
||||
close(limits)
|
||||
}()
|
||||
|
@ -110,7 +110,7 @@ func FindClosests(sequence *obiseq.BioSequence,
|
||||
d, _, _, _ := obialign.D1Or0(sequence, references[order])
|
||||
if d >= 0 {
|
||||
score = d
|
||||
alilength = obiutils.Max(sequence.Len(), ref.Len())
|
||||
alilength = max(sequence.Len(), ref.Len())
|
||||
lcs = alilength - score
|
||||
}
|
||||
} else {
|
||||
@ -294,8 +294,8 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||
j++
|
||||
} else {
|
||||
log.Warnf("Taxid %d is not described in the taxonomy."+
|
||||
" Sequence %s is discared from the reference database",
|
||||
seq.Taxid(), seq.Id())
|
||||
" Sequence %s is discared from the reference database",
|
||||
seq.Taxid(), seq.Id())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,20 +4,6 @@ import (
|
||||
"golang.org/x/exp/constraints"
|
||||
)
|
||||
|
||||
func Min[T constraints.Ordered](x, y T) T {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func Max[T constraints.Ordered](x, y T) T {
|
||||
if x < y {
|
||||
return y
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func MinMax[T constraints.Ordered](x, y T) (T, T) {
|
||||
if x < y {
|
||||
return x, y
|
||||
@ -25,7 +11,6 @@ func MinMax[T constraints.Ordered](x, y T) (T, T) {
|
||||
return y, x
|
||||
}
|
||||
|
||||
|
||||
func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
|
||||
if len(vec) == 0 {
|
||||
panic("empty slice")
|
||||
|
Reference in New Issue
Block a user