Swich to the system min and max functions and remove the version from obiutils

Former-commit-id: 8c4558921b0d0c266b070f16e83813de6e6d4a0f
This commit is contained in:
Eric Coissac
2024-05-30 08:27:24 +02:00
parent 98b3bc2a8c
commit dd9307a4cd
12 changed files with 58 additions and 145 deletions

View File

@ -2,7 +2,6 @@ package obialign
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
var _iupac = [26]byte{
@ -130,11 +129,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// in_matrix := false
x1 := y - lB + extra
x2 := extra - y
xs := obiutils.Max(obiutils.Max(x1, x2), 0)
xs := max(x1, x2, 0)
x1 = y + extra
x2 = lA + extra - y
xf := obiutils.Min(obiutils.Min(x1, x2), even-1) + 1
xf := min(x1, x2, even-1) + 1
for x := xs; x < xf; x++ {
@ -222,11 +221,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// . 9 10 + 2 - 1
x1 = y - lB + extra + even
x2 = extra - y + even - 1
xs = obiutils.Max(obiutils.Max(x1, x2), even)
xs = max(x1, x2, even)
x1 = y + extra + even
x2 = lA + extra - y + even - 1
xf = obiutils.Min(obiutils.Min(x1, x2), width-1) + 1
xf = min(x1, x2, width-1) + 1
for x := xs; x < xf; x++ {
@ -383,4 +382,3 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
}

View File

@ -15,7 +15,6 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
var _MaxPatLen = int(C.MAX_PAT_LEN)
@ -53,26 +52,24 @@ var NilApatPattern = ApatPattern{nil}
// type.
var NilApatSequence = ApatSequence{nil}
// MakeApatPattern creates an ApatPattern object based on the given pattern, error maximum and allowsIndel flag.
// MakeApatPattern creates an ApatPattern object based on the given pattern, error maximum, and allowsIndel flag.
//
// The pattern is a short DNA sequence (up to 64 symboles).
// Parameters:
// The pattern is a short DNA sequence (up to 64 symbols).
// Ambiguities can be represented or using UIPAC symboles,
// or using the [...] classical in regular pattern grammar.
// For example, the ambiguity A/T can be indicated using W
// or [AT]. A nucleotide can be negated by preceding it with
// a '!'. The APAT algorithm allows for error during the
// matching process. The maximum number of tolerated error
// is indicated at the construction of the pattern using
// the errormax parameter. Some positions can be marked as not
// a '!'. The pattern is converted to uppercase.
// Some positions can be marked as not
// allowed for mismatches. They have to be signaled using a '#'
// sign after the corresponding nucleotide.
//
// Parameters:
// pattern: The input pattern string.
// errormax: The maximum number of errors allowed.
// allowsIndel: A flag indicating whether indels are allowed or not.
// errormax is the maximum number of errors allowed in the pattern.
//
// Returns an ApatPattern object and an error.
// allowsIndel is a flag indicating whether indels are allowed in the pattern.
//
// Returns an ApatPattern object and an error if the pattern is invalid.
func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPattern, error) {
cpattern := C.CString(pattern)
defer C.free(unsafe.Pointer(cpattern))
@ -264,6 +261,7 @@ func (sequence ApatSequence) Free() {
// values of the [3]int indicate respectively the start and the end position of
// the match. Following the GO convention the end position is not included in the
// match. The third value indicates the number of error detected for this occurrence.
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int) (loc [][3]int) {
if begin < 0 {
begin = 0
@ -348,8 +346,8 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
start = best[0] - nerr
end = best[0] + int(pattern.pointer.pointer.patlen) + nerr
start = obiutils.Max(start, 0)
end = obiutils.Min(end, sequence.Len())
start = max(start, 0)
end = min(end, sequence.Len())
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
frg := sequence.pointer.reference.Sequence()[start:end]
@ -377,6 +375,22 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
// return int(_AllocatedApaSequences)
// }
// AllMatches finds all matches of a given pattern in a sequence.
//
// It only works if the parttern is a pure IUPAC sequence without
// supplementary characters normally allowed : ! and #.
//
// It takes the following parameters:
// - pattern: the pattern to search for (ApatPattern).
// - sequence: the sequence to search in (ApatSequence).
// - begin: the starting index of the search (int).
// - length: the length of the search (int).
//
// It returns a slice of [3]int representing the locations of all matches in the sequence.
// The AllMatches methood returns return a slice of [3]int. The two firsts
// values of the [3]int indicate respectively the start and the end position of
// the match. Following the GO convention the end position is not included in the
// match. The third value indicates the number of error detected for this occurrence.
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
res := pattern.FindAllIndex(sequence, begin, length)
@ -384,12 +398,17 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
buffer := sbuffer[:]
for _, m := range res {
// Recompute the start and end position of the match
// when the pattern allows for indels
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
start := m[0] - m[2]
end := m[0] + int(pattern.pointer.pointer.patlen) + m[2]
start = obiutils.Max(start, 0)
end = obiutils.Min(end, sequence.Len())
start = max(start, 0)
end = min(end, sequence.Len())
// 1 << 30 = 1,073,741,824 = 1Gb
// It's a virtual array mapping the sequence to the pattern
// in the C code.
// No allocations are done here.
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
frg := sequence.pointer.reference.Sequence()[start:end]

View File

@ -100,87 +100,6 @@ func _EndOfLastFastqEntry(buffer []byte) int {
return cut
}
func lastFastqCut(buffer []byte) ([]byte, []byte) {
imax := len(buffer)
cut := imax
state := 0
restart := imax - 1
for i := restart; i >= 0 && state < 7; i-- {
C := buffer[i]
is_end_of_line := C == '\r' || C == '\n'
is_space := C == ' ' || C == '\t'
is_sep := is_space || is_end_of_line
switch state {
case 0:
if C == '+' {
// Potential start of quality part step 1
state = 1
restart = i
}
case 1:
if is_end_of_line {
// Potential start of quality part step 2
state = 2
} else {
// it was not the start of quality part
state = 0
i = restart
}
case 2:
if is_sep {
// Potential start of quality part step 2 (stay in the same state)
state = 2
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
// End of the sequence
state = 3
} else {
// it was not the start of quality part
state = 0
i = restart
}
case 3:
if is_end_of_line {
// Entrering in the header line
state = 4
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
// progressing along of the sequence
state = 3
} else {
// it was not the sequence part
state = 0
i = restart
}
case 4:
if is_end_of_line {
state = 4
} else {
state = 5
}
case 5:
if is_end_of_line {
// It was not the header line
state = 0
i = restart
} else if C == '@' {
state = 6
cut = i
}
case 6:
if is_end_of_line {
state = 7
} else {
state = 0
i = restart
}
}
}
if state == 7 {
return buffer[:cut], bytes.Clone(buffer[cut:])
}
return []byte{}, buffer
}
func _ParseFastqFile(source string,
input ChannelSeqFileChunk,
out obiiter.IBioSequence,

View File

@ -4,7 +4,6 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
@ -30,7 +29,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
news = append(news, s)
} else {
for i := 0; i < s.Len(); i += step {
end := obiutils.Min(i+length, s.Len())
end := min(i+length, s.Len())
fusion := false
if (s.Len() - end) < step {
end = s.Len()

View File

@ -4,13 +4,10 @@ import (
"math"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
type Table4mer [256]uint16
func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Table4mer {
iternal_buffer := Encode4mer(seq, buffer) // The slice of 4-mer codes
@ -33,7 +30,7 @@ func Count4Mer(seq *obiseq.BioSequence, buffer *[]byte, counts *Table4mer) *Tabl
func Common4Mer(count1, count2 *Table4mer) int {
sum := 0
for i := 0; i < 256; i++ {
sum += int(obiutils.Min((*count1)[i], (*count2)[i]))
sum += int(min((*count1)[i], (*count2)[i]))
}
return sum
}
@ -49,7 +46,7 @@ func Sum4Mer(count *Table4mer) int {
func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
s1 := Sum4Mer(count1)
s2 := Sum4Mer(count2)
smin := obiutils.Min(s1, s2)
smin := min(s1, s2)
cw := Common4Mer(count1, count2)
@ -66,7 +63,7 @@ func LCS4MerBounds(count1, count2 *Table4mer) (int, int) {
func Error4MerBounds(count1, count2 *Table4mer) (int, int) {
s1 := Sum4Mer(count1)
s2 := Sum4Mer(count2)
smax := obiutils.Max(s1, s2)
smax := max(s1, s2)
cw := Common4Mer(count1, count2)

View File

@ -10,7 +10,6 @@ import (
"slices"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
log "github.com/sirupsen/logrus"
)
@ -472,7 +471,7 @@ func (graph *DeBruijnGraph) Gml() string {
n := graph.Nexts(idx)
for _, dst := range n {
dstid := nodeidx[dst]
weight := obiutils.Min(graph.Weight(dst), weight)
weight := min(graph.Weight(dst), weight)
label := decode[dst&3]
buffer.WriteString(
fmt.Sprintf(`edge [ source "%d"

View File

@ -9,7 +9,6 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
type DemultiplexMatch struct {
@ -130,7 +129,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
sseq := sequence.String()
direct := sseq[start:end]
tagstart := obiutils.Max(start-marker.taglength, 0)
tagstart := max(start-marker.taglength, 0)
ftag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
@ -150,7 +149,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
reverse, _ := sequence.Subsequence(start, end, false)
defer reverse.Recycle()
reverse = reverse.ReverseComplement(true)
endtag := obiutils.Min(end+marker.taglength, sequence.Len())
endtag := min(end+marker.taglength, sequence.Len())
rtag, err := sequence.Subsequence(end, endtag, false)
defer rtag.Recycle()
srtag := ""
@ -201,7 +200,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
sseq := sequence.String()
reverse := strings.ToLower(sseq[start:end])
tagstart := obiutils.Max(start-marker.taglength, 0)
tagstart := max(start-marker.taglength, 0)
rtag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
@ -221,7 +220,7 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
defer direct.Recycle()
direct = direct.ReverseComplement(true)
endtag := obiutils.Min(end+marker.taglength, sequence.Len())
endtag := min(end+marker.taglength, sequence.Len())
ftag, err := sequence.Subsequence(end, endtag, false)
defer ftag.Recycle()
sftag := ""

View File

@ -6,7 +6,6 @@ import (
"sort"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
type Suffix struct {
@ -27,7 +26,7 @@ func SuffixLess(suffixarray SuffixArray) func(i, j int) bool {
sj := suffixarray.Suffixes[j]
bj := (*suffixarray.Sequences)[int(sj.Idx)].Sequence()[sj.Pos:]
l := obiutils.Min(len(bi), len(bj))
l := min(len(bi), len(bj))
p := 0
for p < l && bi[p] == bj[p] {
p++
@ -92,7 +91,7 @@ func (suffixarray *SuffixArray) CommonSuffix() []int {
si := suffixarray.Suffixes[i]
bi := (*suffixarray.Sequences)[int(si.Idx)].Sequence()[si.Pos:]
l := obiutils.Min(len(bi), len(bp))
l := min(len(bi), len(bp))
p := 0
for p < l && bi[p] == bp[p] {
p++

View File

@ -4,7 +4,6 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
log "github.com/sirupsen/logrus"
)
@ -47,8 +46,8 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
frags := obiiter.IFragments(
CLIMaxLength()*1000,
CLIMaxLength()*100,
CLIMaxLength()+obiutils.Max(len(CLIForwardPrimer()),
len(CLIReversePrimer()))+obiutils.Min(len(CLIForwardPrimer()),
CLIMaxLength()+max(len(CLIForwardPrimer()),
len(CLIReversePrimer()))+min(len(CLIForwardPrimer()),
len(CLIReversePrimer()))/2,
100,
obioptions.CLIParallelWorkers(),

View File

@ -63,7 +63,7 @@ func IndexSequence(seqidx int,
if lca[order] == ancestor {
// nseq[i]++
if mini != -1 {
wordmin = obiutils.Max(sequence.Len(), references[order].Len()) - 3 - 4*mini
wordmin = max(sequence.Len(), references[order].Len()) - 3 - 4*mini
}
if cw[order] < wordmin {
@ -189,7 +189,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
indexed := obiiter.MakeIBioSequence()
go func() {
for i := 0; i < len(references); i += 10 {
limits <- [2]int{i, obiutils.Min(i+10, len(references))}
limits <- [2]int{i, min(i+10, len(references))}
}
close(limits)
}()

View File

@ -110,7 +110,7 @@ func FindClosests(sequence *obiseq.BioSequence,
d, _, _, _ := obialign.D1Or0(sequence, references[order])
if d >= 0 {
score = d
alilength = obiutils.Max(sequence.Len(), ref.Len())
alilength = max(sequence.Len(), ref.Len())
lcs = alilength - score
}
} else {

View File

@ -4,20 +4,6 @@ import (
"golang.org/x/exp/constraints"
)
func Min[T constraints.Ordered](x, y T) T {
if x < y {
return x
}
return y
}
func Max[T constraints.Ordered](x, y T) T {
if x < y {
return y
}
return x
}
func MinMax[T constraints.Ordered](x, y T) (T, T) {
if x < y {
return x, y
@ -25,7 +11,6 @@ func MinMax[T constraints.Ordered](x, y T) (T, T) {
return y, x
}
func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
if len(vec) == 0 {
panic("empty slice")