Add obiminion first version

Former-commit-id: aa5ace7bd4d2266333715fca7094d1c3cbbb5e6d
This commit is contained in:
Eric Coissac
2024-05-14 08:16:12 +02:00
parent 9e63013bc2
commit 017030bcce
24 changed files with 1599 additions and 469 deletions

View File

@ -58,182 +58,3 @@ var _empty = encodeValues(0, 0, false)
var _out = encodeValues(0, 30000, true)
var _notavail = encodeValues(0, 30000, false)
// func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
// lA := seqA.Len()
// lB := seqB.Len()
// // Ensure that A is the longest
// if lA < lB {
// seqA, seqB = seqB, seqA
// lA, lB = lB, lA
// }
// if maxError == -1 {
// maxError = lA * 2
// }
// delta := lA - lB
// // The difference of length is larger the maximum allowed errors
// if delta > maxError {
// return -1, -1
// }
// // Doit-on vraiment diviser par deux ??? pas certain
// extra := (maxError - delta) + 1
// even := 1 + delta + 2*extra
// width := 2*even - 1
// if buffer == nil {
// var local []uint64
// buffer = &local
// }
// if cap(*buffer) < 2*width {
// *buffer = make([]uint64, 3*width)
// }
// previous := (*buffer)[0:width]
// current := (*buffer)[width:(2 * width)]
// previous[extra] = _empty
// previous[extra+even] = encodeValues(0, 1, false)
// previous[extra+even-1] = encodeValues(0, 1, false)
// N := lB + ((delta) >> 1)
// bA := seqA.Sequence()
// bB := seqB.Sequence()
// // log.Println("N = ", N)
// for y := 1; y <= N; y++ {
// // in_matrix := false
// x1 := y - lB + extra
// x2 := extra - y
// xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0)
// x1 = y + extra
// x2 = lA + extra - y
// xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1
// for x := xs; x < xf; x++ {
// i := y - x + extra
// j := y + x - extra
// var Sdiag, Sleft, Sup uint64
// switch {
// case i == 0:
// Sup = _notavail
// Sdiag = _notavail
// Sleft = encodeValues(0, j-1, false)
// case j == 0:
// Sup = encodeValues(0, i-1, false)
// Sdiag = _notavail
// Sleft = _notavail
// default:
// Sdiag = previous[x]
// if bA[j-1] == bB[i-1] {
// Sdiag = _incscore(Sdiag)
// }
// if x < (even - 1) {
// Sup = previous[x+even]
// } else {
// Sup = _out
// }
// if x > 0 {
// Sleft = previous[x+even-1]
// } else {
// Sleft = _out
// }
// }
// var score uint64
// switch {
// case Sdiag >= Sup && Sdiag >= Sleft:
// score = Sdiag
// case Sup >= Sleft:
// score = Sup
// default:
// score = Sleft
// }
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
// score = _setout(score)
// }
// current[x] = _incpath(score)
// }
// // . 9 10 + 2 - 1
// x1 = y - lB + extra + even
// x2 = extra - y + even - 1
// xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even)
// x1 = y + extra + even
// x2 = lA + extra - y + even - 1
// xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1
// for x := xs; x < xf; x++ {
// i := y - x + extra + even
// j := y + x - extra - even + 1
// var Sdiag, Sleft, Sup uint64
// switch {
// case i == 0:
// Sup = _notavail
// Sdiag = _notavail
// Sleft = encodeValues(0, j-1, false)
// case j == 0:
// Sup = encodeValues(0, i-1, false)
// Sdiag = _notavail
// Sleft = _notavail
// default:
// Sdiag = previous[x]
// if bA[j-1] == bB[i-1] {
// Sdiag = _incscore(Sdiag)
// }
// Sleft = current[x-even]
// Sup = current[x-even+1]
// }
// var score uint64
// switch {
// case Sdiag >= Sup && Sdiag >= Sleft:
// score = Sdiag
// case Sup >= Sleft:
// score = Sup
// default:
// score = Sleft
// }
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
// score = _setout(score)
// }
// current[x] = _incpath(score)
// }
// previous, current = current, previous
// }
// s, l, o := decodeValues(previous[(delta%2)*even+extra+(delta>>1)])
// if o {
// return -1, -1
// }
// return s, l
// }

View File

@ -130,11 +130,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// in_matrix := false
x1 := y - lB + extra
x2 := extra - y
xs := obiutils.MaxInt(obiutils.MaxInt(x1, x2), 0)
xs := obiutils.Max(obiutils.Max(x1, x2), 0)
x1 = y + extra
x2 = lA + extra - y
xf := obiutils.MinInt(obiutils.MinInt(x1, x2), even-1) + 1
xf := obiutils.Min(obiutils.Min(x1, x2), even-1) + 1
for x := xs; x < xf; x++ {
@ -222,11 +222,11 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// . 9 10 + 2 - 1
x1 = y - lB + extra + even
x2 = extra - y + even - 1
xs = obiutils.MaxInt(obiutils.MaxInt(x1, x2), even)
xs = obiutils.Max(obiutils.Max(x1, x2), even)
x1 = y + extra + even
x2 = lA + extra - y + even - 1
xf = obiutils.MinInt(obiutils.MinInt(x1, x2), width-1) + 1
xf = obiutils.Min(obiutils.Min(x1, x2), width-1) + 1
for x := xs; x < xf; x++ {
@ -348,16 +348,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// - Matching: 1
// - Mismatch or gap: 0
//
// Compared to FastLCSScoreByte the length of the shortest alignment returned does not include the end-gaps.
// Parameters:
// - seqA: The first bio sequence.
// - seqB: The second bio sequence.
// - maxError: The maximum allowed error between the sequences. If set to -1, no limit is applied.
// - buffer: A pointer to a uint64 slice to store intermediate results. If nil, a new slice is created.
//
// if buffer != nil, the buffer is used to store intermediate results.
// Otherwise, a new buffer is allocated.
//
// seqA: The first bio sequence.
// seqB: The second bio sequence.
// maxError: The maximum allowed error between the sequences.
// buffer: A buffer to store intermediate results.
// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
// Returns:
// - The score of the longest common subsequence.
// - The length of the shortest alignment corresponding to the LCS.
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
}
@ -372,14 +371,16 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
// - Matching: 1
// - Mismatch or gap: 0
//
// if buffer != nil, the buffer is used to store intermediate results.
// Otherwise, a new buffer is allocated.
// Parameters:
// - seqA: The first bio sequence.
// - seqB: The second bio sequence.
// - maxError: The maximum allowed error between the sequences. If set to -1, no limit is applied.
// - buffer: A pointer to a uint64 slice to store intermediate results. If nil, a new slice is created.
//
// seqA: The first bio sequence.
// seqB: The second bio sequence.
// maxError: The maximum allowed error between the sequences.
// buffer: A buffer to store intermediate results.
// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
// Returns:
// - The score of the longest common subsequence.
// - The length of the shortest alignment corresponding to the LCS.
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
}