mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Add some doc and switch to the parallel gzip library
Former-commit-id: 2c1187001f989ba3de5895f516d4c8b54d52a4c4
This commit is contained in:
@ -29,20 +29,39 @@ var _NucPartMatch [32][32]float64
|
||||
var _NucScorePartMatchMatch [100][100]int
|
||||
var _NucScorePartMatchMismatch [100][100]int
|
||||
|
||||
// _MatchRatio calculates the match ratio between two bytes.
|
||||
//
|
||||
// It takes two parameters, a and b, which are bytes to be compared.
|
||||
// The function returns a float64 value representing the match ratio.
|
||||
func _MatchRatio(a, b byte) float64 {
|
||||
// count of common bits
|
||||
cm := _FourBitsCount[a&b&15]
|
||||
|
||||
// count of bits in a
|
||||
ca := _FourBitsCount[a&15]
|
||||
|
||||
// count of bits in b
|
||||
cb := _FourBitsCount[b&15]
|
||||
|
||||
// check if any of the counts is zero
|
||||
if cm == 0 || ca == 0 || cb == 0 {
|
||||
return float64(0)
|
||||
}
|
||||
|
||||
// calculate the match ratio
|
||||
return float64(cm) / float64(ca) / float64(cb)
|
||||
}
|
||||
|
||||
// _Logaddexp calculates the logarithm of the sum of exponentials of two given numbers.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// a - the first number (float64)
|
||||
// b - the second number (float64)
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// float64 - the result of the calculation
|
||||
func _Logaddexp(a, b float64) float64 {
|
||||
if a > b {
|
||||
a, b = b, a
|
||||
@ -51,6 +70,15 @@ func _Logaddexp(a, b float64) float64 {
|
||||
return b + math.Log1p(math.Exp(a-b))
|
||||
}
|
||||
|
||||
// _MatchScoreRatio calculates the match score ratio between two bytes.
|
||||
//
|
||||
// Parameters:
|
||||
// - a: the first byte
|
||||
// - b: the second byte
|
||||
//
|
||||
// Returns:
|
||||
// - float64: the match score ratio when a match is observed
|
||||
// - float64: the match score ratio when a mismatch is observed
|
||||
func _MatchScoreRatio(a, b byte) (float64, float64) {
|
||||
|
||||
l2 := math.Log(2)
|
||||
|
@ -31,6 +31,28 @@ func _samenuc(a, b byte) bool {
|
||||
}
|
||||
return a == b
|
||||
}
|
||||
|
||||
// FastLCSEGFScoreByte calculates the score of the Longest Common Subsequence (LCS) between two byte slices.
|
||||
//
|
||||
// The score is calculated using the following scoring matrix:
|
||||
// - Match : +1
|
||||
// - Mismatch and gap: 0
|
||||
//
|
||||
// The LCS is calculated using the Needleman-Wunsch algorithm.
|
||||
// At the same time the length of the shortest path between the two sequences is calculated.
|
||||
// If the endgapfree flag is set to true, the returned length does not include the end gaps.
|
||||
// If the number of mismatches or gaps is larger than the maximum allowed error, -1 is returned for both.
|
||||
//
|
||||
// Parameters:
|
||||
// - bA: The first byte slice.
|
||||
// - bB: The second byte slice.
|
||||
// - maxError: The maximum allowed error. If set to -1, no limit is applied.
|
||||
// - endgapfree: A boolean flag indicating whether the LCS should be end-gap free.
|
||||
// - buffer: A pointer to a uint64 slice to store intermediate results. If nil, a new slice is created.
|
||||
//
|
||||
// Returns:
|
||||
// - The score of the LCS.
|
||||
// - The length of the LCS.
|
||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
|
||||
|
||||
lA := len(bA)
|
||||
@ -316,10 +338,48 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
return s, l
|
||||
}
|
||||
|
||||
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
|
||||
//
|
||||
// if maxError > 0, the maximum allowed error between the sequences is maxError.
|
||||
// Otherwise, no error checking is done.
|
||||
// If the actual number of errors is larger than maxError, -1 is returned for both values.
|
||||
//
|
||||
// The score matrix is:
|
||||
// - Matching: 1
|
||||
// - Mismatch or gap: 0
|
||||
//
|
||||
// Compared to FastLCSScoreByte the length of the shortest alignment returned does not include the end-gaps.
|
||||
//
|
||||
// if buffer != nil, the buffer is used to store intermediate results.
|
||||
// Otherwise, a new buffer is allocated.
|
||||
//
|
||||
// seqA: The first bio sequence.
|
||||
// seqB: The second bio sequence.
|
||||
// maxError: The maximum allowed error between the sequences.
|
||||
// buffer: A buffer to store intermediate results.
|
||||
// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
|
||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
||||
}
|
||||
|
||||
// FastLCSScore calculates the score of the longest common subsequence between two bio sequences.
|
||||
//
|
||||
// if maxError > 0, the maximum allowed error between the sequences is maxError.
|
||||
// Otherwise, no error checking is done.
|
||||
// If the actual number of errors is larger than maxError, -1 is returned for both values.
|
||||
//
|
||||
// The score matrix is:
|
||||
// - Matching: 1
|
||||
// - Mismatch or gap: 0
|
||||
//
|
||||
// if buffer != nil, the buffer is used to store intermediate results.
|
||||
// Otherwise, a new buffer is allocated.
|
||||
//
|
||||
// seqA: The first bio sequence.
|
||||
// seqB: The second bio sequence.
|
||||
// maxError: The maximum allowed error between the sequences.
|
||||
// buffer: A buffer to store intermediate results.
|
||||
// Returns the score of the longest common subsequence and the length of the shortest alignment corresponding.
|
||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
}
|
||||
|
Reference in New Issue
Block a user