Files
obitools4/pkg/obialign/alignment.go

194 lines
5.1 KiB
Go
Raw Normal View History

// obialign : function for aligning two sequences
//
// The obialign package provides a set of functions
// foor aligning two objects of type obiseq.BioSequence.
2022-01-13 23:27:39 +01:00
package obialign
import (
"math"
"sync"
2022-01-13 23:27:39 +01:00
2022-01-13 23:43:01 +01:00
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
2022-01-13 23:27:39 +01:00
)
var _BuildAlignArenaPool = sync.Pool{
New: func() interface{} {
bs := make([]byte, 0, 300)
return &bs
},
2022-01-13 23:27:39 +01:00
}
func _BuildAlignment(seqA, seqB []byte, path []int, gap byte, bufferA, bufferB *[]byte) {
2022-01-13 23:27:39 +01:00
*bufferA = (*bufferA)[:0]
*bufferB = (*bufferB)[:0]
lp := len(path)
2022-01-14 15:15:26 +01:00
posA := 0
posB := 0
2022-01-13 23:27:39 +01:00
for i := 0; i < lp; i++ {
step := path[i]
if step < 0 {
2022-01-14 15:15:26 +01:00
*bufferA = append(*bufferA, seqA[posA:(posA-step)]...)
2022-01-13 23:27:39 +01:00
for j := 0; j < -step; j++ {
*bufferB = append(*bufferB, gap)
}
2022-01-14 15:15:26 +01:00
posA -= step
2022-01-13 23:27:39 +01:00
}
if step > 0 {
2022-01-14 15:15:26 +01:00
*bufferB = append(*bufferB, seqB[posB:(posB+step)]...)
2022-01-13 23:27:39 +01:00
for j := 0; j < step; j++ {
*bufferA = append(*bufferA, gap)
}
2022-01-14 15:15:26 +01:00
posB += step
2022-01-13 23:27:39 +01:00
}
i++
step = path[i]
if step > 0 {
2022-01-14 15:15:26 +01:00
*bufferA = append(*bufferA, seqA[posA:(posA+step)]...)
*bufferB = append(*bufferB, seqB[posB:(posB+step)]...)
posA += step
posB += step
2022-01-13 23:27:39 +01:00
}
}
}
2022-01-14 15:15:26 +01:00
// BuildAlignment builds the aligned sequences from an alignemnt path
// returned by one of the alignment procedure.
// The user has to provide both sequences (seqA and seqB), the alignment
// path (path), the symbole used to materialiaze gaps (gap) which is
// usually the dash '-', and a BuildAlignArena (arena). It is always possible
// to provide the NilBuildAlignArena instance for this last parameter.
// In that case an arena will be allocated by the function but, it will not
// be reusable for other alignments and desallocated at the BuildAlignment
// return.
2022-01-13 23:27:39 +01:00
func BuildAlignment(seqA, seqB obiseq.BioSequence,
path []int, gap byte) (obiseq.BioSequence, obiseq.BioSequence) {
2022-01-13 23:27:39 +01:00
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSA)
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSB)
2022-01-13 23:27:39 +01:00
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, gap,
bufferSA,
bufferSB)
2022-01-13 23:27:39 +01:00
seqA = obiseq.MakeBioSequence(seqA.Id(),
*bufferSA,
2022-01-13 23:27:39 +01:00
seqA.Definition())
seqB = obiseq.MakeBioSequence(seqB.Id(),
*bufferSB,
2022-01-13 23:27:39 +01:00
seqB.Definition())
return seqA, seqB
}
// func _logSlice(x *[]byte) {
// l := len(*x)
// if l > 10 {
// l = 10
// }
// log.Printf("%v (%10s): slice=%p array=%p cap=%d len=%d\n", (*x)[:l], string((*x)[:l]), x, (*x), cap(*x), len(*x))
// }
2022-01-14 15:15:26 +01:00
// BuildQualityConsensus builds the consensus sequences corresponding to an
// alignement between two sequences.
// The consensus is built from an alignemnt path returned by one of the
// alignment procedure and the quality score associated to the sequence.
// In case of mismatches the nucleotide with the best score is conserved
// in the consensus. In case of score equality, an IUPAC symbol correesponding
// to the ambiguity is used.
// The user has to provide both sequences (seqA and seqB), the alignment
// path (path), and two BuildAlignArena (arena1 and arena2). It is always possible
// to provide the NilBuildAlignArena instance for these two last parameters.
// In that case arenas will be allocated by the function but, they will not
// be reusable for other alignments and desallocated at the BuildQualityConsensus
// return.
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int) (obiseq.BioSequence, int) {
2022-01-13 23:27:39 +01:00
bufferSA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSA)
2022-01-13 23:27:39 +01:00
bufferSB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferSB)
2022-01-13 23:27:39 +01:00
bufferQA := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQA)
2022-01-13 23:27:39 +01:00
bufferQB := _BuildAlignArenaPool.Get().(*[]byte)
defer _BuildAlignArenaPool.Put(bufferQB)
_BuildAlignment(seqA.Sequence(), seqB.Sequence(), path, ' ',
bufferSA, bufferSB)
// log.Printf("#1 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
_BuildAlignment(seqA.Qualities(), seqB.Qualities(), path, byte(0),
bufferQA, bufferQB)
// log.Printf("#2 %s--> la : %d,%p lb : %d,%p qa : %d,%p qb : %d,%p\n", stamp,
// len(*bufferSA), bufferSA, len(*bufferSB), bufferSB,
// len(*bufferQA), bufferQA, len(*bufferQB), bufferQB)
// log.Printf("#3 %s--> la : %d lb : %d, qa : %d qb : %d\n", stamp, len(sA), len(sB), len(qsA), len(qsB))
2022-01-13 23:27:39 +01:00
var qA, qB byte
var qM, qm byte
var i int
match := 0
for i, qA = range *bufferQA {
nA := (*bufferSA)[i]
nB := (*bufferSB)[i]
qB = (*bufferQB)[i]
2022-01-13 23:27:39 +01:00
if qA > qB {
qM = qA
qm = qB
}
if qB > qA {
(*bufferSA)[i] = (*bufferSB)[i]
2022-01-13 23:27:39 +01:00
qM = qB
qm = qA
}
if qB == qA && nA != nB {
nuc := _FourBitsBaseCode[nA&31] | _FourBitsBaseCode[nB&31]
(*bufferSA)[i] = _FourBitsBaseDecode[nuc]
2022-01-13 23:27:39 +01:00
}
q := qA + qB
if qA > 0 && qB > 0 {
if nA != nB {
2022-01-13 23:27:39 +01:00
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/30))*10+0.5)
}
if nA == nB {
2022-01-13 23:27:39 +01:00
match++
}
}
if q > 90 {
q = 90
}
(*bufferQA)[i] = q
2022-01-13 23:27:39 +01:00
}
consSeq := obiseq.MakeBioSequence(
seqA.Id(),
(*bufferSA),
seqA.Definition(),
)
consSeq.SetSequence((*bufferQA))
2022-01-13 23:27:39 +01:00
return consSeq, match
2022-01-13 23:27:39 +01:00
}