mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
83 lines
2.0 KiB
Go
83 lines
2.0 KiB
Go
package obialign
|
|
|
|
import (
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
)
|
|
|
|
var _FourBitsBaseCode = []byte{0b0000,
|
|
// IUPAC nucleotide code Base
|
|
0b0001, // A Adenine
|
|
0b1110, // B C or G or T
|
|
0b0010, // C Cytosine
|
|
0b1101, // D A or G or T
|
|
0b0000, // E not a nucleotide
|
|
0b0000, // F not a nucleotide
|
|
0b0100, // G Guanine
|
|
0b1011, // H A or C or T
|
|
0b0000, // I not a nucleotide
|
|
0b0000, // J not a nucleotide
|
|
0b1100, // K G or T
|
|
0b0000, // L not a nucleotide
|
|
0b0011, // M A or C
|
|
0b1111, // N any base
|
|
0b0000, // O not a nucleotide
|
|
0b0000, // P not a nucleotide
|
|
0b0000, // Q not a nucleotide
|
|
0b0101, // R A or G
|
|
0b0110, // S G or C
|
|
0b1000, // T Thymine
|
|
0b1000, // U Uracil
|
|
0b0111, // V A or C or G
|
|
0b1001, // W A or T
|
|
0b0000, // X not a nucleotide
|
|
0b1010, // Y C or T
|
|
0b0000, // Z not a nucleotide
|
|
0b0000,
|
|
0b0000,
|
|
0b0000,
|
|
0b0000,
|
|
0b0000}
|
|
|
|
var _FourBitsBaseDecode = []byte{
|
|
// 0b0000 0b0001 0b0010 0b0011
|
|
'.', 'a', 'c', 'm',
|
|
// 0b0100 0b0101 0b0110 0b0111
|
|
'g', 'r', 's', 'v',
|
|
// 0b1000 0b1001 0b1010 0b1011
|
|
't', 'w', 'y', 'h',
|
|
// 0b1100 0b1101 0b1110 0b1111
|
|
'k', 'd', 'b', 'n',
|
|
}
|
|
|
|
// Encode4bits encodes each nucleotide of a sequence into a binary
|
|
// code where the four low weigth bit of a byte correspond respectively
|
|
// to the four nucleotides A, C, G, T. Simple bases A, C, G, T are therefore
|
|
// represented by a code with only a single bit on, when anbiguous symboles
|
|
// like R, D or N have the bits corresponding to each nucleotide represented
|
|
// by the ambiguity set to 1.
|
|
// A byte slice can be provided (buffer) to preveent allocation of a new
|
|
// memory chunk by th function.
|
|
func Encode4bits(seq *obiseq.BioSequence, buffer []byte) []byte {
|
|
length := seq.Len()
|
|
rawseq := seq.Sequence()
|
|
|
|
if buffer == nil {
|
|
buffer = make([]byte, 0, length)
|
|
} else {
|
|
buffer = buffer[:0]
|
|
}
|
|
|
|
var code byte
|
|
|
|
for _, nuc := range rawseq {
|
|
if nuc == '.' || nuc == '-' {
|
|
code = 0
|
|
} else {
|
|
code = _FourBitsBaseCode[nuc&31]
|
|
}
|
|
buffer = append(buffer, code)
|
|
}
|
|
|
|
return buffer
|
|
}
|