Cython alignment library
This commit is contained in:
166
python/obitools3/align/__init__.py
Normal file
166
python/obitools3/align/__init__.py
Normal file
@ -0,0 +1,166 @@
|
||||
from obitools3.dms.obiseq import Nuc_Seq_Stored, Nuc_Seq
|
||||
from functools import reduce
|
||||
import struct
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
class GappedPositionException(Exception):
|
||||
pass
|
||||
|
||||
class AlignedSequence(Nuc_Seq): # TODO discuss. can be both Nuc_Seq or Nuc_Seq_Stored....
|
||||
|
||||
def __init__(self, seq) :
|
||||
self.wrapped = seq
|
||||
self._length=len(seq)
|
||||
self._gaps=[[self._length,0]]
|
||||
|
||||
def clone(self):
|
||||
seq = AlignedSequence(self.wrapped)
|
||||
seq._gaps=deepcopy(self._gaps)
|
||||
seq._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
|
||||
return seq
|
||||
|
||||
def setGaps(self, value):
|
||||
'''
|
||||
Set gap vector to an AlignedSequence.
|
||||
|
||||
Gap vector describes the gap positions on a sequence.
|
||||
It is a gap of couple. The first couple member is the count
|
||||
of sequence letter, the second one is the gap length.
|
||||
@param value: a list of length 2 list describing gap positions
|
||||
@type value: list of couple
|
||||
'''
|
||||
assert isinstance(value, list),'Gap vector must be a list'
|
||||
assert reduce(lambda x,y: x and y,
|
||||
(isinstance(z, list) and len(z)==2 for z in value),
|
||||
True),"Value must be a list of length 2 list"
|
||||
|
||||
lseq = reduce(lambda x,y:x+y, (z[0] for z in value),0)
|
||||
assert lseq==len(self.wrapped),"Gap vector incompatible with the sequence"
|
||||
self._gaps = value
|
||||
self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in value),0)
|
||||
|
||||
def getGaps(self):
|
||||
return tuple(self._gaps)
|
||||
gaps = property(getGaps, setGaps, None, "Gaps's Docstring")
|
||||
|
||||
def _getIndice(self,pos):
|
||||
i=0
|
||||
cpos=0
|
||||
for s,g in self._gaps:
|
||||
cpos+=s
|
||||
if cpos>pos:
|
||||
return i,pos-cpos+s
|
||||
cpos+=g
|
||||
if cpos>pos:
|
||||
return i,-pos+cpos-g-1
|
||||
i+=1
|
||||
raise IndexError
|
||||
|
||||
def getId(self):
|
||||
d = self.id or ("%s_ALN" % self.wrapped.id)
|
||||
return d
|
||||
|
||||
def __len__(self):
|
||||
return self._length
|
||||
|
||||
def get_str(self):
|
||||
return ''.join([x.decode('ascii') for x in self])
|
||||
|
||||
def __iter__(self):
|
||||
def isymb():
|
||||
cpos=0
|
||||
for s,g in self._gaps:
|
||||
for x in range(s):
|
||||
yield (self.wrapped[cpos+x])
|
||||
for x in range(g):
|
||||
yield b"-"
|
||||
cpos+=s
|
||||
return isymb()
|
||||
|
||||
def _posInWrapped(self,position):
|
||||
i,s=self._getIndice(position)
|
||||
if s<0:
|
||||
raise GappedPositionException
|
||||
value=self._gaps
|
||||
p=reduce(lambda x,y:x+y, (z[0] for z in value[:i]),0)+s
|
||||
return p
|
||||
|
||||
def get_symbol_at(self,position):
|
||||
try:
|
||||
return self.wrapped.get_symbol_at(self._posInWrapped(position))
|
||||
except GappedPositionException:
|
||||
return b"-"
|
||||
|
||||
def insertGap(self,position,count=1):
|
||||
if position==self._length:
|
||||
idx=len(self._gaps)-1
|
||||
p=-1
|
||||
else:
|
||||
idx,p = self._getIndice(position)
|
||||
|
||||
if p >= 0:
|
||||
self._gaps.insert(idx, [p,count])
|
||||
self._gaps[idx+1][0]-=p
|
||||
else:
|
||||
self._gaps[idx][1]+=count
|
||||
self._length=reduce(lambda x,y:x+y, (z[0]+z[1] for z in self._gaps),0)
|
||||
|
||||
|
||||
class Alignment(list):
|
||||
|
||||
def _assertData(self,data):
|
||||
# assert isinstance(data, Nuc_Seq_Stored),'You must only add bioseq to an alignement' TODO
|
||||
if hasattr(self, '_alignlen'):
|
||||
assert self._alignlen==len(data),'All aligned sequences must have the same length'
|
||||
else:
|
||||
self._alignlen=len(data)
|
||||
return data
|
||||
|
||||
def clone(self):
|
||||
ali = Alignment(x.clone() for x in self)
|
||||
return ali
|
||||
|
||||
def append(self,data):
|
||||
data = self._assertData(data)
|
||||
list.append(self,data)
|
||||
|
||||
def __setitem__(self,index,data):
|
||||
|
||||
data = self._assertData(data)
|
||||
list.__setitem__(self,index,data)
|
||||
|
||||
def getSite(self,key):
|
||||
if isinstance(key,int):
|
||||
return [x[key] for x in self]
|
||||
|
||||
def insertGap(self,position,count=1):
|
||||
for s in self:
|
||||
s.insertGap(position,count)
|
||||
|
||||
def isFullGapSite(self,key):
|
||||
return reduce(lambda x,y: x and y,(z==b"-" for z in self.getSite(key)),True)
|
||||
|
||||
def isGappedSite(self,key):
|
||||
return b"-" in self.getSite(key)
|
||||
|
||||
def __str__(self):
|
||||
l = len(self[0])
|
||||
rep=""
|
||||
idmax = max(len(x.id) for x in self)+2
|
||||
template= "%%-%ds %%-60s" % idmax
|
||||
for p in range(0,l,60):
|
||||
for s in self:
|
||||
rep+= (template % (s.id,s[p:p+60])).strip() + '\n'
|
||||
rep+="\n"
|
||||
return rep
|
||||
|
||||
|
||||
def columnIterator(alignment):
|
||||
lali = len(alignment[0])
|
||||
for p in range(lali):
|
||||
c = []
|
||||
for x in alignment:
|
||||
c.append(x[p])
|
||||
yield c
|
||||
|
12
python/obitools3/align/_assemble.pxd
Normal file
12
python/obitools3/align/_assemble.pxd
Normal file
@ -0,0 +1,12 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ._nws cimport *
|
||||
|
||||
cdef class DirectAssemble(NWS):
|
||||
cdef double ysmax
|
||||
cdef int ymax
|
||||
|
||||
cdef double doAlignment(self) except? 0
|
||||
|
||||
cdef class ReverseAssemble(DirectAssemble):
|
||||
pass
|
168
python/obitools3/align/_assemble.pyx
Normal file
168
python/obitools3/align/_assemble.pyx
Normal file
@ -0,0 +1,168 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 6 Nov. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
|
||||
cdef class DirectAssemble(NWS):
|
||||
|
||||
def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
|
||||
NWS.__init__(self,match,mismatch,opengap,extgap)
|
||||
self.ysmax=0
|
||||
self.ymax=0
|
||||
|
||||
cdef double doAlignment(self) except? 0:
|
||||
cdef int i # vertical index
|
||||
cdef int j # horizontal index
|
||||
cdef int idx
|
||||
cdef int idx0
|
||||
cdef int idx1
|
||||
cdef int jump
|
||||
cdef int delta
|
||||
cdef double score
|
||||
cdef double scoremax
|
||||
cdef int path
|
||||
|
||||
|
||||
if self.needToCompute:
|
||||
self.allocate()
|
||||
self.reset()
|
||||
self.ysmax=0
|
||||
self.ymax=0
|
||||
|
||||
for j in range(1,self.hSeq.length+1):
|
||||
idx = self.index(j,0)
|
||||
self.matrix.matrix[idx].score = 0
|
||||
self.matrix.matrix[idx].path = j
|
||||
|
||||
for i in range(1,self.vSeq.length+1):
|
||||
idx = self.index(0,i)
|
||||
self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
|
||||
self.matrix.matrix[idx].path = -i
|
||||
|
||||
idx0=self.index(-1,0)
|
||||
idx1=self.index(0,1)
|
||||
for i in range(1,self.vSeq.length+1):
|
||||
idx0+=1
|
||||
idx1+=1
|
||||
for j in range(1,self.hSeq.length+1):
|
||||
|
||||
# 1 - came from diagonal
|
||||
#idx = self.index(j-1,i-1)
|
||||
idx = idx0
|
||||
#print("ASSEMBLE computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),)
|
||||
scoremax = self.matrix.matrix[idx].score + \
|
||||
self.matchScore(j,i)
|
||||
path = 0
|
||||
|
||||
#print("so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),)
|
||||
|
||||
# 2 - open horizontal gap
|
||||
# idx = self.index(j-1,i)
|
||||
idx = idx1 - 1
|
||||
score = self.matrix.matrix[idx].score+ \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = +1
|
||||
|
||||
# 3 - open vertical gap
|
||||
# idx = self.index(j,i-1)
|
||||
idx = idx0 + 1
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -1
|
||||
|
||||
# 4 - extend horizontal gap
|
||||
jump = self.matrix.bestHJump[i]
|
||||
if jump >= 0:
|
||||
idx = self.index(jump,i)
|
||||
delta = j-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = delta+1
|
||||
|
||||
# 5 - extend vertical gap
|
||||
jump = self.matrix.bestVJump[j]
|
||||
if jump >= 0:
|
||||
idx = self.index(j,jump)
|
||||
delta = i-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -delta-1
|
||||
|
||||
# idx = self.index(j,i)
|
||||
idx = idx1
|
||||
self.matrix.matrix[idx].score = scoremax
|
||||
self.matrix.matrix[idx].path = path
|
||||
|
||||
if path == -1:
|
||||
self.matrix.bestVJump[j]=i
|
||||
elif path == +1 :
|
||||
self.matrix.bestHJump[i]=j
|
||||
|
||||
if j==self.hSeq.length and scoremax > self.ysmax:
|
||||
self.ysmax=scoremax
|
||||
self.ymax=i
|
||||
idx0+=1
|
||||
idx1+=1
|
||||
|
||||
self.sequenceChanged=False
|
||||
self.scoreChanged=False
|
||||
|
||||
return self.ysmax
|
||||
|
||||
cdef void backtrack(self):
|
||||
#cdef list path=[]
|
||||
cdef int i
|
||||
cdef int j
|
||||
cdef int p
|
||||
|
||||
self.doAlignment()
|
||||
i=self.ymax
|
||||
j=self.hSeq.length
|
||||
self.path=allocatePath(i,j+1,self.path)
|
||||
|
||||
if self.ymax<self.vSeq.length:
|
||||
self.path.path[self.path.length]=self.ymax-self.vSeq.length
|
||||
self.path.length+=1
|
||||
|
||||
while (i or j):
|
||||
p=self.matrix.matrix[self.index(j,i)].path
|
||||
self.path.path[self.path.length]=p
|
||||
self.path.length+=1
|
||||
#path.append(p)
|
||||
if p==0:
|
||||
i-=1
|
||||
j-=1
|
||||
elif p < 0:
|
||||
i+=p
|
||||
else:
|
||||
j-=p
|
||||
|
||||
#path.reverse()
|
||||
#reversePath(self.path)
|
||||
self.path.hStart=0
|
||||
self.path.vStart=0
|
||||
#return 0,0,path
|
||||
|
||||
|
||||
cdef class ReverseAssemble(DirectAssemble):
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq.wrapped
|
||||
|
||||
def __set__(self, seq):
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq.reverse_complement
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
92
python/obitools3/align/_dynamic.pxd
Normal file
92
python/obitools3/align/_dynamic.pxd
Normal file
@ -0,0 +1,92 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef import from "stdlib.h":
|
||||
void* malloc(int size) except NULL
|
||||
void* realloc(void* chunk,int size) except NULL
|
||||
void free(void* chunk)
|
||||
|
||||
cdef import from "string.h":
|
||||
void bzero(void *s, size_t n)
|
||||
void memset(void* chunk,int car,int length)
|
||||
void memcpy(void* s1, void* s2, int n)
|
||||
|
||||
cdef struct AlignCell :
|
||||
double score
|
||||
int path
|
||||
|
||||
cdef struct AlignMatrix :
|
||||
AlignCell* matrix
|
||||
int* bestVJump
|
||||
int* bestHJump
|
||||
int msize
|
||||
int vsize
|
||||
int hsize
|
||||
|
||||
|
||||
|
||||
cdef AlignMatrix* allocateMatrix(int hsize, int vsize,AlignMatrix *matrix=?)
|
||||
|
||||
cdef void freeMatrix(AlignMatrix* matrix)
|
||||
|
||||
cdef void resetMatrix(AlignMatrix* matrix)
|
||||
|
||||
|
||||
cdef struct alignSequence:
|
||||
long length
|
||||
long buffsize
|
||||
bint hasQuality
|
||||
char* sequence
|
||||
double* quality
|
||||
|
||||
cdef alignSequence* allocateSequence(object bioseq, alignSequence* seq=?) except *
|
||||
|
||||
cdef void freeSequence(alignSequence* seq)
|
||||
|
||||
cdef struct alignPath:
|
||||
long length
|
||||
long buffsize
|
||||
long vStart
|
||||
long hStart
|
||||
long *path
|
||||
|
||||
cdef alignPath* allocatePath(long l1,long l2,alignPath* path=?)
|
||||
|
||||
cdef void reversePath(alignPath* path)
|
||||
|
||||
|
||||
cdef void freePath(alignPath* path)
|
||||
|
||||
|
||||
cdef int bitCount(int x)
|
||||
cpdef bint iupacMatch(unsigned char a, unsigned char b)
|
||||
cpdef double iupacPartialMatch(unsigned char a, unsigned char b)
|
||||
cpdef unsigned char encodeBase(unsigned char lettre)
|
||||
|
||||
cdef class DynamicProgramming:
|
||||
cdef AlignMatrix* matrix
|
||||
|
||||
cdef object horizontalSeq
|
||||
cdef object verticalSeq
|
||||
|
||||
cdef alignSequence* hSeq
|
||||
cdef alignSequence* vSeq
|
||||
cdef alignPath* path
|
||||
|
||||
cdef double _opengap
|
||||
cdef double _extgap
|
||||
|
||||
cdef object alignment
|
||||
|
||||
cdef bint sequenceChanged
|
||||
cdef bint scoreChanged
|
||||
|
||||
cdef int _vlen(self)
|
||||
cdef int _hlen(self)
|
||||
cdef int allocate(self) except -1
|
||||
cdef double doAlignment(self) except? 0
|
||||
cdef void reset(self)
|
||||
cdef inline int index(self, int x, int y)
|
||||
cdef inline bint _needToCompute(self)
|
||||
cdef void backtrack(self)
|
||||
cdef void clean(self)
|
||||
|
361
python/obitools3/align/_dynamic.pyx
Normal file
361
python/obitools3/align/_dynamic.pyx
Normal file
@ -0,0 +1,361 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 14 sept. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.align import AlignedSequence
|
||||
from obitools3.align import Alignment
|
||||
from copy import deepcopy
|
||||
|
||||
|
||||
######
|
||||
#
|
||||
# Import standard memory management function to improve
|
||||
# efficiency of the alignment code
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
cdef AlignMatrix* allocateMatrix(int hsize, int vsize,AlignMatrix *matrix=NULL):
|
||||
|
||||
vsize+=1
|
||||
hsize+=1
|
||||
|
||||
if matrix is NULL:
|
||||
matrix = <AlignMatrix*>malloc(sizeof(AlignMatrix))
|
||||
matrix.vsize=0
|
||||
matrix.hsize=0
|
||||
matrix.msize=0
|
||||
matrix.matrix=NULL
|
||||
matrix.bestVJump=NULL
|
||||
matrix.bestHJump=NULL
|
||||
|
||||
if hsize > matrix.hsize:
|
||||
matrix.bestVJump = <int*>realloc(matrix.bestVJump,hsize * sizeof(int))
|
||||
matrix.hsize=hsize
|
||||
|
||||
if vsize > matrix.vsize:
|
||||
matrix.bestHJump = <int*>realloc(matrix.bestHJump,vsize * sizeof(int))
|
||||
matrix.vsize=vsize
|
||||
|
||||
if (hsize * vsize) > matrix.msize:
|
||||
matrix.msize = hsize * vsize
|
||||
matrix.matrix = <AlignCell*>realloc(matrix.matrix, matrix.msize * sizeof(AlignCell))
|
||||
|
||||
return matrix
|
||||
|
||||
cdef void freeMatrix(AlignMatrix* matrix):
|
||||
if matrix is not NULL:
|
||||
if matrix.matrix is not NULL:
|
||||
free(matrix.matrix)
|
||||
if matrix.bestVJump is not NULL:
|
||||
free(matrix.bestVJump)
|
||||
if matrix.bestHJump is not NULL:
|
||||
free(matrix.bestHJump)
|
||||
free(matrix)
|
||||
|
||||
cdef void resetMatrix(AlignMatrix* matrix):
|
||||
if matrix is not NULL:
|
||||
if matrix.matrix is not NULL:
|
||||
bzero(<void*>matrix.matrix, matrix.msize * sizeof(AlignCell))
|
||||
if matrix.bestHJump is not NULL:
|
||||
memset(<void*>matrix.bestHJump,255,matrix.vsize * sizeof(int))
|
||||
if matrix.bestVJump is not NULL:
|
||||
memset(<void*>matrix.bestVJump,255,matrix.hsize * sizeof(int))
|
||||
|
||||
|
||||
cdef alignSequence* allocateSequence(object bioseq, alignSequence* seq=NULL) except *:
|
||||
cdef bytes strseq
|
||||
cdef int i
|
||||
|
||||
if seq is NULL:
|
||||
seq = <alignSequence*>malloc(sizeof(alignSequence))
|
||||
seq.length=0
|
||||
seq.buffsize=0
|
||||
seq.sequence=NULL
|
||||
seq.quality=NULL
|
||||
seq.hasQuality=False
|
||||
|
||||
seq.length=len(bioseq)
|
||||
if seq.length > seq.buffsize:
|
||||
#seq.sequence = <char*>realloc(seq.sequence,sizeof(char)*seq.length)
|
||||
#seq.quality = <double*>realloc(seq.quality,sizeof(double)*seq.length)
|
||||
seq.buffsize = seq.length
|
||||
|
||||
# TODO optimisable...
|
||||
#strseq = bioseq.seq.lower()
|
||||
#memcpy(seq.sequence,<char*>strseq,seq.length)
|
||||
seq.sequence = bioseq.seq
|
||||
|
||||
return seq
|
||||
|
||||
cdef void freeSequence(alignSequence* seq):
|
||||
if seq is not NULL:
|
||||
#if seq.sequence is not NULL:
|
||||
# free(<void*>seq.sequence)
|
||||
#if seq.quality is not NULL:
|
||||
# free(<void*>seq.quality)
|
||||
free(seq)
|
||||
|
||||
cdef alignPath* allocatePath(long l1,long l2,alignPath* path=NULL):
|
||||
cdef long length=l1+l2
|
||||
|
||||
if path is NULL:
|
||||
path = <alignPath*>malloc(sizeof(alignPath))
|
||||
path.length=0
|
||||
path.buffsize=0
|
||||
path.path=NULL
|
||||
|
||||
if length > path.buffsize:
|
||||
path.buffsize=length
|
||||
path.path=<long*>realloc(path.path,sizeof(long)*length)
|
||||
|
||||
path.length=0
|
||||
path.vStart=0
|
||||
path.hStart=0
|
||||
|
||||
return path
|
||||
|
||||
cdef void reversePath(alignPath* path):
|
||||
cdef long i
|
||||
cdef long j
|
||||
|
||||
j=path.length
|
||||
for i in range(<long>(path.length/2)): # TODO not sure about the cast
|
||||
j-=1
|
||||
path.path[i],path.path[j]=path.path[j],path.path[i]
|
||||
|
||||
cdef void freePath(alignPath* path):
|
||||
if path is not NULL:
|
||||
if path.path is not NULL:
|
||||
free(<void*>path.path)
|
||||
free(<void*>path)
|
||||
|
||||
|
||||
cdef int aascii = ord(b'a')
|
||||
cdef int _basecode[26]
|
||||
|
||||
cdef int bitCount(int x):
|
||||
cdef int i=0
|
||||
while(x):
|
||||
i+=1
|
||||
x&=x-1
|
||||
return i
|
||||
|
||||
cpdef bint iupacMatch(unsigned char a, unsigned char b):
|
||||
cdef bint m
|
||||
|
||||
if a==42: # * ascii code
|
||||
a=110 # n ascii code
|
||||
|
||||
if b==42: # * ascii code
|
||||
b=110 # n ascii code
|
||||
|
||||
m = _basecode[a - aascii] & _basecode[b - aascii]
|
||||
return m
|
||||
|
||||
cpdef unsigned char encodeBase(unsigned char lettre):
|
||||
return _basecode[lettre - aascii]
|
||||
|
||||
cpdef double iupacPartialMatch(unsigned char a, unsigned char b):
|
||||
cdef int codeA
|
||||
cdef int codeB
|
||||
cdef int good
|
||||
cdef int all
|
||||
cdef double partial
|
||||
|
||||
if a==42: # * ascii code
|
||||
a=110 # n ascii code
|
||||
|
||||
if b==42: # * ascii code
|
||||
b=110 # n ascii code
|
||||
|
||||
codeA = _basecode[a - aascii]
|
||||
codeB = _basecode[b - aascii]
|
||||
good = bitCount(codeA & codeB)
|
||||
all = bitCount(codeA) * bitCount(codeB)
|
||||
partial= <double>good / all
|
||||
|
||||
return partial
|
||||
|
||||
|
||||
cdef class DynamicProgramming:
|
||||
|
||||
def __init__(self,opengap,extgap):
|
||||
self.sequenceChanged=True
|
||||
self.scoreChanged=True
|
||||
|
||||
self.matrix=NULL
|
||||
self.hSeq=NULL
|
||||
self.vSeq=NULL
|
||||
self.path=NULL
|
||||
|
||||
self.horizontalSeq=None
|
||||
self.verticalSeq=None
|
||||
|
||||
self._opengap=opengap
|
||||
self._extgap=extgap
|
||||
|
||||
cdef int _vlen(self):
|
||||
return self.vSeq.length
|
||||
|
||||
cdef int _hlen(self):
|
||||
return self.hSeq.length
|
||||
|
||||
cdef int allocate(self) except -1:
|
||||
|
||||
assert self.horizontalSeq is not None,'Sequence A must be set'
|
||||
assert self.verticalSeq is not None,'Sequence B must be set'
|
||||
|
||||
cdef long lenH=self._hlen()
|
||||
cdef long lenV=self._vlen()
|
||||
|
||||
self.matrix=allocateMatrix(lenH,lenV,self.matrix)
|
||||
return 0
|
||||
|
||||
|
||||
cdef double doAlignment(self) except? 0:
|
||||
pass
|
||||
|
||||
cdef bint _needToCompute(self):
|
||||
return self.scoreChanged or self.sequenceChanged
|
||||
|
||||
cdef void backtrack(self):
|
||||
pass
|
||||
|
||||
property seqA:
|
||||
def __get__(self):
|
||||
return self.horizontalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
self.sequenceChanged = True
|
||||
self.horizontalSeq = seq
|
||||
self.hSeq = allocateSequence(self.horizontalSeq, self.hSeq)
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
self.sequenceChanged = True
|
||||
self.verticalSeq = seq
|
||||
self.vSeq = allocateSequence(self.verticalSeq, self.vSeq)
|
||||
|
||||
property opengap:
|
||||
def __get__(self):
|
||||
return self._opengap
|
||||
|
||||
def __set__(self,opengap):
|
||||
self._opengap=opengap
|
||||
self.scoreChanged=True
|
||||
|
||||
property extgap:
|
||||
def __get__(self):
|
||||
return self._extgap
|
||||
|
||||
def __set__(self,extgap):
|
||||
self._extgap=extgap
|
||||
self.scoreChanged=True
|
||||
|
||||
property needToCompute:
|
||||
def __get__(self):
|
||||
return self.scoreChanged or self.sequenceChanged
|
||||
|
||||
property score:
|
||||
def __get__(self):
|
||||
return self.doAlignment()
|
||||
|
||||
cdef void reset(self):
|
||||
self.scoreChanged=True
|
||||
resetMatrix(self.matrix)
|
||||
|
||||
cdef inline int index(self, int x, int y):
|
||||
return (self._hlen()+1) * y + x
|
||||
|
||||
cdef void clean(self):
|
||||
freeMatrix(self.matrix)
|
||||
freeSequence(self.hSeq)
|
||||
freeSequence(self.vSeq)
|
||||
freePath(self.path)
|
||||
|
||||
def __dealloc__(self):
|
||||
self.clean()
|
||||
|
||||
def __call__(self):
|
||||
cdef list hgaps=[]
|
||||
cdef list vgaps=[]
|
||||
cdef list b
|
||||
cdef int hp=0
|
||||
cdef int vp=0
|
||||
cdef int lenh=0
|
||||
cdef int lenv=0
|
||||
cdef int h,v,p
|
||||
cdef int i
|
||||
cdef object ali
|
||||
cdef double score
|
||||
|
||||
if self._needToCompute():
|
||||
score = self.doAlignment()
|
||||
self.backtrack()
|
||||
for i in range(self.path.length-1,-1,-1):
|
||||
p=self.path.path[i]
|
||||
if p==0:
|
||||
hp+=1
|
||||
vp+=1
|
||||
lenh+=1
|
||||
lenv+=1
|
||||
elif p>0:
|
||||
hp+=p
|
||||
lenh+=p
|
||||
vgaps.append([vp,p])
|
||||
vp=0
|
||||
else:
|
||||
vp-=p
|
||||
lenv-=p
|
||||
hgaps.append([hp,-p])
|
||||
hp=0
|
||||
|
||||
if hp:
|
||||
hgaps.append([hp,0])
|
||||
if vp:
|
||||
vgaps.append([vp,0])
|
||||
|
||||
if lenh < self._hlen():
|
||||
hseq=self.horizontalSeq[self.path.hStart:self.path.hStart+lenh]
|
||||
else:
|
||||
hseq=self.horizontalSeq
|
||||
|
||||
hseq=AlignedSequence(hseq)
|
||||
hseq.gaps=hgaps
|
||||
|
||||
if lenv < self._vlen():
|
||||
vseq=self.verticalSeq[self.path.vStart:self.path.vStart+lenv]
|
||||
else:
|
||||
vseq=self.verticalSeq
|
||||
|
||||
vseq=AlignedSequence(vseq)
|
||||
vseq.gaps=vgaps
|
||||
|
||||
ali=Alignment()
|
||||
ali.append(hseq)
|
||||
ali.append(vseq)
|
||||
|
||||
ali.score=score
|
||||
self.alignment=ali
|
||||
ali=self.alignment.clone()
|
||||
ali.score=self.alignment.score
|
||||
return ali
|
||||
|
||||
|
||||
|
||||
|
||||
# initialize iupac carray
|
||||
|
||||
__basecode=[1,14,2,13,0,0,4,11,0,0,12,0,3,15,0,0,0,5,6,8,8,7,9,0,10,0]
|
||||
for i in range(26):
|
||||
_basecode[i]=__basecode[i]
|
||||
|
||||
|
12
python/obitools3/align/_nws.pxd
Normal file
12
python/obitools3/align/_nws.pxd
Normal file
@ -0,0 +1,12 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ._dynamic cimport *
|
||||
|
||||
cdef class NWS(DynamicProgramming):
|
||||
cdef double _match
|
||||
cdef double _mismatch
|
||||
|
||||
cdef double matchScore(self,int h, int v)
|
||||
cdef double doAlignment(self) except? 0
|
||||
|
||||
|
161
python/obitools3/align/_nws.pyx
Normal file
161
python/obitools3/align/_nws.pyx
Normal file
@ -0,0 +1,161 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 6 Nov. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
|
||||
cdef class NWS(DynamicProgramming):
|
||||
|
||||
def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
|
||||
DynamicProgramming.__init__(self,opengap,extgap)
|
||||
self._match=match
|
||||
self._mismatch=mismatch
|
||||
|
||||
cdef double matchScore(self,int h, int v):
|
||||
cdef double score
|
||||
score = iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
|
||||
return score * self._match + (1-score) * self._mismatch
|
||||
|
||||
cdef double doAlignment(self) except? 0:
|
||||
cdef int i # vertical index
|
||||
cdef int j # horizontal index
|
||||
cdef int idx
|
||||
cdef int jump
|
||||
cdef int delta
|
||||
cdef double score
|
||||
cdef double scoremax
|
||||
cdef int path
|
||||
|
||||
|
||||
if self.needToCompute:
|
||||
self.allocate()
|
||||
self.reset()
|
||||
|
||||
for j in range(1,self._hlen()+1):
|
||||
idx = self.index(j,0)
|
||||
self.matrix.matrix[idx].score = self._opengap + (self._extgap * (j-1))
|
||||
self.matrix.matrix[idx].path = j
|
||||
|
||||
for i in range(1,self._vlen()+1):
|
||||
idx = self.index(0,i)
|
||||
self.matrix.matrix[idx].score = self._opengap + (self._extgap * (i-1))
|
||||
self.matrix.matrix[idx].path = -i
|
||||
|
||||
for i in range(1,self._vlen()+1):
|
||||
for j in range(1,self._hlen()+1):
|
||||
|
||||
# 1 - came from diagonal
|
||||
idx = self.index(j-1,i-1)
|
||||
# print "computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),
|
||||
scoremax = self.matrix.matrix[idx].score + \
|
||||
self.matchScore(j,i)
|
||||
path = 0
|
||||
|
||||
# print "so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),
|
||||
|
||||
# 2 - open horizontal gap
|
||||
idx = self.index(j-1,i)
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = +1
|
||||
|
||||
# 3 - open vertical gap
|
||||
idx = self.index(j,i-1)
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -1
|
||||
|
||||
# 4 - extend horizontal gap
|
||||
jump = self.matrix.bestHJump[i]
|
||||
if jump >= 0:
|
||||
idx = self.index(jump,i)
|
||||
delta = j-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = delta+1
|
||||
|
||||
# 5 - extend vertical gap
|
||||
jump = self.matrix.bestVJump[j]
|
||||
if jump >= 0:
|
||||
idx = self.index(j,jump)
|
||||
delta = i-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -delta-1
|
||||
|
||||
idx = self.index(j,i)
|
||||
self.matrix.matrix[idx].score = scoremax
|
||||
self.matrix.matrix[idx].path = path
|
||||
|
||||
if path == -1:
|
||||
self.matrix.bestVJump[j]=i
|
||||
elif path == +1 :
|
||||
self.matrix.bestHJump[i]=j
|
||||
|
||||
self.sequenceChanged=False
|
||||
self.scoreChanged=False
|
||||
|
||||
idx = self.index(self._hlen(),self._vlen())
|
||||
return self.matrix.matrix[idx].score
|
||||
|
||||
cdef void backtrack(self):
|
||||
#cdef list path=[]
|
||||
cdef int i
|
||||
cdef int j
|
||||
cdef int p
|
||||
|
||||
self.doAlignment()
|
||||
i=self._vlen()
|
||||
j=self._hlen()
|
||||
self.path=allocatePath(i,j,self.path)
|
||||
|
||||
while (i or j):
|
||||
p=self.matrix.matrix[self.index(j,i)].path
|
||||
self.path.path[self.path.length]=p
|
||||
self.path.length+=1
|
||||
#path.append(p)
|
||||
if p==0:
|
||||
i-=1
|
||||
j-=1
|
||||
elif p < 0:
|
||||
i+=p
|
||||
else:
|
||||
j-=p
|
||||
|
||||
#path.reverse()
|
||||
#reversePath(self.path)
|
||||
self.path.hStart=0
|
||||
self.path.vStart=0
|
||||
|
||||
#return 0,0,path
|
||||
|
||||
property match:
|
||||
def __get__(self):
|
||||
return self._match
|
||||
|
||||
def __set__(self,match):
|
||||
self._match=match
|
||||
self.scoreChanged=True
|
||||
|
||||
property mismatch:
|
||||
def __get__(self):
|
||||
return self._mismatch
|
||||
|
||||
def __set__(self,mismatch):
|
||||
self._mismatch=mismatch
|
||||
self.scoreChanged=True
|
||||
|
||||
|
||||
|
||||
|
98
python/obitools3/align/_qsassemble.pyx
Normal file
98
python/obitools3/align/_qsassemble.pyx
Normal file
@ -0,0 +1,98 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 6 Nov. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from ._dynamic cimport *
|
||||
from ._assemble cimport DirectAssemble
|
||||
import array
|
||||
|
||||
|
||||
cdef class QSolexaDirectAssemble(DirectAssemble):
|
||||
|
||||
cdef double* hError
|
||||
cdef double* vError
|
||||
cdef object a
|
||||
|
||||
|
||||
def __init__(self,match=4,mismatch=-4,opengap=-8,extgap=-2):
|
||||
"""
|
||||
Rapport entre score de match et mismatch:
|
||||
si mismatch = - match / 3
|
||||
alors quand scrore temps vers 0 et qu'il est impossible de decider
|
||||
pas de penalisation (s'=0)
|
||||
si mismatch < - match / 3 la non decidabilite est penalisee.
|
||||
"""
|
||||
DirectAssemble.__init__(self,match,mismatch,opengap,extgap)
|
||||
|
||||
|
||||
cdef double matchScore(self,int h, int v):
|
||||
cdef double score
|
||||
cdef double smatch
|
||||
cdef double smismatch
|
||||
cdef double hok=1-self.hError[h-1]
|
||||
cdef double vok=1-self.vError[v-1]
|
||||
score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
|
||||
smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
|
||||
smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
|
||||
return smatch * score + smismatch * (1. - score)
|
||||
|
||||
property seqA:
|
||||
def __get__(self):
|
||||
return self.horizontalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality_array"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.horizontalSeq=seq
|
||||
self.hSeq=allocateSequence(self.horizontalSeq,self.hSeq)
|
||||
(oaddress,olength)=seq.quality_array.buffer_info()
|
||||
self.hError=<double*><unsigned long int>oaddress
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality_array"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
||||
(oaddress,olength)=seq.quality_array.buffer_info()
|
||||
self.vError=<double*><unsigned long int>oaddress
|
||||
|
||||
|
||||
cdef class QSolexaReverseAssemble(QSolexaDirectAssemble):
|
||||
|
||||
cdef double matchScore(self,int h, int v):
|
||||
cdef double score
|
||||
cdef double smatch
|
||||
cdef double smismatch
|
||||
cdef double hok=1-self.hError[h-1]
|
||||
cdef double vok=1-self.vError[self.vSeq.length - v]
|
||||
score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
|
||||
smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
|
||||
smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
|
||||
return smatch * score + smismatch * (1. - score)
|
||||
|
||||
|
||||
property seqB:
|
||||
|
||||
def __get__(self):
|
||||
return self.verticalSeq.wrapped
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality_array"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq.reverse_complement
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
||||
self.a = array.array('d', list(seq.quality_array))
|
||||
(oaddress,olength) = self.a.buffer_info()
|
||||
self.vError = <double*><unsigned long int>oaddress
|
||||
|
96
python/obitools3/align/_qsrassemble.pyx
Normal file
96
python/obitools3/align/_qsrassemble.pyx
Normal file
@ -0,0 +1,96 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 6 Nov. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from ._dynamic cimport *
|
||||
from ._rassemble cimport RightDirectAssemble
|
||||
from copy import deepcopy
|
||||
import array
|
||||
|
||||
|
||||
cdef class QSolexaRightDirectAssemble(RightDirectAssemble):
|
||||
|
||||
cdef double* hError
|
||||
cdef double* vError
|
||||
cdef object a
|
||||
|
||||
def __init__(self,match=4,mismatch=-4,opengap=-8,extgap=-2):
|
||||
"""
|
||||
Rapport entre score de match et mismatch:
|
||||
si mismatch = - match / 3
|
||||
alors quand scrore temps vers 0 et qu'il est impossible de decider
|
||||
pas de penalisation (s'=0)
|
||||
si mismatch < - match / 3 la non decidabilite est penalisee.
|
||||
"""
|
||||
RightDirectAssemble.__init__(self,match,mismatch,opengap,extgap)
|
||||
|
||||
|
||||
cdef double matchScore(self,int h, int v):
|
||||
cdef double score
|
||||
cdef double smatch
|
||||
cdef double smismatch
|
||||
cdef double hok=1-self.hError[h-1]
|
||||
cdef double vok=1-self.vError[v-1]
|
||||
score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
|
||||
smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
|
||||
smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
|
||||
return smatch * score + smismatch * (1. - score)
|
||||
|
||||
property seqA:
|
||||
def __get__(self):
|
||||
return self.horizontalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.horizontalSeq=seq
|
||||
self.hSeq=allocateSequence(self.horizontalSeq,self.hSeq)
|
||||
(oaddress,olength)=seq.quality_array.buffer_info()
|
||||
self.hError=<double*><unsigned long int>oaddress
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
||||
(oaddress,olength)=seq.quality_array.buffer_info()
|
||||
self.vError=<double*><unsigned long int>oaddress
|
||||
|
||||
|
||||
cdef class QSolexaRightReverseAssemble(QSolexaRightDirectAssemble):
|
||||
|
||||
cdef double matchScore(self,int h, int v):
|
||||
cdef double score
|
||||
cdef double smatch
|
||||
cdef double smismatch
|
||||
cdef double hok=1-self.hError[h-1]
|
||||
cdef double vok=1-self.vError[self.vSeq.length - v]
|
||||
score=iupacPartialMatch(self.hSeq.sequence[h-1],self.vSeq.sequence[v-1])
|
||||
smatch=((4*hok*vok-hok-vok)*(self._match-self._mismatch)+self._match+2*self._mismatch)/3
|
||||
smismatch=((hok+vok-4*hok*vok)*(self._match-self._mismatch)+2*self._match+7*self._mismatch)/9
|
||||
return smatch * score + smismatch * (1. - score)
|
||||
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq.wrapped
|
||||
|
||||
def __set__(self, seq):
|
||||
cdef object oaddresse,olength
|
||||
assert hasattr(seq, "quality"),"You must use sequence with quality indices"
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq.reverse_complement
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
||||
self.a = array.array('d', list(seq.quality_array))
|
||||
(oaddress,olength) = self.a.buffer_info()
|
||||
self.vError = <double*><unsigned long int>oaddress
|
12
python/obitools3/align/_rassemble.pxd
Normal file
12
python/obitools3/align/_rassemble.pxd
Normal file
@ -0,0 +1,12 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ._nws cimport *
|
||||
|
||||
cdef class RightDirectAssemble(NWS):
|
||||
cdef double xsmax
|
||||
cdef int xmax
|
||||
|
||||
cdef double doAlignment(self) except? 0
|
||||
|
||||
cdef class RightReverseAssemble(RightDirectAssemble):
|
||||
pass
|
158
python/obitools3/align/_rassemble.pyx
Normal file
158
python/obitools3/align/_rassemble.pyx
Normal file
@ -0,0 +1,158 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 6 Nov. 2009
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from ._rassemble cimport *
|
||||
|
||||
|
||||
cdef class RightDirectAssemble(NWS):
|
||||
|
||||
def __init__(self,match=4,mismatch=-6,opengap=-8,extgap=-2):
|
||||
NWS.__init__(self,match,mismatch,opengap,extgap)
|
||||
self.xsmax=0
|
||||
self.xmax=0
|
||||
|
||||
cdef double doAlignment(self) except? 0:
|
||||
cdef int i # vertical index
|
||||
cdef int j # horizontal index
|
||||
cdef int idx
|
||||
cdef int jump
|
||||
cdef int delta
|
||||
cdef double score
|
||||
cdef double scoremax
|
||||
cdef int path
|
||||
|
||||
|
||||
if self.needToCompute:
|
||||
self.allocate()
|
||||
self.reset()
|
||||
self.xsmax=0
|
||||
self.xmax=0
|
||||
|
||||
for j in range(1,self.hSeq.length+1):
|
||||
idx = self.index(j,0)
|
||||
self.matrix.matrix[idx].score = self._opengap + (self._extgap * (j-1))
|
||||
self.matrix.matrix[idx].path = j
|
||||
|
||||
for i in range(1,self.vSeq.length+1):
|
||||
idx = self.index(0,i)
|
||||
self.matrix.matrix[idx].score = 0
|
||||
self.matrix.matrix[idx].path = -i
|
||||
|
||||
for i in range(1,self.vSeq.length+1):
|
||||
for j in range(1,self.hSeq.length+1):
|
||||
|
||||
# 1 - came from diagonal
|
||||
idx = self.index(j-1,i-1)
|
||||
#print("computing cell : %d,%d --> %d/%d" % (j,i,self.index(j,i),self.matrix.msize),)
|
||||
scoremax = self.matrix.matrix[idx].score + \
|
||||
self.matchScore(j,i)
|
||||
path = 0
|
||||
|
||||
#print("so=%f sd=%f sm=%f" % (self.matrix.matrix[idx].score,self.matchScore(j,i),scoremax),)
|
||||
|
||||
# 2 - open horizontal gap
|
||||
idx = self.index(j-1,i)
|
||||
score = self.matrix.matrix[idx].score+ \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = +1
|
||||
|
||||
# 3 - open vertical gap
|
||||
idx = self.index(j,i-1)
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._opengap
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -1
|
||||
|
||||
# 4 - extend horizontal gap
|
||||
jump = self.matrix.bestHJump[i]
|
||||
if jump >= 0:
|
||||
idx = self.index(jump,i)
|
||||
delta = j-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = delta+1
|
||||
|
||||
# 5 - extend vertical gap
|
||||
jump = self.matrix.bestVJump[j]
|
||||
if jump >= 0:
|
||||
idx = self.index(j,jump)
|
||||
delta = i-jump
|
||||
score = self.matrix.matrix[idx].score + \
|
||||
self._extgap * delta
|
||||
if score > scoremax :
|
||||
scoremax = score
|
||||
path = -delta-1
|
||||
|
||||
idx = self.index(j,i)
|
||||
self.matrix.matrix[idx].score = scoremax
|
||||
self.matrix.matrix[idx].path = path
|
||||
|
||||
if path == -1:
|
||||
self.matrix.bestVJump[j]=i
|
||||
elif path == +1 :
|
||||
self.matrix.bestHJump[i]=j
|
||||
|
||||
if i==self.vSeq.length and scoremax > self.xsmax:
|
||||
self.xsmax=scoremax
|
||||
self.xmax=j
|
||||
|
||||
self.sequenceChanged=False
|
||||
self.scoreChanged=False
|
||||
|
||||
return self.xsmax
|
||||
|
||||
cdef void backtrack(self):
|
||||
cdef list path=[]
|
||||
cdef int i
|
||||
cdef int j
|
||||
cdef int p
|
||||
|
||||
self.doAlignment()
|
||||
j=self.xmax
|
||||
i=self.vSeq.length
|
||||
self.path=allocatePath(i,j+1,self.path)
|
||||
|
||||
if self.xmax<self.hSeq.length:
|
||||
self.path.path[self.path.length]=self.hSeq.length-self.xmax
|
||||
self.path.length+=1
|
||||
|
||||
while (i or j):
|
||||
p=self.matrix.matrix[self.index(j,i)].path
|
||||
self.path.path[self.path.length]=p
|
||||
self.path.length+=1
|
||||
#path.append(p)
|
||||
if p==0:
|
||||
i-=1
|
||||
j-=1
|
||||
elif p < 0:
|
||||
i+=p
|
||||
else:
|
||||
j-=p
|
||||
|
||||
#path.reverse()
|
||||
self.path.hStart=0
|
||||
self.path.vStart=0
|
||||
#reversePath(self.path)
|
||||
#return 0,0,path
|
||||
|
||||
|
||||
cdef class RightReverseAssemble(RightDirectAssemble):
|
||||
|
||||
property seqB:
|
||||
def __get__(self):
|
||||
return self.verticalSeq.wrapped
|
||||
|
||||
def __set__(self, seq):
|
||||
self.sequenceChanged=True
|
||||
self.verticalSeq=seq.reverse_complement
|
||||
self.vSeq=allocateSequence(self.verticalSeq,self.vSeq)
|
217
python/obitools3/align/_solexapairend.pyx
Normal file
217
python/obitools3/align/_solexapairend.pyx
Normal file
@ -0,0 +1,217 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from cpython cimport array
|
||||
|
||||
from .solexapairend import iterOnAligment
|
||||
from math import log10
|
||||
|
||||
|
||||
cdef class IterOnConsensus:
|
||||
|
||||
cdef object _ali
|
||||
cdef int __seqASingle
|
||||
cdef int __seqBSingle
|
||||
cdef int __seqABMatch
|
||||
cdef int __seqAMismatch
|
||||
cdef int __seqBMismatch
|
||||
cdef int __seqAInsertion
|
||||
cdef int __seqBInsertion
|
||||
cdef int __seqADeletion
|
||||
cdef int __seqBDeletion
|
||||
cdef object __ioa
|
||||
cdef bint __firstSeqB
|
||||
|
||||
def __cinit__(self,ali):
|
||||
self._ali=ali
|
||||
self.__seqASingle=0
|
||||
self.__seqBSingle=0
|
||||
self.__seqABMatch=0
|
||||
self.__seqAMismatch=0
|
||||
self.__seqBMismatch=0
|
||||
self.__seqAInsertion=0
|
||||
self.__seqBInsertion=0
|
||||
self.__seqADeletion=0
|
||||
self.__seqBDeletion=0
|
||||
|
||||
self.__ioa = iterOnAligment(self._ali)
|
||||
self.__firstSeqB=False
|
||||
|
||||
def get_seqASingle(self):
|
||||
return self.__seqASingle
|
||||
|
||||
|
||||
def get_seqBSingle(self):
|
||||
return self.__seqBSingle
|
||||
|
||||
|
||||
def get_seqABMatch(self):
|
||||
return self.__seqABMatch
|
||||
|
||||
|
||||
def get_seqAMismatch(self):
|
||||
return self.__seqAMismatch
|
||||
|
||||
|
||||
def get_seqBMismatch(self):
|
||||
return self.__seqBMismatch
|
||||
|
||||
|
||||
def get_seqAInsertion(self):
|
||||
return self.__seqAInsertion
|
||||
|
||||
|
||||
def get_seqBInsertion(self):
|
||||
return self.__seqBInsertion
|
||||
|
||||
|
||||
def get_seqADeletion(self):
|
||||
return self.__seqADeletion
|
||||
|
||||
|
||||
def get_seqBDeletion(self):
|
||||
return self.__seqBDeletion
|
||||
|
||||
def __next__(self):
|
||||
cdef bytes snuc0
|
||||
cdef bytes snuc1
|
||||
cdef char* nuc0
|
||||
cdef char* nuc1
|
||||
cdef char* dash=b"-"
|
||||
cdef double score0
|
||||
cdef double score1
|
||||
cdef double h0
|
||||
cdef double h1
|
||||
|
||||
while(1):
|
||||
|
||||
snuc0,score0,snuc1,score1 = self.__ioa.__next__()
|
||||
nuc0=snuc0
|
||||
nuc1=snuc1
|
||||
if nuc0[0]==nuc1[0]:
|
||||
if nuc1[0]!=dash[0]:
|
||||
self.__firstSeqB=True
|
||||
self.__seqABMatch+=1
|
||||
self.__seqBSingle=0
|
||||
return (nuc0,score0*score1)
|
||||
else:
|
||||
h0 = score0 * (1-score1/3)
|
||||
h1 = score1 * (1-score0/3)
|
||||
if h0 < h1:
|
||||
|
||||
if nuc0[0]!=dash[0]:
|
||||
self.__seqBSingle=0
|
||||
if nuc1[0]==dash[0]:
|
||||
if self.__firstSeqB:
|
||||
self.__seqAInsertion+=1
|
||||
else:
|
||||
self.__seqASingle+=1
|
||||
else:
|
||||
self.__firstSeqB=True
|
||||
self.__seqAMismatch+=1
|
||||
if score1==1.0: # Free end gap
|
||||
return (nuc0,score0)
|
||||
else:
|
||||
return (nuc0,h0)
|
||||
else:
|
||||
self.__seqADeletion+=1
|
||||
else:
|
||||
if nuc1[0]!=dash[0]:
|
||||
self.__firstSeqB=True
|
||||
if nuc0[0]==dash[0]:
|
||||
self.__seqBInsertion+=1
|
||||
self.__seqBSingle+=1
|
||||
else:
|
||||
self.__seqBMismatch+=1
|
||||
self.__seqBSingle=0
|
||||
if score0==1.0: # Free end gap
|
||||
return (nuc1,score1)
|
||||
else:
|
||||
return (nuc1,h1)
|
||||
else:
|
||||
self.__seqBSingle=0
|
||||
self.__seqBDeletion+=1
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
seqASingle = property(get_seqASingle, None, None, "direct's docstring")
|
||||
seqBSingle = property(get_seqBSingle, None, None, "reverse's docstring")
|
||||
seqABMatch = property(get_seqABMatch, None, None, "idem's docstring")
|
||||
seqAMismatch = property(get_seqAMismatch, None, None, "mismatchdirect's docstring")
|
||||
seqBMismatch = property(get_seqBMismatch, None, None, "mismatchreverse's docstring")
|
||||
seqAInsertion = property(get_seqAInsertion, None, None, "insertdirect's docstring")
|
||||
seqBInsertion = property(get_seqBInsertion, None, None, "insertreverse's docstring")
|
||||
seqADeletion = property(get_seqADeletion, None, None, "deletedirect's docstring")
|
||||
seqBDeletion = property(get_seqBDeletion, None, None, "deletereverse's docstring")
|
||||
|
||||
|
||||
def buildConsensus(ali, seq):
|
||||
cdef list quality
|
||||
cdef char aseq[1000]
|
||||
cdef int i=0
|
||||
cdef int j=0
|
||||
cdef char* cnuc
|
||||
cdef bytes nuc
|
||||
cdef double score
|
||||
cdef bytes sseq
|
||||
cdef double p
|
||||
|
||||
quality = []
|
||||
|
||||
if len(ali[0])>999: # TODO why?
|
||||
raise AssertionError,"Too long alignemnt"
|
||||
|
||||
ic=IterOnConsensus(ali)
|
||||
|
||||
for nuc,score in ic:
|
||||
cnuc=nuc
|
||||
quality.append(score)
|
||||
aseq[i]=cnuc[0]
|
||||
i+=1
|
||||
|
||||
aseq[i]=0
|
||||
|
||||
sseq=aseq
|
||||
|
||||
# Reconvert quality from array of probabilities to array of raw quality values
|
||||
i=0
|
||||
for p in quality:
|
||||
quality[i] = min(42, round(-10*log10(p)))
|
||||
i+=1
|
||||
|
||||
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality, tags=ali[0].wrapped)
|
||||
|
||||
if hasattr(ali, "direction"):
|
||||
seq[b'direction']=ali.direction
|
||||
if hasattr(ali, "counter"):
|
||||
seq[b'alignement_id']=ali.counter
|
||||
seq[b'seq_a_single']=ic.seqASingle
|
||||
seq[b'seq_b_single']=ic.seqBSingle
|
||||
seq[b'seq_ab_match']=ic.seqABMatch
|
||||
seq[b'seq_a_mismatch']=ic.seqAMismatch
|
||||
seq[b'seq_b_mismatch']=ic.seqBMismatch
|
||||
seq[b'seq_a_insertion']=ic.seqAInsertion
|
||||
seq[b'seq_b_insertion']=ic.seqBInsertion-ic.seqBSingle
|
||||
seq[b'seq_a_deletion']=ic.seqADeletion
|
||||
seq[b'seq_b_deletion']=ic.seqBDeletion
|
||||
seq[b'score']=ali.score
|
||||
seq[b'ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
|
||||
if seq[b'ali_length']>0:
|
||||
seq[b'score_norm']=float(ali.score)/seq[b'ali_length']
|
||||
seq[b'mode']=b'alignment'
|
||||
return seq
|
||||
|
||||
|
||||
def buildJoinedSequence(ali, reverse, seq):
|
||||
forward = ali[0].wrapped
|
||||
s = forward.seq + reverse.seq
|
||||
quality = forward.quality
|
||||
quality.extend(reverse.quality)
|
||||
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality, tags=forward)
|
||||
seq[b"score"]=ali.score
|
||||
seq[b"ali_dir"]=ali.direction
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
return seq
|
||||
|
54
python/obitools3/align/solexapairend.py
Normal file
54
python/obitools3/align/solexapairend.py
Normal file
@ -0,0 +1,54 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 17 mai 2010
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.align import columnIterator
|
||||
|
||||
|
||||
def iterOnAligment(ali):
|
||||
pos0=0
|
||||
pos1=0
|
||||
begin0=False
|
||||
end0=False
|
||||
begin1=False
|
||||
end1=False
|
||||
|
||||
for nuc0,nuc1 in columnIterator(ali):
|
||||
if nuc0==b"-":
|
||||
if begin0:
|
||||
if not end0:
|
||||
score0 = ( ali[0].wrapped.quality_array[pos0-1]
|
||||
+ali[0].wrapped.quality_array[pos0]
|
||||
)/2
|
||||
else:
|
||||
score0 = 1.
|
||||
else:
|
||||
score0 = 0.
|
||||
else:
|
||||
begin0=True
|
||||
score0 = ali[0].wrapped.quality_array[pos0]
|
||||
pos0+=1
|
||||
end0= pos0==len(ali[0].wrapped)
|
||||
|
||||
if nuc1==b"-":
|
||||
if begin1:
|
||||
if not end1:
|
||||
score1 = ( ali[1].wrapped.quality_array[pos1-1]
|
||||
+ali[1].wrapped.quality_array[pos1]
|
||||
)/2
|
||||
else:
|
||||
score1 = 0.
|
||||
else:
|
||||
score1 = 1.
|
||||
else:
|
||||
begin1=True
|
||||
score1 = ali[1].wrapped.quality_array[pos1]
|
||||
pos1+=1
|
||||
end1= pos1==len(ali[1].wrapped)
|
||||
|
||||
result = (nuc0,score0,nuc1,score1)
|
||||
yield result
|
Reference in New Issue
Block a user