Alignpairedend: added alignment using shifting with best kmer similarity
(low level layer in C and Cython API)
This commit is contained in:
@ -3,6 +3,18 @@
|
||||
from cpython cimport array
|
||||
|
||||
from .solexapairend import iterOnAligment
|
||||
from .shifted_ali cimport Ali_shifted
|
||||
|
||||
from obitools3.dms.capi.obiview cimport Obiview_p, QUALITY_COLUMN, \
|
||||
obi_set_qual_int_with_elt_idx_and_col_p_in_view, \
|
||||
obi_set_str_with_elt_idx_and_col_p_in_view
|
||||
|
||||
from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p
|
||||
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
|
||||
from math import log10
|
||||
|
||||
|
||||
@ -146,7 +158,7 @@ cdef class IterOnConsensus:
|
||||
seqBDeletion = property(get_seqBDeletion, None, None, "deletereverse's docstring")
|
||||
|
||||
|
||||
def buildConsensus(ali, seq):
|
||||
def buildConsensus(ali, seq, ref_tags=None):
|
||||
cdef list quality
|
||||
cdef char aseq[1000]
|
||||
cdef int i=0
|
||||
@ -156,61 +168,82 @@ def buildConsensus(ali, seq):
|
||||
cdef double score
|
||||
cdef bytes sseq
|
||||
cdef double p
|
||||
cdef OBIDMS_column_p col_p
|
||||
cdef Obiview_p view_p
|
||||
cdef View view
|
||||
cdef Column column
|
||||
|
||||
quality = []
|
||||
|
||||
if type(ali) == Ali_shifted:
|
||||
view = seq.view
|
||||
view_p = view.pointer()
|
||||
column = view[QUALITY_COLUMN]
|
||||
col_p = column.pointer()
|
||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual, tags=ref_tags)
|
||||
seq[b'ali_length'] = ali.consensus_len
|
||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
||||
seq[b'shift']=ali.shift
|
||||
else:
|
||||
if len(ali[0])>999: # TODO why?
|
||||
raise AssertionError,"Too long alignemnt"
|
||||
|
||||
if len(ali[0])>999: # TODO why?
|
||||
raise AssertionError,"Too long alignemnt"
|
||||
|
||||
ic=IterOnConsensus(ali)
|
||||
|
||||
for nuc,score in ic:
|
||||
cnuc=nuc
|
||||
quality.append(score)
|
||||
aseq[i]=cnuc[0]
|
||||
i+=1
|
||||
|
||||
aseq[i]=0
|
||||
|
||||
sseq=aseq
|
||||
|
||||
# Reconvert quality from array of probabilities to array of raw quality values
|
||||
i=0
|
||||
for p in quality:
|
||||
quality[i] = min(42, round(-10*log10(p)))
|
||||
i+=1
|
||||
ic=IterOnConsensus(ali)
|
||||
|
||||
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality, tags=ali[0].wrapped)
|
||||
for nuc,score in ic:
|
||||
cnuc=nuc
|
||||
quality.append(score)
|
||||
aseq[i]=cnuc[0]
|
||||
i+=1
|
||||
|
||||
aseq[i]=0
|
||||
|
||||
sseq=aseq
|
||||
|
||||
# Reconvert quality from array of probabilities to array of raw quality values
|
||||
i=0
|
||||
for p in quality:
|
||||
quality[i] = min(42, round(-10*log10(p)))
|
||||
i+=1
|
||||
|
||||
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality, tags=ali[0].wrapped)
|
||||
|
||||
if hasattr(ali, "counter"):
|
||||
seq[b'alignement_id']=ali.counter
|
||||
seq[b'seq_a_single']=ic.seqASingle
|
||||
seq[b'seq_b_single']=ic.seqBSingle
|
||||
seq[b'seq_ab_match']=ic.seqABMatch
|
||||
seq[b'seq_a_mismatch']=ic.seqAMismatch
|
||||
seq[b'seq_b_mismatch']=ic.seqBMismatch
|
||||
seq[b'seq_a_insertion']=ic.seqAInsertion
|
||||
seq[b'seq_b_insertion']=ic.seqBInsertion-ic.seqBSingle
|
||||
seq[b'seq_a_deletion']=ic.seqADeletion
|
||||
seq[b'seq_b_deletion']=ic.seqBDeletion
|
||||
seq[b'ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
|
||||
if seq[b'ali_length']>0:
|
||||
seq[b'score_norm']=float(ali.score)/seq[b'ali_length']
|
||||
|
||||
if hasattr(ali, "direction"):
|
||||
seq[b'direction']=ali.direction
|
||||
if hasattr(ali, "counter"):
|
||||
seq[b'alignement_id']=ali.counter
|
||||
seq[b'seq_a_single']=ic.seqASingle
|
||||
seq[b'seq_b_single']=ic.seqBSingle
|
||||
seq[b'seq_ab_match']=ic.seqABMatch
|
||||
seq[b'seq_a_mismatch']=ic.seqAMismatch
|
||||
seq[b'seq_b_mismatch']=ic.seqBMismatch
|
||||
seq[b'seq_a_insertion']=ic.seqAInsertion
|
||||
seq[b'seq_b_insertion']=ic.seqBInsertion-ic.seqBSingle
|
||||
seq[b'seq_a_deletion']=ic.seqADeletion
|
||||
seq[b'seq_b_deletion']=ic.seqBDeletion
|
||||
seq[b'ali_direction']=ali.direction
|
||||
seq[b'score']=ali.score
|
||||
seq[b'ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
|
||||
if seq[b'ali_length']>0:
|
||||
seq[b'score_norm']=float(ali.score)/seq[b'ali_length']
|
||||
seq[b'mode']=b'alignment'
|
||||
|
||||
return seq
|
||||
|
||||
|
||||
def buildJoinedSequence(ali, reverse, seq):
|
||||
forward = ali[0].wrapped
|
||||
def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
if forward is not None:
|
||||
forward = forward
|
||||
else:
|
||||
forward = ali[0].wrapped
|
||||
s = forward.seq + reverse.seq
|
||||
quality = forward.quality
|
||||
quality.extend(reverse.quality)
|
||||
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality, tags=forward)
|
||||
seq[b"score"]=ali.score
|
||||
seq[b"ali_dir"]=ali.direction
|
||||
seq[b"ali_direction"]=ali.direction
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
return seq
|
||||
|
Reference in New Issue
Block a user