Fixed the new alignpaired end to work after ngsfilter with the 9879847

possible cases
This commit is contained in:
Celine Mercier
2019-02-17 18:32:35 +01:00
parent 4ddd1a1c37
commit e026e9ec83
9 changed files with 151 additions and 83 deletions

View File

@ -15,16 +15,12 @@ from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
import sys
import os
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE"
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
__title__="Aligns paired-ended reads"
@ -106,8 +102,7 @@ def alignmentIterator(entries, aligner):
else:
seqF = Nuc_Seq.new_from_stored(entries[i])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
seqF.pop(REVERSE_SEQ_COLUMN_NAME)
seqF.pop(REVERSE_QUALITY_COLUMN_NAME)
seqR.index = i
ali = aligner(seqF, seqR)
@ -196,7 +191,7 @@ def run(config):
reverse = entries[1]
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
else:
aligner = Kmer_similarity(entries, kmer_size=config['alignpairedend']['kmersize'])
aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize'])
ba = alignmentIterator(entries, aligner)

View File

@ -32,6 +32,8 @@ cdef extern from "kmer_similarity.h" nogil:
OBIDMS_column_p column2,
index_t idx2,
index_t elt_idx2,
OBIDMS_column_p qual_col1,
OBIDMS_column_p qual_col2,
uint8_t kmer_size,
int32_t* kmer_pos_array,
int32_t* kmer_pos_array_height_p,

View File

@ -5,6 +5,7 @@ from .view.view cimport Line
cdef class Seq(dict) :
cdef int _index
cpdef object clone(self)
cpdef str get_str(self)
cpdef get_symbol_at(self, int pos)
@ -22,6 +23,7 @@ cdef class Nuc_Seq(Seq) :
cdef class Seq_Stored(Line) :
cpdef get_symbol_at(self, int pos)
cpdef get_slice(self, slice slice_to_get)
@ -31,6 +33,7 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
cdef Nuc_Seq _reverse_complement
cdef object _quality_array
cdef bytes _seq
cpdef set(self, object id, object seq, object definition=*, object quality=*, int offset=*, object tags=*)
cpdef set_quality_int(self, list new_qual)
cpdef set_quality_char(self, object new_qual, int offset=*)

View File

@ -40,6 +40,7 @@ cdef class Seq(dict) :
self.id = id
self.seq = seq
self.definition = definition
self._index = -1
if tags is not None :
for k in tags:
k_b = tobytes(k)
@ -54,6 +55,7 @@ cdef class Seq(dict) :
def new_from_stored(Seq_Stored seq_to_clone) :
cdef Seq new_seq
new_seq = Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
new_seq._index = seq_to_clone._index
return new_seq
def __contains__(self, object key):
@ -128,7 +130,16 @@ cdef class Seq(dict) :
if new_definition is not None:
new_definition = tobytes(new_definition)
self[DEFINITION_COLUMN] = new_definition
# sequence index (for reference in a view eventually) property getter and setter
@property
def index(self): # @ReservedAssignment
return self._index
@index.setter
def index(self, int idx): # @DuplicatedSignature
self._index = idx
cdef class Nuc_Seq(Seq) :
@ -160,6 +171,7 @@ cdef class Nuc_Seq(Seq) :
def new_from_stored(Nuc_Seq_Stored seq_to_clone) :
cdef Nuc_Seq new_seq
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
new_seq._index = seq_to_clone.index
return new_seq
# is_revcomp property getter and setter (boolean indicating whether the sequence was created by reverse complementing another sequence)

View File

@ -13,7 +13,7 @@ from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p
from obitools3.dms.view.view cimport View
from obitools3.dms.column.column cimport Column
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
from math import log10
@ -182,7 +182,7 @@ def buildConsensus(ali, seq, ref_tags=None):
col_p = column.pointer()
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual, tags=ref_tags)
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
seq[b'ali_length'] = ali.consensus_len
seq[b'score_norm']=float(ali.score)/ali.consensus_len
seq[b'shift']=ali.shift
@ -208,7 +208,7 @@ def buildConsensus(ali, seq, ref_tags=None):
quality[i] = min(42, round(-10*log10(p)))
i+=1
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality, tags=ali[0].wrapped)
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality)
if hasattr(ali, "counter"):
seq[b'alignement_id']=ali.counter
@ -224,12 +224,18 @@ def buildConsensus(ali, seq, ref_tags=None):
seq[b'ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
if seq[b'ali_length']>0:
seq[b'score_norm']=float(ali.score)/seq[b'ali_length']
ref_tags = ali[0].wrapped
if hasattr(ali, "direction"):
seq[b'ali_direction']=ali.direction
seq[b'score']=ali.score
seq[b'mode']=b'alignment'
for tag in ref_tags:
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME:
seq[tag] = ref_tags[tag]
return seq
@ -241,10 +247,13 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
s = forward.seq + reverse.seq
quality = forward.quality
quality.extend(reverse.quality)
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality, tags=forward)
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality)
seq[b"score"]=ali.score
seq[b"ali_direction"]=ali.direction
seq[b"mode"]=b"joined"
seq[b"pairedend_limit"]=len(forward)
for tag in forward:
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME:
seq[tag] = forward[tag]
return seq

View File

@ -24,7 +24,9 @@ cdef class Kmer_similarity:
cdef int32_t shift_count_array_height_a[1]
cdef Obiview_p view1_p
cdef OBIDMS_column_p column1_p
cdef OBIDMS_column_p qual_col1_p
cdef Obiview_p view2_p
cdef OBIDMS_column_p column2_p
cdef OBIDMS_column_p qual_col2_p
cdef bint build_consensus
cpdef free(self)

View File

@ -1,8 +1,7 @@
#cython: language_level=3
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, QUALITY_COLUMN
from obitools3.dms.obiseq cimport Nuc_Seq_Stored
from obitools3.dms.capi.kmer_similarity cimport kmer_similarity, Obi_ali_p, obi_free_shifted_ali
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column
@ -52,13 +51,14 @@ cdef class Ali_shifted:
def direction(self):
return self.pointer().direction
cpdef free(self):
cpdef free(self): # TODO allocated memory could be kept
obi_free_shifted_ali(self.pointer())
cdef class Kmer_similarity:
def __init__(self, View_NUC_SEQS view1, Column column1=None, View_NUC_SEQS view2=None, Column column2=None, uint8_t kmer_size=3, build_consensus=True) :
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, uint8_t kmer_size=3, build_consensus=True) :
cdef Column default_seq_col
cdef Column default_qual_col
if kmer_size < 1 or kmer_size > 3:
raise Exception("Kmer size to compute kmer similarity must be >=1 or <=4")
self.kmer_pos_array_p = NULL
@ -89,13 +89,28 @@ cdef class Kmer_similarity:
raise Exception("Kmer similarity with no sequence column given must be given a NUC_SEQS view")
default_seq_col = view2[NUC_SEQUENCE_COLUMN]
self.column2_p = default_seq_col.pointer()
if qual_column1 is not None:
self.qual_col1_p = qual_column1.pointer()
else:
if type(view1) != View_NUC_SEQS:
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
default_qual_col = view1[QUALITY_COLUMN]
self.qual_col1_p = default_qual_col.pointer()
if qual_column2 is not None:
self.qual_col2_p = qual_column2.pointer()
else:
if type(view2) != View_NUC_SEQS:
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
default_qual_col = view2[QUALITY_COLUMN]
self.qual_col2_p = default_qual_col.pointer()
def __call__(self, Nuc_Seq_Stored seq1, Nuc_Seq_Stored seq2):
def __call__(self, object seq1, object seq2):
cdef Obi_ali_p ali_p
cdef Ali_shifted ali
ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \
self.view2_p, self.column2_p, seq2.index, 0, \
self.qual_col1_p, self.qual_col2_p, \
self.kmer_size, \
self.kmer_pos_array_p, self.kmer_pos_array_height_a, \
self.shift_array_p, self.shift_array_height_a, \