ngsfilter and alignpairedend: paired-end reads are now correctly

reversed and labeled to be aligned correctly by alignpairedend
This commit is contained in:
Celine Mercier
2019-07-23 18:56:51 +02:00
parent 1759302829
commit d99702f56f
7 changed files with 39 additions and 5 deletions

View File

@ -188,9 +188,16 @@ def run(config):
if type(entries) == list:
forward = entries[0]
reverse = entries[1]
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
aligner = Kmer_similarity(forward, \
view2=reverse, \
kmer_size=config['alignpairedend']['kmersize'], \
reversed_column=None)
else:
aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize'])
aligner = Kmer_similarity(entries, \
column2=entries[REVERSE_SEQ_COLUMN_NAME], \
qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \
kmer_size=config['alignpairedend']['kmersize'], \
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
ba = alignmentIterator(entries, aligner)

View File

@ -400,6 +400,8 @@ cdef tuple annotate(sequences, infos, verbose=False):
final_sequence = final_sequence[0:reversematch[1][1]]
else:
cut_seq = sequenceR[reversematch[1][2]:]
if not directmatch[0].forward:
cut_seq = cut_seq.reverse_complement
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
@ -422,8 +424,9 @@ cdef tuple annotate(sequences, infos, verbose=False):
samples = infos[reversematch[3]]
if not directmatch[0].forward and not not_aligned: # don't reverse complement if not_aligned
if not directmatch[0].forward:
final_sequence = final_sequence.reverse_complement
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
sample=None

View File

@ -34,6 +34,7 @@ cdef extern from "kmer_similarity.h" nogil:
index_t elt_idx2,
OBIDMS_column_p qual_col1,
OBIDMS_column_p qual_col2,
OBIDMS_column_p reversed_col,
uint8_t kmer_size,
int32_t** kmer_pos_array,
int32_t* kmer_pos_array_height_p,

View File

@ -28,5 +28,6 @@ cdef class Kmer_similarity:
cdef Obiview_p view2_p
cdef OBIDMS_column_p column2_p
cdef OBIDMS_column_p qual_col2_p
cdef OBIDMS_column_p reversed_col_p
cdef bint build_consensus
cpdef free(self)

View File

@ -56,7 +56,9 @@ cdef class Ali_shifted:
cdef class Kmer_similarity:
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, uint8_t kmer_size=3, build_consensus=True) :
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, \
View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, \
uint8_t kmer_size=3, bint build_consensus=True, Column reversed_column=None) :
cdef Column default_seq_col
cdef Column default_qual_col
if kmer_size < 1 or kmer_size > 3:
@ -103,6 +105,10 @@ cdef class Kmer_similarity:
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
default_qual_col = view2[QUALITY_COLUMN]
self.qual_col2_p = default_qual_col.pointer()
if reversed_column is None:
self.reversed_col_p = NULL
else:
self.reversed_col_p = reversed_column.pointer()
def __call__(self, object seq1, object seq2):
@ -111,6 +117,7 @@ cdef class Kmer_similarity:
ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \
self.view2_p, self.column2_p, seq2.index, 0, \
self.qual_col1_p, self.qual_col2_p, \
self.reversed_col_p, \
self.kmer_size, \
&(self.kmer_pos_array_p), self.kmer_pos_array_height_a, \
&(self.shift_array_p), self.shift_array_height_a, \