ngsfilter and alignpairedend: paired-end reads are now correctly
reversed and labeled to be aligned correctly by alignpairedend
This commit is contained in:
@ -188,9 +188,16 @@ def run(config):
|
|||||||
if type(entries) == list:
|
if type(entries) == list:
|
||||||
forward = entries[0]
|
forward = entries[0]
|
||||||
reverse = entries[1]
|
reverse = entries[1]
|
||||||
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
|
aligner = Kmer_similarity(forward, \
|
||||||
|
view2=reverse, \
|
||||||
|
kmer_size=config['alignpairedend']['kmersize'], \
|
||||||
|
reversed_column=None)
|
||||||
else:
|
else:
|
||||||
aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize'])
|
aligner = Kmer_similarity(entries, \
|
||||||
|
column2=entries[REVERSE_SEQ_COLUMN_NAME], \
|
||||||
|
qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \
|
||||||
|
kmer_size=config['alignpairedend']['kmersize'], \
|
||||||
|
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
|
||||||
|
|
||||||
ba = alignmentIterator(entries, aligner)
|
ba = alignmentIterator(entries, aligner)
|
||||||
|
|
||||||
|
@ -400,6 +400,8 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
final_sequence = final_sequence[0:reversematch[1][1]]
|
final_sequence = final_sequence[0:reversematch[1][1]]
|
||||||
else:
|
else:
|
||||||
cut_seq = sequenceR[reversematch[1][2]:]
|
cut_seq = sequenceR[reversematch[1][2]:]
|
||||||
|
if not directmatch[0].forward:
|
||||||
|
cut_seq = cut_seq.reverse_complement
|
||||||
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool
|
||||||
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool
|
||||||
|
|
||||||
@ -422,8 +424,9 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
|
|
||||||
samples = infos[reversematch[3]]
|
samples = infos[reversematch[3]]
|
||||||
|
|
||||||
if not directmatch[0].forward and not not_aligned: # don't reverse complement if not_aligned
|
if not directmatch[0].forward:
|
||||||
final_sequence = final_sequence.reverse_complement
|
final_sequence = final_sequence.reverse_complement
|
||||||
|
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
|
||||||
sample=None
|
sample=None
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ cdef extern from "kmer_similarity.h" nogil:
|
|||||||
index_t elt_idx2,
|
index_t elt_idx2,
|
||||||
OBIDMS_column_p qual_col1,
|
OBIDMS_column_p qual_col1,
|
||||||
OBIDMS_column_p qual_col2,
|
OBIDMS_column_p qual_col2,
|
||||||
|
OBIDMS_column_p reversed_col,
|
||||||
uint8_t kmer_size,
|
uint8_t kmer_size,
|
||||||
int32_t** kmer_pos_array,
|
int32_t** kmer_pos_array,
|
||||||
int32_t* kmer_pos_array_height_p,
|
int32_t* kmer_pos_array_height_p,
|
||||||
|
@ -28,5 +28,6 @@ cdef class Kmer_similarity:
|
|||||||
cdef Obiview_p view2_p
|
cdef Obiview_p view2_p
|
||||||
cdef OBIDMS_column_p column2_p
|
cdef OBIDMS_column_p column2_p
|
||||||
cdef OBIDMS_column_p qual_col2_p
|
cdef OBIDMS_column_p qual_col2_p
|
||||||
|
cdef OBIDMS_column_p reversed_col_p
|
||||||
cdef bint build_consensus
|
cdef bint build_consensus
|
||||||
cpdef free(self)
|
cpdef free(self)
|
@ -56,7 +56,9 @@ cdef class Ali_shifted:
|
|||||||
|
|
||||||
|
|
||||||
cdef class Kmer_similarity:
|
cdef class Kmer_similarity:
|
||||||
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, uint8_t kmer_size=3, build_consensus=True) :
|
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, \
|
||||||
|
View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, \
|
||||||
|
uint8_t kmer_size=3, bint build_consensus=True, Column reversed_column=None) :
|
||||||
cdef Column default_seq_col
|
cdef Column default_seq_col
|
||||||
cdef Column default_qual_col
|
cdef Column default_qual_col
|
||||||
if kmer_size < 1 or kmer_size > 3:
|
if kmer_size < 1 or kmer_size > 3:
|
||||||
@ -103,6 +105,10 @@ cdef class Kmer_similarity:
|
|||||||
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
|
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
|
||||||
default_qual_col = view2[QUALITY_COLUMN]
|
default_qual_col = view2[QUALITY_COLUMN]
|
||||||
self.qual_col2_p = default_qual_col.pointer()
|
self.qual_col2_p = default_qual_col.pointer()
|
||||||
|
if reversed_column is None:
|
||||||
|
self.reversed_col_p = NULL
|
||||||
|
else:
|
||||||
|
self.reversed_col_p = reversed_column.pointer()
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, object seq1, object seq2):
|
def __call__(self, object seq1, object seq2):
|
||||||
@ -111,6 +117,7 @@ cdef class Kmer_similarity:
|
|||||||
ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \
|
ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \
|
||||||
self.view2_p, self.column2_p, seq2.index, 0, \
|
self.view2_p, self.column2_p, seq2.index, 0, \
|
||||||
self.qual_col1_p, self.qual_col2_p, \
|
self.qual_col1_p, self.qual_col2_p, \
|
||||||
|
self.reversed_col_p, \
|
||||||
self.kmer_size, \
|
self.kmer_size, \
|
||||||
&(self.kmer_pos_array_p), self.kmer_pos_array_height_a, \
|
&(self.kmer_pos_array_p), self.kmer_pos_array_height_a, \
|
||||||
&(self.shift_array_p), self.shift_array_height_a, \
|
&(self.shift_array_p), self.shift_array_height_a, \
|
||||||
|
@ -65,7 +65,7 @@ void obi_free_shifted_ali(Obi_ali_p ali)
|
|||||||
|
|
||||||
Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1, index_t elt_idx1,
|
Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1, index_t elt_idx1,
|
||||||
Obiview_p view2, OBIDMS_column_p column2, index_t idx2, index_t elt_idx2,
|
Obiview_p view2, OBIDMS_column_p column2, index_t idx2, index_t elt_idx2,
|
||||||
OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2,
|
OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2, OBIDMS_column_p reversed_column,
|
||||||
uint8_t kmer_size,
|
uint8_t kmer_size,
|
||||||
int32_t** kmer_pos_array_p, int32_t* kmer_pos_array_height_p,
|
int32_t** kmer_pos_array_p, int32_t* kmer_pos_array_height_p,
|
||||||
int32_t** shift_array_p, int32_t* shift_array_height_p,
|
int32_t** shift_array_p, int32_t* shift_array_height_p,
|
||||||
@ -78,6 +78,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
Obi_ali_p ali = NULL;
|
Obi_ali_p ali = NULL;
|
||||||
int i, j;
|
int i, j;
|
||||||
bool switched_seqs;
|
bool switched_seqs;
|
||||||
|
bool reversed;
|
||||||
int score = 0;
|
int score = 0;
|
||||||
Obi_blob_p blob1 = NULL;
|
Obi_blob_p blob1 = NULL;
|
||||||
Obi_blob_p blob2 = NULL;
|
Obi_blob_p blob2 = NULL;
|
||||||
@ -189,6 +190,14 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
seq2 = reverse_complement_sequence(seq2);
|
seq2 = reverse_complement_sequence(seq2);
|
||||||
blob2 = obi_seq_to_blob(seq2);
|
blob2 = obi_seq_to_blob(seq2);
|
||||||
|
|
||||||
|
// Check if the sequences have been reverse-complemented by the ngsfilter tool
|
||||||
|
if (reversed_column != NULL)
|
||||||
|
reversed = obi_get_bool_with_elt_idx_and_col_p_in_view(view1, reversed_column, idx1, 0); // assuming that reversed_column is in view1 is dirty but faster
|
||||||
|
else
|
||||||
|
reversed = false;
|
||||||
|
if (reversed)
|
||||||
|
switched_seqs = !switched_seqs;
|
||||||
|
|
||||||
// Save total length for the shift counts array
|
// Save total length for the shift counts array
|
||||||
total_len = len1 + len2 + 1; // +1 for shift 0
|
total_len = len1 + len2 + 1; // +1 for shift 0
|
||||||
|
|
||||||
|
@ -78,6 +78,11 @@ void obi_free_shifted_ali(Obi_ali_p ali);
|
|||||||
* @param column2 A pointer on the column containing the second sequence.
|
* @param column2 A pointer on the column containing the second sequence.
|
||||||
* @param idx2 The index of the second sequence in view2.
|
* @param idx2 The index of the second sequence in view2.
|
||||||
* @param elt_idx2 The element index of the second sequence in column2.
|
* @param elt_idx2 The element index of the second sequence in column2.
|
||||||
|
* @param qual_col1 A pointer on the column containing the quality associated with the first sequence (in view1).
|
||||||
|
* @param qual_col2 A pointer on the column containing the quality associated with the second sequence (in view2).
|
||||||
|
* @param reversed_column A pointer on the column containing a boolean indicating whether the sequences correspond
|
||||||
|
* to paired-end reads that might have been reversed before aligning them
|
||||||
|
* (e.g. when the ngsfilter tool is used before alignpairedend).
|
||||||
* @param kmer_size The kmer length to use. Must be >= 1 <= 4.
|
* @param kmer_size The kmer length to use. Must be >= 1 <= 4.
|
||||||
* @param kmer_pos_array The array used to store kmer positions. If NULL, allocated by the function.
|
* @param kmer_pos_array The array used to store kmer positions. If NULL, allocated by the function.
|
||||||
* If needed, reallocated to a bigger size.
|
* If needed, reallocated to a bigger size.
|
||||||
@ -109,6 +114,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1,
|
|||||||
index_t elt_idx2,
|
index_t elt_idx2,
|
||||||
OBIDMS_column_p qual_col1,
|
OBIDMS_column_p qual_col1,
|
||||||
OBIDMS_column_p qual_col2,
|
OBIDMS_column_p qual_col2,
|
||||||
|
OBIDMS_column_p reversed_column,
|
||||||
uint8_t kmer_size,
|
uint8_t kmer_size,
|
||||||
int32_t** kmer_pos_array_p,
|
int32_t** kmer_pos_array_p,
|
||||||
int32_t* kmer_pos_array_height_p,
|
int32_t* kmer_pos_array_height_p,
|
||||||
|
Reference in New Issue
Block a user