diff --git a/python/obitools3/commands/alignpairedend.pyx b/python/obitools3/commands/alignpairedend.pyx index dab3483..2530f63 100755 --- a/python/obitools3/commands/alignpairedend.pyx +++ b/python/obitools3/commands/alignpairedend.pyx @@ -188,9 +188,16 @@ def run(config): if type(entries) == list: forward = entries[0] reverse = entries[1] - aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize']) + aligner = Kmer_similarity(forward, \ + view2=reverse, \ + kmer_size=config['alignpairedend']['kmersize'], \ + reversed_column=None) else: - aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize']) + aligner = Kmer_similarity(entries, \ + column2=entries[REVERSE_SEQ_COLUMN_NAME], \ + qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \ + kmer_size=config['alignpairedend']['kmersize'], \ + reversed_column=entries[b'reversed']) # column created by the ngsfilter tool ba = alignmentIterator(entries, aligner) diff --git a/python/obitools3/commands/ngsfilter.pyx b/python/obitools3/commands/ngsfilter.pyx index 073855d..67e69ab 100755 --- a/python/obitools3/commands/ngsfilter.pyx +++ b/python/obitools3/commands/ngsfilter.pyx @@ -400,6 +400,8 @@ cdef tuple annotate(sequences, infos, verbose=False): final_sequence = final_sequence[0:reversematch[1][1]] else: cut_seq = sequenceR[reversematch[1][2]:] + if not directmatch[0].forward: + cut_seq = cut_seq.reverse_complement final_sequence[REVERSE_SEQ_COLUMN_NAME] = cut_seq.seq # used by alignpairedend tool final_sequence[REVERSE_QUALITY_COLUMN_NAME] = cut_seq.quality # used by alignpairedend tool @@ -422,8 +424,9 @@ cdef tuple annotate(sequences, infos, verbose=False): samples = infos[reversematch[3]] - if not directmatch[0].forward and not not_aligned: # don't reverse complement if not_aligned + if not directmatch[0].forward: final_sequence = final_sequence.reverse_complement + final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c) sample=None diff --git a/python/obitools3/dms/capi/kmer_similarity.pxd b/python/obitools3/dms/capi/kmer_similarity.pxd index f3fb77f..6858768 100755 --- a/python/obitools3/dms/capi/kmer_similarity.pxd +++ b/python/obitools3/dms/capi/kmer_similarity.pxd @@ -34,6 +34,7 @@ cdef extern from "kmer_similarity.h" nogil: index_t elt_idx2, OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2, + OBIDMS_column_p reversed_col, uint8_t kmer_size, int32_t** kmer_pos_array, int32_t* kmer_pos_array_height_p, diff --git a/python/obitools3/libalign/shifted_ali.pxd b/python/obitools3/libalign/shifted_ali.pxd index 4e45bf9..4eaa4fb 100755 --- a/python/obitools3/libalign/shifted_ali.pxd +++ b/python/obitools3/libalign/shifted_ali.pxd @@ -28,5 +28,6 @@ cdef class Kmer_similarity: cdef Obiview_p view2_p cdef OBIDMS_column_p column2_p cdef OBIDMS_column_p qual_col2_p + cdef OBIDMS_column_p reversed_col_p cdef bint build_consensus cpdef free(self) \ No newline at end of file diff --git a/python/obitools3/libalign/shifted_ali.pyx b/python/obitools3/libalign/shifted_ali.pyx index 7a247ad..35e1830 100755 --- a/python/obitools3/libalign/shifted_ali.pyx +++ b/python/obitools3/libalign/shifted_ali.pyx @@ -56,7 +56,9 @@ cdef class Ali_shifted: cdef class Kmer_similarity: - def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, uint8_t kmer_size=3, build_consensus=True) : + def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, \ + View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, \ + uint8_t kmer_size=3, bint build_consensus=True, Column reversed_column=None) : cdef Column default_seq_col cdef Column default_qual_col if kmer_size < 1 or kmer_size > 3: @@ -103,6 +105,10 @@ cdef class Kmer_similarity: raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view") default_qual_col = view2[QUALITY_COLUMN] self.qual_col2_p = default_qual_col.pointer() + if reversed_column is None: + self.reversed_col_p = NULL + else: + self.reversed_col_p = reversed_column.pointer() def __call__(self, object seq1, object seq2): @@ -111,6 +117,7 @@ cdef class Kmer_similarity: ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \ self.view2_p, self.column2_p, seq2.index, 0, \ self.qual_col1_p, self.qual_col2_p, \ + self.reversed_col_p, \ self.kmer_size, \ &(self.kmer_pos_array_p), self.kmer_pos_array_height_a, \ &(self.shift_array_p), self.shift_array_height_a, \ diff --git a/src/kmer_similarity.c b/src/kmer_similarity.c index a9787a6..2097460 100755 --- a/src/kmer_similarity.c +++ b/src/kmer_similarity.c @@ -65,7 +65,7 @@ void obi_free_shifted_ali(Obi_ali_p ali) Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1, index_t elt_idx1, Obiview_p view2, OBIDMS_column_p column2, index_t idx2, index_t elt_idx2, - OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2, + OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2, OBIDMS_column_p reversed_column, uint8_t kmer_size, int32_t** kmer_pos_array_p, int32_t* kmer_pos_array_height_p, int32_t** shift_array_p, int32_t* shift_array_height_p, @@ -78,6 +78,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1 Obi_ali_p ali = NULL; int i, j; bool switched_seqs; + bool reversed; int score = 0; Obi_blob_p blob1 = NULL; Obi_blob_p blob2 = NULL; @@ -189,6 +190,14 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1 seq2 = reverse_complement_sequence(seq2); blob2 = obi_seq_to_blob(seq2); + // Check if the sequences have been reverse-complemented by the ngsfilter tool + if (reversed_column != NULL) + reversed = obi_get_bool_with_elt_idx_and_col_p_in_view(view1, reversed_column, idx1, 0); // assuming that reversed_column is in view1 is dirty but faster + else + reversed = false; + if (reversed) + switched_seqs = !switched_seqs; + // Save total length for the shift counts array total_len = len1 + len2 + 1; // +1 for shift 0 diff --git a/src/kmer_similarity.h b/src/kmer_similarity.h index 738fccf..5bac38c 100755 --- a/src/kmer_similarity.h +++ b/src/kmer_similarity.h @@ -78,6 +78,11 @@ void obi_free_shifted_ali(Obi_ali_p ali); * @param column2 A pointer on the column containing the second sequence. * @param idx2 The index of the second sequence in view2. * @param elt_idx2 The element index of the second sequence in column2. + * @param qual_col1 A pointer on the column containing the quality associated with the first sequence (in view1). + * @param qual_col2 A pointer on the column containing the quality associated with the second sequence (in view2). + * @param reversed_column A pointer on the column containing a boolean indicating whether the sequences correspond + * to paired-end reads that might have been reversed before aligning them + * (e.g. when the ngsfilter tool is used before alignpairedend). * @param kmer_size The kmer length to use. Must be >= 1 <= 4. * @param kmer_pos_array The array used to store kmer positions. If NULL, allocated by the function. * If needed, reallocated to a bigger size. @@ -109,6 +114,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1, index_t elt_idx2, OBIDMS_column_p qual_col1, OBIDMS_column_p qual_col2, + OBIDMS_column_p reversed_column, uint8_t kmer_size, int32_t** kmer_pos_array_p, int32_t* kmer_pos_array_height_p,