Fixed the new alignpaired end to work after ngsfilter with the 9879847
possible cases
This commit is contained in:
@ -15,16 +15,12 @@ from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
|
||||
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||
|
||||
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE"
|
||||
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY"
|
||||
|
||||
|
||||
__title__="Aligns paired-ended reads"
|
||||
|
||||
|
||||
@ -106,8 +102,7 @@ def alignmentIterator(entries, aligner):
|
||||
else:
|
||||
seqF = Nuc_Seq.new_from_stored(entries[i])
|
||||
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
|
||||
seqF.pop(REVERSE_SEQ_COLUMN_NAME)
|
||||
seqF.pop(REVERSE_QUALITY_COLUMN_NAME)
|
||||
seqR.index = i
|
||||
|
||||
ali = aligner(seqF, seqR)
|
||||
|
||||
@ -196,7 +191,7 @@ def run(config):
|
||||
reverse = entries[1]
|
||||
aligner = Kmer_similarity(forward, view2=reverse, kmer_size=config['alignpairedend']['kmersize'])
|
||||
else:
|
||||
aligner = Kmer_similarity(entries, kmer_size=config['alignpairedend']['kmersize'])
|
||||
aligner = Kmer_similarity(entries, column2=entries[REVERSE_SEQ_COLUMN_NAME], qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], kmer_size=config['alignpairedend']['kmersize'])
|
||||
|
||||
ba = alignmentIterator(entries, aligner)
|
||||
|
||||
|
@ -32,6 +32,8 @@ cdef extern from "kmer_similarity.h" nogil:
|
||||
OBIDMS_column_p column2,
|
||||
index_t idx2,
|
||||
index_t elt_idx2,
|
||||
OBIDMS_column_p qual_col1,
|
||||
OBIDMS_column_p qual_col2,
|
||||
uint8_t kmer_size,
|
||||
int32_t* kmer_pos_array,
|
||||
int32_t* kmer_pos_array_height_p,
|
||||
|
@ -5,6 +5,7 @@ from .view.view cimport Line
|
||||
|
||||
cdef class Seq(dict) :
|
||||
|
||||
cdef int _index
|
||||
cpdef object clone(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef get_symbol_at(self, int pos)
|
||||
@ -22,6 +23,7 @@ cdef class Nuc_Seq(Seq) :
|
||||
|
||||
|
||||
cdef class Seq_Stored(Line) :
|
||||
|
||||
cpdef get_symbol_at(self, int pos)
|
||||
cpdef get_slice(self, slice slice_to_get)
|
||||
|
||||
@ -31,6 +33,7 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
cdef Nuc_Seq _reverse_complement
|
||||
cdef object _quality_array
|
||||
cdef bytes _seq
|
||||
|
||||
cpdef set(self, object id, object seq, object definition=*, object quality=*, int offset=*, object tags=*)
|
||||
cpdef set_quality_int(self, list new_qual)
|
||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||
|
@ -40,6 +40,7 @@ cdef class Seq(dict) :
|
||||
self.id = id
|
||||
self.seq = seq
|
||||
self.definition = definition
|
||||
self._index = -1
|
||||
if tags is not None :
|
||||
for k in tags:
|
||||
k_b = tobytes(k)
|
||||
@ -54,6 +55,7 @@ cdef class Seq(dict) :
|
||||
def new_from_stored(Seq_Stored seq_to_clone) :
|
||||
cdef Seq new_seq
|
||||
new_seq = Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||
new_seq._index = seq_to_clone._index
|
||||
return new_seq
|
||||
|
||||
def __contains__(self, object key):
|
||||
@ -128,7 +130,16 @@ cdef class Seq(dict) :
|
||||
if new_definition is not None:
|
||||
new_definition = tobytes(new_definition)
|
||||
self[DEFINITION_COLUMN] = new_definition
|
||||
|
||||
|
||||
# sequence index (for reference in a view eventually) property getter and setter
|
||||
@property
|
||||
def index(self): # @ReservedAssignment
|
||||
return self._index
|
||||
|
||||
@index.setter
|
||||
def index(self, int idx): # @DuplicatedSignature
|
||||
self._index = idx
|
||||
|
||||
|
||||
cdef class Nuc_Seq(Seq) :
|
||||
|
||||
@ -160,6 +171,7 @@ cdef class Nuc_Seq(Seq) :
|
||||
def new_from_stored(Nuc_Seq_Stored seq_to_clone) :
|
||||
cdef Nuc_Seq new_seq
|
||||
new_seq = Nuc_Seq(seq_to_clone.id, seq_to_clone.seq, definition=seq_to_clone.definition, quality=seq_to_clone.quality, tags=seq_to_clone)
|
||||
new_seq._index = seq_to_clone.index
|
||||
return new_seq
|
||||
|
||||
# is_revcomp property getter and setter (boolean indicating whether the sequence was created by reverse complementing another sequence)
|
||||
|
@ -13,7 +13,7 @@ from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p
|
||||
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
|
||||
|
||||
from math import log10
|
||||
|
||||
@ -182,7 +182,7 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
col_p = column.pointer()
|
||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual, tags=ref_tags)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
||||
seq[b'ali_length'] = ali.consensus_len
|
||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
||||
seq[b'shift']=ali.shift
|
||||
@ -208,7 +208,7 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
quality[i] = min(42, round(-10*log10(p)))
|
||||
i+=1
|
||||
|
||||
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality, tags=ali[0].wrapped)
|
||||
seq.set(ali[0].wrapped.id+b"_CONS", sseq, quality=quality)
|
||||
|
||||
if hasattr(ali, "counter"):
|
||||
seq[b'alignement_id']=ali.counter
|
||||
@ -224,12 +224,18 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
seq[b'ali_length']=len(seq)-ic.seqASingle-ic.seqBSingle
|
||||
if seq[b'ali_length']>0:
|
||||
seq[b'score_norm']=float(ali.score)/seq[b'ali_length']
|
||||
|
||||
ref_tags = ali[0].wrapped
|
||||
|
||||
if hasattr(ali, "direction"):
|
||||
seq[b'ali_direction']=ali.direction
|
||||
seq[b'score']=ali.score
|
||||
seq[b'mode']=b'alignment'
|
||||
|
||||
|
||||
for tag in ref_tags:
|
||||
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME:
|
||||
seq[tag] = ref_tags[tag]
|
||||
|
||||
return seq
|
||||
|
||||
|
||||
@ -241,10 +247,13 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
s = forward.seq + reverse.seq
|
||||
quality = forward.quality
|
||||
quality.extend(reverse.quality)
|
||||
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality, tags=forward)
|
||||
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality)
|
||||
seq[b"score"]=ali.score
|
||||
seq[b"ali_direction"]=ali.direction
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
for tag in forward:
|
||||
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME:
|
||||
seq[tag] = forward[tag]
|
||||
return seq
|
||||
|
||||
|
@ -24,7 +24,9 @@ cdef class Kmer_similarity:
|
||||
cdef int32_t shift_count_array_height_a[1]
|
||||
cdef Obiview_p view1_p
|
||||
cdef OBIDMS_column_p column1_p
|
||||
cdef OBIDMS_column_p qual_col1_p
|
||||
cdef Obiview_p view2_p
|
||||
cdef OBIDMS_column_p column2_p
|
||||
cdef OBIDMS_column_p qual_col2_p
|
||||
cdef bint build_consensus
|
||||
cpdef free(self)
|
@ -1,8 +1,7 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, QUALITY_COLUMN
|
||||
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq_Stored
|
||||
from obitools3.dms.capi.kmer_similarity cimport kmer_similarity, Obi_ali_p, obi_free_shifted_ali
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
@ -52,13 +51,14 @@ cdef class Ali_shifted:
|
||||
def direction(self):
|
||||
return self.pointer().direction
|
||||
|
||||
cpdef free(self):
|
||||
cpdef free(self): # TODO allocated memory could be kept
|
||||
obi_free_shifted_ali(self.pointer())
|
||||
|
||||
|
||||
cdef class Kmer_similarity:
|
||||
def __init__(self, View_NUC_SEQS view1, Column column1=None, View_NUC_SEQS view2=None, Column column2=None, uint8_t kmer_size=3, build_consensus=True) :
|
||||
def __init__(self, View_NUC_SEQS view1, Column column1=None, Column qual_column1=None, View_NUC_SEQS view2=None, Column column2=None, Column qual_column2=None, uint8_t kmer_size=3, build_consensus=True) :
|
||||
cdef Column default_seq_col
|
||||
cdef Column default_qual_col
|
||||
if kmer_size < 1 or kmer_size > 3:
|
||||
raise Exception("Kmer size to compute kmer similarity must be >=1 or <=4")
|
||||
self.kmer_pos_array_p = NULL
|
||||
@ -89,13 +89,28 @@ cdef class Kmer_similarity:
|
||||
raise Exception("Kmer similarity with no sequence column given must be given a NUC_SEQS view")
|
||||
default_seq_col = view2[NUC_SEQUENCE_COLUMN]
|
||||
self.column2_p = default_seq_col.pointer()
|
||||
if qual_column1 is not None:
|
||||
self.qual_col1_p = qual_column1.pointer()
|
||||
else:
|
||||
if type(view1) != View_NUC_SEQS:
|
||||
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
|
||||
default_qual_col = view1[QUALITY_COLUMN]
|
||||
self.qual_col1_p = default_qual_col.pointer()
|
||||
if qual_column2 is not None:
|
||||
self.qual_col2_p = qual_column2.pointer()
|
||||
else:
|
||||
if type(view2) != View_NUC_SEQS:
|
||||
raise Exception("Kmer similarity with no quality column given must be given a NUC_SEQS view")
|
||||
default_qual_col = view2[QUALITY_COLUMN]
|
||||
self.qual_col2_p = default_qual_col.pointer()
|
||||
|
||||
|
||||
|
||||
def __call__(self, Nuc_Seq_Stored seq1, Nuc_Seq_Stored seq2):
|
||||
def __call__(self, object seq1, object seq2):
|
||||
cdef Obi_ali_p ali_p
|
||||
cdef Ali_shifted ali
|
||||
ali_p = kmer_similarity(self.view1_p, self.column1_p, seq1.index, 0, \
|
||||
self.view2_p, self.column2_p, seq2.index, 0, \
|
||||
self.qual_col1_p, self.qual_col2_p, \
|
||||
self.kmer_size, \
|
||||
self.kmer_pos_array_p, self.kmer_pos_array_height_a, \
|
||||
self.shift_array_p, self.shift_array_height_a, \
|
||||
|
Reference in New Issue
Block a user