alignpairedend: improved/fixed the output tags for the alignment score
and lengths. Removed minimum score option
This commit is contained in:
@ -39,12 +39,13 @@ def addOptions(parser):
|
||||
type=str,
|
||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||
|
||||
group.add_argument('--score-min',
|
||||
action="store", dest="alignpairedend:smin",
|
||||
metavar="#.###",
|
||||
default=None,
|
||||
type=float,
|
||||
help="Minimum score for keeping alignments")
|
||||
# group.add_argument('--score-min',
|
||||
# action="store", dest="alignpairedend:smin",
|
||||
# metavar="#.###",
|
||||
# default=None,
|
||||
# type=float,
|
||||
# help="Minimum score for keeping alignments. "
|
||||
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
|
||||
|
||||
# group.add_argument('-A', '--true-ali',
|
||||
# action="store_true", dest="alignpairedend:trueali",
|
||||
@ -174,11 +175,6 @@ def run(config):
|
||||
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||
|
||||
if 'smin' in config['alignpairedend']:
|
||||
smin = config['alignpairedend']['smin']
|
||||
else:
|
||||
smin = 0
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
|
||||
@ -217,7 +213,7 @@ def run(config):
|
||||
else:
|
||||
seqF = forward[i]
|
||||
|
||||
if ali.score > smin and ali.consensus_len > 0 :
|
||||
if ali.overlap_len > 0 :
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
else:
|
||||
if not two_views:
|
||||
@ -225,9 +221,7 @@ def run(config):
|
||||
else:
|
||||
seqR = reverse[i]
|
||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||
|
||||
consensus[b"smin"] = smin
|
||||
|
||||
|
||||
if kmer_ali :
|
||||
ali.free()
|
||||
|
||||
|
@ -183,8 +183,9 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
||||
seq[b'ali_length'] = ali.consensus_len
|
||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
seq[b'shift']=ali.shift
|
||||
else:
|
||||
if len(ali[0])>999: # TODO why?
|
||||
@ -256,9 +257,10 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
seq[b"ali_direction"]=None
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
seq[b"ali_length"] = ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
if ali.consensus_len > 0:
|
||||
seq[b"score_norm"]=float(ali.score)/ali.consensus_len
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
else:
|
||||
seq[b"score_norm"]=0.0
|
||||
|
||||
|
@ -414,7 +414,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
||||
}
|
||||
|
||||
if (max_common_kmers > 0)
|
||||
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
|
||||
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
|
||||
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
// meaning that the score will be often underestimated and never overestimated.
|
||||
else
|
||||
score = 0;
|
||||
abs_shift = abs(best_shift);
|
||||
|
@ -27,7 +27,11 @@
|
||||
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
|
||||
*/
|
||||
typedef struct Obi_ali {
|
||||
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
|
||||
int score; /**< Alignment score, corresponding to an approximation of the number of
|
||||
* nucleotides matching in the overlap of the alignment.
|
||||
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
* meaning that the score will be often underestimated and never overestimated.
|
||||
*/
|
||||
int consensus_length; /**< Length of the final consensus sequence.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user