Fixed a bug in kmer similarity computation where the fact that sequences
could be switched was not accounted for
This commit is contained in:
@ -72,7 +72,9 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
bool build_consensus)
|
bool build_consensus)
|
||||||
{
|
{
|
||||||
Obi_ali_p ali = NULL;
|
Obi_ali_p ali = NULL;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
bool switched_seqs;
|
||||||
|
bool left_ali;
|
||||||
OBIDMS_column_p qual_col1;
|
OBIDMS_column_p qual_col1;
|
||||||
OBIDMS_column_p qual_col2;
|
OBIDMS_column_p qual_col2;
|
||||||
double score = 0.0;
|
double score = 0.0;
|
||||||
@ -99,6 +101,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
int32_t kmer_count;
|
int32_t kmer_count;
|
||||||
int32_t best_shift_idx;
|
int32_t best_shift_idx;
|
||||||
int32_t best_shift;
|
int32_t best_shift;
|
||||||
|
int32_t abs_shift;
|
||||||
int32_t max_common_kmers;
|
int32_t max_common_kmers;
|
||||||
int32_t overlap_len;
|
int32_t overlap_len;
|
||||||
int32_t consensus_len;
|
int32_t consensus_len;
|
||||||
@ -137,10 +140,12 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Choose the shortest sequence to save kmer positions in array
|
// Choose the shortest sequence to save kmer positions in array
|
||||||
|
switched_seqs = false;
|
||||||
len1 = blob1->length_decoded_value;
|
len1 = blob1->length_decoded_value;
|
||||||
len2 = blob2->length_decoded_value;
|
len2 = blob2->length_decoded_value;
|
||||||
if (len2 > len1)
|
if (len2 > len1)
|
||||||
{
|
{
|
||||||
|
switched_seqs = true;
|
||||||
temp_len = len1;
|
temp_len = len1;
|
||||||
len1 = len2;
|
len1 = len2;
|
||||||
len2 = temp_len;
|
len2 = temp_len;
|
||||||
@ -355,13 +360,25 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
abs_shift = abs(best_shift);
|
||||||
|
|
||||||
// Save result in Obi_ali structure
|
// Save result in Obi_ali structure
|
||||||
ali->score = score;
|
ali->score = score;
|
||||||
ali->consensus_length = 0;
|
ali->consensus_length = 0;
|
||||||
ali->overlap_length = overlap_len;
|
ali->overlap_length = overlap_len;
|
||||||
ali->shift = best_shift;
|
ali->shift = abs_shift;
|
||||||
ali->consensus_seq = NULL;
|
ali->consensus_seq = NULL;
|
||||||
ali->consensus_qual = NULL;
|
ali->consensus_qual = NULL;
|
||||||
|
if (((best_shift < 0) && (!switched_seqs)) || ((best_shift > 0) && switched_seqs))
|
||||||
|
{
|
||||||
|
left_ali = true;
|
||||||
|
strcpy(ali->direction, "left");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
left_ali = false;
|
||||||
|
strcpy(ali->direction, "right");
|
||||||
|
}
|
||||||
|
|
||||||
// Build the consensus sequence if asked
|
// Build the consensus sequence if asked
|
||||||
if (build_consensus)
|
if (build_consensus)
|
||||||
@ -400,7 +417,7 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
if (seq1 == NULL)
|
if (seq1 == NULL)
|
||||||
seq1 = obi_blob_to_seq(blob1);
|
seq1 = obi_blob_to_seq(blob1);
|
||||||
|
|
||||||
if (best_shift > 0)
|
if (left_ali)
|
||||||
consensus_len = len1 + len2 - overlap_len;
|
consensus_len = len1 + len2 - overlap_len;
|
||||||
else
|
else
|
||||||
consensus_len = overlap_len;
|
consensus_len = overlap_len;
|
||||||
@ -427,26 +444,33 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
ali->consensus_seq = consensus_seq;
|
ali->consensus_seq = consensus_seq;
|
||||||
ali->consensus_qual = consensus_qual;
|
ali->consensus_qual = consensus_qual;
|
||||||
|
|
||||||
// Compute consensus-relative shift for each sequence and store direction
|
// Compute consensus-relative shift for each sequence
|
||||||
if (best_shift >= 0)
|
if (best_shift > 0)
|
||||||
{
|
{
|
||||||
shift1 = 0;
|
shift1 = best_shift;
|
||||||
shift2 = best_shift;
|
shift2 = 0;
|
||||||
strcpy(ali->direction, "left");
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
shift1 = -(best_shift);
|
shift1 = 0;
|
||||||
shift2 = 0;
|
shift2 = -(best_shift);
|
||||||
strcpy(ali->direction, "right");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If positive shift, copy first part of second sequence
|
// If left alignment, copy first part of first sequence
|
||||||
if (best_shift > 0)
|
if (left_ali)
|
||||||
{
|
{
|
||||||
strncpy(consensus_seq, seq2, best_shift);
|
if (switched_seqs)
|
||||||
memcpy(consensus_qual, qual2, best_shift*sizeof(uint8_t));
|
{
|
||||||
cons_shift = best_shift;
|
strncpy(consensus_seq, seq2, abs_shift);
|
||||||
|
memcpy(consensus_qual, qual2, abs_shift*sizeof(uint8_t));
|
||||||
|
cons_shift = abs_shift;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strncpy(consensus_seq, seq1, abs_shift);
|
||||||
|
memcpy(consensus_qual, qual1, abs_shift*sizeof(uint8_t));
|
||||||
|
cons_shift = abs_shift;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
cons_shift = 0;
|
cons_shift = 0;
|
||||||
@ -461,11 +485,19 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
consensus_qual[pos+cons_shift] = round((qual1[pos+shift1] + qual2[pos+shift2])/2); // TODO maybe use the (p1*(1-p2/3)) formula (but parenthesis bug???)
|
consensus_qual[pos+cons_shift] = round((qual1[pos+shift1] + qual2[pos+shift2])/2); // TODO maybe use the (p1*(1-p2/3)) formula (but parenthesis bug???)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If positive shift, copy last part of first sequence
|
// If left alignment, copy last part of first sequence
|
||||||
if (best_shift > 0)
|
if (left_ali)
|
||||||
{
|
{
|
||||||
strncpy(consensus_seq+cons_shift+overlap_len, seq1+overlap_len, len1-overlap_len);
|
if (switched_seqs)
|
||||||
memcpy(consensus_qual+cons_shift+overlap_len, qual1+overlap_len, (len1-overlap_len)*sizeof(uint8_t));
|
{
|
||||||
|
strncpy(consensus_seq+cons_shift+overlap_len, seq1+overlap_len, len1-overlap_len);
|
||||||
|
memcpy(consensus_qual+cons_shift+overlap_len, qual1+overlap_len, (len1-overlap_len)*sizeof(uint8_t));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strncpy(consensus_seq+cons_shift+overlap_len, seq2+overlap_len, len2-overlap_len);
|
||||||
|
memcpy(consensus_qual+cons_shift+overlap_len, qual2+overlap_len, (len2-overlap_len)*sizeof(uint8_t));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
consensus_seq[consensus_len] = '\0';
|
consensus_seq[consensus_len] = '\0';
|
||||||
|
Reference in New Issue
Block a user