2019-01-21 17:30:46 +01:00
/****************************************************************************
* Header file for Kmer similarity computation functions *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* @ file kmer_similarity . h
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
* @ date January 7 th 2019
* @ brief Header file for Kmer similarity computation functions .
*/
# ifndef KMER_SIMILARITY_H_
# define KMER_SIMILARITY_H_
# include <stdio.h>
# include "obitypes.h"
# include "obidmscolumn.h"
# include "obiview.h"
# define ARRAY_LENGTH (256)
/**
* @ brief Alignment structure , with informations about the similarity and to rebuild the alignment .
*/
typedef struct Obi_ali {
2020-07-13 15:59:50 +02:00
int score ; /**< Alignment score, corresponding to an approximation of the number of
* nucleotides matching in the overlap of the alignment .
* It ' s an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap ,
* and less for mismatches located towards the ends of the overlap . The case where there are the most mismatches is assumed ,
* meaning that the score will be often underestimated and never overestimated .
2019-01-21 17:30:46 +01:00
*/
int consensus_length ; /**< Length of the final consensus sequence.
*/
int overlap_length ; /**< Length of the overlap between the aligned sequences.
*/
2019-02-17 18:32:35 +01:00
char * consensus_seq ; /**< Consensus sequence built as to reconstruct a pairedend read.
2019-01-21 17:30:46 +01:00
*/
2019-02-17 18:32:35 +01:00
uint8_t * consensus_qual ; /**< Consensus quality built as to reconstruct a pairedend read.
2019-01-21 17:30:46 +01:00
*/
2019-07-06 16:30:32 +02:00
int shift ; /**< Shift chosen to align the sequences.
2019-01-21 17:30:46 +01:00
*/
2019-07-06 16:30:32 +02:00
char direction [ 6 ] ; /**< Alignment direction (positive/right or negative/left shift).
*/
2019-01-21 17:30:46 +01:00
} Obi_ali_t , * Obi_ali_p ;
/**
* @ brief Frees an Obi_ali_p structure and all its elements .
*
* @ warning The pointer sent becomes unusable .
*
* @ param ali The pointer on the Obi_ali_p structure to free .
*
* @ since January 2019
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
void obi_free_shifted_ali ( Obi_ali_p ali ) ;
/**
* Function computing the kmer similarity of two sequences stored in views .
*
* The similarity is computing this way : the positions of identical kmers in both sequences are
* compared , and the most represented shift is chosen . The similarity is then calculated as :
* kmer_similarity = number_of_common_kmers_with_the_chosen_shift + kmer_size - 1
*
* @ warning Several pointers and structures passed to or returned by the function have to be freed by the caller :
* - kmer_pos_array
* - shift_array
* - shift_count_array
* - the Obi_ali_p structure returned
*
* @ param view1 A pointer on the view containing the first sequence .
* @ param column1 A pointer on the column containing the first sequence .
* @ param idx1 The index of the first sequence in view1 .
* @ param elt_idx1 The element index of the first sequence in column1 .
* @ param view2 A pointer on the view containing the second sequence .
* @ param column2 A pointer on the column containing the second sequence .
* @ param idx2 The index of the second sequence in view2 .
* @ param elt_idx2 The element index of the second sequence in column2 .
2019-07-23 18:56:51 +02:00
* @ param qual_col1 A pointer on the column containing the quality associated with the first sequence ( in view1 ) .
* @ param qual_col2 A pointer on the column containing the quality associated with the second sequence ( in view2 ) .
* @ param reversed_column A pointer on the column containing a boolean indicating whether the sequences correspond
* to paired - end reads that might have been reversed before aligning them
* ( e . g . when the ngsfilter tool is used before alignpairedend ) .
2019-01-21 17:30:46 +01:00
* @ param kmer_size The kmer length to use . Must be > = 1 < = 4.
* @ param kmer_pos_array The array used to store kmer positions . If NULL , allocated by the function .
* If needed , reallocated to a bigger size .
* @ param kmer_pos_array_height_p A pointer on an integer corresponding to the size ( number of elements )
* allocated for kmer_pos_array . Updated by the function as needed .
* @ param shift_array The array used to store kmer shifts . If NULL , allocated by the function .
* If needed , reallocated to a bigger size .
* @ param shift_array_height_p A pointer on an integer corresponding to the size ( number of elements )
* allocated for shift_array . Updated by the function as needed .
* @ param shift_count_array The array used to store shift counts . If NULL , allocated by the function .
* If needed , reallocated to a bigger size .
* @ param shift_count_array_height_p A pointer on an integer corresponding to the size ( number of elements )
* allocated for shift_count_array . Updated by the function as needed .
2019-02-17 18:32:35 +01:00
* @ param build_consensus A boolean indicating whether the function should build the consensus sequence and quality as to reconstruct a pairedend read . // TODO option to build consensus without quality?
2019-01-21 17:30:46 +01:00
*
* @ returns A pointer on an Obi_ali_p structure containing the results .
* @ retval NULL if an error occurred .
*
* @ since January 2019
* @ author Celine Mercier ( celine . mercier @ metabarcoding . org )
*/
Obi_ali_p kmer_similarity ( Obiview_p view1 ,
OBIDMS_column_p column1 ,
index_t idx1 ,
index_t elt_idx1 ,
Obiview_p view2 ,
OBIDMS_column_p column2 ,
index_t idx2 ,
index_t elt_idx2 ,
2019-02-17 18:32:35 +01:00
OBIDMS_column_p qual_col1 ,
OBIDMS_column_p qual_col2 ,
2019-07-23 18:56:51 +02:00
OBIDMS_column_p reversed_column ,
2019-01-21 17:30:46 +01:00
uint8_t kmer_size ,
2019-03-29 11:16:25 +01:00
int32_t * * kmer_pos_array_p ,
2019-01-21 17:30:46 +01:00
int32_t * kmer_pos_array_height_p ,
2019-03-29 11:16:25 +01:00
int32_t * * shift_array_p ,
2019-01-21 17:30:46 +01:00
int32_t * shift_array_height_p ,
2019-03-29 11:16:25 +01:00
int32_t * * shift_count_array_p ,
2019-01-21 17:30:46 +01:00
int32_t * shift_count_array_height_p ,
bool build_consensus ) ;
# endif /* KMER_SIMILARITY_H_ */