2016-05-11 16:36:23 +02:00
|
|
|
/****************************************************************************
|
|
|
|
* Sequence alignment functions header file *
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file obi_align.h
|
|
|
|
* @author Celine Mercier
|
|
|
|
* @date May 11th 2016
|
|
|
|
* @brief Header file for the functions handling the alignment of DNA sequences.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef OBI_ALIGN_H_
|
|
|
|
#define OBI_ALIGN_H_
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
|
|
|
#include "obidms.h"
|
|
|
|
#include "obidmscolumn.h"
|
|
|
|
#include "obitypes.h"
|
|
|
|
|
|
|
|
|
2016-08-10 14:51:02 +02:00
|
|
|
/**
|
|
|
|
* @brief Aligns a NUC_SEQ column with itself.
|
|
|
|
*
|
|
|
|
* @param seq_view A pointer on the view where the column to align is.
|
|
|
|
* @param seq_column A pointer on the OBI_SEQ column to align.
|
|
|
|
* @param score_view A pointer on the view to write the outputs to.
|
|
|
|
* @param id1_column A pointer on the OBI_STR column in score_view where the id of the first sequence should be written.
|
|
|
|
* @param id2_column A pointer on the OBI_STR column in score_view where the id of the second sequence should be written.
|
|
|
|
* @param score_column A pointer on the OBI_FLOAT column in score_view where the alignment score should be written.
|
|
|
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
|
|
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
|
|
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
|
|
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
|
|
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
|
|
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
|
|
|
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
|
|
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
|
|
|
*
|
|
|
|
* @returns A value indicating the success of the operation.
|
|
|
|
* @retval 0 if the operation was successfully completed.
|
|
|
|
* @retval -1 if an error occurred.
|
|
|
|
*
|
|
|
|
* @since May 2016
|
|
|
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
|
|
|
*/
|
|
|
|
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
|
|
|
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
|
|
|
double threshold, bool normalize, int reference, bool similarity_mode);
|
|
|
|
|
2016-05-11 16:36:23 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief
|
|
|
|
*
|
|
|
|
* TODO
|
|
|
|
*
|
|
|
|
*/
|
2016-08-10 14:51:02 +02:00
|
|
|
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
|
|
|
|
// Obiview_p score_view, OBIDMS_column_p score_column,
|
|
|
|
// double threshold, bool normalize, int reference, bool similarity_mode);
|
2016-05-11 16:36:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
#endif /* OBI_ALIGN_H_ */
|
|
|
|
|