Alignment: API rework. 'obi align' is now 'obi lcs', and the results are

now written to columns automatically created in the output view, all
optimally handled at the C level.
This commit is contained in:
Celine Mercier
2016-12-12 11:58:59 +01:00
parent fa4e4ffaff
commit 8afb1644e9
11 changed files with 579 additions and 272 deletions

View File

@ -24,15 +24,53 @@
#include "obitypes.h"
/**
* @brief Names and comments of columns automatically created in the output view when aligning.
*
* @since December 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
#define ID1_COLUMN_NAME "ID1"
#define ID1_COLUMN_COMMENTS "ID1"
#define ID2_COLUMN_NAME "ID2"
#define ID2_COLUMN_COMMENTS "ID2"
#define SEQ1_COLUMN_NAME "SEQ1"
#define SEQ1_COLUMN_COMMENTS "SEQ1"
#define SEQ2_COLUMN_NAME "SEQ2"
#define SEQ2_COLUMN_COMMENTS "SEQ2"
#define COUNT1_COLUMN_NAME "COUNT1"
#define COUNT1_COLUMN_COMMENTS "COUNT1"
#define COUNT2_COLUMN_NAME "COUNT2"
#define COUNT2_COLUMN_COMMENTS "COUNT2"
#define IDX1_COLUMN_NAME "IDX1"
#define IDX1_COLUMN_COMMENTS "IDX1"
#define IDX2_COLUMN_NAME "IDX2"
#define IDX2_COLUMN_COMMENTS "IDX2"
#define LCS_LENGTH_COLUMN_NAME "LCS_LENGTH"
#define LCS_LENGTH_COLUMN_COMMENTS "LCS_LENGTH"
#define ALI_LENGTH_COLUMN_NAME "ALI_LENGTH"
#define ALI_LENGTH_COLUMN_COMMENTS "ALI_LENGTH"
#define SCORE_COLUMN_NAME "SCORE"
#define SCORE_COLUMN_COMMENTS "SCORE"
/**
* @brief Aligns a NUC_SEQ column with itself.
*
* @param seq_view A pointer on the view where the column to align is.
* @param seq_column A pointer on the OBI_SEQ column to align.
* @param score_view A pointer on the view to write the outputs to.
* @param id1_column A pointer on the OBI_STR column in score_view where the id of the first sequence should be written.
* @param id2_column A pointer on the OBI_STR column in score_view where the id of the second sequence should be written.
* @param score_column A pointer on the OBI_FLOAT column in score_view where the alignment score should be written.
* Note: The columns where the results are written are automatically named and created.
*
* @param dms A pointer on an OBIDMS.
* @param seq_view_name The name of the view where the column to align is.
* @param seq_column_name The name of the OBI_SEQ column in the input view to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
* @param seq_elt_name The name of the element in the column corresponding to the sequence to align, if the column has multiple
* elements per line.
* @param id_column_name The name of the column in the input view containing the identifiers of the sequences to align.
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "ID" column is aligned.
* @param output_view_name The name of the output view where the results should be written (should not already exist).
* @param output_view_comments The comments that should be associated with the output view.
* @param print_seq A boolean indicating whether the aligned sequences should be copied in the output view.
* @param print_count A boolean indicating whether the aligned sequence counts should be copied in the output view.
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
@ -49,9 +87,12 @@
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, const char* seq_name,
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
double threshold, bool normalize, int reference, bool similarity_mode);
int obi_lcs_align_one_column(OBIDMS_p dms,
const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
const char* id_column_name,
const char* output_view_name, const char* output_view_comments,
bool print_seq, bool print_count,
double threshold, bool normalize, int reference, bool similarity_mode);
/**