diff --git a/src/obi_align.c b/src/obi_lcs.c similarity index 88% rename from src/obi_align.c rename to src/obi_lcs.c index 036d59b..0f52911 100755 --- a/src/obi_align.c +++ b/src/obi_lcs.c @@ -3,7 +3,7 @@ ****************************************************************************/ /** - * @file obi_align.c + * @file obi_lcs.c * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date May 4th 2016 * @brief Functions handling LCS sequence alignments. @@ -18,7 +18,7 @@ #include #include -#include "obi_align.h" +#include "obi_lcs.h" #include "obidebug.h" #include "obierrno.h" #include "obitypes.h" @@ -121,11 +121,11 @@ static int print_alignment_result(Obiview_p output_view, OBIDMS_column_p seq2_column, index_t seq1_idx, index_t seq2_idx, -// bool print_count, -// OBIDMS_column_p count1_column, -// OBIDMS_column_p count2_column, -// int count1, -// int count2, + bool print_count, + OBIDMS_column_p count1_column, + OBIDMS_column_p count2_column, + int count1, + int count2, OBIDMS_column_p ali_length_column, int ali_length, OBIDMS_column_p lcs_length_column, @@ -231,22 +231,22 @@ static int create_alignment_output_columns(Obiview_p output_view, return -1; } } -// if (print_count) // TODO count columns not implemented yet -// { -// // Create the column for the count of the first sequences aligned -// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0) -// { -// obidebug(1, "\nError creating the first column for the sequence counts when aligning"); -// return -1; -// } -// -// // Create the column for the count of the second sequences aligned -// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0) -// { -// obidebug(1, "\nError creating the second column for the sequence counts when aligning"); -// return -1; -// } -// } + if (print_count) + { + // Create the column for the count of the first sequences aligned + if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0) + { + obidebug(1, "\nError creating the first column for the sequence counts when aligning"); + return -1; + } + + // Create the column for the count of the second sequences aligned + if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0) + { + obidebug(1, "\nError creating the second column for the sequence counts when aligning"); + return -1; + } + } return 0; } @@ -267,11 +267,11 @@ static int print_alignment_result(Obiview_p output_view, OBIDMS_column_p seq2_column, index_t seq1_idx, index_t seq2_idx, -// bool print_count, -// OBIDMS_column_p count1_column, -// OBIDMS_column_p count2_column, -// int count1, -// int count2, + bool print_count, + OBIDMS_column_p count1_column, + OBIDMS_column_p count2_column, + int count1, + int count2, OBIDMS_column_p ali_length_column, int ali_length, OBIDMS_column_p lcs_length_column, @@ -322,21 +322,21 @@ static int print_alignment_result(Obiview_p output_view, } } -// // Write the counts if needed // TODO count columns not implemented yet -// if (print_count) -// { -// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0) -// { -// obidebug(1, "\nError writing count1 in a column"); -// return -1; -// } -// -// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0) -// { -// obidebug(1, "\nError writing count2 in a column"); -// return -1; -// } -// } + // Write the counts if needed + if (print_count) + { + if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0) + { + obidebug(1, "\nError writing count1 in a column"); + return -1; + } + + if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0) + { + obidebug(1, "\nError writing count2 in a column"); + return -1; + } + } // Write the alignment length if it was computed if ((reference == ALILEN) && (normalize || !similarity_mode)) @@ -385,9 +385,13 @@ static int print_alignment_result(Obiview_p output_view, **********************************************************************/ -int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name, +int obi_lcs_align_one_column(const char* dms_name, + const char* seq_view_name, + const char* seq_column_name, + const char* seq_elt_name, const char* id_column_name, - const char* output_view_name, const char* output_view_comments, + const char* output_view_name, + const char* output_view_comments, bool print_seq, bool print_count, double threshold, bool normalize, int reference, bool similarity_mode, int thread_count) @@ -396,6 +400,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char index_t seq_count; index_t id1_idx, id2_idx; index_t seq1_idx, seq2_idx; + int count1, count2; double score; int lcs_length; int ali_length; @@ -405,16 +410,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char int lcs_min; index_t seq_elt_idx; + OBIDMS_p dms = NULL; Obiview_p seq_view = NULL; Obiview_p output_view = NULL; OBIDMS_column_p iseq_column = NULL; + OBIDMS_column_p i_count_column = NULL; OBIDMS_column_p id_column = NULL; OBIDMS_column_p id1_column = NULL; OBIDMS_column_p id2_column = NULL; OBIDMS_column_p seq1_column = NULL; OBIDMS_column_p seq2_column = NULL; - //OBIDMS_column_p count1_column = NULL; - //OBIDMS_column_p count2_column = NULL; + OBIDMS_column_p count1_column = NULL; + OBIDMS_column_p count2_column = NULL; OBIDMS_column_p idx1_column = NULL; OBIDMS_column_p idx2_column = NULL; OBIDMS_column_p lcs_length_column = NULL; @@ -423,6 +430,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char k = 0; + // Open DMS + dms = obi_open_dms(dms_name); + if (dms == NULL) + { + obidebug(1, "\nError opening the DMS"); + return -1; + } + // Open input view seq_view = obi_open_view(dms, seq_view_name); if (seq_view == NULL) @@ -494,6 +509,17 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char return -1; } + // Open the input count column + if (print_count) + { + i_count_column = obi_view_get_column(seq_view, COUNT_COLUMN); + if (i_count_column == NULL) + { + obidebug(1, "\nError getting the input COUNT column"); + return -1; + } + } + // Create the output view output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments); if (output_view == NULL) @@ -521,11 +547,11 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME); seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME); } -// if (print_count) // TODO count columns not implemented yet -// { -// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME); -// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME); -// } + if (print_count) + { + count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME); + count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME); + } // Build kmer tables ktable = hash_seq_column(seq_view, iseq_column, seq_elt_idx); @@ -597,11 +623,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char // Get second id idx id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0); + // Get counts // TODO use array for efficiency? + if (print_count) + { + count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, i, 0); + count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0); + } + if (print_alignment_result(output_view, k, idx1_column, idx2_column, i, j, id1_column, id2_column, id1_idx, id2_idx, print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx, - //print_count, count1_column, count2_column, count1, count2, + print_count, count1_column, count2_column, count1, count2, ali_length_column, ali_length, lcs_length_column, lcs_length, score_column, score, @@ -625,13 +658,19 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char return -1; } + if (obi_close_dms(dms, false) < 0) + { + obidebug(1, "\nError closing the DMS after aligning"); + return -1; + } + free_kmer_tables(ktable, seq_count); return 0; } -int obi_lcs_align_two_columns(OBIDMS_p dms, +int obi_lcs_align_two_columns(const char* dms_name, const char* seq1_view_name, const char* seq2_view_name, const char* seq1_column_name, @@ -640,7 +679,8 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, const char* seq2_elt_name, const char* id1_column_name, const char* id2_column_name, - const char* output_view_name, const char* output_view_comments, + const char* output_view_name, + const char* output_view_comments, bool print_seq, bool print_count, double threshold, bool normalize, int reference, bool similarity_mode) { @@ -649,6 +689,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, index_t seq2_count; index_t id1_idx, id2_idx; index_t seq1_idx, seq2_idx; + int count1, count2; double score; int lcs_length; int ali_length; @@ -660,6 +701,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, index_t seq2_elt_idx; bool same_indexer; + OBIDMS_p dms = NULL; Obiview_p seq1_view = NULL; Obiview_p seq2_view = NULL; Obiview_p output_view = NULL; @@ -667,12 +709,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, OBIDMS_column_p i_seq2_column = NULL; OBIDMS_column_p i_id1_column = NULL; OBIDMS_column_p i_id2_column = NULL; + OBIDMS_column_p i_count1_column = NULL; + OBIDMS_column_p i_count2_column = NULL; OBIDMS_column_p id1_column = NULL; OBIDMS_column_p id2_column = NULL; OBIDMS_column_p seq1_column = NULL; OBIDMS_column_p seq2_column = NULL; - //OBIDMS_column_p count1_column = NULL; - //OBIDMS_column_p count2_column = NULL; + OBIDMS_column_p count1_column = NULL; + OBIDMS_column_p count2_column = NULL; OBIDMS_column_p idx1_column = NULL; OBIDMS_column_p idx2_column = NULL; OBIDMS_column_p lcs_length_column = NULL; @@ -681,6 +725,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, k = 0; + // Open DMS + dms = obi_open_dms(dms_name); + if (dms == NULL) + { + obidebug(1, "\nError opening the DMS to align"); + return -1; + } + // Open the first input view seq1_view = obi_open_view(dms, seq1_view_name); if (seq1_view == NULL) @@ -835,6 +887,23 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, return -1; } + // Open the input count columns + if (print_count) + { + i_count1_column = obi_view_get_column(seq1_view, COUNT_COLUMN); + if (i_count1_column == NULL) + { + obidebug(1, "\nError getting the first input COUNT column"); + return -1; + } + i_count2_column = obi_view_get_column(seq2_view, COUNT_COLUMN); + if (i_count2_column == NULL) + { + obidebug(1, "\nError getting the second input COUNT column"); + return -1; + } + } + // Create the output view output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments); if (output_view == NULL) @@ -862,11 +931,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME); seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME); } -// if (print_count) // TODO count columns not implemented yet -// { -// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME); -// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME); -// } + if (print_count) + { + count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME); + count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME); + } // Check if the sequence columns share the same indexer (allows for quick checking of sequence equality) if (strcmp((i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name) == 0) @@ -949,11 +1018,18 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, // Get second id idx id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_id2_column, j, 0); + // Get counts // TODO use array for efficiency? + if (print_count) + { + count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq1_view, i_count1_column, i, 0); + count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq2_view, i_count2_column, j, 0); + } + if (print_alignment_result(output_view, k, idx1_column, idx2_column, i, j, id1_column, id2_column, id1_idx, id2_idx, print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx, - //print_count, count1_column, count2_column, count1, count2, + print_count, count1_column, count2_column, count1, count2, ali_length_column, ali_length, lcs_length_column, lcs_length, score_column, score, @@ -986,6 +1062,12 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, return -1; } + if (obi_close_dms(dms, false) < 0) + { + obidebug(1, "\nError closing the DMS after aligning"); + return -1; + } + free_kmer_tables(ktable, seq1_count + seq2_count); return 0; diff --git a/src/obi_align.h b/src/obi_lcs.h similarity index 93% rename from src/obi_align.h rename to src/obi_lcs.h index 059c528..6ecb69f 100755 --- a/src/obi_align.h +++ b/src/obi_lcs.h @@ -3,15 +3,15 @@ ****************************************************************************/ /** - * @file obi_align.h + * @file obi_lcs.h * @author Celine Mercier (celine.mercier@metabarcoding.org) * @date May 11th 2016 * @brief Header file for the functions handling the LCS alignment of DNA sequences. */ -#ifndef OBI_ALIGN_H_ -#define OBI_ALIGN_H_ +#ifndef OBI_LCS_H_ +#define OBI_LCS_H_ #include @@ -59,7 +59,7 @@ * * Note: The columns where the results are written are automatically named and created. * - * @param dms A pointer on an OBIDMS. + * @param dms_name The path of the DMS. * @param seq_view_name The name of the view where the column to align is. * @param seq_column_name The name of the OBI_SEQ column in the input view to align. * If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned. @@ -87,10 +87,13 @@ * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int obi_lcs_align_one_column(OBIDMS_p dms, - const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name, +int obi_lcs_align_one_column(const char* dms_name, + const char* seq_view_name, + const char* seq_column_name, + const char* seq_elt_name, const char* id_column_name, - const char* output_view_name, const char* output_view_comments, + const char* output_view_name, + const char* output_view_comments, bool print_seq, bool print_count, double threshold, bool normalize, int reference, bool similarity_mode, int thread_count); @@ -103,7 +106,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, * * Note: The columns where the results are written are automatically named and created. * - * @param dms A pointer on an OBIDMS. + * @param dms_name The path of the DMS. * @param seq1_view_name The name of the view where the first column to align is. * @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one). * @param seq1_column_name The name of the first OBI_SEQ column in the input view to align. @@ -138,7 +141,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, * @since December 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int obi_lcs_align_two_columns(OBIDMS_p dms, +int obi_lcs_align_two_columns(const char* dms_name, const char* seq1_view_name, const char* seq2_view_name, const char* seq1_column_name, @@ -147,10 +150,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms, const char* seq2_elt_name, const char* id1_column_name, const char* id2_column_name, - const char* output_view_name, const char* output_view_comments, + const char* output_view_name, + const char* output_view_comments, bool print_seq, bool print_count, double threshold, bool normalize, int reference, bool similarity_mode); -#endif /* OBI_ALIGN_H_ */ +#endif /* OBI_LCS_H_ */