Alignment: updated functions to align columns (LCS)
This commit is contained in:
@ -3,7 +3,7 @@
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obi_align.c
|
||||
* @file obi_lcs.c
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date May 4th 2016
|
||||
* @brief Functions handling LCS sequence alignments.
|
||||
@ -18,7 +18,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obi_align.h"
|
||||
#include "obi_lcs.h"
|
||||
#include "obidebug.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h"
|
||||
@ -121,11 +121,11 @@ static int print_alignment_result(Obiview_p output_view,
|
||||
OBIDMS_column_p seq2_column,
|
||||
index_t seq1_idx,
|
||||
index_t seq2_idx,
|
||||
// bool print_count,
|
||||
// OBIDMS_column_p count1_column,
|
||||
// OBIDMS_column_p count2_column,
|
||||
// int count1,
|
||||
// int count2,
|
||||
bool print_count,
|
||||
OBIDMS_column_p count1_column,
|
||||
OBIDMS_column_p count2_column,
|
||||
int count1,
|
||||
int count2,
|
||||
OBIDMS_column_p ali_length_column,
|
||||
int ali_length,
|
||||
OBIDMS_column_p lcs_length_column,
|
||||
@ -231,22 +231,22 @@ static int create_alignment_output_columns(Obiview_p output_view,
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// if (print_count) // TODO count columns not implemented yet
|
||||
// {
|
||||
// // Create the column for the count of the first sequences aligned
|
||||
// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
|
||||
// {
|
||||
// obidebug(1, "\nError creating the first column for the sequence counts when aligning");
|
||||
// return -1;
|
||||
// }
|
||||
//
|
||||
// // Create the column for the count of the second sequences aligned
|
||||
// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
|
||||
// {
|
||||
// obidebug(1, "\nError creating the second column for the sequence counts when aligning");
|
||||
// return -1;
|
||||
// }
|
||||
// }
|
||||
if (print_count)
|
||||
{
|
||||
// Create the column for the count of the first sequences aligned
|
||||
if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the first column for the sequence counts when aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create the column for the count of the second sequences aligned
|
||||
if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0)
|
||||
{
|
||||
obidebug(1, "\nError creating the second column for the sequence counts when aligning");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -267,11 +267,11 @@ static int print_alignment_result(Obiview_p output_view,
|
||||
OBIDMS_column_p seq2_column,
|
||||
index_t seq1_idx,
|
||||
index_t seq2_idx,
|
||||
// bool print_count,
|
||||
// OBIDMS_column_p count1_column,
|
||||
// OBIDMS_column_p count2_column,
|
||||
// int count1,
|
||||
// int count2,
|
||||
bool print_count,
|
||||
OBIDMS_column_p count1_column,
|
||||
OBIDMS_column_p count2_column,
|
||||
int count1,
|
||||
int count2,
|
||||
OBIDMS_column_p ali_length_column,
|
||||
int ali_length,
|
||||
OBIDMS_column_p lcs_length_column,
|
||||
@ -322,21 +322,21 @@ static int print_alignment_result(Obiview_p output_view,
|
||||
}
|
||||
}
|
||||
|
||||
// // Write the counts if needed // TODO count columns not implemented yet
|
||||
// if (print_count)
|
||||
// {
|
||||
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
|
||||
// {
|
||||
// obidebug(1, "\nError writing count1 in a column");
|
||||
// return -1;
|
||||
// }
|
||||
//
|
||||
// if (obi_set_index_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
|
||||
// {
|
||||
// obidebug(1, "\nError writing count2 in a column");
|
||||
// return -1;
|
||||
// }
|
||||
// }
|
||||
// Write the counts if needed
|
||||
if (print_count)
|
||||
{
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count1_column, line, 0, count1) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing count1 in a column");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (obi_set_int_with_elt_idx_and_col_p_in_view(output_view, count2_column, line, 0, count2) < 0)
|
||||
{
|
||||
obidebug(1, "\nError writing count2 in a column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Write the alignment length if it was computed
|
||||
if ((reference == ALILEN) && (normalize || !similarity_mode))
|
||||
@ -385,9 +385,13 @@ static int print_alignment_result(Obiview_p output_view,
|
||||
**********************************************************************/
|
||||
|
||||
|
||||
int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
|
||||
int obi_lcs_align_one_column(const char* dms_name,
|
||||
const char* seq_view_name,
|
||||
const char* seq_column_name,
|
||||
const char* seq_elt_name,
|
||||
const char* id_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode,
|
||||
int thread_count)
|
||||
@ -396,6 +400,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
index_t seq_count;
|
||||
index_t id1_idx, id2_idx;
|
||||
index_t seq1_idx, seq2_idx;
|
||||
int count1, count2;
|
||||
double score;
|
||||
int lcs_length;
|
||||
int ali_length;
|
||||
@ -405,16 +410,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
int lcs_min;
|
||||
index_t seq_elt_idx;
|
||||
|
||||
OBIDMS_p dms = NULL;
|
||||
Obiview_p seq_view = NULL;
|
||||
Obiview_p output_view = NULL;
|
||||
OBIDMS_column_p iseq_column = NULL;
|
||||
OBIDMS_column_p i_count_column = NULL;
|
||||
OBIDMS_column_p id_column = NULL;
|
||||
OBIDMS_column_p id1_column = NULL;
|
||||
OBIDMS_column_p id2_column = NULL;
|
||||
OBIDMS_column_p seq1_column = NULL;
|
||||
OBIDMS_column_p seq2_column = NULL;
|
||||
//OBIDMS_column_p count1_column = NULL;
|
||||
//OBIDMS_column_p count2_column = NULL;
|
||||
OBIDMS_column_p count1_column = NULL;
|
||||
OBIDMS_column_p count2_column = NULL;
|
||||
OBIDMS_column_p idx1_column = NULL;
|
||||
OBIDMS_column_p idx2_column = NULL;
|
||||
OBIDMS_column_p lcs_length_column = NULL;
|
||||
@ -423,6 +430,14 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
|
||||
k = 0;
|
||||
|
||||
// Open DMS
|
||||
dms = obi_open_dms(dms_name);
|
||||
if (dms == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening the DMS");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open input view
|
||||
seq_view = obi_open_view(dms, seq_view_name);
|
||||
if (seq_view == NULL)
|
||||
@ -494,6 +509,17 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open the input count column
|
||||
if (print_count)
|
||||
{
|
||||
i_count_column = obi_view_get_column(seq_view, COUNT_COLUMN);
|
||||
if (i_count_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the input COUNT column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output view
|
||||
output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
|
||||
if (output_view == NULL)
|
||||
@ -521,11 +547,11 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
|
||||
seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
|
||||
}
|
||||
// if (print_count) // TODO count columns not implemented yet
|
||||
// {
|
||||
// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
|
||||
// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
|
||||
// }
|
||||
if (print_count)
|
||||
{
|
||||
count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME);
|
||||
count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME);
|
||||
}
|
||||
|
||||
// Build kmer tables
|
||||
ktable = hash_seq_column(seq_view, iseq_column, seq_elt_idx);
|
||||
@ -597,11 +623,18 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
// Get second id idx
|
||||
id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
||||
|
||||
// Get counts // TODO use array for efficiency?
|
||||
if (print_count)
|
||||
{
|
||||
count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, i, 0);
|
||||
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
|
||||
}
|
||||
|
||||
if (print_alignment_result(output_view, k,
|
||||
idx1_column, idx2_column, i, j,
|
||||
id1_column, id2_column, id1_idx, id2_idx,
|
||||
print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
|
||||
//print_count, count1_column, count2_column, count1, count2,
|
||||
print_count, count1_column, count2_column, count1, count2,
|
||||
ali_length_column, ali_length,
|
||||
lcs_length_column, lcs_length,
|
||||
score_column, score,
|
||||
@ -625,13 +658,19 @@ int obi_lcs_align_one_column(OBIDMS_p dms, const char* seq_view_name, const char
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (obi_close_dms(dms, false) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing the DMS after aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
free_kmer_tables(ktable, seq_count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
int obi_lcs_align_two_columns(const char* dms_name,
|
||||
const char* seq1_view_name,
|
||||
const char* seq2_view_name,
|
||||
const char* seq1_column_name,
|
||||
@ -640,7 +679,8 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
const char* seq2_elt_name,
|
||||
const char* id1_column_name,
|
||||
const char* id2_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode)
|
||||
{
|
||||
@ -649,6 +689,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
index_t seq2_count;
|
||||
index_t id1_idx, id2_idx;
|
||||
index_t seq1_idx, seq2_idx;
|
||||
int count1, count2;
|
||||
double score;
|
||||
int lcs_length;
|
||||
int ali_length;
|
||||
@ -660,6 +701,7 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
index_t seq2_elt_idx;
|
||||
bool same_indexer;
|
||||
|
||||
OBIDMS_p dms = NULL;
|
||||
Obiview_p seq1_view = NULL;
|
||||
Obiview_p seq2_view = NULL;
|
||||
Obiview_p output_view = NULL;
|
||||
@ -667,12 +709,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
OBIDMS_column_p i_seq2_column = NULL;
|
||||
OBIDMS_column_p i_id1_column = NULL;
|
||||
OBIDMS_column_p i_id2_column = NULL;
|
||||
OBIDMS_column_p i_count1_column = NULL;
|
||||
OBIDMS_column_p i_count2_column = NULL;
|
||||
OBIDMS_column_p id1_column = NULL;
|
||||
OBIDMS_column_p id2_column = NULL;
|
||||
OBIDMS_column_p seq1_column = NULL;
|
||||
OBIDMS_column_p seq2_column = NULL;
|
||||
//OBIDMS_column_p count1_column = NULL;
|
||||
//OBIDMS_column_p count2_column = NULL;
|
||||
OBIDMS_column_p count1_column = NULL;
|
||||
OBIDMS_column_p count2_column = NULL;
|
||||
OBIDMS_column_p idx1_column = NULL;
|
||||
OBIDMS_column_p idx2_column = NULL;
|
||||
OBIDMS_column_p lcs_length_column = NULL;
|
||||
@ -681,6 +725,14 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
|
||||
k = 0;
|
||||
|
||||
// Open DMS
|
||||
dms = obi_open_dms(dms_name);
|
||||
if (dms == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening the DMS to align");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open the first input view
|
||||
seq1_view = obi_open_view(dms, seq1_view_name);
|
||||
if (seq1_view == NULL)
|
||||
@ -835,6 +887,23 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Open the input count columns
|
||||
if (print_count)
|
||||
{
|
||||
i_count1_column = obi_view_get_column(seq1_view, COUNT_COLUMN);
|
||||
if (i_count1_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the first input COUNT column");
|
||||
return -1;
|
||||
}
|
||||
i_count2_column = obi_view_get_column(seq2_view, COUNT_COLUMN);
|
||||
if (i_count2_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the second input COUNT column");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output view
|
||||
output_view = obi_new_view(dms, output_view_name, NULL, NULL, output_view_comments);
|
||||
if (output_view == NULL)
|
||||
@ -862,11 +931,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
seq1_column = obi_view_get_column(output_view, SEQ1_COLUMN_NAME);
|
||||
seq2_column = obi_view_get_column(output_view, SEQ2_COLUMN_NAME);
|
||||
}
|
||||
// if (print_count) // TODO count columns not implemented yet
|
||||
// {
|
||||
// count1_column = obi_view_get_column(seq_view, COUNT1_COLUMN_NAME);
|
||||
// count2_column = obi_view_get_column(seq_view, COUNT2_COLUMN_NAME);
|
||||
// }
|
||||
if (print_count)
|
||||
{
|
||||
count1_column = obi_view_get_column(output_view, COUNT1_COLUMN_NAME);
|
||||
count2_column = obi_view_get_column(output_view, COUNT2_COLUMN_NAME);
|
||||
}
|
||||
|
||||
// Check if the sequence columns share the same indexer (allows for quick checking of sequence equality)
|
||||
if (strcmp((i_seq1_column->header)->indexer_name, (i_seq2_column->header)->indexer_name) == 0)
|
||||
@ -949,11 +1018,18 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
// Get second id idx
|
||||
id2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq2_view, i_id2_column, j, 0);
|
||||
|
||||
// Get counts // TODO use array for efficiency?
|
||||
if (print_count)
|
||||
{
|
||||
count1 = obi_get_int_with_elt_idx_and_col_p_in_view(seq1_view, i_count1_column, i, 0);
|
||||
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq2_view, i_count2_column, j, 0);
|
||||
}
|
||||
|
||||
if (print_alignment_result(output_view, k,
|
||||
idx1_column, idx2_column, i, j,
|
||||
id1_column, id2_column, id1_idx, id2_idx,
|
||||
print_seq, seq1_column, seq2_column, seq1_idx, seq2_idx,
|
||||
//print_count, count1_column, count2_column, count1, count2,
|
||||
print_count, count1_column, count2_column, count1, count2,
|
||||
ali_length_column, ali_length,
|
||||
lcs_length_column, lcs_length,
|
||||
score_column, score,
|
||||
@ -986,6 +1062,12 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (obi_close_dms(dms, false) < 0)
|
||||
{
|
||||
obidebug(1, "\nError closing the DMS after aligning");
|
||||
return -1;
|
||||
}
|
||||
|
||||
free_kmer_tables(ktable, seq1_count + seq2_count);
|
||||
|
||||
return 0;
|
@ -3,15 +3,15 @@
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obi_align.h
|
||||
* @file obi_lcs.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date May 11th 2016
|
||||
* @brief Header file for the functions handling the LCS alignment of DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBI_ALIGN_H_
|
||||
#define OBI_ALIGN_H_
|
||||
#ifndef OBI_LCS_H_
|
||||
#define OBI_LCS_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
@ -59,7 +59,7 @@
|
||||
*
|
||||
* Note: The columns where the results are written are automatically named and created.
|
||||
*
|
||||
* @param dms A pointer on an OBIDMS.
|
||||
* @param dms_name The path of the DMS.
|
||||
* @param seq_view_name The name of the view where the column to align is.
|
||||
* @param seq_column_name The name of the OBI_SEQ column in the input view to align.
|
||||
* If "" (empty string), and the input view is of type NUC_SEQS_VIEW, the associated "NUC_SEQ" column is aligned.
|
||||
@ -87,10 +87,13 @@
|
||||
* @since May 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_lcs_align_one_column(OBIDMS_p dms,
|
||||
const char* seq_view_name, const char* seq_column_name, const char* seq_elt_name,
|
||||
int obi_lcs_align_one_column(const char* dms_name,
|
||||
const char* seq_view_name,
|
||||
const char* seq_column_name,
|
||||
const char* seq_elt_name,
|
||||
const char* id_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode,
|
||||
int thread_count);
|
||||
@ -103,7 +106,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
|
||||
*
|
||||
* Note: The columns where the results are written are automatically named and created.
|
||||
*
|
||||
* @param dms A pointer on an OBIDMS.
|
||||
* @param dms_name The path of the DMS.
|
||||
* @param seq1_view_name The name of the view where the first column to align is.
|
||||
* @param seq2_view_name The name of the view where the second column to align is ("" if it is the same view as the first one).
|
||||
* @param seq1_column_name The name of the first OBI_SEQ column in the input view to align.
|
||||
@ -138,7 +141,7 @@ int obi_lcs_align_one_column(OBIDMS_p dms,
|
||||
* @since December 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
int obi_lcs_align_two_columns(const char* dms_name,
|
||||
const char* seq1_view_name,
|
||||
const char* seq2_view_name,
|
||||
const char* seq1_column_name,
|
||||
@ -147,10 +150,11 @@ int obi_lcs_align_two_columns(OBIDMS_p dms,
|
||||
const char* seq2_elt_name,
|
||||
const char* id1_column_name,
|
||||
const char* id2_column_name,
|
||||
const char* output_view_name, const char* output_view_comments,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
bool print_seq, bool print_count,
|
||||
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||
|
||||
|
||||
#endif /* OBI_ALIGN_H_ */
|
||||
#endif /* OBI_LCS_H_ */
|
||||
|
Reference in New Issue
Block a user