1st version of obi align command and reworked functions that handle
column alignment
This commit is contained in:
128
python/obitools3/commands/align.pyx
Normal file
128
python/obitools3/commands/align.pyx
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.obidms._obidms import OBIDMS, OBIView # TODO cimport doesn't work
|
||||||
|
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
__title__="Aligns one sequence column with itself or two sequence columns"
|
||||||
|
|
||||||
|
|
||||||
|
default_config = { 'inputview' : None,
|
||||||
|
'skip' : 0,
|
||||||
|
'only' : None,
|
||||||
|
'skiperror' : False,
|
||||||
|
'moltype' : 'nuc',
|
||||||
|
}
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
# TODO put this common group somewhere else but I don't know where
|
||||||
|
group=parser.add_argument_group('DMS and view options')
|
||||||
|
|
||||||
|
group.add_argument('--default-dms','-d',
|
||||||
|
action="store", dest="obi:defaultdms",
|
||||||
|
metavar='<DMS NAME>',
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Name of the default DMS for reading and writing data.")
|
||||||
|
|
||||||
|
group.add_argument('--input-view','-i',
|
||||||
|
action="store", dest="obi:inputview",
|
||||||
|
metavar='<INPUT VIEW NAME>',
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Name of the input view, either raw if the view is in the default DMS,"
|
||||||
|
" or in the form 'dms:view' if it is in another DMS.")
|
||||||
|
|
||||||
|
# TODO eventually 2nd view, or 2nd column?
|
||||||
|
|
||||||
|
group.add_argument('--output-view','-o',
|
||||||
|
action="store", dest="obi:outputview",
|
||||||
|
metavar='<OUTPUT VIEW NAME>',
|
||||||
|
default=None,
|
||||||
|
type=str,
|
||||||
|
help="Name of the output view, either raw if the view is in the default DMS,"
|
||||||
|
" or in the form 'dms:view' if it is in another DMS.")
|
||||||
|
|
||||||
|
|
||||||
|
group=parser.add_argument_group('obi align specific options')
|
||||||
|
|
||||||
|
group.add_argument('--lcs','-C',
|
||||||
|
action="store", dest="align:alitype",
|
||||||
|
metavar='<ALIGNMENT TYPE>',
|
||||||
|
default='lcs',
|
||||||
|
type=str,
|
||||||
|
help="Compute alignment using the LCS method.")
|
||||||
|
|
||||||
|
group.add_argument('--threshold','-t',
|
||||||
|
action="store", dest="align:threshold",
|
||||||
|
metavar='<THRESHOLD>',
|
||||||
|
default=0.0,
|
||||||
|
type=float,
|
||||||
|
help="Score threshold. If the score is normalized and expressed in similarity (default),"
|
||||||
|
" it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized"
|
||||||
|
" and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%."
|
||||||
|
" If the score is not normalized and expressed in similarity, it is the length of the"
|
||||||
|
" Longest Common Subsequence. If the score is not normalized and expressed in distance,"
|
||||||
|
" it is (reference length - LCS length)."
|
||||||
|
" Only sequence pairs with a similarity above <THRESHOLD> are printed. Default: 0.00"
|
||||||
|
" (no threshold).")
|
||||||
|
|
||||||
|
group.add_argument('--longest_length','-L',
|
||||||
|
action="store_const", dest="align:reflength",
|
||||||
|
default="ali",
|
||||||
|
const="longest",
|
||||||
|
help="The reference length is the length of the longest sequence."
|
||||||
|
" Default: the reference length is the length of the alignment.")
|
||||||
|
|
||||||
|
group.add_argument('--shortest_length','-l',
|
||||||
|
action="store_const", dest="align:reflength",
|
||||||
|
default="ali",
|
||||||
|
const="shortest",
|
||||||
|
help="The reference length is the length of the shortest sequence."
|
||||||
|
" Default: the reference length is the length of the alignment.")
|
||||||
|
|
||||||
|
group.add_argument('--raw','-r',
|
||||||
|
action="store_false", dest="align:normalize",
|
||||||
|
default=True,
|
||||||
|
help="Raw score, not normalized. Default: score is normalized with the reference sequence length.")
|
||||||
|
|
||||||
|
group.add_argument('--distance','-D',
|
||||||
|
action="store_false", dest="align:similarity",
|
||||||
|
default=True,
|
||||||
|
help="Score is expressed in distance. Default: score is expressed in similarity.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
#pb = ProgressBar(1, config, seconde=5) # TODO
|
||||||
|
|
||||||
|
# Open DMS
|
||||||
|
d = OBIDMS(config['obi']['defaultdms'])
|
||||||
|
|
||||||
|
# Open input view 1
|
||||||
|
iview = d.open_view(config['obi']['inputview'])
|
||||||
|
|
||||||
|
# TODO Open input view 2 if there is one
|
||||||
|
|
||||||
|
# Create output view if necessary
|
||||||
|
if config['obi']['outputview'] is not None :
|
||||||
|
oview = d.new_view(config['obi']['outputview'])
|
||||||
|
else :
|
||||||
|
oview = None
|
||||||
|
|
||||||
|
# TODO Take other alignment types into account when they'll be implemented
|
||||||
|
|
||||||
|
# Call cython alignment function
|
||||||
|
iview.align(output_view=oview)
|
||||||
|
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -77,6 +77,14 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
|||||||
cdef OBIDMS_column qualities
|
cdef OBIDMS_column qualities
|
||||||
|
|
||||||
cpdef delete_column(self, str column_name)
|
cpdef delete_column(self, str column_name)
|
||||||
|
cpdef align(self,
|
||||||
|
OBIView iview2=*,
|
||||||
|
object output_view=*,
|
||||||
|
double threshold=*,
|
||||||
|
bint normalize=*,
|
||||||
|
int reference=*,
|
||||||
|
bint similarity_mode=*
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
cdef class OBIView_line :
|
cdef class OBIView_line :
|
||||||
|
@ -11,6 +11,8 @@ from .capi.obidmscolumn cimport obi_close_column, \
|
|||||||
|
|
||||||
from .capi.obiutils cimport obi_format_date
|
from .capi.obiutils cimport obi_format_date
|
||||||
|
|
||||||
|
from .capi.obialign cimport obi_align_one_column
|
||||||
|
|
||||||
from .capi.obitypes cimport const_char_p, \
|
from .capi.obitypes cimport const_char_p, \
|
||||||
OBIType_t, \
|
OBIType_t, \
|
||||||
OBI_INT, \
|
OBI_INT, \
|
||||||
@ -455,6 +457,7 @@ cdef class OBIView :
|
|||||||
to_print = to_print + str(line) + "\n"
|
to_print = to_print + str(line) + "\n"
|
||||||
return to_print
|
return to_print
|
||||||
|
|
||||||
|
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
cdef class OBIView_NUC_SEQS(OBIView):
|
cdef class OBIView_NUC_SEQS(OBIView):
|
||||||
@ -526,6 +529,75 @@ cdef class OBIView_NUC_SEQS(OBIView):
|
|||||||
self[line_idx][key] = sequence_obj[key]
|
self[line_idx][key] = sequence_obj[key]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
cpdef align(self, OBIView iview2=None, object output_view=None,
|
||||||
|
double threshold=0.0, bint normalize=True, int reference=0, bint similarity_mode=True) :
|
||||||
|
|
||||||
|
cdef OBIView iview1
|
||||||
|
cdef OBIView oview
|
||||||
|
|
||||||
|
cdef Obiview_p iview1_p
|
||||||
|
cdef Obiview_p iview2_p
|
||||||
|
cdef Obiview_p oview_p
|
||||||
|
|
||||||
|
cdef OBIDMS_column icol1
|
||||||
|
cdef OBIDMS_column_p icol1_p
|
||||||
|
cdef OBIDMS_column_p* icol1_pp
|
||||||
|
|
||||||
|
cdef OBIDMS_column id1_col
|
||||||
|
cdef OBIDMS_column_p id1_col_p
|
||||||
|
cdef OBIDMS_column_p* id1_col_pp
|
||||||
|
|
||||||
|
cdef OBIDMS_column id2_col
|
||||||
|
cdef OBIDMS_column_p id2_col_p
|
||||||
|
cdef OBIDMS_column_p* id2_col_pp
|
||||||
|
|
||||||
|
cdef OBIDMS_column ocol
|
||||||
|
cdef OBIDMS_column_p ocol_p
|
||||||
|
cdef OBIDMS_column_p* ocol_pp
|
||||||
|
|
||||||
|
cdef str id1_col_name
|
||||||
|
cdef str id2_col_name
|
||||||
|
cdef str score_col_name
|
||||||
|
|
||||||
|
id1_col_name = "ID1" # TODO discuss names, aliases
|
||||||
|
id2_col_name = "ID2"
|
||||||
|
score_col_name = "score"
|
||||||
|
|
||||||
|
iview1= self
|
||||||
|
iview1_p = iview1.pointer
|
||||||
|
icol1 = iview1[bytes2str(NUC_SEQUENCE_COLUMN)]
|
||||||
|
icol1_pp = icol1.pointer
|
||||||
|
icol1_p = icol1_pp[0]
|
||||||
|
|
||||||
|
# Create the output view if needed
|
||||||
|
if output_view is None :
|
||||||
|
oview = self.dms.new_view("alignment_score_view") # TODO discuss
|
||||||
|
elif type(output_view) == str :
|
||||||
|
oview = self.dms.new_view(output_view)
|
||||||
|
else :
|
||||||
|
oview = output_view
|
||||||
|
|
||||||
|
oview.add_column(id1_col_name, type='OBI_STR', create=True)
|
||||||
|
oview.add_column(id2_col_name, type='OBI_STR', create=True)
|
||||||
|
oview.add_column(score_col_name, type='OBI_FLOAT', create=True)
|
||||||
|
|
||||||
|
oview_p = oview.pointer
|
||||||
|
ocol = oview[score_col_name]
|
||||||
|
ocol_pp = ocol.pointer
|
||||||
|
ocol_p = ocol_pp[0]
|
||||||
|
|
||||||
|
id1_col = oview[id1_col_name]
|
||||||
|
id2_col = oview[id2_col_name]
|
||||||
|
id1_col_pp = id1_col.pointer
|
||||||
|
id2_col_pp = id2_col.pointer
|
||||||
|
id1_col_p = id1_col_pp[0]
|
||||||
|
id2_col_p = id2_col_pp[0]
|
||||||
|
|
||||||
|
if obi_align_one_column(iview1_p, icol1_p, oview_p, id1_col_p, id2_col_p, ocol_p, threshold, normalize, reference, similarity_mode) < 0 :
|
||||||
|
raise Exception("Error aligning sequences")
|
||||||
|
|
||||||
|
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
cdef class OBIView_line :
|
cdef class OBIView_line :
|
||||||
|
@ -8,17 +8,6 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
|||||||
cpdef object get_line(self, index_t line_nb)
|
cpdef object get_line(self, index_t line_nb)
|
||||||
cpdef set_line(self, index_t line_nb, object value)
|
cpdef set_line(self, index_t line_nb, object value)
|
||||||
|
|
||||||
# TO DISCUSS :
|
|
||||||
# I'am not sure that this method has to be declared here
|
|
||||||
# Alignment must be declared outside of the sequence object
|
|
||||||
cpdef align(self,
|
|
||||||
OBIView score_view,
|
|
||||||
OBIDMS_column score_column,
|
|
||||||
double threshold = *,
|
|
||||||
bint normalize = *,
|
|
||||||
int reference = *,
|
|
||||||
bint similarity_mode = *)
|
|
||||||
|
|
||||||
|
|
||||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||||
|
@ -38,18 +38,6 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
|||||||
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0:
|
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0:
|
||||||
raise Exception("Problem setting a value in a column")
|
raise Exception("Problem setting a value in a column")
|
||||||
|
|
||||||
# TODO choose alignment type (lcs or other) with supplementary argument
|
|
||||||
cpdef align(self,
|
|
||||||
OBIView score_view,
|
|
||||||
OBIDMS_column score_column,
|
|
||||||
double threshold = 0.0,
|
|
||||||
bint normalize = True,
|
|
||||||
int reference = 0, # TODO
|
|
||||||
bint similarity_mode = True):
|
|
||||||
if (obi_align_one_column(self.view.pointer, (self.pointer)[0], score_view.pointer, (score_column.pointer)[0], threshold, normalize, reference, similarity_mode) < 0) :
|
|
||||||
raise Exception("An error occurred while aligning sequences")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||||
|
|
||||||
|
@ -6,5 +6,14 @@ from ..capi.obidmscolumn cimport OBIDMS_column_p
|
|||||||
|
|
||||||
cdef extern from "obi_align.h" nogil:
|
cdef extern from "obi_align.h" nogil:
|
||||||
|
|
||||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p score_column, double threshold, bint normalize, int reference, bint similarity_mode)
|
int obi_align_one_column(Obiview_p seq_view,
|
||||||
|
OBIDMS_column_p seq_column,
|
||||||
|
Obiview_p score_view,
|
||||||
|
OBIDMS_column_p id1_column,
|
||||||
|
OBIDMS_column_p id2_column,
|
||||||
|
OBIDMS_column_p score_column,
|
||||||
|
double threshold,
|
||||||
|
bint normalize,
|
||||||
|
int reference,
|
||||||
|
bint similarity_mode)
|
||||||
|
|
||||||
|
135
src/obi_align.c
135
src/obi_align.c
@ -25,20 +25,25 @@
|
|||||||
|
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
// use openMP pragmas : garder scores en memoire et ecrire a la fin? (normalement c bon avec index)
|
// use openMP pragmas
|
||||||
// tout ecrire en stdint?
|
// option pour ecrire en stdint?
|
||||||
// check NUC_SEQS and score type (int or float if normalize)
|
// check NUC_SEQS view type? and score type (int or float if normalize)
|
||||||
// what's with multiple sequence/line columns?
|
// what's with multiple sequences/line columns?
|
||||||
// make function that put blobs in int16
|
// make function that put blobs in int16
|
||||||
|
|
||||||
|
|
||||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode)
|
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||||
|
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
||||||
|
double threshold, bool normalize, int reference, bool similarity_mode)
|
||||||
{
|
{
|
||||||
index_t i, j, k;
|
index_t i, j, k;
|
||||||
index_t seq_count;
|
index_t seq_count;
|
||||||
char* seq1;
|
char* seq1;
|
||||||
char* seq2;
|
char* seq2;
|
||||||
|
const char* id1;
|
||||||
|
const char* id2;
|
||||||
double score;
|
double score;
|
||||||
|
OBIDMS_column_p id_column;
|
||||||
|
|
||||||
k = 0;
|
k = 0;
|
||||||
|
|
||||||
@ -57,6 +62,9 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the ID column pointer
|
||||||
|
id_column = obi_view_get_column(seq_view, ID_COLUMN);
|
||||||
|
|
||||||
seq_count = (seq_column->header)->lines_used;
|
seq_count = (seq_column->header)->lines_used;
|
||||||
|
|
||||||
for (i=0; i < (seq_count - 1); i++)
|
for (i=0; i < (seq_count - 1); i++)
|
||||||
@ -65,8 +73,8 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview
|
|||||||
{
|
{
|
||||||
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
//fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
||||||
|
|
||||||
seq1 = obi_column_get_obiseq_with_elt_idx_in_view(seq_view, seq_column, i, 0);
|
seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, i, 0);
|
||||||
seq2 = obi_column_get_obiseq_with_elt_idx_in_view(seq_view, seq_column, j, 0);
|
seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column, j, 0);
|
||||||
|
|
||||||
if ((seq1 == NULL) || (seq2 == NULL))
|
if ((seq1 == NULL) || (seq2 == NULL))
|
||||||
{
|
{
|
||||||
@ -74,12 +82,32 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO filter
|
// TODO kmer filter
|
||||||
|
|
||||||
|
// Compute alignment score
|
||||||
score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
||||||
|
|
||||||
|
// Get sequence ids
|
||||||
|
id1 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0);
|
||||||
|
id2 = obi_get_str_with_elt_idx_and_col_p_in_view(seq_view, id_column, j, 0);
|
||||||
|
|
||||||
|
// Write sequence ids in output view
|
||||||
|
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id1_column, k, 0, id1) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError writing id1 in a column");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (obi_set_str_with_elt_idx_and_col_p_in_view(score_view, id2_column, k, 0, id2) < 0)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError writing id2 in a column");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write score in output view
|
||||||
if (normalize)
|
if (normalize)
|
||||||
{
|
{
|
||||||
if (obi_column_set_obifloat_with_elt_idx_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing alignment score in a column");
|
obidebug(1, "\nError writing alignment score in a column");
|
||||||
return -1;
|
return -1;
|
||||||
@ -87,13 +115,16 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (obi_column_set_obiint_with_elt_idx_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing alignment score in a column");
|
obidebug(1, "\nError writing alignment score in a column");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(seq1);
|
||||||
|
free(seq2);
|
||||||
|
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -102,8 +133,82 @@ int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_align_two_columns(OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode)
|
// TODO discuss if 2 input views or 2 columns or both possible
|
||||||
{
|
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2, // TODO it's implied both seq columns are in the same view but maybe it shouldn't
|
||||||
// TODO
|
// Obiview_p score_view, OBIDMS_column_p score_column,
|
||||||
return 0;
|
// double threshold, bool normalize, int reference, bool similarity_mode)
|
||||||
}
|
//{
|
||||||
|
// index_t i, j, k;
|
||||||
|
// index_t seq_count_1;
|
||||||
|
// index_t seq_count_2;
|
||||||
|
// char* seq1;
|
||||||
|
// char* seq2;
|
||||||
|
// double score;
|
||||||
|
//
|
||||||
|
// k = 0;
|
||||||
|
//
|
||||||
|
// if (((seq_column_1->header)->returned_data_type != OBI_SEQ) || ((seq_column_2->header)->returned_data_type != OBI_SEQ))
|
||||||
|
// {
|
||||||
|
// obi_set_errno(OBI_ALIGN_ERROR);
|
||||||
|
// obidebug(1, "\nTrying to align a column of a different type than OBI_SEQ");
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ((normalize && ((score_column->header)->returned_data_type != OBI_FLOAT)) ||
|
||||||
|
// (!normalize && ((score_column->header)->returned_data_type != OBI_INT)))
|
||||||
|
// {
|
||||||
|
// obi_set_errno(OBI_ALIGN_ERROR);
|
||||||
|
// obidebug(1, "\nTrying to store alignment scores in a column of an inappropriate type");
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// seq_count_1 = (seq_column_1->header)->lines_used;
|
||||||
|
// seq_count_2 = (seq_column_2->header)->lines_used;
|
||||||
|
//
|
||||||
|
// for (i=0; i < (seq_count_1 - 1); i++)
|
||||||
|
// {
|
||||||
|
// for (j=0; j < seq_count_2; j++)
|
||||||
|
// {
|
||||||
|
// //fprintf(stderr, "\ni=%lld, j=%lld, k=%lld", i, j, k);
|
||||||
|
//
|
||||||
|
// seq1 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_1, i, 0);
|
||||||
|
// seq2 = obi_get_seq_with_elt_idx_and_col_p_in_view(seq_view, seq_column_2, j, 0);
|
||||||
|
//
|
||||||
|
// if ((seq1 == NULL) || (seq2 == NULL))
|
||||||
|
// {
|
||||||
|
// obidebug(1, "\nError retrieving sequences to align");
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // TODO kmer filter
|
||||||
|
//
|
||||||
|
// score = generic_sse_banded_lcs_align(seq1, seq2, threshold, normalize, reference, similarity_mode);
|
||||||
|
//
|
||||||
|
// if (normalize)
|
||||||
|
// {
|
||||||
|
// if (obi_set_float_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obifloat_t) score) < 0)
|
||||||
|
// {
|
||||||
|
// obidebug(1, "\nError writing alignment score in a column");
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// if (obi_set_int_with_elt_idx_and_col_p_in_view(score_view, score_column, k, 0, (obiint_t) score) < 0)
|
||||||
|
// {
|
||||||
|
// obidebug(1, "\nError writing alignment score in a column");
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// free(seq1);
|
||||||
|
// free(seq2);
|
||||||
|
//
|
||||||
|
// k++;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return 0;
|
||||||
|
//}
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,6 +23,35 @@
|
|||||||
#include "obitypes.h"
|
#include "obitypes.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Aligns a NUC_SEQ column with itself.
|
||||||
|
*
|
||||||
|
* @param seq_view A pointer on the view where the column to align is.
|
||||||
|
* @param seq_column A pointer on the OBI_SEQ column to align.
|
||||||
|
* @param score_view A pointer on the view to write the outputs to.
|
||||||
|
* @param id1_column A pointer on the OBI_STR column in score_view where the id of the first sequence should be written.
|
||||||
|
* @param id2_column A pointer on the OBI_STR column in score_view where the id of the second sequence should be written.
|
||||||
|
* @param score_column A pointer on the OBI_FLOAT column in score_view where the alignment score should be written.
|
||||||
|
* @param threshold Score threshold. If the score is normalized and expressed in similarity, it is an identity, e.g. 0.95
|
||||||
|
* for an identity of 95%. If the score is normalized and expressed in distance, it is (1.0 - identity),
|
||||||
|
* e.g. 0.05 for an identity of 95%. If the score is not normalized and expressed in similarity, it is
|
||||||
|
* the length of the Longest Common Subsequence. If the score is not normalized and expressed in distance,
|
||||||
|
* it is (reference length - LCS length). Only sequence pairs with a similarity above the threshold are printed.
|
||||||
|
* @param normalize Whether the score should be normalized with the reference sequence length.
|
||||||
|
* @param reference The reference length. 0: The alignement length; 1: The longest sequence's length; 2: The shortest sequence's length.
|
||||||
|
* @param similarity_mode Whether the score should be expressed in similarity (true) or distance (false).
|
||||||
|
*
|
||||||
|
* @returns A value indicating the success of the operation.
|
||||||
|
* @retval 0 if the operation was successfully completed.
|
||||||
|
* @retval -1 if an error occurred.
|
||||||
|
*
|
||||||
|
* @since May 2016
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column,
|
||||||
|
Obiview_p score_view, OBIDMS_column_p id1_column, OBIDMS_column_p id2_column, OBIDMS_column_p score_column,
|
||||||
|
double threshold, bool normalize, int reference, bool similarity_mode);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief
|
* @brief
|
||||||
@ -30,7 +59,9 @@
|
|||||||
* TODO
|
* TODO
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int obi_align_one_column(Obiview_p seq_view, OBIDMS_column_p seq_column, Obiview_p score_view, OBIDMS_column_p score_column, double threshold, bool normalize, int reference, bool similarity_mode);
|
//int obi_align_two_columns(Obiview_p seq_view, OBIDMS_column_p seq_column_1, OBIDMS_column_p seq_column_2,
|
||||||
|
// Obiview_p score_view, OBIDMS_column_p score_column,
|
||||||
|
// double threshold, bool normalize, int reference, bool similarity_mode);
|
||||||
|
|
||||||
|
|
||||||
#endif /* OBI_ALIGN_H_ */
|
#endif /* OBI_ALIGN_H_ */
|
||||||
|
Reference in New Issue
Block a user