Compare commits
8 Commits
v3.0.0-bet
...
v3.0.0-bet
Author | SHA1 | Date | |
---|---|---|---|
974528b2e6 | |||
1b346b54f9 | |||
058f2ad8b3 | |||
60bfd3ae8d | |||
67bdee105a | |||
0f745e0113 | |||
da8de52ba4 | |||
4d36538c6e |
@ -190,58 +190,50 @@ def sequenceTaggerGenerator(config, taxo=None):
|
|||||||
seq['seq_rank']=counter[0]
|
seq['seq_rank']=counter[0]
|
||||||
|
|
||||||
for i,v in toSet:
|
for i,v in toSet:
|
||||||
#try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(v, environ, seq)
|
val = eval(v, environ, seq)
|
||||||
#except Exception,e: # TODO discuss usefulness of this
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = v
|
||||||
# raise e
|
|
||||||
# val = v
|
|
||||||
seq[i]=val
|
seq[i]=val
|
||||||
|
|
||||||
if length:
|
if length:
|
||||||
seq['seq_length']=len(seq)
|
seq['seq_length']=len(seq)
|
||||||
|
|
||||||
if newId is not None:
|
if newId is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newId, environ, seq)
|
val = eval(newId, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newId
|
||||||
# raise e
|
|
||||||
# val = newId
|
|
||||||
seq.id=val
|
seq.id=val
|
||||||
|
|
||||||
if newDef is not None:
|
if newDef is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newDef, environ, seq)
|
val = eval(newDef, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newDef
|
||||||
# raise e
|
|
||||||
# val = newDef
|
|
||||||
seq.definition=val
|
seq.definition=val
|
||||||
#
|
|
||||||
if newSeq is not None:
|
if newSeq is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newSeq, environ, seq)
|
val = eval(newSeq, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newSeq
|
||||||
# raise e
|
|
||||||
# val = newSeq
|
|
||||||
seq.seq=val
|
seq.seq=val
|
||||||
if 'seq_length' in seq:
|
if 'seq_length' in seq:
|
||||||
seq['seq_length']=len(seq)
|
seq['seq_length']=len(seq)
|
||||||
@ -251,15 +243,14 @@ def sequenceTaggerGenerator(config, taxo=None):
|
|||||||
seq.view.delete_column(QUALITY_COLUMN)
|
seq.view.delete_column(QUALITY_COLUMN)
|
||||||
|
|
||||||
if run is not None:
|
if run is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
eval(run, environ, seq)
|
eval(run, environ, seq)
|
||||||
# except Exception,e:
|
except Exception,e:
|
||||||
# if options.onlyValid:
|
raise e
|
||||||
# raise e
|
|
||||||
|
|
||||||
return sequenceTagger
|
return sequenceTagger
|
||||||
|
|
||||||
|
@ -59,13 +59,23 @@ def run(config):
|
|||||||
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
||||||
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
||||||
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
||||||
|
|
||||||
|
if config['obi']['only'] is not None:
|
||||||
|
withoutskip = min(input[4], config['obi']['only'])
|
||||||
|
else:
|
||||||
|
withoutskip = input[4]
|
||||||
|
|
||||||
|
if config['obi']['skip'] is not None:
|
||||||
|
skip = min(input[4], config['obi']['skip'])
|
||||||
|
else:
|
||||||
|
skip = 0
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
if config['obi']['noprogressbar']:
|
if config['obi']['noprogressbar']:
|
||||||
pb = None
|
pb = None
|
||||||
else:
|
else:
|
||||||
pb = ProgressBar(len(iview), config, seconde=5)
|
pb = ProgressBar(withoutskip - skip, config, seconde=5)
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in iview :
|
for seq in iview :
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0-beta14'
|
serial= '0-beta15'
|
||||||
|
|
||||||
version ="%d.%02d.%s" % (major,minor,serial)
|
version ="%d.%02d.%s" % (major,minor,serial)
|
||||||
|
@ -157,7 +157,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
ecotx_t* lca_2 = NULL;
|
ecotx_t* lca_2 = NULL;
|
||||||
ecotx_t* lca = NULL;
|
ecotx_t* lca = NULL;
|
||||||
index_t idx1, idx2;
|
index_t idx1, idx2;
|
||||||
index_t i, j, k;
|
index_t i, j, k, count;
|
||||||
int32_t taxid_array_length;
|
int32_t taxid_array_length;
|
||||||
int32_t score_array_length;
|
int32_t score_array_length;
|
||||||
int32_t taxid_array_writable_length;
|
int32_t taxid_array_writable_length;
|
||||||
@ -185,6 +185,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
||||||
strcat(matrix_view_name, "_matrix");
|
strcat(matrix_view_name, "_matrix");
|
||||||
|
|
||||||
|
fprintf(stderr, "Aligning queries with reference database...\n");
|
||||||
if (obi_lcs_align_one_column(dms_name,
|
if (obi_lcs_align_one_column(dms_name,
|
||||||
refs_view_name,
|
refs_view_name,
|
||||||
"",
|
"",
|
||||||
@ -320,13 +321,19 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
count = (matrix_with_lca_view->infos)->line_count;
|
||||||
|
fprintf(stderr, "Computing LCAs...\n");
|
||||||
|
|
||||||
// Compute all the LCAs
|
// Compute all the LCAs
|
||||||
// For each pair
|
// For each pair
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read all taxids associated with the first sequence and compute their LCA
|
// Read all taxids associated with the first sequence and compute their LCA
|
||||||
// Read line index
|
// Read line index
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
@ -363,6 +370,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
||||||
|
|
||||||
@ -442,13 +450,18 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "Building LCA arrays...\n");
|
||||||
|
|
||||||
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
||||||
// Going through matrix once, filling refs arrays on the go for efficiency
|
// Going through matrix once, filling refs arrays on the go for efficiency
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read ref line indexes
|
// Read ref line indexes
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
||||||
@ -464,6 +477,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
// Read alignment score
|
// Read alignment score
|
||||||
score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
|
score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n\ntaxid_lca=%d, score=%f, idx1=%d, idx2=%d", taxid_lca, score, idx1, idx2);
|
||||||
|
|
||||||
///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
||||||
|
|
||||||
// Read arrays
|
// Read arrays
|
||||||
@ -480,9 +495,11 @@ int build_reference_db(const char* dms_name,
|
|||||||
// return -1;
|
// return -1;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n1st sequence");
|
||||||
// If empty, add values
|
// If empty, add values
|
||||||
if (taxid_array_length == 0)
|
if (taxid_array_length == 0)
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nEmpty, add value");
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -496,6 +513,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nNot empty");
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
modified = false;
|
modified = false;
|
||||||
while (j < taxid_array_length)
|
while (j < taxid_array_length)
|
||||||
@ -509,6 +528,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
|
||||||
|
// score_array_writable[j], taxid_lca, score);
|
||||||
|
|
||||||
// Better score for the same LCA, replace this LCA/score pair
|
// Better score for the same LCA, replace this LCA/score pair
|
||||||
lca_taxid_array_writable[j] = taxid_lca;
|
lca_taxid_array_writable[j] = taxid_lca;
|
||||||
score_array_writable[j] = score;
|
score_array_writable[j] = score;
|
||||||
@ -535,6 +557,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
{
|
{
|
||||||
if (score > score_array[j])
|
if (score > score_array[j])
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nInsert new");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -579,10 +603,15 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nAppend at the end");
|
||||||
|
|
||||||
// Append LCA
|
// Append LCA
|
||||||
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
||||||
score_array_writable[score_array_writable_length] = score;
|
score_array_writable[score_array_writable_length] = score;
|
||||||
|
|
||||||
|
taxid_array_writable_length++;
|
||||||
|
score_array_writable_length++;
|
||||||
|
|
||||||
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
||||||
while ((j>0) && (score_array_writable[j-1] <= score))
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
||||||
{
|
{
|
||||||
@ -603,6 +632,13 @@ int build_reference_db(const char* dms_name,
|
|||||||
// Write new arrays
|
// Write new arrays
|
||||||
if (modified)
|
if (modified)
|
||||||
{
|
{
|
||||||
|
// fprintf(stderr, "\n\nnew array:");
|
||||||
|
// for (k=0;k<taxid_array_writable_length;k++)
|
||||||
|
// {
|
||||||
|
// lca = obi_taxo_get_taxon_with_taxid(tax, lca_taxid_array_writable[k]);
|
||||||
|
// fprintf(stderr, "\nLCA=%d, %s, score=%f", lca_taxid_array_writable[k], lca->name, score_array_writable[k]);
|
||||||
|
// }
|
||||||
|
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -632,9 +668,13 @@ int build_reference_db(const char* dms_name,
|
|||||||
// return -1;
|
// return -1;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n2nd sequence");
|
||||||
|
|
||||||
// If empty, add values
|
// If empty, add values
|
||||||
if (taxid_array_length == 0)
|
if (taxid_array_length == 0)
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nEmpty, add value");
|
||||||
|
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -648,6 +688,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nNot empty");
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
modified = false;
|
modified = false;
|
||||||
while (j < taxid_array_length)
|
while (j < taxid_array_length)
|
||||||
@ -661,6 +703,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
|
||||||
|
// score_array_writable[j], taxid_lca, score);
|
||||||
|
|
||||||
// Better score for the same LCA, replace this LCA/score pair
|
// Better score for the same LCA, replace this LCA/score pair
|
||||||
lca_taxid_array_writable[j] = taxid_lca;
|
lca_taxid_array_writable[j] = taxid_lca;
|
||||||
score_array_writable[j] = score;
|
score_array_writable[j] = score;
|
||||||
@ -687,6 +732,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
{
|
{
|
||||||
if (score > score_array[j])
|
if (score > score_array[j])
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nInsert new");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -727,6 +774,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
|
|
||||||
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nAppend at the end");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -735,6 +784,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
||||||
score_array_writable[score_array_writable_length] = score;
|
score_array_writable[score_array_writable_length] = score;
|
||||||
|
|
||||||
|
taxid_array_writable_length++;
|
||||||
|
score_array_writable_length++;
|
||||||
|
|
||||||
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
||||||
while ((j>0) && (score_array_writable[j-1] <= score))
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
||||||
{
|
{
|
||||||
@ -769,11 +821,17 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
|
fprintf(stderr, "Writing results...\n");
|
||||||
|
count = (o_view->infos)->line_count;
|
||||||
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
||||||
score=1.0; // technically getting LCA of identical sequences
|
score=1.0; // technically getting LCA of identical sequences
|
||||||
for (i=0; i<(o_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
||||||
if (taxid_array_length == 0) // no LCA set
|
if (taxid_array_length == 0) // no LCA set
|
||||||
{
|
{
|
||||||
@ -799,6 +857,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Add information about the threshold used to build the DB
|
// Add information about the threshold used to build the DB
|
||||||
snprintf(threshold_str, 5, "%f", threshold);
|
snprintf(threshold_str, 5, "%f", threshold);
|
||||||
@ -858,7 +917,6 @@ int build_reference_db(const char* dms_name,
|
|||||||
free(matrix_view_name);
|
free(matrix_view_name);
|
||||||
free(matrix_with_lca_view_name);
|
free(matrix_with_lca_view_name);
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% \n");
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1061,7 +1061,7 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length = posj - posi - o1->patlen - o2->patlen;
|
length = posj - posi - o1->patlen - o2->patlen;
|
||||||
if (posj < posi)
|
else if (circular > 0)
|
||||||
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
if ((length>0) && // For when primers touch or overlap
|
if ((length>0) && // For when primers touch or overlap
|
||||||
(!min_len || (length >= min_len)) &&
|
(!min_len || (length >= min_len)) &&
|
||||||
@ -1151,7 +1151,7 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
|
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
|
||||||
if (posj < posi)
|
else if (circular > 0)
|
||||||
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
if ((length>0) && // For when primers touch or overlap
|
if ((length>0) && // For when primers touch or overlap
|
||||||
(!min_len || (length >= min_len)) &&
|
(!min_len || (length >= min_len)) &&
|
||||||
@ -1232,7 +1232,7 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% ");
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -455,7 +455,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
|
|
||||||
for (i=0; i < query_count; i++)
|
for (i=0; i < query_count; i++)
|
||||||
{
|
{
|
||||||
if (i%100 == 0)
|
if (i%1000 == 0)
|
||||||
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
|
||||||
|
|
||||||
best_match_count = 0;
|
best_match_count = 0;
|
||||||
@ -562,7 +562,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
||||||
|
|
||||||
k = 0;
|
k = 0;
|
||||||
while ((k < lca_array_length) && (score_array[k] >= ecotag_threshold))
|
while ((k < lca_array_length) && (score_array[k] >= best_score))
|
||||||
k++;
|
k++;
|
||||||
|
|
||||||
if (k>0)
|
if (k>0)
|
||||||
@ -570,12 +570,12 @@ int obi_ecotag(const char* dms_name,
|
|||||||
lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
|
lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
|
||||||
if (j>0)
|
if (j>0)
|
||||||
{
|
{
|
||||||
lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
|
// lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
|
||||||
if (lca == NULL)
|
// if (lca == NULL)
|
||||||
{
|
// {
|
||||||
obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
|
// obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
|
||||||
return -1;
|
// return -1;
|
||||||
}
|
// }
|
||||||
lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
|
lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
|
||||||
if (lca_in_array == NULL)
|
if (lca_in_array == NULL)
|
||||||
{
|
{
|
||||||
|
@ -1974,6 +1974,10 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
// Calculate the new file size
|
// Calculate the new file size
|
||||||
old_line_count = (column->header)->line_count;
|
old_line_count = (column->header)->line_count;
|
||||||
new_line_count = ceil((double) old_line_count * (double) COLUMN_GROWTH_FACTOR);
|
new_line_count = ceil((double) old_line_count * (double) COLUMN_GROWTH_FACTOR);
|
||||||
|
if (new_line_count > old_line_count+100000)
|
||||||
|
new_line_count = old_line_count+100000;
|
||||||
|
else if (new_line_count < old_line_count+1000)
|
||||||
|
new_line_count = old_line_count+1000;
|
||||||
|
|
||||||
if (new_line_count > MAXIMUM_LINE_COUNT)
|
if (new_line_count > MAXIMUM_LINE_COUNT)
|
||||||
{
|
{
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) /**< The maximum number of elements per line if the default element names
|
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) /**< The maximum number of elements per line if the default element names
|
||||||
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX. // TODO not up to date
|
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX. // TODO not up to date
|
||||||
*/
|
*/
|
||||||
#define COLUMN_GROWTH_FACTOR (1.3) /**< The growth factor when a column is enlarged.
|
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
|
||||||
*/
|
*/
|
||||||
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column (1E9). //TODO
|
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column (1E9). //TODO
|
||||||
*/
|
*/
|
||||||
|
@ -686,6 +686,9 @@ int calculateSizeToAllocate(int maxLen, int LCSmin)
|
|||||||
size *= 3;
|
size *= 3;
|
||||||
size += 16;
|
size += 16;
|
||||||
|
|
||||||
|
size += 10; // band-aid for memory bug I don't understand (triggered on specific db on ubuntu)
|
||||||
|
// bug might have to do with the way different systems behave when aligning the address in obi_get_memory_aligned_on_16
|
||||||
|
|
||||||
return(size*sizeof(int16_t));
|
return(size*sizeof(int16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user