i loop parallelized: bad

This commit is contained in:
Celine Mercier
2019-05-25 18:37:56 +02:00
parent a04588da31
commit debf59b266

View File

@ -183,6 +183,7 @@ int obi_clean(const char* dms_name,
char status;
byte_t* alignment_result_array = NULL;
byte_t* ali_result_array = NULL;
byte_t ali_result;
int* complete_sample_count_array = NULL;
@ -211,6 +212,7 @@ int obi_clean(const char* dms_name,
bool stop = false;
int max_threads = 1;
int thread_id = 0;
#ifdef _OPENMP
max_threads = omp_get_max_threads();
@ -375,7 +377,7 @@ int obi_clean(const char* dms_name,
// Allocate alignment result array (byte at 0 if not aligned yet,
// 1 if sequence at index has a similarity above the threshold with the current sequence,
// 2 if sequence at index has a similarity below the threshold with the current sequence)
alignment_result_array = (byte_t*) calloc(seq_count, sizeof(byte_t));
alignment_result_array = (byte_t*) calloc(thread_count*seq_count, sizeof(byte_t));
if (alignment_result_array == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
@ -399,126 +401,133 @@ int obi_clean(const char* dms_name,
}
}
#pragma omp parallel shared(seq_count, blob_array, complete_sample_count_array, alignment_result_array, stop) \
private(ali_result_array, thread_id, i, j, blob1, blob2, s1_count, s2_count, \
sample_count_array, sample, yes, no, above_threshold, ali_result, score)
for (i=0; i< (seq_count-1); i++)
{
if (i%1000 == 0)
#ifdef _OPENMP
thread_id = omp_get_thread_num();
ali_result_array = alignment_result_array+thread_id;
#endif
#pragma omp for schedule(dynamic, 100)
for (i=0; i< (seq_count-1); i++)
{
p = (i/(float)seq_count)*100;
fprintf(stderr,"\rDone : %f %% ",p);
}
// Get first sequence
blob1 = blob_array[i];
// blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0); // slower
if (blob1 == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
return -1;
}
for (sample=0; sample < sample_count; sample++)
{
sample_count_array = complete_sample_count_array+(sample*seq_count);
// Get count for this sample
s1_count = sample_count_array[i];
//s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample); // slower
#pragma omp parallel for shared(i, seq_count, s1_count, sample, blob_array, sample_count_array, alignment_result_array, stop) \
private(j, blob2, s2_count, yes, no, above_threshold, ali_result, score)
for (j=i+1; j < seq_count; j++)
if (i%1000 == 0)
{
// Get second sequence
blob2 = blob_array[j];
// blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0); // slower
if (blob2 == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
stop = true;
}
p = (i/(float)(seq_count/(float)thread_count))*100;
fprintf(stderr,"\rDone : %f %% ",p);
}
// Get first sequence
blob1 = blob_array[i];
// blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0); // slower
if (blob1 == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
stop = true;
}
for (sample=0; sample < sample_count; sample++)
{
sample_count_array = complete_sample_count_array+(sample*seq_count);
// Get count for this sample
s2_count = sample_count_array[j];
//s2_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, j, sample); // slower
s1_count = sample_count_array[i];
//s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample); // slower
// Check all ratios
if (((s1_count!=OBIInt_NA && s2_count!=OBIInt_NA) && (s1_count>0 && s2_count>0)) &&
((((s1_count >= s2_count) && (((double) s2_count / (double) s1_count) <= max_ratio))) ||
(((s2_count >= s1_count) && (((double) s1_count / (double) s2_count) <= max_ratio)))))
for (j=i+1; j < seq_count; j++)
{
yes = 0;
no = 0;
above_threshold = false;
ali_result = alignment_result_array[j];
if (ali_result > 0) // already aligned
// Get second sequence
blob2 = blob_array[j];
// blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0); // slower
if (blob2 == NULL)
{
if (ali_result == 2)
no = 1;
else if (ali_result == 1)
yes = 1;
obidebug(1, "\nError retrieving sequences to align");
stop = true;
}
else // never compared before
// Get count for this sample
s2_count = sample_count_array[j];
//s2_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, j, sample); // slower
// Check all ratios
if (((s1_count!=OBIInt_NA && s2_count!=OBIInt_NA) && (s1_count>0 && s2_count>0)) &&
((((s1_count >= s2_count) && (((double) s2_count / (double) s1_count) <= max_ratio))) ||
(((s2_count >= s1_count) && (((double) s1_count / (double) s2_count) <= max_ratio)))))
{
// Check if the sequences are identical in a quick way (same index in the same indexer)
if (obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0) == obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0))
above_threshold = true;
else // the sequences aren't identical
yes = 0;
no = 0;
above_threshold = false;
ali_result = ali_result_array[j];
if (ali_result > 0) // already aligned
{
// kmer filter
align_filters(ktable, blob1, blob2, i, j, threshold, normalize, reference, similarity_mode, &score, &lcs_min, false);
// Compute alignment score if filter passed
if (score == -1.0)
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
above_threshold = ((score >= 0) && (score <= threshold));
if (ali_result == 2)
no = 1;
else if (ali_result == 1)
yes = 1;
}
}
if (yes || above_threshold)
{
if (yes == 0)
// Set ali result as above the threshold (value 1)
alignment_result_array[j] = 1;
// Might be worth having arrays to read values too for some datasets but unlikely
// label as head or internal
if (s1_count >= s2_count)
else // never compared before
{
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton
// Check if the sequences are identical in a quick way (same index in the same indexer)
if (obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0) == obi_get_index_with_elt_idx_and_col_p_in_view(i_view, iseq_column, j, 0))
above_threshold = true;
else // the sequences aren't identical
{
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h') < 0)
stop = true;
// kmer filter
align_filters(ktable, blob1, blob2, i, j, threshold, normalize, reference, similarity_mode, &score, &lcs_min, false);
// Compute alignment score if filter passed
if (score == -1.0)
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
above_threshold = ((score >= 0) && (score <= threshold));
}
// Otherwise it's an internal (do nothing)
// Label other sequence as internal no matter what
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i') < 0)
stop = true;
}
else // Same thing but with sequences switched
if (yes || above_threshold)
{
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton
if (yes == 0)
// Set ali result as above the threshold (value 1)
ali_result_array[j] = 1;
// Might be worth having arrays to read values too for some datasets but unlikely
// label as head or internal
if (s1_count >= s2_count)
{
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton
{
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h') < 0)
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h') < 0)
stop = true;
}
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i') < 0)
stop = true;
// Otherwise it's an internal (do nothing)
// Label other sequence as internal no matter what
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i') < 0)
stop = true;
}
else // Same thing but with sequences switched
{
if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton
{
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h') < 0)
stop = true;
}
if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i') < 0)
stop = true;
}
}
else if (no == 0)
// Set ali result as above the threshold (value 2)
ali_result_array[j] = 2;
}
else if (no == 0)
// Set ali result as above the threshold (value 2)
alignment_result_array[j] = 2;
}
}
if (stop)
return -1;
// Reset ali result array to 0
memset(ali_result_array, 0, seq_count);
}
// Reset ali result array to 0
memset(alignment_result_array, 0, seq_count);
}
free_kmer_tables(ktable, seq_count);
@ -528,6 +537,9 @@ int obi_clean(const char* dms_name,
fprintf(stderr, "\n");
if (stop)
return -1;
if (heads_only)
{
line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));