diff --git a/src/obi_clean.c b/src/obi_clean.c index 648118b..a611603 100755 --- a/src/obi_clean.c +++ b/src/obi_clean.c @@ -183,7 +183,6 @@ int obi_clean(const char* dms_name, char status; byte_t* alignment_result_array = NULL; - byte_t* ali_result_array = NULL; byte_t ali_result; int* complete_sample_count_array = NULL; @@ -212,7 +211,6 @@ int obi_clean(const char* dms_name, bool stop = false; int max_threads = 1; - int thread_id = 0; #ifdef _OPENMP max_threads = omp_get_max_threads(); @@ -377,7 +375,7 @@ int obi_clean(const char* dms_name, // Allocate alignment result array (byte at 0 if not aligned yet, // 1 if sequence at index has a similarity above the threshold with the current sequence, // 2 if sequence at index has a similarity below the threshold with the current sequence) - alignment_result_array = (byte_t*) calloc(thread_count*seq_count, sizeof(byte_t)); + alignment_result_array = (byte_t*) calloc(seq_count, sizeof(byte_t)); if (alignment_result_array == NULL) { obi_set_errno(OBI_MALLOC_ERROR); @@ -401,43 +399,36 @@ int obi_clean(const char* dms_name, } } - #pragma omp parallel shared(seq_count, blob_array, complete_sample_count_array, alignment_result_array, stop) \ - private(ali_result_array, thread_id, i, j, blob1, blob2, s1_count, s2_count, \ - sample_count_array, sample, yes, no, above_threshold, ali_result, score) + for (i=0; i< (seq_count-1); i++) { - - #ifdef _OPENMP - thread_id = omp_get_thread_num(); - ali_result_array = alignment_result_array+thread_id; - #endif - - #pragma omp for schedule(dynamic, 100) - for (i=0; i< (seq_count-1); i++) + if (i%1000 == 0) { - if (i%1000 == 0) + p = (i/(float)seq_count)*100; + fprintf(stderr,"\rDone : %f %% ",p); + } + + // Get first sequence + blob1 = blob_array[i]; +// blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0); // slower + if (blob1 == NULL) + { + obidebug(1, "\nError retrieving sequences to align"); + return -1; + } + + for (sample=0; sample < sample_count; sample++) + { + sample_count_array = complete_sample_count_array+(sample*seq_count); + + // Get count for this sample + s1_count = sample_count_array[i]; + //s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample); // slower + + #pragma omp parallel shared(i, seq_count, s1_count, sample, blob_array, sample_count_array, alignment_result_array, stop) \ + private(j, blob2, s2_count, yes, no, above_threshold, ali_result, score) { - p = (i/(float)(seq_count/(float)thread_count))*100; - fprintf(stderr,"\rDone : %f %% ",p); - } - - // Get first sequence - blob1 = blob_array[i]; - // blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(i_view, iseq_column, i, 0); // slower - if (blob1 == NULL) - { - obidebug(1, "\nError retrieving sequences to align"); - stop = true; - } - - for (sample=0; sample < sample_count; sample++) - { - sample_count_array = complete_sample_count_array+(sample*seq_count); - - // Get count for this sample - s1_count = sample_count_array[i]; - //s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample); // slower - + #pragma omp for schedule(dynamic, 100) for (j=i+1; j < seq_count; j++) { // Get second sequence @@ -462,7 +453,7 @@ int obi_clean(const char* dms_name, yes = 0; no = 0; above_threshold = false; - ali_result = ali_result_array[j]; + ali_result = alignment_result_array[j]; if (ali_result > 0) // already aligned { if (ali_result == 2) @@ -492,42 +483,47 @@ int obi_clean(const char* dms_name, { if (yes == 0) // Set ali result as above the threshold (value 1) - ali_result_array[j] = 1; + alignment_result_array[j] = 1; // Might be worth having arrays to read values too for some datasets but unlikely // label as head or internal - if (s1_count >= s2_count) + #pragma omp critical { - if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton + if (s1_count >= s2_count) { - if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h') < 0) - stop = true; - } - // Otherwise it's an internal (do nothing) - // Label other sequence as internal no matter what - if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i') < 0) - stop = true; - } - else // Same thing but with sequences switched - { - if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton + if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton { - if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h') < 0) + if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h') < 0) stop = true; } - if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i') < 0) - stop = true; + // Otherwise it's an internal (do nothing) + // Label other sequence as internal no matter what + if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i') < 0) + stop = true; + } + else // Same thing but with sequences switched + { + if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton + { + if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h') < 0) + stop = true; + } + if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i') < 0) + stop = true; + } } } else if (no == 0) // Set ali result as above the threshold (value 2) - ali_result_array[j] = 2; + alignment_result_array[j] = 2; } } } - // Reset ali result array to 0 - memset(ali_result_array, 0, seq_count); + if (stop) + return -1; } + // Reset ali result array to 0 + memset(alignment_result_array, 0, seq_count); } free_kmer_tables(ktable, seq_count); @@ -537,9 +533,6 @@ int obi_clean(const char* dms_name, fprintf(stderr, "\n"); - if (stop) - return -1; - if (heads_only) { line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));