From a04588da31d962515ce827ec2ad8dd1ceef52efd Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 24 May 2019 16:51:04 +0200 Subject: [PATCH] openmp on j loop (i loop probably better) --- python/obitools3/commands/clean.pyx | 2 +- src/obi_clean.c | 32 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/python/obitools3/commands/clean.pyx b/python/obitools3/commands/clean.pyx index 9c22761..b69fb23 100755 --- a/python/obitools3/commands/clean.pyx +++ b/python/obitools3/commands/clean.pyx @@ -101,7 +101,7 @@ def run(config): comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name]) if obi_clean(tobytes(i_dms_name), tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \ - config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], 1) < 0: + config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], -1) < 0: raise Exception("Error running obiclean") # If the input and output DMS are not the same, export result view to output DMS diff --git a/src/obi_clean.c b/src/obi_clean.c index 53d9a56..9cda879 100755 --- a/src/obi_clean.c +++ b/src/obi_clean.c @@ -9,8 +9,7 @@ * @brief Functions tagging a set of sequences for PCR/sequencing errors identification. */ -//#define OMP_SUPPORT // TODO -#ifdef OMP_SUPPORT +#ifdef _OPENMP #include #endif @@ -209,6 +208,18 @@ int obi_clean(const char* dms_name, int reference = 0; bool similarity_mode = false; + bool stop = false; + + int max_threads = 1; + + #ifdef _OPENMP + max_threads = omp_get_max_threads(); + if ((thread_count == -1) || (thread_count > max_threads)) // TODO doc + thread_count = max_threads; + omp_set_num_threads(4); + fprintf(stderr, "Running on %d thread(s)\n", thread_count); + #endif + // Open DMS dms = obi_dms(dms_name); if (dms == NULL) @@ -414,7 +425,10 @@ int obi_clean(const char* dms_name, s1_count = sample_count_array[i]; //s1_count = obi_get_int_with_elt_idx_and_col_p_in_view(i_view, sample_column, i, sample); // slower - for (j=i+1; j < seq_count; j++) // TODO parallelize this loop? + #pragma omp parallel for shared(i, seq_count, s1_count, sample, blob_array, sample_count_array, alignment_result_array, stop) \ + private(j, blob2, s2_count, yes, no, above_threshold, ali_result, score) + + for (j=i+1; j < seq_count; j++) { // Get second sequence blob2 = blob_array[j]; @@ -422,7 +436,7 @@ int obi_clean(const char* dms_name, if (blob2 == NULL) { obidebug(1, "\nError retrieving sequences to align"); - return -1; + stop = true; } // Get count for this sample @@ -477,22 +491,22 @@ int obi_clean(const char* dms_name, if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample) == 's') // seq can become head ONLY if it's a singleton { if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'h') < 0) - return -1; + stop = true; } // Otherwise it's an internal (do nothing) // Label other sequence as internal no matter what if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'i') < 0) - return -1; + stop = true; } else // Same thing but with sequences switched { if (obi_get_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample) == 's') // seq can become head ONLY if it's a singleton { if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, j, sample, 'h') < 0) - return -1; + stop = true; } if (obi_set_char_with_elt_idx_and_col_p_in_view(o_view, status_column, i, sample, 'i') < 0) - return -1; + stop = true; } } else if (no == 0) @@ -500,6 +514,8 @@ int obi_clean(const char* dms_name, alignment_result_array[j] = 2; } } + if (stop) + return -1; } // Reset ali result array to 0 memset(alignment_result_array, 0, seq_count);