From 6819d0461579e0006a6f5139860281bd3d5d4cd8 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Wed, 29 Nov 2023 16:24:31 +1300 Subject: [PATCH] Testing RAM instead of mmap for blob alignment --- src/obi_lcs.c | 75 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 11 deletions(-) mode change 100755 => 100644 src/obi_lcs.c diff --git a/src/obi_lcs.c b/src/obi_lcs.c old mode 100755 new mode 100644 index 6108167..011d7fb --- a/src/obi_lcs.c +++ b/src/obi_lcs.c @@ -10,8 +10,9 @@ */ //#define OMP_SUPPORT // TODO -#ifdef OMP_SUPPORT +#ifdef _OPENMP #include +#include #endif #include @@ -407,10 +408,15 @@ int obi_lcs_align_one_column(const char* dms_name, int lcs_length; int ali_length; Kmer_table_p ktable; + Obi_blob_p blob1_mapped; + Obi_blob_p blob2_mapped; Obi_blob_p blob1; - Obi_blob_p blob2; + Obi_blob_p blob2; + int blob1_size; + int blob2_size; int lcs_min; index_t seq_elt_idx; + bool stop = false; OBIDMS_p dms = NULL; Obiview_p seq_view = NULL; @@ -568,10 +574,16 @@ int obi_lcs_align_one_column(const char* dms_name, seq_count = (seq_view->infos)->line_count; - #ifdef OMP_SUPPORT - omp_set_num_threads(thread_count); - #pragma omp parallel for - #endif +// #ifdef _OPENMP +// int max_threads = omp_get_max_threads(); +// if ((thread_count == -1) || (thread_count > max_threads)) +// thread_count = max_threads; +// omp_set_num_threads(thread_count); +// fprintf(stderr, "Running on %d thread(s)\n", thread_count); +// pthread_mutex_t mutex; +// if (pthread_mutex_init(&mutex, NULL) < 0) +// return -1; +// #endif for (i=0; i < (seq_count - 1); i++) { @@ -585,23 +597,45 @@ int obi_lcs_align_one_column(const char* dms_name, id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line? // Get first sequence and its index seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx); - blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx); + blob1_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx); + if (blob1_mapped == NULL) + { + obidebug(1, "\nError retrieving sequences to align"); + return -1; + } + blob1_size = obi_blob_sizeof(blob1_mapped); + blob1 = (Obi_blob_p) malloc(blob1_size); if (blob1 == NULL) { obidebug(1, "\nError retrieving sequences to align"); return -1; } + blob1 = memcpy(blob1, blob1_mapped, blob1_size); +//#pragma omp parallel shared(blob1_mapped, blob1, blob1_size, id1_idx, seq1_idx, stop, seq_count, output_view, k, ktable, \ +// idx1_column, idx2_column, i, id1_column, id2_column, print_seq, seq1_column, seq2_column, \ +// print_count, count1_column, count2_column, ali_length_column, lcs_length_column, score_column, reference, normalize, similarity_mode) \ +// private(blob2_mapped, blob2, blob2_size, j, id2_idx, seq2_idx, count2, count1, score, ali_length, lcs_length) +//{ +// #pragma omp for schedule(dynamic, seq_count/thread_count + (seq_count % thread_count != 0)) // Avoid 0 which blocks the program for (j=i+1; j < seq_count; j++) { // Get second sequence and its index seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx); - blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx); + blob2_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx); + if (blob2_mapped == NULL) + { + obidebug(1, "\nError retrieving sequences to align"); + stop = true; + } + blob2_size = obi_blob_sizeof(blob2_mapped); + blob2 = (Obi_blob_p) malloc(blob2_size); if (blob2 == NULL) { obidebug(1, "\nError retrieving sequences to align"); - return -1; + stop = true; } + blob2 = memcpy(blob2, blob2_mapped, blob2_size); // Check if the sequences are identical in a quick way (same index in the same indexer) if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx)) @@ -624,6 +658,8 @@ int obi_lcs_align_one_column(const char* dms_name, score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length); } + free(blob2); + if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold)))) { // Print result @@ -637,6 +673,10 @@ int obi_lcs_align_one_column(const char* dms_name, count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0); } +// #ifdef _OPENMP +// if (pthread_mutex_lock(&mutex) < 0) +// stop = true; +// #endif if (print_alignment_result(output_view, k, idx1_column, idx2_column, i, j, id1_column, id2_column, id1_idx, id2_idx, @@ -646,15 +686,24 @@ int obi_lcs_align_one_column(const char* dms_name, lcs_length_column, lcs_length, score_column, score, reference, normalize, similarity_mode) < 0) - return -1; + stop = true; +// #ifdef _OPENMP +// if (pthread_mutex_unlock(&mutex) < 0) +// stop = true; +// #endif k++; } } +//} + free(blob1); } fprintf(stderr,"\rDone : 100 %% \n"); + //fprintf(stderr,"\nstop=%d\n", stop); + + // Close views if (obi_save_and_close_view(seq_view) < 0) { @@ -675,6 +724,11 @@ int obi_lcs_align_one_column(const char* dms_name, free_kmer_tables(ktable, seq_count); +// #ifdef _OPENMP +// if (pthread_mutex_destroy(&mutex) < 0) +// return -1; +// #endif + return 0; } @@ -1087,4 +1141,3 @@ int obi_lcs_align_two_columns(const char* dms_name, return 0; } -