diff --git a/src/obi_clean.c b/src/obi_clean.c old mode 100644 new mode 100755 index 08e491f..c5ed3bd --- a/src/obi_clean.c +++ b/src/obi_clean.c @@ -185,8 +185,12 @@ int obi_clean(const char* dms_name, int ind_sample_count; char status; - void** yes_trees; - void** no_trees; + void** yes_trees = NULL; + void** no_trees = NULL; + + int* complete_sample_count_array = NULL; + int* sample_count_array = NULL; + Obi_blob_p* blob_array = NULL; OBIDMS_p dms = NULL; Obiview_p i_view = NULL; @@ -332,6 +336,34 @@ int obi_clean(const char* dms_name, seq_count = (i_view->infos)->line_count; + // Allocate arrays for sample counts otherwise reading in mapped files takes longer + complete_sample_count_array = (int*) malloc(seq_count * sample_count * sizeof(int)); + if (complete_sample_count_array == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for the array of sample counts, size: %lld", seq_count * sample_count * sizeof(int)); + return -1; + } + for (sample=0; sample < sample_count; sample++) + { + sample_count_array = complete_sample_count_array+(sample*seq_count); + for (i=0; i0 && s2_count>0)) && @@ -453,6 +490,7 @@ int obi_clean(const char* dms_name, } } + // Might be worth having arrays to read values too for some datasets but unlikely // label as head or internal if (s1_count >= s2_count) { @@ -493,6 +531,8 @@ int obi_clean(const char* dms_name, free_kmer_tables(ktable, seq_count); free(index_array); + free(complete_sample_count_array); + free(blob_array); free(yes_trees); free(no_trees);