Compare commits

..

1 Commits

Author SHA1 Message Date
6819d04615 Testing RAM instead of mmap for blob alignment 2023-11-29 16:24:31 +13:00
5 changed files with 75 additions and 24 deletions

View File

@ -325,9 +325,8 @@ cdef class Taxonomy(OBIWrapper) :
cdef Taxon taxon
try:
taxon = self.get_taxon_by_taxid(taxid)
except Exception as e:
print('\n'+e, file=sys.stderr)
return
except:
raise StopIteration
if taxon is not None:
while taxon.taxid != 1:
yield taxon
@ -335,7 +334,7 @@ cdef class Taxonomy(OBIWrapper) :
taxon = taxon.parent
yield taxon
else:
return
raise StopIteration
def is_ancestor(self, int ancestor_taxid, int taxid):

View File

@ -23,17 +23,16 @@ cdef class TabFormat:
@cython.boundscheck(False)
def __call__(self, object data):
cdef object ktags
cdef set ktags
cdef list tags = [key for key in data]
line = []
if self.tags != None and self.tags:
ktags = list(self.tags)
if self.tags is not None and self.tags:
ktags = self.tags
else:
ktags = list(set(tags))
ktags.sort()
ktags = set(tags)
if self.header and self.first_line:
for k in ktags:
if k in tags:

View File

@ -280,7 +280,7 @@ def open_uri(uri,
iseq = urib
objclass = bytes
else: # TODO update uopen to be able to write?
if 'outputformat' in config['obi'] and config['obi']['outputformat'] == b'metabaR':
if config['obi']['outputformat'] == b'metabaR':
if 'metabarprefix' not in config['obi']:
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
else:

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '1b26'
serial= '1b25'
version ="%d.%d.%s" % (major,minor,serial)

75
src/obi_lcs.c Executable file → Normal file
View File

@ -10,8 +10,9 @@
*/
//#define OMP_SUPPORT // TODO
#ifdef OMP_SUPPORT
#ifdef _OPENMP
#include <omp.h>
#include <pthread.h>
#endif
#include <stdlib.h>
@ -407,10 +408,15 @@ int obi_lcs_align_one_column(const char* dms_name,
int lcs_length;
int ali_length;
Kmer_table_p ktable;
Obi_blob_p blob1_mapped;
Obi_blob_p blob2_mapped;
Obi_blob_p blob1;
Obi_blob_p blob2;
Obi_blob_p blob2;
int blob1_size;
int blob2_size;
int lcs_min;
index_t seq_elt_idx;
bool stop = false;
OBIDMS_p dms = NULL;
Obiview_p seq_view = NULL;
@ -568,10 +574,16 @@ int obi_lcs_align_one_column(const char* dms_name,
seq_count = (seq_view->infos)->line_count;
#ifdef OMP_SUPPORT
omp_set_num_threads(thread_count);
#pragma omp parallel for
#endif
// #ifdef _OPENMP
// int max_threads = omp_get_max_threads();
// if ((thread_count == -1) || (thread_count > max_threads))
// thread_count = max_threads;
// omp_set_num_threads(thread_count);
// fprintf(stderr, "Running on %d thread(s)\n", thread_count);
// pthread_mutex_t mutex;
// if (pthread_mutex_init(&mutex, NULL) < 0)
// return -1;
// #endif
for (i=0; i < (seq_count - 1); i++)
{
@ -585,23 +597,45 @@ int obi_lcs_align_one_column(const char* dms_name,
id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
// Get first sequence and its index
seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
blob1_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
if (blob1_mapped == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
return -1;
}
blob1_size = obi_blob_sizeof(blob1_mapped);
blob1 = (Obi_blob_p) malloc(blob1_size);
if (blob1 == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
return -1;
}
blob1 = memcpy(blob1, blob1_mapped, blob1_size);
//#pragma omp parallel shared(blob1_mapped, blob1, blob1_size, id1_idx, seq1_idx, stop, seq_count, output_view, k, ktable, \
// idx1_column, idx2_column, i, id1_column, id2_column, print_seq, seq1_column, seq2_column, \
// print_count, count1_column, count2_column, ali_length_column, lcs_length_column, score_column, reference, normalize, similarity_mode) \
// private(blob2_mapped, blob2, blob2_size, j, id2_idx, seq2_idx, count2, count1, score, ali_length, lcs_length)
//{
// #pragma omp for schedule(dynamic, seq_count/thread_count + (seq_count % thread_count != 0)) // Avoid 0 which blocks the program
for (j=i+1; j < seq_count; j++)
{
// Get second sequence and its index
seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
blob2_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
if (blob2_mapped == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
stop = true;
}
blob2_size = obi_blob_sizeof(blob2_mapped);
blob2 = (Obi_blob_p) malloc(blob2_size);
if (blob2 == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
return -1;
stop = true;
}
blob2 = memcpy(blob2, blob2_mapped, blob2_size);
// Check if the sequences are identical in a quick way (same index in the same indexer)
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx))
@ -624,6 +658,8 @@ int obi_lcs_align_one_column(const char* dms_name,
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
}
free(blob2);
if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
{ // Print result
@ -637,6 +673,10 @@ int obi_lcs_align_one_column(const char* dms_name,
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
}
// #ifdef _OPENMP
// if (pthread_mutex_lock(&mutex) < 0)
// stop = true;
// #endif
if (print_alignment_result(output_view, k,
idx1_column, idx2_column, i, j,
id1_column, id2_column, id1_idx, id2_idx,
@ -646,15 +686,24 @@ int obi_lcs_align_one_column(const char* dms_name,
lcs_length_column, lcs_length,
score_column, score,
reference, normalize, similarity_mode) < 0)
return -1;
stop = true;
// #ifdef _OPENMP
// if (pthread_mutex_unlock(&mutex) < 0)
// stop = true;
// #endif
k++;
}
}
//}
free(blob1);
}
fprintf(stderr,"\rDone : 100 %% \n");
//fprintf(stderr,"\nstop=%d\n", stop);
// Close views
if (obi_save_and_close_view(seq_view) < 0)
{
@ -675,6 +724,11 @@ int obi_lcs_align_one_column(const char* dms_name,
free_kmer_tables(ktable, seq_count);
// #ifdef _OPENMP
// if (pthread_mutex_destroy(&mutex) < 0)
// return -1;
// #endif
return 0;
}
@ -1087,4 +1141,3 @@ int obi_lcs_align_two_columns(const char* dms_name,
return 0;
}