Compare commits
4 Commits
v3.0.1b22
...
adria_buil
Author | SHA1 | Date | |
---|---|---|---|
6819d04615 | |||
8a1f844645 | |||
791ccfb92e | |||
1c9a906f5b |
@ -24,6 +24,9 @@ from cpython.exc cimport PyErr_CheckSignals
|
|||||||
from io import BufferedWriter
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
|
MAX_PAT_LEN = 31
|
||||||
|
|
||||||
|
|
||||||
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
|
||||||
|
|
||||||
|
|
||||||
@ -84,6 +87,8 @@ class Primer:
|
|||||||
@type direct:
|
@type direct:
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
|
||||||
|
|
||||||
assert sequence not in Primer.collection \
|
assert sequence not in Primer.collection \
|
||||||
or Primer.collection[sequence]==taglength, \
|
or Primer.collection[sequence]==taglength, \
|
||||||
"Primer %s must always be used with tags of the same length" % sequence
|
"Primer %s must always be used with tags of the same length" % sequence
|
||||||
|
@ -103,7 +103,11 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
|
try:
|
||||||
hline = next(i)
|
hline = next(i)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fastqWithoutQualityIterator(lineiterator,
|
def fastqWithoutQualityIterator(lineiterator,
|
||||||
@ -174,5 +178,7 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
|
try:
|
||||||
hline = next(i)
|
hline = next(i)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
@ -99,7 +99,10 @@ def tabIterator(lineiterator,
|
|||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
|
|
||||||
|
try:
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '1b22'
|
serial= '1b25'
|
||||||
|
|
||||||
version ="%d.%d.%s" % (major,minor,serial)
|
version ="%d.%d.%s" % (major,minor,serial)
|
||||||
|
@ -365,8 +365,6 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
|
|||||||
|
|
||||||
int32_t i;
|
int32_t i;
|
||||||
|
|
||||||
// TODO add check for primer longer than MAX_PAT_LEN (32)
|
|
||||||
|
|
||||||
// Get sequence id
|
// Get sequence id
|
||||||
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
|
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
|
||||||
|
|
||||||
@ -751,6 +749,20 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
o1c = complementPattern(o1);
|
o1c = complementPattern(o1);
|
||||||
o2c = complementPattern(o2);
|
o2c = complementPattern(o2);
|
||||||
|
|
||||||
|
// check for primers equal or longer than MAX_PAT_LEN (32)
|
||||||
|
if (strlen(primer1) >= MAX_PAT_LEN)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_ECOPCR_ERROR);
|
||||||
|
obidebug(1, "\nError: first primer is too long, needs to be < 32bp (%s)", primer1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (strlen(primer2) >= MAX_PAT_LEN)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_ECOPCR_ERROR);
|
||||||
|
obidebug(1, "\nError: second primer is too long, needs to be < 32bp (%s)", primer2);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Open input DMS
|
// Open input DMS
|
||||||
i_dms = obi_open_dms(i_dms_name, false);
|
i_dms = obi_open_dms(i_dms_name, false);
|
||||||
if (i_dms == NULL)
|
if (i_dms == NULL)
|
||||||
|
73
src/obi_lcs.c
Executable file → Normal file
73
src/obi_lcs.c
Executable file → Normal file
@ -10,8 +10,9 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
//#define OMP_SUPPORT // TODO
|
//#define OMP_SUPPORT // TODO
|
||||||
#ifdef OMP_SUPPORT
|
#ifdef _OPENMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -407,10 +408,15 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
int lcs_length;
|
int lcs_length;
|
||||||
int ali_length;
|
int ali_length;
|
||||||
Kmer_table_p ktable;
|
Kmer_table_p ktable;
|
||||||
|
Obi_blob_p blob1_mapped;
|
||||||
|
Obi_blob_p blob2_mapped;
|
||||||
Obi_blob_p blob1;
|
Obi_blob_p blob1;
|
||||||
Obi_blob_p blob2;
|
Obi_blob_p blob2;
|
||||||
|
int blob1_size;
|
||||||
|
int blob2_size;
|
||||||
int lcs_min;
|
int lcs_min;
|
||||||
index_t seq_elt_idx;
|
index_t seq_elt_idx;
|
||||||
|
bool stop = false;
|
||||||
|
|
||||||
OBIDMS_p dms = NULL;
|
OBIDMS_p dms = NULL;
|
||||||
Obiview_p seq_view = NULL;
|
Obiview_p seq_view = NULL;
|
||||||
@ -568,10 +574,16 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
|
|
||||||
seq_count = (seq_view->infos)->line_count;
|
seq_count = (seq_view->infos)->line_count;
|
||||||
|
|
||||||
#ifdef OMP_SUPPORT
|
// #ifdef _OPENMP
|
||||||
omp_set_num_threads(thread_count);
|
// int max_threads = omp_get_max_threads();
|
||||||
#pragma omp parallel for
|
// if ((thread_count == -1) || (thread_count > max_threads))
|
||||||
#endif
|
// thread_count = max_threads;
|
||||||
|
// omp_set_num_threads(thread_count);
|
||||||
|
// fprintf(stderr, "Running on %d thread(s)\n", thread_count);
|
||||||
|
// pthread_mutex_t mutex;
|
||||||
|
// if (pthread_mutex_init(&mutex, NULL) < 0)
|
||||||
|
// return -1;
|
||||||
|
// #endif
|
||||||
|
|
||||||
for (i=0; i < (seq_count - 1); i++)
|
for (i=0; i < (seq_count - 1); i++)
|
||||||
{
|
{
|
||||||
@ -585,23 +597,45 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
|
id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
|
||||||
// Get first sequence and its index
|
// Get first sequence and its index
|
||||||
seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
|
seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
|
||||||
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
|
blob1_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
|
||||||
|
if (blob1_mapped == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError retrieving sequences to align");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
blob1_size = obi_blob_sizeof(blob1_mapped);
|
||||||
|
blob1 = (Obi_blob_p) malloc(blob1_size);
|
||||||
if (blob1 == NULL)
|
if (blob1 == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError retrieving sequences to align");
|
obidebug(1, "\nError retrieving sequences to align");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
blob1 = memcpy(blob1, blob1_mapped, blob1_size);
|
||||||
|
|
||||||
|
//#pragma omp parallel shared(blob1_mapped, blob1, blob1_size, id1_idx, seq1_idx, stop, seq_count, output_view, k, ktable, \
|
||||||
|
// idx1_column, idx2_column, i, id1_column, id2_column, print_seq, seq1_column, seq2_column, \
|
||||||
|
// print_count, count1_column, count2_column, ali_length_column, lcs_length_column, score_column, reference, normalize, similarity_mode) \
|
||||||
|
// private(blob2_mapped, blob2, blob2_size, j, id2_idx, seq2_idx, count2, count1, score, ali_length, lcs_length)
|
||||||
|
//{
|
||||||
|
// #pragma omp for schedule(dynamic, seq_count/thread_count + (seq_count % thread_count != 0)) // Avoid 0 which blocks the program
|
||||||
for (j=i+1; j < seq_count; j++)
|
for (j=i+1; j < seq_count; j++)
|
||||||
{
|
{
|
||||||
// Get second sequence and its index
|
// Get second sequence and its index
|
||||||
seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
|
seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
|
||||||
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
|
blob2_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
|
||||||
|
if (blob2_mapped == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError retrieving sequences to align");
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
blob2_size = obi_blob_sizeof(blob2_mapped);
|
||||||
|
blob2 = (Obi_blob_p) malloc(blob2_size);
|
||||||
if (blob2 == NULL)
|
if (blob2 == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError retrieving sequences to align");
|
obidebug(1, "\nError retrieving sequences to align");
|
||||||
return -1;
|
stop = true;
|
||||||
}
|
}
|
||||||
|
blob2 = memcpy(blob2, blob2_mapped, blob2_size);
|
||||||
|
|
||||||
// Check if the sequences are identical in a quick way (same index in the same indexer)
|
// Check if the sequences are identical in a quick way (same index in the same indexer)
|
||||||
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx))
|
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx))
|
||||||
@ -624,6 +658,8 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
|
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(blob2);
|
||||||
|
|
||||||
if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
|
if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
|
||||||
{ // Print result
|
{ // Print result
|
||||||
|
|
||||||
@ -637,6 +673,10 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
|
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #ifdef _OPENMP
|
||||||
|
// if (pthread_mutex_lock(&mutex) < 0)
|
||||||
|
// stop = true;
|
||||||
|
// #endif
|
||||||
if (print_alignment_result(output_view, k,
|
if (print_alignment_result(output_view, k,
|
||||||
idx1_column, idx2_column, i, j,
|
idx1_column, idx2_column, i, j,
|
||||||
id1_column, id2_column, id1_idx, id2_idx,
|
id1_column, id2_column, id1_idx, id2_idx,
|
||||||
@ -646,15 +686,24 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
lcs_length_column, lcs_length,
|
lcs_length_column, lcs_length,
|
||||||
score_column, score,
|
score_column, score,
|
||||||
reference, normalize, similarity_mode) < 0)
|
reference, normalize, similarity_mode) < 0)
|
||||||
return -1;
|
stop = true;
|
||||||
|
// #ifdef _OPENMP
|
||||||
|
// if (pthread_mutex_unlock(&mutex) < 0)
|
||||||
|
// stop = true;
|
||||||
|
// #endif
|
||||||
|
|
||||||
k++;
|
k++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//}
|
||||||
|
free(blob1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% \n");
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
|
//fprintf(stderr,"\nstop=%d\n", stop);
|
||||||
|
|
||||||
|
|
||||||
// Close views
|
// Close views
|
||||||
if (obi_save_and_close_view(seq_view) < 0)
|
if (obi_save_and_close_view(seq_view) < 0)
|
||||||
{
|
{
|
||||||
@ -675,6 +724,11 @@ int obi_lcs_align_one_column(const char* dms_name,
|
|||||||
|
|
||||||
free_kmer_tables(ktable, seq_count);
|
free_kmer_tables(ktable, seq_count);
|
||||||
|
|
||||||
|
// #ifdef _OPENMP
|
||||||
|
// if (pthread_mutex_destroy(&mutex) < 0)
|
||||||
|
// return -1;
|
||||||
|
// #endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1087,4 +1141,3 @@ int obi_lcs_align_two_columns(const char* dms_name,
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user