Compare commits

..

5 Commits

7 changed files with 148 additions and 26 deletions

View File

@ -24,6 +24,9 @@ from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter from io import BufferedWriter
MAX_PAT_LEN = 31
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers" __title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
@ -84,6 +87,8 @@ class Primer:
@type direct: @type direct:
''' '''
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
assert sequence not in Primer.collection \ assert sequence not in Primer.collection \
or Primer.collection[sequence]==taglength, \ or Primer.collection[sequence]==taglength, \
"Primer %s must always be used with tags of the same length" % sequence "Primer %s must always be used with tags of the same length" % sequence
@ -271,7 +276,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not_aligned: if not_aligned:
sequences[1] = sequences[1].clone() sequences[1] = sequences[1].clone()
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
for seq in sequences: for seq in sequences:

View File

@ -5,7 +5,9 @@ from obitools3.apps.config import logger
from obitools3.dms import DMS from obitools3.dms import DMS
from obitools3.apps.optiongroups import addMinimalInputOption from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.dms.view.view cimport View from obitools3.dms.view.view cimport View
from obitools3.utils cimport tostr
import os import os
import shutil
__title__="Delete a view" __title__="Delete a view"
@ -30,15 +32,56 @@ def run(config):
else: else:
raise NotImplementedError() raise NotImplementedError()
dms = input[0]
# Get the path to the view file to remove # Get the path to the view file to remove
path = input[0].full_path # dms path path = dms.full_path # dms path
path+=b"/VIEWS/" view_path=path+b"/VIEWS/"
path+=view.name view_path+=view.name
path+=b".obiview" view_path+=b".obiview"
to_remove = {}
# For each column:
for col_alias in view.keys():
col = view[col_alias]
col_name = col.original_name
col_version = col.version
col_type = col.data_type
col_ref = (col_name, col_version)
# build file name and AVL file names
col_file_name = f"{tostr(path)}/{tostr(col.original_name)}.obicol/{tostr(col.original_name)}@{col.version}.odc"
if col_type in [b'OBI_CHAR', b'OBI_QUAL', b'OBI_STR', b'OBI_SEQ']:
avl_file_name = f"{tostr(path)}/OBIBLOB_INDEXERS/{tostr(col.original_name)}_{col.version}_indexer"
else:
avl_file_name = None
to_remove[col_ref] = [col_file_name, avl_file_name]
# For each view:
do_not_remove = []
for vn in dms:
v = dms[vn]
# ignore the one being deleted
if v.name != view.name:
# check that none of the column is referenced, if referenced, remove from list to remove
cols = [(v[c].original_name, v[c].version) for c in v.keys()]
for col_ref in to_remove:
if col_ref in cols:
do_not_remove.append(col_ref)
for nr in do_not_remove:
to_remove.pop(nr)
# Close the view and the DMS # Close the view and the DMS
view.close() view.close()
input[0].close(force=True) input[0].close(force=True)
# Rm #print(to_remove)
os.remove(path)
# rm AFTER view and DMS close
os.remove(view_path)
for col in to_remove:
os.remove(to_remove[col][0])
if to_remove[col][1] is not None:
shutil.rmtree(to_remove[col][1])

View File

@ -103,7 +103,11 @@ def fastqWithQualityIterator(lineiterator,
yield seq yield seq
read+=1 read+=1
hline = next(i) try:
hline = next(i)
except StopIteration:
return
def fastqWithoutQualityIterator(lineiterator, def fastqWithoutQualityIterator(lineiterator,
@ -174,5 +178,7 @@ def fastqWithoutQualityIterator(lineiterator,
yield seq yield seq
read+=1 read+=1
hline = next(i) try:
hline = next(i)
except StopIteration:
return

View File

@ -99,7 +99,10 @@ def tabIterator(lineiterator,
read+=1 read+=1
line = next(iterator) try:
line = next(iterator)
except StopIteration:
return

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '1b21' serial= '1b25'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -365,8 +365,6 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
int32_t i; int32_t i;
// TODO add check for primer longer than MAX_PAT_LEN (32)
// Get sequence id // Get sequence id
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0); seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
@ -751,6 +749,20 @@ int obi_ecopcr(const char* i_dms_name,
o1c = complementPattern(o1); o1c = complementPattern(o1);
o2c = complementPattern(o2); o2c = complementPattern(o2);
// check for primers equal or longer than MAX_PAT_LEN (32)
if (strlen(primer1) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: first primer is too long, needs to be < 32bp (%s)", primer1);
return -1;
}
if (strlen(primer2) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: second primer is too long, needs to be < 32bp (%s)", primer2);
return -1;
}
// Open input DMS // Open input DMS
i_dms = obi_open_dms(i_dms_name, false); i_dms = obi_open_dms(i_dms_name, false);
if (i_dms == NULL) if (i_dms == NULL)

75
src/obi_lcs.c Executable file → Normal file
View File

@ -10,8 +10,9 @@
*/ */
//#define OMP_SUPPORT // TODO //#define OMP_SUPPORT // TODO
#ifdef OMP_SUPPORT #ifdef _OPENMP
#include <omp.h> #include <omp.h>
#include <pthread.h>
#endif #endif
#include <stdlib.h> #include <stdlib.h>
@ -407,10 +408,15 @@ int obi_lcs_align_one_column(const char* dms_name,
int lcs_length; int lcs_length;
int ali_length; int ali_length;
Kmer_table_p ktable; Kmer_table_p ktable;
Obi_blob_p blob1_mapped;
Obi_blob_p blob2_mapped;
Obi_blob_p blob1; Obi_blob_p blob1;
Obi_blob_p blob2; Obi_blob_p blob2;
int blob1_size;
int blob2_size;
int lcs_min; int lcs_min;
index_t seq_elt_idx; index_t seq_elt_idx;
bool stop = false;
OBIDMS_p dms = NULL; OBIDMS_p dms = NULL;
Obiview_p seq_view = NULL; Obiview_p seq_view = NULL;
@ -568,10 +574,16 @@ int obi_lcs_align_one_column(const char* dms_name,
seq_count = (seq_view->infos)->line_count; seq_count = (seq_view->infos)->line_count;
#ifdef OMP_SUPPORT // #ifdef _OPENMP
omp_set_num_threads(thread_count); // int max_threads = omp_get_max_threads();
#pragma omp parallel for // if ((thread_count == -1) || (thread_count > max_threads))
#endif // thread_count = max_threads;
// omp_set_num_threads(thread_count);
// fprintf(stderr, "Running on %d thread(s)\n", thread_count);
// pthread_mutex_t mutex;
// if (pthread_mutex_init(&mutex, NULL) < 0)
// return -1;
// #endif
for (i=0; i < (seq_count - 1); i++) for (i=0; i < (seq_count - 1); i++)
{ {
@ -585,23 +597,45 @@ int obi_lcs_align_one_column(const char* dms_name,
id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line? id1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, id_column, i, 0); // TODO Could there be multiple IDs per line?
// Get first sequence and its index // Get first sequence and its index
seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx); seq1_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
blob1 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx); blob1_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx);
if (blob1_mapped == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
return -1;
}
blob1_size = obi_blob_sizeof(blob1_mapped);
blob1 = (Obi_blob_p) malloc(blob1_size);
if (blob1 == NULL) if (blob1 == NULL)
{ {
obidebug(1, "\nError retrieving sequences to align"); obidebug(1, "\nError retrieving sequences to align");
return -1; return -1;
} }
blob1 = memcpy(blob1, blob1_mapped, blob1_size);
//#pragma omp parallel shared(blob1_mapped, blob1, blob1_size, id1_idx, seq1_idx, stop, seq_count, output_view, k, ktable, \
// idx1_column, idx2_column, i, id1_column, id2_column, print_seq, seq1_column, seq2_column, \
// print_count, count1_column, count2_column, ali_length_column, lcs_length_column, score_column, reference, normalize, similarity_mode) \
// private(blob2_mapped, blob2, blob2_size, j, id2_idx, seq2_idx, count2, count1, score, ali_length, lcs_length)
//{
// #pragma omp for schedule(dynamic, seq_count/thread_count + (seq_count % thread_count != 0)) // Avoid 0 which blocks the program
for (j=i+1; j < seq_count; j++) for (j=i+1; j < seq_count; j++)
{ {
// Get second sequence and its index // Get second sequence and its index
seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx); seq2_idx = obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
blob2 = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx); blob2_mapped = obi_get_blob_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx);
if (blob2_mapped == NULL)
{
obidebug(1, "\nError retrieving sequences to align");
stop = true;
}
blob2_size = obi_blob_sizeof(blob2_mapped);
blob2 = (Obi_blob_p) malloc(blob2_size);
if (blob2 == NULL) if (blob2 == NULL)
{ {
obidebug(1, "\nError retrieving sequences to align"); obidebug(1, "\nError retrieving sequences to align");
return -1; stop = true;
} }
blob2 = memcpy(blob2, blob2_mapped, blob2_size);
// Check if the sequences are identical in a quick way (same index in the same indexer) // Check if the sequences are identical in a quick way (same index in the same indexer)
if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx)) if (obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, i, seq_elt_idx) == obi_get_index_with_elt_idx_and_col_p_in_view(seq_view, iseq_column, j, seq_elt_idx))
@ -624,6 +658,8 @@ int obi_lcs_align_one_column(const char* dms_name,
score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length); score = obiblob_sse_banded_lcs_align(blob1, blob2, threshold, normalize, reference, similarity_mode, &lcs_length, &ali_length);
} }
free(blob2);
if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold)))) if ((score >= 0) && (((normalize || similarity_mode) && (score >= threshold)) || ((!similarity_mode && !normalize) && (score <= threshold))))
{ // Print result { // Print result
@ -637,6 +673,10 @@ int obi_lcs_align_one_column(const char* dms_name,
count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0); count2 = obi_get_int_with_elt_idx_and_col_p_in_view(seq_view, i_count_column, j, 0);
} }
// #ifdef _OPENMP
// if (pthread_mutex_lock(&mutex) < 0)
// stop = true;
// #endif
if (print_alignment_result(output_view, k, if (print_alignment_result(output_view, k,
idx1_column, idx2_column, i, j, idx1_column, idx2_column, i, j,
id1_column, id2_column, id1_idx, id2_idx, id1_column, id2_column, id1_idx, id2_idx,
@ -646,15 +686,24 @@ int obi_lcs_align_one_column(const char* dms_name,
lcs_length_column, lcs_length, lcs_length_column, lcs_length,
score_column, score, score_column, score,
reference, normalize, similarity_mode) < 0) reference, normalize, similarity_mode) < 0)
return -1; stop = true;
// #ifdef _OPENMP
// if (pthread_mutex_unlock(&mutex) < 0)
// stop = true;
// #endif
k++; k++;
} }
} }
//}
free(blob1);
} }
fprintf(stderr,"\rDone : 100 %% \n"); fprintf(stderr,"\rDone : 100 %% \n");
//fprintf(stderr,"\nstop=%d\n", stop);
// Close views // Close views
if (obi_save_and_close_view(seq_view) < 0) if (obi_save_and_close_view(seq_view) < 0)
{ {
@ -675,6 +724,11 @@ int obi_lcs_align_one_column(const char* dms_name,
free_kmer_tables(ktable, seq_count); free_kmer_tables(ktable, seq_count);
// #ifdef _OPENMP
// if (pthread_mutex_destroy(&mutex) < 0)
// return -1;
// #endif
return 0; return 0;
} }
@ -1087,4 +1141,3 @@ int obi_lcs_align_two_columns(const char* dms_name,
return 0; return 0;
} }