Compare commits

...

11 Commits

9 changed files with 96 additions and 25 deletions

View File

@ -24,6 +24,9 @@ from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter from io import BufferedWriter
MAX_PAT_LEN = 31
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers" __title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
@ -84,6 +87,8 @@ class Primer:
@type direct: @type direct:
''' '''
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
assert sequence not in Primer.collection \ assert sequence not in Primer.collection \
or Primer.collection[sequence]==taglength, \ or Primer.collection[sequence]==taglength, \
"Primer %s must always be used with tags of the same length" % sequence "Primer %s must always be used with tags of the same length" % sequence
@ -271,7 +276,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not_aligned: if not_aligned:
sequences[1] = sequences[1].clone() sequences[1] = sequences[1].clone()
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
for seq in sequences: for seq in sequences:
@ -299,7 +304,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p)) directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p))
new_seq = False new_seq = False
pattern+=1 pattern+=1
# Choose match closer to the start of (one of the) sequence(s) # Choose match closer to the start of (one of the) sequence(s)
directmatch = sorted(directmatch, key=sortMatch) directmatch = sorted(directmatch, key=sortMatch)
all_direct_matches = directmatch all_direct_matches = directmatch

View File

@ -5,7 +5,9 @@ from obitools3.apps.config import logger
from obitools3.dms import DMS from obitools3.dms import DMS
from obitools3.apps.optiongroups import addMinimalInputOption from obitools3.apps.optiongroups import addMinimalInputOption
from obitools3.dms.view.view cimport View from obitools3.dms.view.view cimport View
from obitools3.utils cimport tostr
import os import os
import shutil
__title__="Delete a view" __title__="Delete a view"
@ -30,15 +32,56 @@ def run(config):
else: else:
raise NotImplementedError() raise NotImplementedError()
dms = input[0]
# Get the path to the view file to remove # Get the path to the view file to remove
path = input[0].full_path # dms path path = dms.full_path # dms path
path+=b"/VIEWS/" view_path=path+b"/VIEWS/"
path+=view.name view_path+=view.name
path+=b".obiview" view_path+=b".obiview"
to_remove = {}
# For each column:
for col_alias in view.keys():
col = view[col_alias]
col_name = col.original_name
col_version = col.version
col_type = col.data_type
col_ref = (col_name, col_version)
# build file name and AVL file names
col_file_name = f"{tostr(path)}/{tostr(col.original_name)}.obicol/{tostr(col.original_name)}@{col.version}.odc"
if col_type in [b'OBI_CHAR', b'OBI_QUAL', b'OBI_STR', b'OBI_SEQ']:
avl_file_name = f"{tostr(path)}/OBIBLOB_INDEXERS/{tostr(col.original_name)}_{col.version}_indexer"
else:
avl_file_name = None
to_remove[col_ref] = [col_file_name, avl_file_name]
# For each view:
do_not_remove = []
for vn in dms:
v = dms[vn]
# ignore the one being deleted
if v.name != view.name:
# check that none of the column is referenced, if referenced, remove from list to remove
cols = [(v[c].original_name, v[c].version) for c in v.keys()]
for col_ref in to_remove:
if col_ref in cols:
do_not_remove.append(col_ref)
for nr in do_not_remove:
to_remove.pop(nr)
# Close the view and the DMS # Close the view and the DMS
view.close() view.close()
input[0].close(force=True) input[0].close(force=True)
# Rm #print(to_remove)
os.remove(path)
# rm AFTER view and DMS close
os.remove(view_path)
for col in to_remove:
os.remove(to_remove[col][0])
if to_remove[col][1] is not None:
shutil.rmtree(to_remove[col][1])

View File

@ -325,8 +325,9 @@ cdef class Taxonomy(OBIWrapper) :
cdef Taxon taxon cdef Taxon taxon
try: try:
taxon = self.get_taxon_by_taxid(taxid) taxon = self.get_taxon_by_taxid(taxid)
except: except Exception as e:
raise StopIteration print('\n'+e, file=sys.stderr)
return
if taxon is not None: if taxon is not None:
while taxon.taxid != 1: while taxon.taxid != 1:
yield taxon yield taxon
@ -334,7 +335,7 @@ cdef class Taxonomy(OBIWrapper) :
taxon = taxon.parent taxon = taxon.parent
yield taxon yield taxon
else: else:
raise StopIteration return
def is_ancestor(self, int ancestor_taxid, int taxid): def is_ancestor(self, int ancestor_taxid, int taxid):

View File

@ -23,16 +23,17 @@ cdef class TabFormat:
@cython.boundscheck(False) @cython.boundscheck(False)
def __call__(self, object data): def __call__(self, object data):
cdef set ktags cdef object ktags
cdef list tags = [key for key in data] cdef list tags = [key for key in data]
line = [] line = []
if self.tags != None and self.tags:
if self.tags is not None and self.tags: ktags = list(self.tags)
ktags = self.tags
else: else:
ktags = set(tags) ktags = list(set(tags))
ktags.sort()
if self.header and self.first_line: if self.header and self.first_line:
for k in ktags: for k in ktags:
if k in tags: if k in tags:

View File

@ -103,7 +103,11 @@ def fastqWithQualityIterator(lineiterator,
yield seq yield seq
read+=1 read+=1
hline = next(i) try:
hline = next(i)
except StopIteration:
return
def fastqWithoutQualityIterator(lineiterator, def fastqWithoutQualityIterator(lineiterator,
@ -174,5 +178,7 @@ def fastqWithoutQualityIterator(lineiterator,
yield seq yield seq
read+=1 read+=1
hline = next(i) try:
hline = next(i)
except StopIteration:
return

View File

@ -99,7 +99,10 @@ def tabIterator(lineiterator,
read+=1 read+=1
line = next(iterator) try:
line = next(iterator)
except StopIteration:
return

View File

@ -280,7 +280,7 @@ def open_uri(uri,
iseq = urib iseq = urib
objclass = bytes objclass = bytes
else: # TODO update uopen to be able to write? else: # TODO update uopen to be able to write?
if config['obi']['outputformat'] == b'metabaR': if 'outputformat' in config['obi'] and config['obi']['outputformat'] == b'metabaR':
if 'metabarprefix' not in config['obi']: if 'metabarprefix' not in config['obi']:
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)") raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
else: else:

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '1b21' serial= '1b26'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -365,8 +365,6 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
int32_t i; int32_t i;
// TODO add check for primer longer than MAX_PAT_LEN (32)
// Get sequence id // Get sequence id
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0); seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
@ -751,6 +749,20 @@ int obi_ecopcr(const char* i_dms_name,
o1c = complementPattern(o1); o1c = complementPattern(o1);
o2c = complementPattern(o2); o2c = complementPattern(o2);
// check for primers equal or longer than MAX_PAT_LEN (32)
if (strlen(primer1) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: first primer is too long, needs to be < 32bp (%s)", primer1);
return -1;
}
if (strlen(primer2) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: second primer is too long, needs to be < 32bp (%s)", primer2);
return -1;
}
// Open input DMS // Open input DMS
i_dms = obi_open_dms(i_dms_name, false); i_dms = obi_open_dms(i_dms_name, false);
if (i_dms == NULL) if (i_dms == NULL)