Compare commits

..

12 Commits

11 changed files with 79 additions and 41 deletions

View File

@ -161,8 +161,7 @@ def obi_eval(compiled_expr, loc_env, line):
return obi_eval_result return obi_eval_result
def Filter_generator(options, tax_filter): def Filter_generator(options, tax_filter, i_view):
#taxfilter = taxonomyFilterGenerator(options)
# Initialize conditions # Initialize conditions
predicates = None predicates = None
@ -171,6 +170,9 @@ def Filter_generator(options, tax_filter):
attributes = None attributes = None
if "attributes" in options and len(options["attributes"]) > 0: if "attributes" in options and len(options["attributes"]) > 0:
attributes = options["attributes"] attributes = options["attributes"]
for attribute in attributes:
if attribute not in i_view:
return None
lmax = None lmax = None
if "lmax" in options: if "lmax" in options:
lmax = options["lmax"] lmax = options["lmax"]
@ -196,6 +198,8 @@ def Filter_generator(options, tax_filter):
if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0: if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
for p in options["attribute_patterns"]: for p in options["attribute_patterns"]:
attribute, pattern = p.split(":", 1) attribute, pattern = p.split(":", 1)
if attribute not in i_view:
return None
attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern)) attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
def filter(line, loc_env): def filter(line, loc_env):
@ -324,21 +328,29 @@ def run(config):
# Apply filter # Apply filter
tax_filter = Taxonomy_filter_generator(taxo, config["grep"]) tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
filter = Filter_generator(config["grep"], tax_filter) filter = Filter_generator(config["grep"], tax_filter, i_view)
selection = Line_selection(i_view) selection = Line_selection(i_view)
for i in range(len(i_view)):
PyErr_CheckSignals()
pb(i)
line = i_view[i]
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False} if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
good = filter(line, loc_env) PyErr_CheckSignals()
pb(i)
if good :
selection.append(i) selection.append(i)
pb(i, force=True) elif filter is not None : # filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
pb(i)
line = i_view[i]
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
good = filter(line, loc_env)
if good :
selection.append(i)
pb(len(i_view), force=True)
print("", file=sys.stderr) print("", file=sys.stderr)
# Create output view with the line selection # Create output view with the line selection

View File

@ -34,9 +34,10 @@ def run(config):
if input[2] == DMS and not config['ls']['longformat']: if input[2] == DMS and not config['ls']['longformat']:
dms = input[0] dms = input[0]
l = [] l = []
for view in input[0]: for viewname in input[0]:
l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")") view = dms[viewname]
dms[view].close() l.append(tostr(viewname) + "\t(Date created: " + str(bytes2str_object(view.comments["Date created"]))+")")
view.close()
l.sort() l.sort()
for v in l: for v in l:
print(v) print(v)

View File

@ -479,6 +479,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not directmatch[0].forward: if not directmatch[0].forward:
sequences[0] = sequences[0].reverse_complement sequences[0] = sequences[0].reverse_complement
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c) sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
else:
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
sample=None sample=None
if not no_tags: if not no_tags:
@ -506,7 +508,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sample=None sample=None
if sample is None: if sample is None:
sequences[0][b'error']=b"No tags found" sequences[0][b'error']=b"No sample with that tag combination"
return False, sequences[0] return False, sequences[0]
sequences[0].update(sample) sequences[0].update(sample)

View File

@ -591,10 +591,11 @@ def run(config):
# Initialize the progress bar # Initialize the progress bar
pb = ProgressBar(len(entries), config, seconde=5) pb = ProgressBar(len(entries), config, seconde=5)
try: if len(entries) > 0:
uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts']) try:
except Exception, e: uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view) except Exception, e:
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
print("", file=sys.stderr) print("", file=sys.stderr)

View File

@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
cdef inline OBIDMS_column_p pointer(self) cdef inline OBIDMS_column_p pointer(self)
cdef read_elements_names(self) cdef read_elements_names(self)
cpdef list keys(self)
@staticmethod @staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples) cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)

View File

@ -323,6 +323,9 @@ cdef class Column(OBIWrapper) :
free(elts_names_b) free(elts_names_b)
return elts_names_list return elts_names_list
cpdef list keys(self):
return self._elements_names
# Column alias property getter and setter # Column alias property getter and setter
@property @property

View File

@ -227,7 +227,9 @@ cdef class DMS(OBIWrapper):
cdef str s cdef str s
s="" s=""
for view_name in self.keys(): for view_name in self.keys():
s = s + repr(self.get_view(view_name)) + "\n" view = self.get_view(view_name)
s = s + repr(view) + "\n"
view.close()
return s return s

View File

@ -3,7 +3,7 @@
cimport cython cimport cython
from obitools3.dms.view.view cimport Line from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line from obitools3.dms.column.column cimport Column_line, Column_multi_elts
cdef class TabFormat: cdef class TabFormat:
@ -25,18 +25,28 @@ cdef class TabFormat:
for k in self.tags: for k in self.tags:
if self.header and self.first_line: if self.header and self.first_line:
value = tobytes(k) if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys():
line.append(tobytes(k)+b':'+tobytes(k2))
else:
line.append(tobytes(k))
else: else:
value = data[k] value = data[k]
if value is not None: if isinstance(data.view[k], Column_multi_elts):
if type(value) == Column_line: if value is None: # all keys at None
value = value.bytes() for k2 in data.view[k].keys(): # TODO could be much more efficient
line.append(self.NAString)
else: else:
value = str2bytes(str(bytes2str_object(value))) # genius programming for k2 in data.view[k].keys(): # TODO could be much more efficient
if value is None: if value[k2] is not None:
value = self.NAString line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else:
line.append(value) line.append(self.NAString)
else:
if value is not None:
line.append(str2bytes(str(bytes2str_object(value))))
else:
line.append(self.NAString)
if self.first_line: if self.first_line:
self.first_line = False self.first_line = False

View File

@ -166,7 +166,9 @@ cdef object bytes2str_object(object value): # Only works if complex types are d
value[k] = bytes2str(v) value[k] = bytes2str(v)
if type(k) == bytes: if type(k) == bytes:
value[bytes2str(k)] = value.pop(k) value[bytes2str(k)] = value.pop(k)
elif isinstance(value, list): elif isinstance(value, list) or isinstance(value, tuple):
if isinstance(value, tuple):
value = list(value)
for i in range(len(value)): for i in range(len(value)):
if isinstance(value[i], list) or isinstance(value[i], dict): if isinstance(value[i], list) or isinstance(value[i], dict):
value[i] = bytes2str_object(value[i]) value[i] = bytes2str_object(value[i])

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '0b24' serial= '0b26'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -1350,6 +1350,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
} }
strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME); strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME);
} }
else
new_column->indexer = NULL;
// Fill the data with NA values // Fill the data with NA values
obi_ini_to_NA_values(new_column, 0, nb_lines); obi_ini_to_NA_values(new_column, 0, nb_lines);
@ -1558,6 +1560,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
return NULL; return NULL;
} }
} }
else
column->indexer = NULL;
if (close(column_file_descriptor) < 0) if (close(column_file_descriptor) < 0)
{ {
@ -1694,7 +1698,7 @@ int obi_close_column(OBIDMS_column_p column)
ret_val = -1; ret_val = -1;
// If it's a tuple column or the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed // If it's a tuple column or the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
if (((column->header)->tuples) || (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))) if ((column->indexer) != NULL)
if (obi_close_indexer(column->indexer) < 0) if (obi_close_indexer(column->indexer) < 0)
ret_val = -1; ret_val = -1;