Compare commits

..

7 Commits

8 changed files with 65 additions and 30 deletions

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS from obitools3.dms import DMS
from obitools3.dms.view.view cimport View from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes from obitools3.utils cimport str2bytes
@ -28,6 +28,7 @@ __title__="Concatenate views."
def addOptions(parser): def addOptions(parser):
addMinimalOutputOption(parser) addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi cat specific options') group=parser.add_argument_group('obi cat specific options')
@ -47,9 +48,9 @@ def run(config):
logger("info", "obi cat") logger("info", "obi cat")
# Open the views to concatenate # Check the views to concatenate
iview_list = []
idms_list = [] idms_list = []
iview_list = []
total_len = 0 total_len = 0
remove_qual = False remove_qual = False
remove_rev_qual = False remove_rev_qual = False
@ -67,8 +68,9 @@ def run(config):
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
remove_rev_qual = True remove_rev_qual = True
total_len += len(i_view) total_len += len(i_view)
iview_list.append(i_view)
idms_list.append(i_dms) idms_list.append(i_dms)
iview_list.append(i_view.name)
i_view.close()
# Open the output: only the DMS # Open the output: only the DMS
output = open_uri(config['obi']['outputURI'], output = open_uri(config['obi']['outputURI'],
@ -97,8 +99,10 @@ def run(config):
# Initialize multiple elements columns # Initialize multiple elements columns
if type(output_0)==BufferedWriter: if type(output_0)==BufferedWriter:
dict_cols = {} dict_cols = {}
for v in iview_list: for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for coln in v.keys(): for coln in v.keys():
col = v[coln]
if v[coln].nb_elements_per_line > 1: if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols: if coln not in dict_cols:
dict_cols[coln] = {} dict_cols[coln] = {}
@ -108,6 +112,7 @@ def run(config):
else: else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames'])) dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames']) dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
v.close()
for coln in dict_cols: for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'], Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames'])) nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
@ -119,7 +124,8 @@ def run(config):
pb = None pb = None
i = 0 i = 0
for v in iview_list: for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for entry in v: for entry in v:
PyErr_CheckSignals() PyErr_CheckSignals()
if pb is not None: if pb is not None:
@ -130,6 +136,7 @@ def run(config):
else: else:
o_view[i] = entry o_view[i] = entry
i+=1 i+=1
v.close()
# Deletes quality columns if needed # Deletes quality columns if needed
if type(output_0)!=BufferedWriter: if type(output_0)!=BufferedWriter:
@ -144,7 +151,7 @@ def run(config):
# Save command config in DMS comments # Save command config in DMS comments
command_line = " ".join(sys.argv[1:]) command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list]) o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
o_dms.record_command_line(command_line) o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr) #print("\n\nOutput view:\n````````````", file=sys.stderr)

View File

@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
key = mergedKeys[k] key = mergedKeys[k]
merged_col_name = mergedKeys_m[k] merged_col_name = mergedKeys_m[k]
if merged_infos[merged_col_name]['nb_elts'] == 1:
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
if merged_col_name in view: if merged_col_name in view:
i_col = view[merged_col_name] i_col = view[merged_col_name]
else: else:

View File

@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line, Column_multi_elts from obitools3.dms.column.column cimport Column_line, Column_multi_elts
import sys
cdef class TabFormat: cdef class TabFormat:
@ -26,18 +27,22 @@ cdef class TabFormat:
if self.header and self.first_line: if self.header and self.first_line:
if isinstance(data.view[k], Column_multi_elts): if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys(): keys = data.view[k].keys()
keys.sort()
for k2 in keys:
line.append(tobytes(k)+b':'+tobytes(k2)) line.append(tobytes(k)+b':'+tobytes(k2))
else: else:
line.append(tobytes(k)) line.append(tobytes(k))
else: else:
value = data[k] value = data[k]
if isinstance(data.view[k], Column_multi_elts): if isinstance(data.view[k], Column_multi_elts):
keys = data.view[k].keys()
keys.sort()
if value is None: # all keys at None if value is None: # all keys at None
for k2 in data.view[k].keys(): # TODO could be much more efficient for k2 in keys: # TODO could be much more efficient
line.append(self.NAString) line.append(self.NAString)
else: else:
for k2 in data.view[k].keys(): # TODO could be much more efficient for k2 in keys: # TODO could be much more efficient
if value[k2] is not None: if value[k2] is not None:
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else: else:

View File

@ -2,7 +2,7 @@
from obitools3.dms.capi.obitypes cimport obitype_t, index_t from obitools3.dms.capi.obitypes cimport obitype_t, index_t
cpdef bytes format_separator(bytes format) cpdef bytes format_uniq_pattern(bytes format)
cpdef int count_entries(file, bytes format) cpdef int count_entries(file, bytes format)
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*) cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)

View File

@ -24,11 +24,11 @@ import glob
import gzip import gzip
cpdef bytes format_separator(bytes format): cpdef bytes format_uniq_pattern(bytes format):
if format == b"fasta": if format == b"fasta":
return b"\n>" return b"\n>"
elif format == b"fastq": elif format == b"fastq":
return b"\n@" return b"\n\+\n"
elif format == b"ngsfilter" or format == b"tabular": elif format == b"ngsfilter" or format == b"tabular":
return b"\n" return b"\n"
elif format == b"genbank" or format == b"embl": elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
cpdef int count_entries(file, bytes format): cpdef int count_entries(file, bytes format):
try: try:
sep = format_separator(format) sep = format_uniq_pattern(format)
if sep is None: if sep is None:
return -1 return -1
sep = re.compile(sep) sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
return -1 return -1
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
total_count += len(re.findall(sep, mmapped_file)) total_count += len(re.findall(sep, mmapped_file))
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank": if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n) total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
except: except:

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '0b31' serial= '0b34'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -1725,17 +1725,33 @@ int obi_close_column(OBIDMS_column_p column)
int obi_clone_column_indexer(OBIDMS_column_p column) int obi_clone_column_indexer(OBIDMS_column_p column)
{ {
char* new_indexer_name; char* new_indexer_name;
int i;
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); i=0;
while (true) // find avl name not already used
{
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
if (new_indexer_name == NULL) if (new_indexer_name == NULL)
return -1; return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL) if (column->indexer == NULL)
{ {
if (errno == EEXIST)
{
free(new_indexer_name);
i++;
}
else
{
free(new_indexer_name);
obidebug(1, "\nError cloning a column's indexer to make it writable"); obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1; return -1;
} }
}
else
break;
}
strcpy((column->header)->indexer_name, new_indexer_name); strcpy((column->header)->indexer_name, new_indexer_name);
@ -2415,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
} }
char* obi_column_formatted_infos(OBIDMS_column_p column) char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
{ {
char* column_infos; char* column_infos = NULL;
char* elt_names; char* elt_names = NULL;
char* column_name = NULL;
column_infos = malloc(1024 * sizeof(char)); // should be in view.c because alias exists in the context of view
column_infos = malloc(2048 * sizeof(char)); // TODO
elt_names = obi_get_formatted_elements_names(column); elt_names = obi_get_formatted_elements_names(column);
// "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
free(elt_names); free(elt_names);
return column_infos; return column_infos;
} }