Compare commits
5 Commits
Author | SHA1 | Date | |
---|---|---|---|
f99fc13b75 | |||
1da6aac1b8 | |||
159803b40a | |||
7dcbc34017 | |||
db2202c8b4 |
@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
key = mergedKeys[k]
|
key = mergedKeys[k]
|
||||||
merged_col_name = mergedKeys_m[k]
|
merged_col_name = mergedKeys_m[k]
|
||||||
|
|
||||||
|
if merged_infos[merged_col_name]['nb_elts'] == 1:
|
||||||
|
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
|
||||||
|
|
||||||
if merged_col_name in view:
|
if merged_col_name in view:
|
||||||
i_col = view[merged_col_name]
|
i_col = view[merged_col_name]
|
||||||
else:
|
else:
|
||||||
|
@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
|
|||||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||||
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
|
|
||||||
@ -26,18 +27,22 @@ cdef class TabFormat:
|
|||||||
|
|
||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
for k2 in data.view[k].keys():
|
keys = data.view[k].keys()
|
||||||
|
keys.sort()
|
||||||
|
for k2 in keys:
|
||||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||||
else:
|
else:
|
||||||
line.append(tobytes(k))
|
line.append(tobytes(k))
|
||||||
else:
|
else:
|
||||||
value = data[k]
|
value = data[k]
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
|
keys = data.view[k].keys()
|
||||||
|
keys.sort()
|
||||||
if value is None: # all keys at None
|
if value is None: # all keys at None
|
||||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
for k2 in keys: # TODO could be much more efficient
|
||||||
line.append(self.NAString)
|
line.append(self.NAString)
|
||||||
else:
|
else:
|
||||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
for k2 in keys: # TODO could be much more efficient
|
||||||
if value[k2] is not None:
|
if value[k2] is not None:
|
||||||
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||||
else:
|
else:
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
||||||
|
|
||||||
cpdef bytes format_separator(bytes format)
|
cpdef bytes format_uniq_pattern(bytes format)
|
||||||
cpdef int count_entries(file, bytes format)
|
cpdef int count_entries(file, bytes format)
|
||||||
|
|
||||||
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
||||||
|
@ -24,11 +24,11 @@ import glob
|
|||||||
import gzip
|
import gzip
|
||||||
|
|
||||||
|
|
||||||
cpdef bytes format_separator(bytes format):
|
cpdef bytes format_uniq_pattern(bytes format):
|
||||||
if format == b"fasta":
|
if format == b"fasta":
|
||||||
return b"\n>"
|
return b"\n>"
|
||||||
elif format == b"fastq":
|
elif format == b"fastq":
|
||||||
return b"\n@"
|
return b"\n\+\n"
|
||||||
elif format == b"ngsfilter" or format == b"tabular":
|
elif format == b"ngsfilter" or format == b"tabular":
|
||||||
return b"\n"
|
return b"\n"
|
||||||
elif format == b"genbank" or format == b"embl":
|
elif format == b"genbank" or format == b"embl":
|
||||||
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
|
|||||||
cpdef int count_entries(file, bytes format):
|
cpdef int count_entries(file, bytes format):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sep = format_separator(format)
|
sep = format_uniq_pattern(format)
|
||||||
if sep is None:
|
if sep is None:
|
||||||
return -1
|
return -1
|
||||||
sep = re.compile(sep)
|
sep = re.compile(sep)
|
||||||
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
|
|||||||
return -1
|
return -1
|
||||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||||
total_count += len(re.findall(sep, mmapped_file))
|
total_count += len(re.findall(sep, mmapped_file))
|
||||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
|
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
|
||||||
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0b31'
|
serial= '0b32'
|
||||||
|
|
||||||
version ="%d.%d.%s" % (major,minor,serial)
|
version ="%d.%d.%s" % (major,minor,serial)
|
||||||
|
@ -1725,17 +1725,33 @@ int obi_close_column(OBIDMS_column_p column)
|
|||||||
int obi_clone_column_indexer(OBIDMS_column_p column)
|
int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||||
{
|
{
|
||||||
char* new_indexer_name;
|
char* new_indexer_name;
|
||||||
|
int i;
|
||||||
|
|
||||||
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
|
i=0;
|
||||||
|
while (true) // find avl name not already used
|
||||||
|
{
|
||||||
|
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
|
||||||
if (new_indexer_name == NULL)
|
if (new_indexer_name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||||
if (column->indexer == NULL)
|
if (column->indexer == NULL)
|
||||||
{
|
{
|
||||||
|
if (errno == EEXIST)
|
||||||
|
{
|
||||||
|
free(new_indexer_name);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
free(new_indexer_name);
|
||||||
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
strcpy((column->header)->indexer_name, new_indexer_name);
|
strcpy((column->header)->indexer_name, new_indexer_name);
|
||||||
|
|
||||||
@ -2415,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* obi_column_formatted_infos(OBIDMS_column_p column)
|
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
|
||||||
{
|
{
|
||||||
char* column_infos;
|
char* column_infos = NULL;
|
||||||
char* elt_names;
|
char* elt_names = NULL;
|
||||||
|
char* column_name = NULL;
|
||||||
column_infos = malloc(1024 * sizeof(char));
|
// should be in view.c because alias exists in the context of view
|
||||||
|
column_infos = malloc(2048 * sizeof(char)); // TODO
|
||||||
|
|
||||||
elt_names = obi_get_formatted_elements_names(column);
|
elt_names = obi_get_formatted_elements_names(column);
|
||||||
|
|
||||||
|
|
||||||
|
// "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
|
||||||
|
|
||||||
|
|
||||||
free(elt_names);
|
free(elt_names);
|
||||||
return column_infos;
|
return column_infos;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user