Compare commits
10 Commits
Author | SHA1 | Date | |
---|---|---|---|
9ace9989c4 | |||
a3ebe5f118 | |||
9100e14899 | |||
ccda0661ce | |||
aab59f2214 | |||
ade1107b42 | |||
9c7d24406f | |||
03bc9915f2 | |||
24b1dab573 | |||
7593673f3f |
@ -36,8 +36,7 @@ def addOptions(parser):
|
|||||||
dest="clean:sample-tag-name",
|
dest="clean:sample-tag-name",
|
||||||
metavar="<SAMPLE TAG NAME>",
|
metavar="<SAMPLE TAG NAME>",
|
||||||
type=str,
|
type=str,
|
||||||
default="merged_sample",
|
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
|
||||||
help="Name of the tag where sample counts are kept.")
|
|
||||||
|
|
||||||
group.add_argument('--ratio', '-r',
|
group.add_argument('--ratio', '-r',
|
||||||
action="store", dest="clean:ratio",
|
action="store", dest="clean:ratio",
|
||||||
@ -107,6 +106,9 @@ def run(config):
|
|||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
||||||
|
|
||||||
|
if 'sample-tag-name' not in config['clean']:
|
||||||
|
config['clean']['sample-tag-name'] = ""
|
||||||
|
|
||||||
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
||||||
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
||||||
raise Exception("Error running obiclean")
|
raise Exception("Error running obiclean")
|
||||||
|
@ -22,7 +22,7 @@ def addOptions(parser):
|
|||||||
group.add_argument('-s','--sequence',
|
group.add_argument('-s','--sequence',
|
||||||
action="store_true", dest="count:sequence",
|
action="store_true", dest="count:sequence",
|
||||||
default=False,
|
default=False,
|
||||||
help="Prints only the number of sequence records.")
|
help="Prints only the number of sequence records (much faster, default: False).")
|
||||||
|
|
||||||
group.add_argument('-a','--all',
|
group.add_argument('-a','--all',
|
||||||
action="store_true", dest="count:all",
|
action="store_true", dest="count:all",
|
||||||
|
@ -479,6 +479,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
|
|||||||
if not directmatch[0].forward:
|
if not directmatch[0].forward:
|
||||||
sequences[0] = sequences[0].reverse_complement
|
sequences[0] = sequences[0].reverse_complement
|
||||||
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
else:
|
||||||
|
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
|
||||||
sample=None
|
sample=None
|
||||||
if not no_tags:
|
if not no_tags:
|
||||||
|
@ -307,6 +307,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
for x in categories :
|
for x in categories :
|
||||||
catl.append(i_seq[x])
|
catl.append(i_seq[x])
|
||||||
|
|
||||||
|
#unique_id = tuple(catl) + (i_seq_col[i],)
|
||||||
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
||||||
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
||||||
|
|
||||||
@ -453,6 +454,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
merged_dict[mkey] = {}
|
merged_dict[mkey] = {}
|
||||||
|
|
||||||
for i_idx in merged_sequences:
|
for i_idx in merged_sequences:
|
||||||
|
PyErr_CheckSignals()
|
||||||
pb(total_treated)
|
pb(total_treated)
|
||||||
|
|
||||||
i_id = i_id_col[i_idx]
|
i_id = i_id_col[i_idx]
|
||||||
|
@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
|
|||||||
|
|
||||||
cdef inline OBIDMS_column_p pointer(self)
|
cdef inline OBIDMS_column_p pointer(self)
|
||||||
cdef read_elements_names(self)
|
cdef read_elements_names(self)
|
||||||
|
cpdef list keys(self)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
||||||
|
@ -323,6 +323,9 @@ cdef class Column(OBIWrapper) :
|
|||||||
free(elts_names_b)
|
free(elts_names_b)
|
||||||
return elts_names_list
|
return elts_names_list
|
||||||
|
|
||||||
|
cpdef list keys(self):
|
||||||
|
return self._elements_names
|
||||||
|
|
||||||
|
|
||||||
# Column alias property getter and setter
|
# Column alias property getter and setter
|
||||||
@property
|
@property
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
cimport cython
|
cimport cython
|
||||||
from obitools3.dms.view.view cimport Line
|
from obitools3.dms.view.view cimport Line
|
||||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||||
from obitools3.dms.column.column cimport Column_line
|
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||||
|
|
||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
@ -25,18 +25,28 @@ cdef class TabFormat:
|
|||||||
for k in self.tags:
|
for k in self.tags:
|
||||||
|
|
||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
value = tobytes(k)
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
|
for k2 in data.view[k].keys():
|
||||||
|
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||||
|
else:
|
||||||
|
line.append(tobytes(k))
|
||||||
else:
|
else:
|
||||||
value = data[k]
|
value = data[k]
|
||||||
if value is not None:
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
if type(value) == Column_line:
|
if value is None: # all keys at None
|
||||||
value = value.bytes()
|
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||||
|
line.append(self.NAString)
|
||||||
else:
|
else:
|
||||||
value = str2bytes(str(bytes2str_object(value))) # genius programming
|
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||||
if value is None:
|
if value[k2] is not None:
|
||||||
value = self.NAString
|
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||||
|
else:
|
||||||
line.append(value)
|
line.append(self.NAString)
|
||||||
|
else:
|
||||||
|
if value is not None:
|
||||||
|
line.append(str2bytes(str(bytes2str_object(value))))
|
||||||
|
else:
|
||||||
|
line.append(self.NAString)
|
||||||
|
|
||||||
if self.first_line:
|
if self.first_line:
|
||||||
self.first_line = False
|
self.first_line = False
|
||||||
|
@ -166,7 +166,9 @@ cdef object bytes2str_object(object value): # Only works if complex types are d
|
|||||||
value[k] = bytes2str(v)
|
value[k] = bytes2str(v)
|
||||||
if type(k) == bytes:
|
if type(k) == bytes:
|
||||||
value[bytes2str(k)] = value.pop(k)
|
value[bytes2str(k)] = value.pop(k)
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list) or isinstance(value, tuple):
|
||||||
|
if isinstance(value, tuple):
|
||||||
|
value = list(value)
|
||||||
for i in range(len(value)):
|
for i in range(len(value)):
|
||||||
if isinstance(value[i], list) or isinstance(value[i], dict):
|
if isinstance(value[i], list) or isinstance(value[i], dict):
|
||||||
value[i] = bytes2str_object(value[i])
|
value[i] = bytes2str_object(value[i])
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0b25'
|
serial= '0b27'
|
||||||
|
|
||||||
version ="%d.%d.%s" % (major,minor,serial)
|
version ="%d.%d.%s" % (major,minor,serial)
|
||||||
|
@ -246,7 +246,16 @@ int obi_clean(const char* dms_name,
|
|||||||
|
|
||||||
// Open the sample column if there is one
|
// Open the sample column if there is one
|
||||||
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
||||||
sample_column = NULL;
|
{
|
||||||
|
fprintf(stderr, "Info: No sample information provided, assuming one sample.\n");
|
||||||
|
sample_column = obi_view_get_column(i_view, COUNT_COLUMN);
|
||||||
|
if (sample_column == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError getting the COUNT column");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
sample_count = 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sample_column = obi_view_get_column(i_view, sample_column_name);
|
sample_column = obi_view_get_column(i_view, sample_column_name);
|
||||||
@ -255,6 +264,13 @@ int obi_clean(const char* dms_name,
|
|||||||
obidebug(1, "\nError getting the sample column");
|
obidebug(1, "\nError getting the sample column");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
sample_count = (sample_column->header)->nb_elements_per_line;
|
||||||
|
// Check that the sample column is a merged column with all sample informations
|
||||||
|
if (sample_count == 1)
|
||||||
|
{
|
||||||
|
obidebug(1, "\n\nError: If a sample column is provided, it must contain 'merged' sample counts as built by obi uniq with the -m option\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the output view, or a temporary one if heads only
|
// Create the output view, or a temporary one if heads only
|
||||||
@ -279,8 +295,6 @@ int obi_clean(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sample_count = (sample_column->header)->nb_elements_per_line;
|
|
||||||
|
|
||||||
// Create the output columns
|
// Create the output columns
|
||||||
if (create_output_columns(o_view, sample_column, sample_count) < 0)
|
if (create_output_columns(o_view, sample_column, sample_count) < 0)
|
||||||
{
|
{
|
||||||
@ -549,7 +563,7 @@ int obi_clean(const char* dms_name,
|
|||||||
|
|
||||||
if (heads_only)
|
if (heads_only)
|
||||||
{
|
{
|
||||||
line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));
|
line_selection = malloc((((o_view->infos)->line_count) + 1) * sizeof(index_t));
|
||||||
if (line_selection == NULL)
|
if (line_selection == NULL)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
@ -52,7 +52,8 @@
|
|||||||
*
|
*
|
||||||
* @param dms A pointer on an OBIDMS.
|
* @param dms A pointer on an OBIDMS.
|
||||||
* @param i_view_name The name of the input view.
|
* @param i_view_name The name of the input view.
|
||||||
* @param sample_column_name The name of the OBI_STR column in the input view where the sample information is kept.
|
* @param sample_column_name The name of the column in the input view where the sample information is kept.
|
||||||
|
* Must be merged informations as built by the obi uniq tool (checked by the function).
|
||||||
* NULL or "" (empty string) if there is no sample information.
|
* NULL or "" (empty string) if there is no sample information.
|
||||||
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
||||||
* @param o_view_comments The comments that should be associated with the output view.
|
* @param o_view_comments The comments that should be associated with the output view.
|
||||||
|
16
src/obiavl.c
16
src/obiavl.c
@ -2259,7 +2259,13 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
|
|||||||
parent = next;
|
parent = next;
|
||||||
|
|
||||||
// Compare the crc of the value with the crc of the current node
|
// Compare the crc of the value with the crc of the current node
|
||||||
comp = (current_node->crc64) - crc;
|
//comp = (current_node->crc64) - crc;
|
||||||
|
if ((current_node->crc64) == crc)
|
||||||
|
comp = 0;
|
||||||
|
else if ((current_node->crc64) > crc)
|
||||||
|
comp = 1;
|
||||||
|
else
|
||||||
|
comp = -1;
|
||||||
|
|
||||||
if (comp == 0)
|
if (comp == 0)
|
||||||
{ // check if really same value
|
{ // check if really same value
|
||||||
@ -2354,7 +2360,13 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
|
|||||||
current_node = (avl->tree)+next;
|
current_node = (avl->tree)+next;
|
||||||
|
|
||||||
// Compare the crc of the value with the crc of the current node
|
// Compare the crc of the value with the crc of the current node
|
||||||
comp = (current_node->crc64) - crc;
|
//comp = (current_node->crc64) - crc;
|
||||||
|
if ((current_node->crc64) == crc)
|
||||||
|
comp = 0;
|
||||||
|
else if ((current_node->crc64) > crc)
|
||||||
|
comp = 1;
|
||||||
|
else
|
||||||
|
comp = -1;
|
||||||
|
|
||||||
if (comp == 0)
|
if (comp == 0)
|
||||||
{ // Check if really same value
|
{ // Check if really same value
|
||||||
|
Reference in New Issue
Block a user