Compare commits

...

11 Commits

24 changed files with 555 additions and 97 deletions

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS from obitools3.dms import DMS
from obitools3.dms.view.view cimport View from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes from obitools3.utils cimport str2bytes
@ -28,6 +28,7 @@ __title__="Concatenate views."
def addOptions(parser): def addOptions(parser):
addMinimalOutputOption(parser) addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi cat specific options') group=parser.add_argument_group('obi cat specific options')
@ -47,9 +48,9 @@ def run(config):
logger("info", "obi cat") logger("info", "obi cat")
# Open the views to concatenate # Check the views to concatenate
iview_list = []
idms_list = [] idms_list = []
iview_list = []
total_len = 0 total_len = 0
remove_qual = False remove_qual = False
remove_rev_qual = False remove_rev_qual = False
@ -67,8 +68,9 @@ def run(config):
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
remove_rev_qual = True remove_rev_qual = True
total_len += len(i_view) total_len += len(i_view)
iview_list.append(i_view)
idms_list.append(i_dms) idms_list.append(i_dms)
iview_list.append(i_view.name)
i_view.close()
# Open the output: only the DMS # Open the output: only the DMS
output = open_uri(config['obi']['outputURI'], output = open_uri(config['obi']['outputURI'],
@ -97,8 +99,10 @@ def run(config):
# Initialize multiple elements columns # Initialize multiple elements columns
if type(output_0)==BufferedWriter: if type(output_0)==BufferedWriter:
dict_cols = {} dict_cols = {}
for v in iview_list: for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for coln in v.keys(): for coln in v.keys():
col = v[coln]
if v[coln].nb_elements_per_line > 1: if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols: if coln not in dict_cols:
dict_cols[coln] = {} dict_cols[coln] = {}
@ -108,6 +112,7 @@ def run(config):
else: else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames'])) dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames']) dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
v.close()
for coln in dict_cols: for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'], Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames'])) nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
@ -119,7 +124,8 @@ def run(config):
pb = None pb = None
i = 0 i = 0
for v in iview_list: for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for entry in v: for entry in v:
PyErr_CheckSignals() PyErr_CheckSignals()
if pb is not None: if pb is not None:
@ -130,6 +136,7 @@ def run(config):
else: else:
o_view[i] = entry o_view[i] = entry
i+=1 i+=1
v.close()
# Deletes quality columns if needed # Deletes quality columns if needed
if type(output_0)!=BufferedWriter: if type(output_0)!=BufferedWriter:
@ -144,7 +151,7 @@ def run(config):
# Save command config in DMS comments # Save command config in DMS comments
command_line = " ".join(sys.argv[1:]) command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list]) o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
o_dms.record_command_line(command_line) o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr) #print("\n\nOutput view:\n````````````", file=sys.stderr)

View File

@ -41,6 +41,17 @@ def addOptions(parser):
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. " help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.00 (no threshold).") "Default: 0.00 (no threshold).")
group.add_argument('--minimum-circle','-c',
action="store", dest="ecotag:bubble_threshold",
metavar='<CIRCLE_THRESHOLD>',
default=0.99,
type=float,
help="Minimum identity considered for the assignment circle "
"(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
"the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found). "
"Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.99.")
def run(config): def run(config):
DMS.obi_atexit() DMS.obi_atexit()
@ -66,9 +77,8 @@ def run(config):
ref_view_name = ref[1] ref_view_name = ref[1]
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database # Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) : if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).", raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
# Open the output: only the DMS # Open the output: only the DMS
output = open_uri(config['obi']['outputURI'], output = open_uri(config['obi']['outputURI'],
@ -113,8 +123,9 @@ def run(config):
if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \ if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
ref_dms.name_with_full_path, tobytes(ref_view_name), \ ref_dms.name_with_full_path, tobytes(ref_view_name), \
taxo_dms.name_with_full_path, tobytes(taxonomy_name), \ taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
tobytes(o_view_name), comments, tobytes(o_view_name), comments, \
config['ecotag']['threshold']) < 0: config['ecotag']['threshold'], \
config['ecotag']['bubble_threshold']) < 0:
raise Exception("Error running ecotag") raise Exception("Error running ecotag")
# If the input and output DMS are not the same, export result view to output DMS # If the input and output DMS are not the same, export result view to output DMS

View File

@ -31,27 +31,11 @@ def run(config):
input = open_uri(config['obi']['inputURI']) input = open_uri(config['obi']['inputURI'])
if input is None: if input is None:
raise Exception("Could not read input") raise Exception("Could not read input")
if input[2] == DMS and not config['ls']['longformat']:
dms = input[0] # Print representation
l = [] if config['ls']['longformat']:
for viewname in input[0]: print(input[1].repr_longformat())
view = dms[viewname]
l.append(tostr(viewname) + "\t(Date created: " + str(bytes2str_object(view.comments["Date created"]))+")")
view.close()
l.sort()
for v in l:
print(v)
else: else:
print(repr(input[1])) print(repr(input[1]))
if input[2] == DMS:
taxolist = ["\n### Taxonomies:"]
for t in Taxonomy.list_taxos(input[0]):
taxolist.append("\t"+tostr(t))
if len(taxolist) > 1:
for t in taxolist:
print(t)
if config['ls']['longformat'] and len(input[1].comments) > 0:
print("\n### Comments:")
print(str(input[1].comments))
input[0].close(force=True) input[0].close(force=True)

View File

@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
key = mergedKeys[k] key = mergedKeys[k]
merged_col_name = mergedKeys_m[k] merged_col_name = mergedKeys_m[k]
if merged_infos[merged_col_name]['nb_elts'] == 1:
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
if merged_col_name in view: if merged_col_name in view:
i_col = view[merged_col_name] i_col = view[merged_col_name]
else: else:

View File

@ -34,6 +34,7 @@ cdef extern from "obidms.h" nogil:
int obi_close_dms(OBIDMS_p dms, bint force) int obi_close_dms(OBIDMS_p dms, bint force)
char* obi_dms_get_dms_path(OBIDMS_p dms) char* obi_dms_get_dms_path(OBIDMS_p dms)
char* obi_dms_get_full_path(OBIDMS_p dms, const_char_p path_name) char* obi_dms_get_full_path(OBIDMS_p dms, const_char_p path_name)
char* obi_dms_formatted_infos(OBIDMS_p dms, bint detailed)
void obi_close_atexit() void obi_close_atexit()
obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number) obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number)

View File

@ -70,3 +70,6 @@ cdef extern from "obidmscolumn.h" nogil:
int obi_column_write_comments(OBIDMS_column_p column, const char* comments) int obi_column_write_comments(OBIDMS_column_p column, const char* comments)
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value) int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value)
char* obi_column_formatted_infos(OBIDMS_column_p column, bint detailed)

View File

@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil:
const char* taxonomy_name, const char* taxonomy_name,
const char* output_view_name, const char* output_view_name,
const char* output_view_comments, const char* output_view_comments,
double ecotag_threshold) double ecotag_threshold,
double bubble_threshold)

View File

@ -110,6 +110,10 @@ cdef extern from "obiview.h" nogil:
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias) int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
char* obi_view_formatted_infos(Obiview_p view, bint detailed)
char* obi_view_formatted_infos_one_line(Obiview_p view)
int obi_view_write_comments(Obiview_p view, const_char_p comments) int obi_view_write_comments(Obiview_p view, const_char_p comments)
int obi_view_add_comment(Obiview_p view, const_char_p key, const_char_p value) int obi_view_add_comment(Obiview_p view, const_char_p key, const_char_p value)

View File

@ -302,15 +302,24 @@ cdef class Column(OBIWrapper) :
@OBIWrapper.checkIsActive @OBIWrapper.checkIsActive
def __repr__(self) : def __repr__(self) :
cdef bytes s cdef str s
#cdef char* s_b cdef char* sc
#cdef str s_str cdef OBIDMS_column_p pointer = self.pointer()
#s_b = obi_column_formatted_infos(self.pointer()) sc = obi_column_formatted_infos(pointer, False)
#s_str = bytes2str(s_b) s = bytes2str(sc)
#free(s_b) free(sc)
s = self._alias + b", data type: " + self.data_type return s
#return s_str
return bytes2str(s)
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef OBIDMS_column_p pointer = self.pointer()
sc = obi_column_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s
def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C

View File

@ -10,7 +10,8 @@ from .capi.obidms cimport obi_open_dms, \
obi_dms_exists, \ obi_dms_exists, \
obi_dms_get_full_path, \ obi_dms_get_full_path, \
obi_close_atexit, \ obi_close_atexit, \
obi_dms_write_comments obi_dms_write_comments, \
obi_dms_formatted_infos
from .capi.obitypes cimport const_char_p from .capi.obitypes cimport const_char_p
@ -32,6 +33,8 @@ from .object import OBIWrapper
import json import json
import time import time
from libc.stdlib cimport free
cdef class DMS(OBIWrapper): cdef class DMS(OBIWrapper):
@ -223,13 +226,24 @@ cdef class DMS(OBIWrapper):
@OBIWrapper.checkIsActive @OBIWrapper.checkIsActive
def __repr__(self): def __repr__(self) :
cdef str s cdef str s
s="" cdef char* sc
for view_name in self.keys(): cdef OBIDMS_p pointer = self.pointer()
view = self.get_view(view_name) sc = obi_dms_formatted_infos(pointer, False)
s = s + repr(view) + "\n" s = bytes2str(sc)
view.close() free(sc)
return s
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef OBIDMS_p pointer = self.pointer()
sc = obi_dms_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s return s

View File

@ -19,7 +19,9 @@ from ..capi.obiview cimport Alias_column_pair_p, \
obi_view_delete_column, \ obi_view_delete_column, \
obi_view_create_column_alias, \ obi_view_create_column_alias, \
obi_view_write_comments, \ obi_view_write_comments, \
obi_delete_view obi_delete_view, \
obi_view_formatted_infos, \
obi_view_formatted_infos_one_line
from ..capi.obidmscolumn cimport OBIDMS_column_p from ..capi.obidmscolumn cimport OBIDMS_column_p
from ..capi.obidms cimport OBIDMS_p from ..capi.obidms cimport OBIDMS_p
@ -59,6 +61,8 @@ import pkgutil
import json import json
import sys import sys
from libc.stdlib cimport free
cdef class View(OBIWrapper) : cdef class View(OBIWrapper) :
@ -186,15 +190,22 @@ cdef class View(OBIWrapper) :
@OBIWrapper.checkIsActive @OBIWrapper.checkIsActive
def __repr__(self) : def __repr__(self) :
cdef str s cdef str s
if self.read_only: # can read date cdef char* sc
s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name), cdef Obiview_p pointer = self.pointer()
line_count = self.line_count, sc = obi_view_formatted_infos(pointer, False)
date = str(bytes2str_object(self.comments["Date created"]))) s = bytes2str(sc)
else: free(sc)
s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name), return s
line_count = self.line_count)
for column_name in self.keys() :
s = s + repr(self[column_name]) + '\n' @OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef Obiview_p pointer = self.pointer()
sc = obi_view_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s return s

View File

@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line, Column_multi_elts from obitools3.dms.column.column cimport Column_line, Column_multi_elts
import sys
cdef class TabFormat: cdef class TabFormat:
@ -26,18 +27,22 @@ cdef class TabFormat:
if self.header and self.first_line: if self.header and self.first_line:
if isinstance(data.view[k], Column_multi_elts): if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys(): keys = data.view[k].keys()
keys.sort()
for k2 in keys:
line.append(tobytes(k)+b':'+tobytes(k2)) line.append(tobytes(k)+b':'+tobytes(k2))
else: else:
line.append(tobytes(k)) line.append(tobytes(k))
else: else:
value = data[k] value = data[k]
if isinstance(data.view[k], Column_multi_elts): if isinstance(data.view[k], Column_multi_elts):
keys = data.view[k].keys()
keys.sort()
if value is None: # all keys at None if value is None: # all keys at None
for k2 in data.view[k].keys(): # TODO could be much more efficient for k2 in keys: # TODO could be much more efficient
line.append(self.NAString) line.append(self.NAString)
else: else:
for k2 in data.view[k].keys(): # TODO could be much more efficient for k2 in keys: # TODO could be much more efficient
if value[k2] is not None: if value[k2] is not None:
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else: else:

View File

@ -2,7 +2,7 @@
from obitools3.dms.capi.obitypes cimport obitype_t, index_t from obitools3.dms.capi.obitypes cimport obitype_t, index_t
cpdef bytes format_separator(bytes format) cpdef bytes format_uniq_pattern(bytes format)
cpdef int count_entries(file, bytes format) cpdef int count_entries(file, bytes format)
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*) cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)

View File

@ -24,11 +24,11 @@ import glob
import gzip import gzip
cpdef bytes format_separator(bytes format): cpdef bytes format_uniq_pattern(bytes format):
if format == b"fasta": if format == b"fasta":
return b"\n>" return b"\n>"
elif format == b"fastq": elif format == b"fastq":
return b"\n@" return b"\n\+\n"
elif format == b"ngsfilter" or format == b"tabular": elif format == b"ngsfilter" or format == b"tabular":
return b"\n" return b"\n"
elif format == b"genbank" or format == b"embl": elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
cpdef int count_entries(file, bytes format): cpdef int count_entries(file, bytes format):
try: try:
sep = format_separator(format) sep = format_uniq_pattern(format)
if sep is None: if sep is None:
return -1 return -1
sep = re.compile(sep) sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
return -1 return -1
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
total_count += len(re.findall(sep, mmapped_file)) total_count += len(re.findall(sep, mmapped_file))
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank": if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n) total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
except: except:

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '0b31' serial= '0b36'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name,
const char* taxonomy_name, const char* taxonomy_name,
const char* output_view_name, const char* output_view_name,
const char* output_view_comments, const char* output_view_comments,
double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs double ecotag_threshold,
double bubble_threshold)
{ {
// For each sequence // For each sequence
@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name,
index_t query_seq_idx, ref_seq_idx; index_t query_seq_idx, ref_seq_idx;
double score, best_score; double score, best_score;
double threshold; double threshold;
double lca_threshold;
int lcs_length; int lcs_length;
int ali_length; int ali_length;
Kmer_table_p ktable; Kmer_table_p ktable;
@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name,
return -1; return -1;
} }
free(db_threshold_str); free(db_threshold_str);
if (ecotag_threshold < db_threshold) if (bubble_threshold < db_threshold)
{ {
fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n", fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n",
ecotag_threshold, db_threshold); bubble_threshold, db_threshold);
return -1; return -1;
} }
@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name,
{ {
best_match_idx = best_match_array[j]; best_match_idx = best_match_array[j];
// Find the LCA for the chosen threshold // Find the LCA for the highest threshold between best_score and the chosen bubble threshold
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length); score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
if (bubble_threshold < best_score)
lca_threshold = best_score;
else
lca_threshold = bubble_threshold;
k = 0; k = 0;
while ((k < lca_array_length) && (score_array[k] >= best_score)) while ((k < lca_array_length) && (score_array[k] >= lca_threshold))
k++; k++;
if (k>0) if (k>0)

View File

@ -42,12 +42,14 @@
* @param output_view_name The name to give to the output view. * @param output_view_name The name to give to the output view.
* @param output_view_comments The comments to associate to the output view. * @param output_view_comments The comments to associate to the output view.
* @param ecotag_threshold The threshold at which to assign. * @param ecotag_threshold The threshold at which to assign.
* @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle);
* the threshold actually used will be the highest between this value and the best assignment score found.
* *
* The algorithm works like this: * The algorithm works like this:
* For each query sequence: * For each query sequence:
* Align with reference database * Align with reference database
* Keep the indices of all the best matches * Keep the indices of all the best matches
* For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs * For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs
* Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches) * Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
* *
* @returns A value indicating the success of the operation. * @returns A value indicating the success of the operation.
@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name,
const char* taxonomy_name, const char* taxonomy_name,
const char* output_view_name, const char* output_view_name,
const char* output_view_comments, const char* output_view_comments,
double ecotag_threshold); double ecotag_threshold,
double bubble_threshold);
#endif /* OBI_ECOTAG_H_ */ #endif /* OBI_ECOTAG_H_ */

View File

@ -1409,6 +1409,107 @@ DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
} }
char* obi_dms_formatted_infos(OBIDMS_p dms, bool detailed)
{
char* dms_infos = NULL;
char* view_infos = NULL;
char* view_name = NULL;
char* tax_name = NULL;
char* all_tax_dir_path = NULL;
int i;
struct dirent* dp;
Obiview_p view;
// DMS name
dms_infos = (char*) malloc((strlen("# DMS name: ")+strlen(dms->dms_name)+strlen("\n# Views:\n")+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError allocating memory for DMS formatted infos");
return NULL;
}
strcpy(dms_infos, "# DMS name: ");
strcat(dms_infos, dms->dms_name);
strcat(dms_infos, "\n# Views:\n");
// Go through views and get their infos
rewinddir(dms->view_directory);
while ((dp = readdir(dms->view_directory)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
i=0;
while ((dp->d_name)[i] != '.')
i++;
view_name = (char*) malloc((i+1) * sizeof(char));
if (view_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a view name when getting formatted DMS infos: file %s", dp->d_name);
return NULL;
}
strncpy(view_name, dp->d_name, i);
view_name[i] = '\0';
view = obi_open_view(dms, view_name);
if (view == NULL)
{
obidebug(1, "\nError opening a view to get DMS formatted infos");
return NULL;
}
if (detailed)
view_infos = obi_view_formatted_infos(view, detailed);
else
view_infos = obi_view_formatted_infos_one_line(view);
if (view_infos == NULL)
{
obidebug(1, "\nError getting a view infos to get DMS formatted infos");
return NULL;
}
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen(view_infos)+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, view_infos);
if (obi_save_and_close_view(view) < 0)
{
obidebug(1, "\nError closing view while getting DMS formatted infos");
return NULL;
}
if (detailed)
{
dms_infos = realloc(dms_infos, (strlen(dms_infos)+2) * sizeof(char));
strcat(dms_infos, "\n");
}
}
// Add taxonomies
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen("\n# Taxonomies:\n")+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, "# Taxonomies:\n");
rewinddir(dms->tax_directory);
while ((dp = readdir(dms->tax_directory)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
tax_name = dp->d_name;
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen(" # ")+strlen(view_infos)+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, " # ");
strcat(dms_infos, tax_name);
}
return dms_infos;
}
// TODO move somewhere else maybe // TODO move somewhere else maybe
// TODO discuss arguments // TODO discuss arguments
obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number) obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number)

View File

@ -459,6 +459,23 @@ char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name);
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name); DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
/**
* @brief Returns the informations of a DMS with a human readable format (dms name, taxonomies and view infos).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a DMS.
* @param detailed Whether the informations should contain detailed view infos.
*
* @returns A pointer on a character array where the formatted DMS informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_dms_formatted_infos(OBIDMS_p dms, bool detailed);
/** /**
* @brief Imports a column, copying it from a DMS to another DMS, and returns the version of the column in the destination DMS. * @brief Imports a column, copying it from a DMS to another DMS, and returns the version of the column in the destination DMS.
* *

View File

@ -1725,17 +1725,33 @@ int obi_close_column(OBIDMS_column_p column)
int obi_clone_column_indexer(OBIDMS_column_p column) int obi_clone_column_indexer(OBIDMS_column_p column)
{ {
char* new_indexer_name; char* new_indexer_name;
int i;
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); i=0;
while (true) // find avl name not already used
{
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
if (new_indexer_name == NULL) if (new_indexer_name == NULL)
return -1; return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL) if (column->indexer == NULL)
{ {
if (errno == EEXIST)
{
free(new_indexer_name);
i++;
}
else
{
free(new_indexer_name);
obidebug(1, "\nError cloning a column's indexer to make it writable"); obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1; return -1;
} }
}
else
break;
}
strcpy((column->header)->indexer_name, new_indexer_name); strcpy((column->header)->indexer_name, new_indexer_name);
@ -2415,17 +2431,81 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
} }
char* obi_column_formatted_infos(OBIDMS_column_p column) char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
{ {
char* column_infos; char* column_infos = NULL;
char* elt_names; char* elt_names = NULL;
char* data_type_str = NULL;
column_infos = malloc(1024 * sizeof(char)); char* comments = NULL;
// Get element names informations
elt_names = obi_get_formatted_elements_names(column); elt_names = obi_get_formatted_elements_names(column);
if (elt_names == NULL)
{
obidebug(1, "\nError getting formatted elements names for formatted columns infos");
return NULL;
}
// Get data type informations
data_type_str = name_data_type((column->header)->returned_data_type);
if (data_type_str == NULL)
{
obidebug(1, "\nError getting formatted data type for formatted columns infos");
return NULL;
}
// Get commments if detailed informations required
if (detailed)
comments = (column->header)->comments;
// Build the string of formatted infos, allocating memory as needed
// Data type
column_infos = (char*) malloc((strlen("data type: ")+strlen(data_type_str)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcpy(column_infos, "data type: ");
strcat(column_infos, data_type_str);
// Element names if more than 1
if ((column->header)->nb_elements_per_line > 1)
{
column_infos = realloc(column_infos, (strlen(column_infos)+strlen(", elements: ")+strlen(elt_names)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcat(column_infos, ", elements: ");
strcat(column_infos, elt_names);
}
if (detailed && (strlen(comments)>2)) // Add all comments if required and not empty
{
column_infos = realloc(column_infos, (strlen(column_infos)+strlen("\nComments:\n")+strlen(comments)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcat(column_infos, "\nComments:\n");
strcat(column_infos, comments);
}
// "data type: OBI_TYPE, element names: [formatted element names](, all comments)"
free(elt_names); free(elt_names);
free(data_type_str);
return column_infos; return column_infos;
} }
@ -2472,7 +2552,6 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, ind
} }
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb) int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
{ {
if ((line_nb+1) > ((column->header)->line_count)) if ((line_nb+1) > ((column->header)->line_count))

View File

@ -505,12 +505,37 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
char* obi_get_elements_names(OBIDMS_column_p column); char* obi_get_elements_names(OBIDMS_column_p column);
// TODO /**
//char* obi_get_formatted_elements_names(OBIDMS_column_p column); * @brief Recovers the elements names of the lines of a column with a human readable format ("0; 1; 2; ...; n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_formatted_elements_names(OBIDMS_column_p column);
// TODO /**
//char* obi_column_formatted_infos(OBIDMS_column_p column); * @brief Returns the informations of a column with a human readable format (data type, element names, comments).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
* @param detailed Whether the informations should contain column comments or just data type and element names.
*
* @returns A pointer on a character array where the formatted column informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed);
/** /**

View File

@ -17,6 +17,7 @@
#include <sys/mman.h> #include <sys/mman.h>
#include <inttypes.h> #include <inttypes.h>
#include <math.h> #include <math.h>
#include <time.h>
//#include <ctype.h> //#include <ctype.h>
#include "obiview.h" #include "obiview.h"
@ -1185,6 +1186,7 @@ static int close_view(Obiview_p view)
obidebug(1, "\nError getting a column to close from the linked list of column pointers of a view"); obidebug(1, "\nError getting a column to close from the linked list of column pointers of a view");
return -1; return -1;
} }
if (obi_close_column(column) < 0) if (obi_close_column(column) < 0)
{ {
obidebug(1, "\nError closing a column while closing a view"); obidebug(1, "\nError closing a column while closing a view");
@ -2603,6 +2605,144 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
} }
char* obi_view_formatted_infos(Obiview_p view, bool detailed)
{
int i;
char* view_infos = NULL;
char* view_name = NULL;
time_t creation_date = NULL;
char* creation_date_str = NULL;
index_t line_count;
char line_count_str[256];
OBIDMS_column_p column;
char* column_alias = NULL;
char* column_infos = NULL;
char* comments = NULL;
// View name
view_name = (view->infos)->name;
view_infos = (char*) malloc((strlen("# View name:\n")+strlen(view_name)+1) * sizeof(char));
strcpy(view_infos, "# View name:\n");
strcat(view_infos, view_name);
// Date created
if (view->read_only) // Date not saved until view is finished writing
{
creation_date = (view->infos)->creation_date;
creation_date_str = ctime(&creation_date);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Date created:\n")+strlen(creation_date_str)+1) * sizeof(char));
strcat(view_infos, "\n# Date created:\n");
strcat(view_infos, creation_date_str);
}
// Line count
line_count = (view->infos)->line_count;
snprintf(line_count_str, sizeof line_count_str, "%zu", line_count);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Line count:\n")+strlen(line_count_str)+1) * sizeof(char));
strcat(view_infos, "# Line count:\n");
strcat(view_infos, line_count_str);
// Columns: go through each, print their alias then their infos
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Columns:")+1) * sizeof(char));
strcat(view_infos, "\n# Columns:");
for (i=0; i<((view->infos)->column_count); i++)
{
column = *((OBIDMS_column_p*)ll_get(view->columns, i));
if (column == NULL)
{
obidebug(1, "\nError getting a column from the linked list of column pointers of a view to format view infos");
return NULL;
}
// Column alias
column_alias = (((view->infos)->column_references)[i]).alias;
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n")+strlen(column_alias)+strlen(", ")+1) * sizeof(char));
strcat(view_infos, "\n");
strcat(view_infos, column_alias);
strcat(view_infos, ", ");
// Column infos
column_infos = obi_column_formatted_infos(column, detailed);
if (column_infos == NULL)
{
obidebug(1, "\nError getting column infos to format view infos");
return NULL;
}
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(column_infos)+1) * sizeof(char));
strcat(view_infos, column_infos);
free(column_infos);
}
// Get commments if detailed informations required
if (detailed)
{
comments = (view->infos)->comments;
if (strlen(comments)>2) // Add all comments if not empty
{
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Comments:\n")+strlen(comments)+1) * sizeof(char));
if (view_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted view infos");
return NULL;
}
strcat(view_infos, "\n# Comments:\n");
strcat(view_infos, comments);
}
}
view_infos = realloc(view_infos, (strlen(view_infos)+2) * sizeof(char));
strcat(view_infos, "\n");
return view_infos;
}
char* obi_view_formatted_infos_one_line(Obiview_p view)
{
int i;
char* view_infos = NULL;
char* view_name = NULL;
time_t creation_date = NULL;
char* creation_date_str = NULL;
index_t line_count;
char line_count_str[256];
// View name
view_name = (view->infos)->name;
view_infos = (char*) malloc((strlen(" # ")+strlen(view_name)+2) * sizeof(char));
strcpy(view_infos, " # ");
strcat(view_infos, view_name);
strcat(view_infos, ":");
// Date created
if (view->read_only) // Date not saved until view is finished writing
{
creation_date = (view->infos)->creation_date;
creation_date_str = ctime(&creation_date);
// Delete \n added by ctime
creation_date_str[strlen(creation_date_str)-1] = '\0';
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(" Date created: ")+strlen(creation_date_str)+1) * sizeof(char));
strcat(view_infos, " Date created: ");
strcat(view_infos, creation_date_str);
}
// Line count
line_count = (view->infos)->line_count;
snprintf(line_count_str, sizeof line_count_str, "%zu", line_count);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(" ; Line count: ")+strlen(line_count_str)+1) * sizeof(char));
strcat(view_infos, " ; Line count: ");
strcat(view_infos, line_count_str);
view_infos = realloc(view_infos, (strlen(view_infos)+2) * sizeof(char));
strcat(view_infos, "\n");
return view_infos;
}
int obi_view_write_comments(Obiview_p view, const char* comments) int obi_view_write_comments(Obiview_p view, const char* comments)
{ {
size_t new_size; size_t new_size;

View File

@ -519,6 +519,39 @@ OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const ch
int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias); int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias);
/**
* @brief Returns the informations of a view with a human readable format (view name, date created, line count, column informations, comments).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a view.
* @param detailed Whether the informations should contain view comments.
*
* @returns A pointer on a character array where the formatted view informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_view_formatted_infos(Obiview_p view, bool detailed);
/**
* @brief Returns the informations of a view with a human readable format on one line (view name, date created, line count).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a view.
*
* @returns A pointer on a character array where the formatted view informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_view_formatted_infos_one_line(Obiview_p view);
/** /**
* @brief Internal function writing new comments in a view file. * @brief Internal function writing new comments in a view file.
* *