Compare commits

...

11 Commits

24 changed files with 555 additions and 97 deletions

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -28,6 +28,7 @@ __title__="Concatenate views."
def addOptions(parser):
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi cat specific options')
@ -47,9 +48,9 @@ def run(config):
logger("info", "obi cat")
# Open the views to concatenate
iview_list = []
# Check the views to concatenate
idms_list = []
iview_list = []
total_len = 0
remove_qual = False
remove_rev_qual = False
@ -67,8 +68,9 @@ def run(config):
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
remove_rev_qual = True
total_len += len(i_view)
iview_list.append(i_view)
idms_list.append(i_dms)
iview_list.append(i_view.name)
i_view.close()
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
@ -97,8 +99,10 @@ def run(config):
# Initialize multiple elements columns
if type(output_0)==BufferedWriter:
dict_cols = {}
for v in iview_list:
for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for coln in v.keys():
col = v[coln]
if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols:
dict_cols[coln] = {}
@ -108,6 +112,7 @@ def run(config):
else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
v.close()
for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
@ -119,7 +124,8 @@ def run(config):
pb = None
i = 0
for v in iview_list:
for v_uri in config["cat"]["views_to_cat"]:
v = open_uri(v_uri)[1]
for entry in v:
PyErr_CheckSignals()
if pb is not None:
@ -130,6 +136,7 @@ def run(config):
else:
o_view[i] = entry
i+=1
v.close()
# Deletes quality columns if needed
if type(output_0)!=BufferedWriter:
@ -144,7 +151,7 @@ def run(config):
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)

View File

@ -41,6 +41,17 @@ def addOptions(parser):
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.00 (no threshold).")
group.add_argument('--minimum-circle','-c',
action="store", dest="ecotag:bubble_threshold",
metavar='<CIRCLE_THRESHOLD>',
default=0.99,
type=float,
help="Minimum identity considered for the assignment circle "
"(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
"the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found). "
"Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
"Default: 0.99.")
def run(config):
DMS.obi_atexit()
@ -66,9 +77,8 @@ def run(config):
ref_view_name = ref[1]
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).",
config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
@ -113,8 +123,9 @@ def run(config):
if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
ref_dms.name_with_full_path, tobytes(ref_view_name), \
taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
tobytes(o_view_name), comments,
config['ecotag']['threshold']) < 0:
tobytes(o_view_name), comments, \
config['ecotag']['threshold'], \
config['ecotag']['bubble_threshold']) < 0:
raise Exception("Error running ecotag")
# If the input and output DMS are not the same, export result view to output DMS

View File

@ -89,7 +89,7 @@ def run(config):
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
print("", file=sys.stderr)
# TODO save command in input dms?

View File

@ -31,27 +31,11 @@ def run(config):
input = open_uri(config['obi']['inputURI'])
if input is None:
raise Exception("Could not read input")
if input[2] == DMS and not config['ls']['longformat']:
dms = input[0]
l = []
for viewname in input[0]:
view = dms[viewname]
l.append(tostr(viewname) + "\t(Date created: " + str(bytes2str_object(view.comments["Date created"]))+")")
view.close()
l.sort()
for v in l:
print(v)
# Print representation
if config['ls']['longformat']:
print(input[1].repr_longformat())
else:
print(repr(input[1]))
if input[2] == DMS:
taxolist = ["\n### Taxonomies:"]
for t in Taxonomy.list_taxos(input[0]):
taxolist.append("\t"+tostr(t))
if len(taxolist) > 1:
for t in taxolist:
print(t)
if config['ls']['longformat'] and len(input[1].comments) > 0:
print("\n### Comments:")
print(str(input[1].comments))
input[0].close(force=True)

View File

@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
key = mergedKeys[k]
merged_col_name = mergedKeys_m[k]
if merged_infos[merged_col_name]['nb_elts'] == 1:
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
if merged_col_name in view:
i_col = view[merged_col_name]
else:

View File

@ -34,6 +34,7 @@ cdef extern from "obidms.h" nogil:
int obi_close_dms(OBIDMS_p dms, bint force)
char* obi_dms_get_dms_path(OBIDMS_p dms)
char* obi_dms_get_full_path(OBIDMS_p dms, const_char_p path_name)
char* obi_dms_formatted_infos(OBIDMS_p dms, bint detailed)
void obi_close_atexit()
obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number)

View File

@ -70,3 +70,6 @@ cdef extern from "obidmscolumn.h" nogil:
int obi_column_write_comments(OBIDMS_column_p column, const char* comments)
int obi_column_add_comment(OBIDMS_column_p column, const char* key, const char* value)
char* obi_column_formatted_infos(OBIDMS_column_p column, bint detailed)

View File

@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil:
const char* taxonomy_name,
const char* output_view_name,
const char* output_view_comments,
double ecotag_threshold)
double ecotag_threshold,
double bubble_threshold)

View File

@ -103,13 +103,17 @@ cdef extern from "obiview.h" nogil:
bint create)
int obi_view_delete_column(Obiview_p view, const_char_p column_name, bint delete_file)
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_view_create_column_alias(Obiview_p view, const_char_p current_name, const_char_p alias)
char* obi_view_formatted_infos(Obiview_p view, bint detailed)
char* obi_view_formatted_infos_one_line(Obiview_p view)
int obi_view_write_comments(Obiview_p view, const_char_p comments)
int obi_view_add_comment(Obiview_p view, const_char_p key, const_char_p value)

View File

@ -302,15 +302,24 @@ cdef class Column(OBIWrapper) :
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef bytes s
#cdef char* s_b
#cdef str s_str
#s_b = obi_column_formatted_infos(self.pointer())
#s_str = bytes2str(s_b)
#free(s_b)
s = self._alias + b", data type: " + self.data_type
#return s_str
return bytes2str(s)
cdef str s
cdef char* sc
cdef OBIDMS_column_p pointer = self.pointer()
sc = obi_column_formatted_infos(pointer, False)
s = bytes2str(sc)
free(sc)
return s
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef OBIDMS_column_p pointer = self.pointer()
sc = obi_column_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s
def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C

View File

@ -10,7 +10,8 @@ from .capi.obidms cimport obi_open_dms, \
obi_dms_exists, \
obi_dms_get_full_path, \
obi_close_atexit, \
obi_dms_write_comments
obi_dms_write_comments, \
obi_dms_formatted_infos
from .capi.obitypes cimport const_char_p
@ -32,6 +33,8 @@ from .object import OBIWrapper
import json
import time
from libc.stdlib cimport free
cdef class DMS(OBIWrapper):
@ -223,13 +226,24 @@ cdef class DMS(OBIWrapper):
@OBIWrapper.checkIsActive
def __repr__(self):
cdef str s
s=""
for view_name in self.keys():
view = self.get_view(view_name)
s = s + repr(view) + "\n"
view.close()
def __repr__(self) :
cdef str s
cdef char* sc
cdef OBIDMS_p pointer = self.pointer()
sc = obi_dms_formatted_infos(pointer, False)
s = bytes2str(sc)
free(sc)
return s
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef OBIDMS_p pointer = self.pointer()
sc = obi_dms_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s

View File

@ -19,7 +19,9 @@ from ..capi.obiview cimport Alias_column_pair_p, \
obi_view_delete_column, \
obi_view_create_column_alias, \
obi_view_write_comments, \
obi_delete_view
obi_delete_view, \
obi_view_formatted_infos, \
obi_view_formatted_infos_one_line
from ..capi.obidmscolumn cimport OBIDMS_column_p
from ..capi.obidms cimport OBIDMS_p
@ -59,6 +61,8 @@ import pkgutil
import json
import sys
from libc.stdlib cimport free
cdef class View(OBIWrapper) :
@ -186,15 +190,22 @@ cdef class View(OBIWrapper) :
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef str s
if self.read_only: # can read date
s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
line_count = self.line_count,
date = str(bytes2str_object(self.comments["Date created"])))
else:
s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
line_count = self.line_count)
for column_name in self.keys() :
s = s + repr(self[column_name]) + '\n'
cdef char* sc
cdef Obiview_p pointer = self.pointer()
sc = obi_view_formatted_infos(pointer, False)
s = bytes2str(sc)
free(sc)
return s
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef Obiview_p pointer = self.pointer()
sc = obi_view_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s

View File

@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
import sys
cdef class TabFormat:
@ -26,18 +27,22 @@ cdef class TabFormat:
if self.header and self.first_line:
if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys():
keys = data.view[k].keys()
keys.sort()
for k2 in keys:
line.append(tobytes(k)+b':'+tobytes(k2))
else:
line.append(tobytes(k))
else:
value = data[k]
if isinstance(data.view[k], Column_multi_elts):
keys = data.view[k].keys()
keys.sort()
if value is None: # all keys at None
for k2 in data.view[k].keys(): # TODO could be much more efficient
for k2 in keys: # TODO could be much more efficient
line.append(self.NAString)
else:
for k2 in data.view[k].keys(): # TODO could be much more efficient
for k2 in keys: # TODO could be much more efficient
if value[k2] is not None:
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else:

View File

@ -2,7 +2,7 @@
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
cpdef bytes format_separator(bytes format)
cpdef bytes format_uniq_pattern(bytes format)
cpdef int count_entries(file, bytes format)
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)

View File

@ -24,11 +24,11 @@ import glob
import gzip
cpdef bytes format_separator(bytes format):
cpdef bytes format_uniq_pattern(bytes format):
if format == b"fasta":
return b"\n>"
elif format == b"fastq":
return b"\n@"
return b"\n\+\n"
elif format == b"ngsfilter" or format == b"tabular":
return b"\n"
elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
cpdef int count_entries(file, bytes format):
try:
sep = format_separator(format)
sep = format_uniq_pattern(format)
if sep is None:
return -1
sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
return -1
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
total_count += len(re.findall(sep, mmapped_file))
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
except:

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '0b31'
serial= '0b36'
version ="%d.%d.%s" % (major,minor,serial)

View File

@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name,
const char* taxonomy_name,
const char* output_view_name,
const char* output_view_comments,
double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs
double ecotag_threshold,
double bubble_threshold)
{
// For each sequence
@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name,
index_t query_seq_idx, ref_seq_idx;
double score, best_score;
double threshold;
double lca_threshold;
int lcs_length;
int ali_length;
Kmer_table_p ktable;
@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name,
return -1;
}
free(db_threshold_str);
if (ecotag_threshold < db_threshold)
if (bubble_threshold < db_threshold)
{
fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n",
ecotag_threshold, db_threshold);
bubble_threshold, db_threshold);
return -1;
}
@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name,
{
best_match_idx = best_match_array[j];
// Find the LCA for the chosen threshold
// Find the LCA for the highest threshold between best_score and the chosen bubble threshold
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
if (bubble_threshold < best_score)
lca_threshold = best_score;
else
lca_threshold = bubble_threshold;
k = 0;
while ((k < lca_array_length) && (score_array[k] >= best_score))
while ((k < lca_array_length) && (score_array[k] >= lca_threshold))
k++;
if (k>0)

View File

@ -42,12 +42,14 @@
* @param output_view_name The name to give to the output view.
* @param output_view_comments The comments to associate to the output view.
* @param ecotag_threshold The threshold at which to assign.
* @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle);
* the threshold actually used will be the highest between this value and the best assignment score found.
*
* The algorithm works like this:
* For each query sequence:
* Align with reference database
* Keep the indices of all the best matches
* For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
* For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs
* Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
*
* @returns A value indicating the success of the operation.
@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name,
const char* taxonomy_name,
const char* output_view_name,
const char* output_view_comments,
double ecotag_threshold);
double ecotag_threshold,
double bubble_threshold);
#endif /* OBI_ECOTAG_H_ */

View File

@ -1409,6 +1409,107 @@ DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
}
char* obi_dms_formatted_infos(OBIDMS_p dms, bool detailed)
{
char* dms_infos = NULL;
char* view_infos = NULL;
char* view_name = NULL;
char* tax_name = NULL;
char* all_tax_dir_path = NULL;
int i;
struct dirent* dp;
Obiview_p view;
// DMS name
dms_infos = (char*) malloc((strlen("# DMS name: ")+strlen(dms->dms_name)+strlen("\n# Views:\n")+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError allocating memory for DMS formatted infos");
return NULL;
}
strcpy(dms_infos, "# DMS name: ");
strcat(dms_infos, dms->dms_name);
strcat(dms_infos, "\n# Views:\n");
// Go through views and get their infos
rewinddir(dms->view_directory);
while ((dp = readdir(dms->view_directory)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
i=0;
while ((dp->d_name)[i] != '.')
i++;
view_name = (char*) malloc((i+1) * sizeof(char));
if (view_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a view name when getting formatted DMS infos: file %s", dp->d_name);
return NULL;
}
strncpy(view_name, dp->d_name, i);
view_name[i] = '\0';
view = obi_open_view(dms, view_name);
if (view == NULL)
{
obidebug(1, "\nError opening a view to get DMS formatted infos");
return NULL;
}
if (detailed)
view_infos = obi_view_formatted_infos(view, detailed);
else
view_infos = obi_view_formatted_infos_one_line(view);
if (view_infos == NULL)
{
obidebug(1, "\nError getting a view infos to get DMS formatted infos");
return NULL;
}
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen(view_infos)+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, view_infos);
if (obi_save_and_close_view(view) < 0)
{
obidebug(1, "\nError closing view while getting DMS formatted infos");
return NULL;
}
if (detailed)
{
dms_infos = realloc(dms_infos, (strlen(dms_infos)+2) * sizeof(char));
strcat(dms_infos, "\n");
}
}
// Add taxonomies
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen("\n# Taxonomies:\n")+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, "# Taxonomies:\n");
rewinddir(dms->tax_directory);
while ((dp = readdir(dms->tax_directory)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
tax_name = dp->d_name;
dms_infos = realloc(dms_infos, (strlen(dms_infos)+strlen(" # ")+strlen(view_infos)+1) * sizeof(char));
if (dms_infos == NULL)
{
obidebug(1, "\nError reallocating memory for DMS formatted infos");
return NULL;
}
strcat(dms_infos, " # ");
strcat(dms_infos, tax_name);
}
return dms_infos;
}
// TODO move somewhere else maybe
// TODO discuss arguments
obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, const char* column_name, obiversion_t version_number)

View File

@ -459,6 +459,23 @@ char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name);
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
/**
* @brief Returns the informations of a DMS with a human readable format (dms name, taxonomies and view infos).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a DMS.
* @param detailed Whether the informations should contain detailed view infos.
*
* @returns A pointer on a character array where the formatted DMS informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_dms_formatted_infos(OBIDMS_p dms, bool detailed);
/**
* @brief Imports a column, copying it from a DMS to another DMS, and returns the version of the column in the destination DMS.
*

View File

@ -1725,16 +1725,32 @@ int obi_close_column(OBIDMS_column_p column)
int obi_clone_column_indexer(OBIDMS_column_p column)
{
char* new_indexer_name;
int i;
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL)
i=0;
while (true) // find avl name not already used
{
obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1;
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL)
{
if (errno == EEXIST)
{
free(new_indexer_name);
i++;
}
else
{
free(new_indexer_name);
obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1;
}
}
else
break;
}
strcpy((column->header)->indexer_name, new_indexer_name);
@ -2415,17 +2431,81 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
}
char* obi_column_formatted_infos(OBIDMS_column_p column)
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
{
char* column_infos;
char* elt_names;
column_infos = malloc(1024 * sizeof(char));
char* column_infos = NULL;
char* elt_names = NULL;
char* data_type_str = NULL;
char* comments = NULL;
// Get element names informations
elt_names = obi_get_formatted_elements_names(column);
if (elt_names == NULL)
{
obidebug(1, "\nError getting formatted elements names for formatted columns infos");
return NULL;
}
// Get data type informations
data_type_str = name_data_type((column->header)->returned_data_type);
if (data_type_str == NULL)
{
obidebug(1, "\nError getting formatted data type for formatted columns infos");
return NULL;
}
// Get commments if detailed informations required
if (detailed)
comments = (column->header)->comments;
// Build the string of formatted infos, allocating memory as needed
// Data type
column_infos = (char*) malloc((strlen("data type: ")+strlen(data_type_str)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcpy(column_infos, "data type: ");
strcat(column_infos, data_type_str);
// Element names if more than 1
if ((column->header)->nb_elements_per_line > 1)
{
column_infos = realloc(column_infos, (strlen(column_infos)+strlen(", elements: ")+strlen(elt_names)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcat(column_infos, ", elements: ");
strcat(column_infos, elt_names);
}
if (detailed && (strlen(comments)>2)) // Add all comments if required and not empty
{
column_infos = realloc(column_infos, (strlen(column_infos)+strlen("\nComments:\n")+strlen(comments)+1) * sizeof(char));
if (column_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted column infos");
return NULL;
}
strcat(column_infos, "\nComments:\n");
strcat(column_infos, comments);
}
// "data type: OBI_TYPE, element names: [formatted element names](, all comments)"
free(elt_names);
free(data_type_str);
return column_infos;
}
@ -2472,7 +2552,6 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, ind
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
{
if ((line_nb+1) > ((column->header)->line_count))

View File

@ -505,12 +505,37 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
char* obi_get_elements_names(OBIDMS_column_p column);
// TODO
//char* obi_get_formatted_elements_names(OBIDMS_column_p column);
/**
* @brief Recovers the elements names of the lines of a column with a human readable format ("0; 1; 2; ...; n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_formatted_elements_names(OBIDMS_column_p column);
// TODO
//char* obi_column_formatted_infos(OBIDMS_column_p column);
/**
* @brief Returns the informations of a column with a human readable format (data type, element names, comments).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
* @param detailed Whether the informations should contain column comments or just data type and element names.
*
* @returns A pointer on a character array where the formatted column informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed);
/**

View File

@ -17,6 +17,7 @@
#include <sys/mman.h>
#include <inttypes.h>
#include <math.h>
#include <time.h>
//#include <ctype.h>
#include "obiview.h"
@ -1185,6 +1186,7 @@ static int close_view(Obiview_p view)
obidebug(1, "\nError getting a column to close from the linked list of column pointers of a view");
return -1;
}
if (obi_close_column(column) < 0)
{
obidebug(1, "\nError closing a column while closing a view");
@ -2603,6 +2605,144 @@ int obi_view_create_column_alias(Obiview_p view, const char* current_name, const
}
char* obi_view_formatted_infos(Obiview_p view, bool detailed)
{
int i;
char* view_infos = NULL;
char* view_name = NULL;
time_t creation_date = NULL;
char* creation_date_str = NULL;
index_t line_count;
char line_count_str[256];
OBIDMS_column_p column;
char* column_alias = NULL;
char* column_infos = NULL;
char* comments = NULL;
// View name
view_name = (view->infos)->name;
view_infos = (char*) malloc((strlen("# View name:\n")+strlen(view_name)+1) * sizeof(char));
strcpy(view_infos, "# View name:\n");
strcat(view_infos, view_name);
// Date created
if (view->read_only) // Date not saved until view is finished writing
{
creation_date = (view->infos)->creation_date;
creation_date_str = ctime(&creation_date);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Date created:\n")+strlen(creation_date_str)+1) * sizeof(char));
strcat(view_infos, "\n# Date created:\n");
strcat(view_infos, creation_date_str);
}
// Line count
line_count = (view->infos)->line_count;
snprintf(line_count_str, sizeof line_count_str, "%zu", line_count);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Line count:\n")+strlen(line_count_str)+1) * sizeof(char));
strcat(view_infos, "# Line count:\n");
strcat(view_infos, line_count_str);
// Columns: go through each, print their alias then their infos
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Columns:")+1) * sizeof(char));
strcat(view_infos, "\n# Columns:");
for (i=0; i<((view->infos)->column_count); i++)
{
column = *((OBIDMS_column_p*)ll_get(view->columns, i));
if (column == NULL)
{
obidebug(1, "\nError getting a column from the linked list of column pointers of a view to format view infos");
return NULL;
}
// Column alias
column_alias = (((view->infos)->column_references)[i]).alias;
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n")+strlen(column_alias)+strlen(", ")+1) * sizeof(char));
strcat(view_infos, "\n");
strcat(view_infos, column_alias);
strcat(view_infos, ", ");
// Column infos
column_infos = obi_column_formatted_infos(column, detailed);
if (column_infos == NULL)
{
obidebug(1, "\nError getting column infos to format view infos");
return NULL;
}
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(column_infos)+1) * sizeof(char));
strcat(view_infos, column_infos);
free(column_infos);
}
// Get commments if detailed informations required
if (detailed)
{
comments = (view->infos)->comments;
if (strlen(comments)>2) // Add all comments if not empty
{
view_infos = realloc(view_infos, (strlen(view_infos)+strlen("\n# Comments:\n")+strlen(comments)+1) * sizeof(char));
if (view_infos == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for formatted view infos");
return NULL;
}
strcat(view_infos, "\n# Comments:\n");
strcat(view_infos, comments);
}
}
view_infos = realloc(view_infos, (strlen(view_infos)+2) * sizeof(char));
strcat(view_infos, "\n");
return view_infos;
}
char* obi_view_formatted_infos_one_line(Obiview_p view)
{
int i;
char* view_infos = NULL;
char* view_name = NULL;
time_t creation_date = NULL;
char* creation_date_str = NULL;
index_t line_count;
char line_count_str[256];
// View name
view_name = (view->infos)->name;
view_infos = (char*) malloc((strlen(" # ")+strlen(view_name)+2) * sizeof(char));
strcpy(view_infos, " # ");
strcat(view_infos, view_name);
strcat(view_infos, ":");
// Date created
if (view->read_only) // Date not saved until view is finished writing
{
creation_date = (view->infos)->creation_date;
creation_date_str = ctime(&creation_date);
// Delete \n added by ctime
creation_date_str[strlen(creation_date_str)-1] = '\0';
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(" Date created: ")+strlen(creation_date_str)+1) * sizeof(char));
strcat(view_infos, " Date created: ");
strcat(view_infos, creation_date_str);
}
// Line count
line_count = (view->infos)->line_count;
snprintf(line_count_str, sizeof line_count_str, "%zu", line_count);
view_infos = realloc(view_infos, (strlen(view_infos)+strlen(" ; Line count: ")+strlen(line_count_str)+1) * sizeof(char));
strcat(view_infos, " ; Line count: ");
strcat(view_infos, line_count_str);
view_infos = realloc(view_infos, (strlen(view_infos)+2) * sizeof(char));
strcat(view_infos, "\n");
return view_infos;
}
int obi_view_write_comments(Obiview_p view, const char* comments)
{
size_t new_size;

View File

@ -519,6 +519,39 @@ OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const ch
int obi_view_create_column_alias(Obiview_p view, const char* current_name, const char* alias);
/**
* @brief Returns the informations of a view with a human readable format (view name, date created, line count, column informations, comments).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a view.
* @param detailed Whether the informations should contain view comments.
*
* @returns A pointer on a character array where the formatted view informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_view_formatted_infos(Obiview_p view, bool detailed);
/**
* @brief Returns the informations of a view with a human readable format on one line (view name, date created, line count).
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on a view.
*
* @returns A pointer on a character array where the formatted view informations are stored.
* @retval NULL if an error occurred.
*
* @since September 2020
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_view_formatted_infos_one_line(Obiview_p view);
/**
* @brief Internal function writing new comments in a view file.
*