Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
c4696ac865 | |||
11a0945a9b | |||
f23c40c905 | |||
f99fc13b75 | |||
1da6aac1b8 | |||
159803b40a | |||
7dcbc34017 | |||
db2202c8b4 | |||
d33ff97846 | |||
1dcdf69f1f | |||
dec114eed6 | |||
f36691053b | |||
f2aa5fcf8b | |||
bccb3e6874 | |||
f5a17bea68 | |||
e28507639a | |||
e6feac93fe | |||
50b292b489 |
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -28,6 +28,7 @@ __title__="Concatenate views."
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi cat specific options')
|
||||
|
||||
@ -47,9 +48,9 @@ def run(config):
|
||||
|
||||
logger("info", "obi cat")
|
||||
|
||||
# Open the views to concatenate
|
||||
iview_list = []
|
||||
# Check the views to concatenate
|
||||
idms_list = []
|
||||
iview_list = []
|
||||
total_len = 0
|
||||
remove_qual = False
|
||||
remove_rev_qual = False
|
||||
@ -67,8 +68,9 @@ def run(config):
|
||||
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
|
||||
remove_rev_qual = True
|
||||
total_len += len(i_view)
|
||||
iview_list.append(i_view)
|
||||
idms_list.append(i_dms)
|
||||
iview_list.append(i_view.name)
|
||||
i_view.close()
|
||||
|
||||
# Open the output: only the DMS
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
@ -97,8 +99,10 @@ def run(config):
|
||||
# Initialize multiple elements columns
|
||||
if type(output_0)==BufferedWriter:
|
||||
dict_cols = {}
|
||||
for v in iview_list:
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for coln in v.keys():
|
||||
col = v[coln]
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
@ -108,6 +112,7 @@ def run(config):
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
v.close()
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||
@ -119,7 +124,8 @@ def run(config):
|
||||
pb = None
|
||||
|
||||
i = 0
|
||||
for v in iview_list:
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for entry in v:
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
@ -130,6 +136,7 @@ def run(config):
|
||||
else:
|
||||
o_view[i] = entry
|
||||
i+=1
|
||||
v.close()
|
||||
|
||||
# Deletes quality columns if needed
|
||||
if type(output_0)!=BufferedWriter:
|
||||
@ -144,7 +151,7 @@ def run(config):
|
||||
|
||||
# Save command config in DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list])
|
||||
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
|
@ -41,6 +41,17 @@ def addOptions(parser):
|
||||
help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
|
||||
"Default: 0.00 (no threshold).")
|
||||
|
||||
group.add_argument('--minimum-circle','-c',
|
||||
action="store", dest="ecotag:bubble_threshold",
|
||||
metavar='<CIRCLE_THRESHOLD>',
|
||||
default=0.99,
|
||||
type=float,
|
||||
help="Minimum identity considered for the assignment circle "
|
||||
"(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
|
||||
"the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found). "
|
||||
"Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
|
||||
"Default: 0.99.")
|
||||
|
||||
def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
@ -66,9 +77,8 @@ def run(config):
|
||||
ref_view_name = ref[1]
|
||||
|
||||
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
|
||||
if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
|
||||
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).",
|
||||
config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
|
||||
if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
|
||||
raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
|
||||
|
||||
# Open the output: only the DMS
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
@ -113,8 +123,9 @@ def run(config):
|
||||
if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
|
||||
ref_dms.name_with_full_path, tobytes(ref_view_name), \
|
||||
taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
|
||||
tobytes(o_view_name), comments,
|
||||
config['ecotag']['threshold']) < 0:
|
||||
tobytes(o_view_name), comments, \
|
||||
config['ecotag']['threshold'], \
|
||||
config['ecotag']['bubble_threshold']) < 0:
|
||||
raise Exception("Error running ecotag")
|
||||
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
|
@ -89,7 +89,7 @@ def run(config):
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# TODO save command in input dms?
|
||||
|
||||
|
@ -77,6 +77,11 @@ def addOptions(parser):
|
||||
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||
"a much faster import. This option is not recommended and will slow down the import in any other case.")
|
||||
|
||||
group.add_argument('--space-priority',
|
||||
action="store_true", dest="import:space_priority",
|
||||
default=False,
|
||||
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
|
||||
"has a line selection associated.")
|
||||
|
||||
def run(config):
|
||||
|
||||
@ -142,7 +147,7 @@ def run(config):
|
||||
else:
|
||||
v = None
|
||||
|
||||
if config['obi']['taxdump'] or isinstance(input[1], View):
|
||||
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
|
||||
dms_only=True
|
||||
else:
|
||||
dms_only=False
|
||||
@ -170,12 +175,15 @@ def run(config):
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
# If importing a view between two DMS, use C API
|
||||
if isinstance(input[1], View):
|
||||
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
|
||||
if isinstance(input[1], View) and not config['import']['space_priority']:
|
||||
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
raise Exception("Error importing a view in a DMS")
|
||||
o_dms.record_command_line(" ".join(sys.argv[1:]))
|
||||
o_dms.close()
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
|
@ -23,6 +23,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
import shutil
|
||||
import string
|
||||
import random
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -366,7 +367,7 @@ def random_new_view(config, infos, first=False):
|
||||
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
else :
|
||||
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
|
||||
infos['view'].write_config(config, "test", infos["command_line"], input_dms_name=[infos['dms'].name], input_view_name=["random"])
|
||||
print_test(config, repr(infos['view']))
|
||||
if v_to_clone is not None :
|
||||
if line_selection is None:
|
||||
@ -441,7 +442,7 @@ def addOptions(parser):
|
||||
default=20,
|
||||
type=int,
|
||||
help="Maximum length of tuples. "
|
||||
"Default: 200")
|
||||
"Default: 50")
|
||||
|
||||
group.add_argument('--max_ini_col_count','-o',
|
||||
action="store", dest="test:maxinicolcount",
|
||||
@ -457,7 +458,7 @@ def addOptions(parser):
|
||||
default=10000,
|
||||
type=int,
|
||||
help="Maximum number of lines in a column. "
|
||||
"Default: 10000")
|
||||
"Default: 1000")
|
||||
|
||||
group.add_argument('--max_elts_per_line','-e',
|
||||
action="store", dest="test:maxelts",
|
||||
@ -497,7 +498,8 @@ def run(config):
|
||||
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
|
||||
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
|
||||
},
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view],
|
||||
'command_line': " ".join(sys.argv[1:])
|
||||
}
|
||||
|
||||
# TODO ???
|
||||
|
@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
key = mergedKeys[k]
|
||||
merged_col_name = mergedKeys_m[k]
|
||||
|
||||
if merged_infos[merged_col_name]['nb_elts'] == 1:
|
||||
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
|
||||
|
||||
if merged_col_name in view:
|
||||
i_col = view[merged_col_name]
|
||||
else:
|
||||
|
@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil:
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold)
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold)
|
||||
|
@ -40,7 +40,8 @@ from obitools3.utils cimport tobytes, \
|
||||
from obitools3.dms.column import typed_column
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
from libc.string cimport strcpy
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
@ -97,6 +98,7 @@ cdef class Column(OBIWrapper) :
|
||||
object alias=b""):
|
||||
# TODO indexer_name?
|
||||
|
||||
cdef Column column
|
||||
cdef bytes column_name_b = tobytes(column_name)
|
||||
cdef bytes alias_b = tobytes(alias)
|
||||
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
|
||||
@ -132,13 +134,14 @@ cdef class Column(OBIWrapper) :
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = NUC_SEQUENCE_COLUMN
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
if REVERSE_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
|
||||
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
|
||||
|
||||
|
||||
if (obi_view_add_column(view = view.pointer(),
|
||||
column_name = column_name_b,
|
||||
@ -158,8 +161,19 @@ cdef class Column(OBIWrapper) :
|
||||
create = True)<0):
|
||||
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
|
||||
return Column.open(view, alias_b)
|
||||
|
||||
column = Column.open(view, alias_b)
|
||||
|
||||
# Automatically associate nuc sequence column to quality column if necessary
|
||||
if data_type == OBI_QUAL:
|
||||
if column_name == QUALITY_COLUMN:
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
|
||||
return column
|
||||
|
||||
|
||||
@staticmethod
|
||||
@ -407,6 +421,31 @@ cdef class Column(OBIWrapper) :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return obi_format_date(self.pointer().header.creation_date)
|
||||
|
||||
|
||||
# associated_column name property getter and setter
|
||||
@property
|
||||
def associated_column_name(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.column_name
|
||||
|
||||
@associated_column_name.setter
|
||||
def associated_column_name(self, object new_name):
|
||||
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
|
||||
|
||||
|
||||
# associated_column version property getter and setter
|
||||
@property
|
||||
def associated_column_version(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.version
|
||||
|
||||
@associated_column_version.setter
|
||||
def associated_column_version(self, int new_version):
|
||||
self.pointer().header.associated_column.version = new_version
|
||||
|
||||
|
||||
# comments property getter
|
||||
@property
|
||||
def comments(self):
|
||||
|
@ -7,6 +7,7 @@ cdef dict __VIEW_CLASS__= {}
|
||||
from libc.stdlib cimport malloc
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.version import version
|
||||
|
||||
from ..capi.obiview cimport Alias_column_pair_p, \
|
||||
obi_new_view, \
|
||||
@ -183,9 +184,15 @@ cdef class View(OBIWrapper) :
|
||||
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def __repr__(self) :
|
||||
cdef str s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count)
|
||||
def __repr__(self) :
|
||||
cdef str s
|
||||
if self.read_only: # can read date
|
||||
s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count,
|
||||
date = str(bytes2str_object(self.comments["Date created"])))
|
||||
else:
|
||||
s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count)
|
||||
for column_name in self.keys() :
|
||||
s = s + repr(self[column_name]) + '\n'
|
||||
return s
|
||||
@ -434,6 +441,7 @@ cdef class View(OBIWrapper) :
|
||||
for i in range(len(input_view_name)):
|
||||
input_str.append(tostr(input_dms_name[i])+"/"+tostr(input_view_name[i]))
|
||||
comments["input_str"] = input_str
|
||||
comments["version"] = version
|
||||
return bytes2str_object(comments)
|
||||
|
||||
|
||||
|
@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
|
||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||
|
||||
import sys
|
||||
|
||||
cdef class TabFormat:
|
||||
|
||||
@ -26,18 +27,22 @@ cdef class TabFormat:
|
||||
|
||||
if self.header and self.first_line:
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
for k2 in data.view[k].keys():
|
||||
keys = data.view[k].keys()
|
||||
keys.sort()
|
||||
for k2 in keys:
|
||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||
else:
|
||||
line.append(tobytes(k))
|
||||
else:
|
||||
value = data[k]
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
keys = data.view[k].keys()
|
||||
keys.sort()
|
||||
if value is None: # all keys at None
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
for k2 in keys: # TODO could be much more efficient
|
||||
line.append(self.NAString)
|
||||
else:
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
for k2 in keys: # TODO could be much more efficient
|
||||
if value[k2] is not None:
|
||||
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||
else:
|
||||
|
@ -259,7 +259,7 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
if ali.consensus_len > 0:
|
||||
if ali.overlap_len > 0:
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
else:
|
||||
seq[b"score_norm"]=0.0
|
||||
|
@ -276,11 +276,11 @@ def open_uri(uri,
|
||||
iseq = urib
|
||||
objclass = bytes
|
||||
else: # TODO update uopen to be able to write?
|
||||
if urip.path == b'-':
|
||||
if not urip.path or urip.path == b'-':
|
||||
file = sys.stdout.buffer
|
||||
elif urip.path :
|
||||
else:
|
||||
file = open(urip.path, 'wb')
|
||||
|
||||
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
||||
|
||||
cpdef bytes format_separator(bytes format)
|
||||
cpdef bytes format_uniq_pattern(bytes format)
|
||||
cpdef int count_entries(file, bytes format)
|
||||
|
||||
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
||||
|
@ -24,11 +24,11 @@ import glob
|
||||
import gzip
|
||||
|
||||
|
||||
cpdef bytes format_separator(bytes format):
|
||||
cpdef bytes format_uniq_pattern(bytes format):
|
||||
if format == b"fasta":
|
||||
return b"\n>"
|
||||
elif format == b"fastq":
|
||||
return b"\n@"
|
||||
return b"\n\+\n"
|
||||
elif format == b"ngsfilter" or format == b"tabular":
|
||||
return b"\n"
|
||||
elif format == b"genbank" or format == b"embl":
|
||||
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
|
||||
cpdef int count_entries(file, bytes format):
|
||||
|
||||
try:
|
||||
sep = format_separator(format)
|
||||
sep = format_uniq_pattern(format)
|
||||
if sep is None:
|
||||
return -1
|
||||
sep = re.compile(sep)
|
||||
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
|
||||
return -1
|
||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
total_count += len(re.findall(sep, mmapped_file))
|
||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
|
||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
|
||||
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
||||
|
||||
except:
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 3
|
||||
minor = 0
|
||||
serial= '0b28'
|
||||
serial= '0b35'
|
||||
|
||||
version ="%d.%d.%s" % (major,minor,serial)
|
||||
|
@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name,
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold) // TODO different threshold for the similarity sphere around ref seqs
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold)
|
||||
{
|
||||
|
||||
// For each sequence
|
||||
@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name,
|
||||
index_t query_seq_idx, ref_seq_idx;
|
||||
double score, best_score;
|
||||
double threshold;
|
||||
double lca_threshold;
|
||||
int lcs_length;
|
||||
int ali_length;
|
||||
Kmer_table_p ktable;
|
||||
@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name,
|
||||
return -1;
|
||||
}
|
||||
free(db_threshold_str);
|
||||
if (ecotag_threshold < db_threshold)
|
||||
if (bubble_threshold < db_threshold)
|
||||
{
|
||||
fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n",
|
||||
ecotag_threshold, db_threshold);
|
||||
bubble_threshold, db_threshold);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name,
|
||||
{
|
||||
best_match_idx = best_match_array[j];
|
||||
|
||||
// Find the LCA for the chosen threshold
|
||||
// Find the LCA for the highest threshold between best_score and the chosen bubble threshold
|
||||
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
||||
|
||||
if (bubble_threshold < best_score)
|
||||
lca_threshold = best_score;
|
||||
else
|
||||
lca_threshold = bubble_threshold;
|
||||
|
||||
k = 0;
|
||||
while ((k < lca_array_length) && (score_array[k] >= best_score))
|
||||
while ((k < lca_array_length) && (score_array[k] >= lca_threshold))
|
||||
k++;
|
||||
|
||||
if (k>0)
|
||||
|
@ -42,12 +42,14 @@
|
||||
* @param output_view_name The name to give to the output view.
|
||||
* @param output_view_comments The comments to associate to the output view.
|
||||
* @param ecotag_threshold The threshold at which to assign.
|
||||
* @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle);
|
||||
* the threshold actually used will be the highest between this value and the best assignment score found.
|
||||
*
|
||||
* The algorithm works like this:
|
||||
* For each query sequence:
|
||||
* Align with reference database
|
||||
* Keep the indices of all the best matches
|
||||
* For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
|
||||
* For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs
|
||||
* Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name,
|
||||
const char* taxonomy_name,
|
||||
const char* output_view_name,
|
||||
const char* output_view_comments,
|
||||
double ecotag_threshold);
|
||||
double ecotag_threshold,
|
||||
double bubble_threshold);
|
||||
|
||||
|
||||
#endif /* OBI_ECOTAG_H_ */
|
||||
|
@ -1659,6 +1659,12 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char*
|
||||
else // Non-typed view
|
||||
view_2 = obi_new_view(dms_2, view_name_2, NULL, NULL, (view_1->infos)->comments);
|
||||
|
||||
if (view_2 == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating the new view to import a view in a DMS");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Import line count
|
||||
view_2->infos->line_count = view_1->infos->line_count;
|
||||
|
||||
|
@ -1312,19 +1312,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the associated column reference if needed // TODO discuss cases
|
||||
if (data_type == OBI_QUAL)
|
||||
// Store the associated column reference if needed
|
||||
if ((associated_column_name != NULL) && (*associated_column_name != '\0'))
|
||||
{
|
||||
if ((associated_column_name == NULL) || (*associated_column_name == '\0'))
|
||||
{
|
||||
obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
|
||||
munmap(new_column->header, header_size);
|
||||
close(column_file_descriptor);
|
||||
free(new_column);
|
||||
return NULL;
|
||||
}
|
||||
strcpy((header->associated_column).column_name, associated_column_name);
|
||||
|
||||
if (associated_column_version == -1)
|
||||
{
|
||||
obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
|
||||
@ -1336,6 +1327,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
(header->associated_column).version = associated_column_version;
|
||||
}
|
||||
|
||||
|
||||
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
|
||||
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL) || tuples)
|
||||
{
|
||||
@ -1733,16 +1725,32 @@ int obi_close_column(OBIDMS_column_p column)
|
||||
int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||
{
|
||||
char* new_indexer_name;
|
||||
int i;
|
||||
|
||||
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
|
||||
if (new_indexer_name == NULL)
|
||||
return -1;
|
||||
|
||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||
if (column->indexer == NULL)
|
||||
i=0;
|
||||
while (true) // find avl name not already used
|
||||
{
|
||||
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
||||
return -1;
|
||||
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
|
||||
if (new_indexer_name == NULL)
|
||||
return -1;
|
||||
|
||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||
if (column->indexer == NULL)
|
||||
{
|
||||
if (errno == EEXIST)
|
||||
{
|
||||
free(new_indexer_name);
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
free(new_indexer_name);
|
||||
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
strcpy((column->header)->indexer_name, new_indexer_name);
|
||||
@ -2423,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
|
||||
}
|
||||
|
||||
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column)
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
|
||||
{
|
||||
char* column_infos;
|
||||
char* elt_names;
|
||||
|
||||
column_infos = malloc(1024 * sizeof(char));
|
||||
char* column_infos = NULL;
|
||||
char* elt_names = NULL;
|
||||
char* column_name = NULL;
|
||||
// should be in view.c because alias exists in the context of view
|
||||
column_infos = malloc(2048 * sizeof(char)); // TODO
|
||||
|
||||
elt_names = obi_get_formatted_elements_names(column);
|
||||
|
||||
|
||||
// "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
|
||||
|
||||
|
||||
free(elt_names);
|
||||
return column_infos;
|
||||
}
|
||||
|
@ -254,11 +254,15 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
/**
|
||||
* @brief Internal function to clone a column in the context of a view.
|
||||
*
|
||||
* Used to edit a closed column.
|
||||
*
|
||||
* Clones with the right line selection and replaces the cloned columns with the new ones in the view.
|
||||
* If there is a line selection, all columns have to be cloned, otherwise only the column of interest is cloned.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
* @param column_name The name of the column in the view that should be cloned.
|
||||
* @param clone_associated Whether to clone the associated column
|
||||
* (should always be true except when calling from the function itself to avoid infinite recursion).
|
||||
*
|
||||
* @returns A pointer on the new column.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -266,7 +270,7 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated);
|
||||
|
||||
|
||||
/**
|
||||
@ -845,7 +849,7 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
// Clone the column first if needed
|
||||
if (!(column->writable))
|
||||
{
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias);
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias, true);
|
||||
if (column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError cloning a column in a view when updating its line count");
|
||||
@ -870,12 +874,14 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
}
|
||||
|
||||
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
OBIDMS_column_p column = NULL;
|
||||
OBIDMS_column_p new_column = NULL;
|
||||
OBIDMS_column_p column_buffer;
|
||||
OBIDMS_column_p associated_cloned_column = NULL;
|
||||
char* associated_column_alias = NULL;
|
||||
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
@ -916,11 +922,62 @@ static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_n
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Look for associated column to clone and reassociate
|
||||
if ((column_buffer->header->associated_column).column_name[0] != '\0')
|
||||
{
|
||||
// Get the associated column alias
|
||||
j=0;
|
||||
while (((strcmp((((view->infos)->column_references)[j]).column_refs.column_name, (column_buffer->header->associated_column).column_name)) ||
|
||||
((((view->infos)->column_references)[j]).column_refs.version != (column_buffer->header->associated_column).version)) &&
|
||||
j<(view->infos)->column_count) // TODO function for that
|
||||
j++;
|
||||
|
||||
if (j == (view->infos)->column_count) // not found
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nCould not find associated column when cloning a column for editing");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// No line selection: only this column is cloned, clone and reassociate the associated column
|
||||
if ((view->line_selection == NULL) && clone_associated)
|
||||
{
|
||||
associated_column_alias = (((view->infos)->column_references)[j]).alias;
|
||||
// Clone the associated column
|
||||
associated_cloned_column = clone_column_in_view(view, associated_column_alias, false);
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Line selection: all columns are cloned, check if associated column has been cloned previously (it precedes this one in the list) to reassociate
|
||||
if (j < i)
|
||||
{
|
||||
// Get pointer to associated column
|
||||
associated_cloned_column = *((OBIDMS_column_p*)ll_get(view->columns, j));
|
||||
if (associated_cloned_column == NULL)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError getting a column to clone from the linked list of column pointers of a view");
|
||||
return NULL;
|
||||
}
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close old cloned column
|
||||
obi_close_column(column_buffer);
|
||||
|
||||
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
|
||||
// Found the column to return
|
||||
// Get the column to return
|
||||
new_column = column;
|
||||
}
|
||||
}
|
||||
@ -1193,7 +1250,7 @@ static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* colum
|
||||
return -1;
|
||||
}
|
||||
|
||||
(*column_pp) = clone_column_in_view(view, column_name);
|
||||
(*column_pp) = clone_column_in_view(view, column_name, true);
|
||||
if ((*column_pp) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError trying to clone a column to modify it");
|
||||
@ -1844,6 +1901,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
{
|
||||
Obiview_p view;
|
||||
OBIDMS_column_p associated_nuc_column;
|
||||
OBIDMS_column_p associated_qual_column;
|
||||
int nb_predicates;
|
||||
|
||||
if (view_to_clone != NULL)
|
||||
@ -1896,6 +1954,10 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
|
||||
return NULL;
|
||||
}
|
||||
// Associating both ways: associating nuc sequences column to quality column
|
||||
associated_qual_column = obi_view_get_column(view, QUALITY_COLUMN);
|
||||
strcpy((associated_nuc_column->header->associated_column).column_name, associated_qual_column->header->name);
|
||||
(associated_nuc_column->header->associated_column).version = associated_qual_column->header->version;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1922,7 +1984,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
(view->predicate_functions)[(view->nb_predicates)] = view_has_nuc_sequence_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 1] = view_has_id_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 2] = view_has_definition_column;
|
||||
// if (quality_column) # TODO discuss. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// if (quality_column) # TODO fix by triggering predicate deleting if quality deleting. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// (view->predicate_functions)[(view->nb_predicates) + 3] = view_has_quality_column;
|
||||
|
||||
view->nb_predicates = nb_predicates;
|
||||
@ -2212,7 +2274,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
|
||||
// TODO return a pointer on the column?
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
@ -406,7 +406,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created.
|
||||
* @param associated_column_version The version of the associated column if there is one (otherwise -1), if the column is created.
|
||||
* @param comments Optional comments associated with the column if it is created (NULL or "" if no comments associated).
|
||||
* @param create Whether the column should be created (create == true) or opened (create == false).
|
||||
* @param create Whether the column should be created (create == true) or already exists (create == false).
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
@ -416,7 +416,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
Reference in New Issue
Block a user