Compare commits
33 Commits
v3.0.0-bet
...
v3.0.0-bet
Author | SHA1 | Date | |
---|---|---|---|
974528b2e6 | |||
1b346b54f9 | |||
058f2ad8b3 | |||
60bfd3ae8d | |||
67bdee105a | |||
0f745e0113 | |||
da8de52ba4 | |||
4d36538c6e | |||
8d0b17d87d | |||
343999a627 | |||
e9a40630e9 | |||
8dbcd3025a | |||
4cf635d001 | |||
b7e7cc232a | |||
b6ab792ceb | |||
ddea5a2964 | |||
30852ab7d5 | |||
4d0299904e | |||
eef5156d95 | |||
e62c991bbc | |||
1218eed7fd | |||
cd9cea8c97 | |||
98cfb70d73 | |||
b9f68c76c8 | |||
0b98371688 | |||
f0d152fcbd | |||
8019dee68e | |||
0b4a234671 | |||
d32cfdcce5 | |||
219c0d6fdc | |||
dc9f897917 | |||
bb72682f7d | |||
52920c3c71 |
@ -1,4 +1,3 @@
|
|||||||
#/usr/bin/env bash
|
|
||||||
|
|
||||||
_obi_comp ()
|
_obi_comp ()
|
||||||
{
|
{
|
@ -266,9 +266,9 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
|
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
@ -247,10 +247,10 @@ def run(config):
|
|||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(view), file=sys.stderr)
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
input[0].close()
|
input[0].close(force=True)
|
||||||
if two_views:
|
if two_views:
|
||||||
rinput[0].close()
|
rinput[0].close(force=True)
|
||||||
output[0].close()
|
output[0].close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
@ -190,58 +190,50 @@ def sequenceTaggerGenerator(config, taxo=None):
|
|||||||
seq['seq_rank']=counter[0]
|
seq['seq_rank']=counter[0]
|
||||||
|
|
||||||
for i,v in toSet:
|
for i,v in toSet:
|
||||||
#try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(v, environ, seq)
|
val = eval(v, environ, seq)
|
||||||
#except Exception,e: # TODO discuss usefulness of this
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = v
|
||||||
# raise e
|
|
||||||
# val = v
|
|
||||||
seq[i]=val
|
seq[i]=val
|
||||||
|
|
||||||
if length:
|
if length:
|
||||||
seq['seq_length']=len(seq)
|
seq['seq_length']=len(seq)
|
||||||
|
|
||||||
if newId is not None:
|
if newId is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newId, environ, seq)
|
val = eval(newId, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newId
|
||||||
# raise e
|
|
||||||
# val = newId
|
|
||||||
seq.id=val
|
seq.id=val
|
||||||
|
|
||||||
if newDef is not None:
|
if newDef is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newDef, environ, seq)
|
val = eval(newDef, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newDef
|
||||||
# raise e
|
|
||||||
# val = newDef
|
|
||||||
seq.definition=val
|
seq.definition=val
|
||||||
#
|
|
||||||
if newSeq is not None:
|
if newSeq is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
val = eval(newSeq, environ, seq)
|
val = eval(newSeq, environ, seq)
|
||||||
# except Exception,e:
|
except Exception: # set string if not a valid expression
|
||||||
# if options.onlyValid:
|
val = newSeq
|
||||||
# raise e
|
|
||||||
# val = newSeq
|
|
||||||
seq.seq=val
|
seq.seq=val
|
||||||
if 'seq_length' in seq:
|
if 'seq_length' in seq:
|
||||||
seq['seq_length']=len(seq)
|
seq['seq_length']=len(seq)
|
||||||
@ -251,15 +243,14 @@ def sequenceTaggerGenerator(config, taxo=None):
|
|||||||
seq.view.delete_column(QUALITY_COLUMN)
|
seq.view.delete_column(QUALITY_COLUMN)
|
||||||
|
|
||||||
if run is not None:
|
if run is not None:
|
||||||
# try:
|
try:
|
||||||
if taxo is not None:
|
if taxo is not None:
|
||||||
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
else:
|
else:
|
||||||
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
environ = {'sequence':seq, 'counter':counter[0], 'math':math}
|
||||||
eval(run, environ, seq)
|
eval(run, environ, seq)
|
||||||
# except Exception,e:
|
except Exception,e:
|
||||||
# if options.onlyValid:
|
raise e
|
||||||
# raise e
|
|
||||||
|
|
||||||
return sequenceTagger
|
return sequenceTagger
|
||||||
|
|
||||||
@ -379,7 +370,7 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(o_dms, imported_view_name)
|
View.delete_view(o_dms, imported_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -97,9 +97,9 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
|
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
||||||
|
@ -86,7 +86,24 @@ def run(config):
|
|||||||
if not remove_rev_qual:
|
if not remove_rev_qual:
|
||||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||||
|
|
||||||
|
# Initialize multiple elements columns
|
||||||
|
dict_cols = {}
|
||||||
|
for v in iview_list:
|
||||||
|
for coln in v.keys():
|
||||||
|
if v[coln].nb_elements_per_line > 1:
|
||||||
|
if coln not in dict_cols:
|
||||||
|
dict_cols[coln] = {}
|
||||||
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||||
|
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||||
|
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||||
|
else:
|
||||||
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||||
|
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||||
|
for coln in dict_cols:
|
||||||
|
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||||
|
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(total_len, config, seconde=5)
|
pb = ProgressBar(total_len, config, seconde=5)
|
||||||
|
|
||||||
@ -116,7 +133,7 @@ def run(config):
|
|||||||
#print(repr(view), file=sys.stderr)
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
for d in idms_list:
|
for d in idms_list:
|
||||||
d.close()
|
d.close(force=True)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -124,8 +124,8 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
|
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
@ -56,3 +56,5 @@ def run(config):
|
|||||||
print(count2)
|
print(count2)
|
||||||
else:
|
else:
|
||||||
print(count1)
|
print(count1)
|
||||||
|
|
||||||
|
input[0].close(force=True)
|
||||||
|
@ -35,13 +35,13 @@ def addOptions(parser):
|
|||||||
action="store", dest="ecopcr:primer1",
|
action="store", dest="ecopcr:primer1",
|
||||||
metavar='<PRIMER>',
|
metavar='<PRIMER>',
|
||||||
type=str,
|
type=str,
|
||||||
help="Forward primer.")
|
help="Forward primer, length must be less than or equal to 32")
|
||||||
|
|
||||||
group.add_argument('--primer2', '-R',
|
group.add_argument('--primer2', '-R',
|
||||||
action="store", dest="ecopcr:primer2",
|
action="store", dest="ecopcr:primer2",
|
||||||
metavar='<PRIMER>',
|
metavar='<PRIMER>',
|
||||||
type=str,
|
type=str,
|
||||||
help="Reverse primer.")
|
help="Reverse primer, length must be less than or equal to 32")
|
||||||
|
|
||||||
group.add_argument('--error', '-e',
|
group.add_argument('--error', '-e',
|
||||||
action="store", dest="ecopcr:error",
|
action="store", dest="ecopcr:error",
|
||||||
@ -203,6 +203,7 @@ def run(config):
|
|||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
o_dms.close()
|
i_dms.close(force=True)
|
||||||
|
o_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -64,10 +64,10 @@ def run(config):
|
|||||||
ref_view_name = ref[1]
|
ref_view_name = ref[1]
|
||||||
|
|
||||||
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
|
# Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
|
||||||
if config['ecotag']['threshold'] < eval(i_dms[ref_view_name].comments["ref_db_threshold"]) :
|
if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
|
||||||
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).",
|
print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).",
|
||||||
config['ecotag']['threshold'], i_dms[ref_view_name].comments["ref_db_threshold"])
|
config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
|
||||||
|
|
||||||
# Open the output: only the DMS
|
# Open the output: only the DMS
|
||||||
output = open_uri(config['obi']['outputURI'],
|
output = open_uri(config['obi']['outputURI'],
|
||||||
input=False,
|
input=False,
|
||||||
@ -126,9 +126,11 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
|
|
||||||
i_dms.close()
|
taxo_dms.close(force=True)
|
||||||
|
ref_dms.close(force=True)
|
||||||
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
||||||
|
@ -59,13 +59,23 @@ def run(config):
|
|||||||
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
# Check that the input view has the type NUC_SEQS if needed # TODO discuss, maybe bool property
|
||||||
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") : # Nuc_Seq_Stored? TODO
|
||||||
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
|
||||||
|
|
||||||
|
if config['obi']['only'] is not None:
|
||||||
|
withoutskip = min(input[4], config['obi']['only'])
|
||||||
|
else:
|
||||||
|
withoutskip = input[4]
|
||||||
|
|
||||||
|
if config['obi']['skip'] is not None:
|
||||||
|
skip = min(input[4], config['obi']['skip'])
|
||||||
|
else:
|
||||||
|
skip = 0
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
if config['obi']['noprogressbar']:
|
if config['obi']['noprogressbar']:
|
||||||
pb = None
|
pb = None
|
||||||
else:
|
else:
|
||||||
pb = ProgressBar(len(iview), config, seconde=5)
|
pb = ProgressBar(withoutskip - skip, config, seconde=5)
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in iview :
|
for seq in iview :
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
@ -86,7 +96,7 @@ def run(config):
|
|||||||
if not BrokenPipeError and not IOError:
|
if not BrokenPipeError and not IOError:
|
||||||
output_object.close()
|
output_object.close()
|
||||||
iview.close()
|
iview.close()
|
||||||
input[0].close()
|
input[0].close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
||||||
|
@ -370,7 +370,7 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -103,7 +103,7 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -54,4 +54,5 @@ def run(config):
|
|||||||
print(bytes2str(entries.ascii_history))
|
print(bytes2str(entries.ascii_history))
|
||||||
else:
|
else:
|
||||||
raise Exception("ASCII history only available for views")
|
raise Exception("ASCII history only available for views")
|
||||||
|
|
||||||
|
input[0].close(force=True)
|
||||||
|
@ -11,6 +11,7 @@ from obitools3.dms.column.column cimport Column
|
|||||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.taxo.taxo cimport Taxonomy
|
from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||||
|
from obitools3.files.uncompress cimport CompressedFile
|
||||||
|
|
||||||
|
|
||||||
from obitools3.utils cimport tobytes, \
|
from obitools3.utils cimport tobytes, \
|
||||||
@ -65,6 +66,14 @@ def addOptions(parser):
|
|||||||
addTaxdumpInputOption(parser)
|
addTaxdumpInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi import specific options')
|
||||||
|
|
||||||
|
group.add_argument('--preread',
|
||||||
|
action="store_true", dest="import:preread",
|
||||||
|
default=False,
|
||||||
|
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||||
|
"a much faster import.")
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
@ -169,8 +178,6 @@ def run(config):
|
|||||||
|
|
||||||
if entry_count >= 0:
|
if entry_count >= 0:
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
|
||||||
entries = input[1]
|
|
||||||
|
|
||||||
NUC_SEQS_view = False
|
NUC_SEQS_view = False
|
||||||
if isinstance(output[1], View) :
|
if isinstance(output[1], View) :
|
||||||
@ -188,6 +195,60 @@ def run(config):
|
|||||||
|
|
||||||
dcols = {}
|
dcols = {}
|
||||||
|
|
||||||
|
# First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
|
||||||
|
if config['import']['preread']:
|
||||||
|
logger("info", "First readthrough...")
|
||||||
|
entries = input[1]
|
||||||
|
i = 0
|
||||||
|
dict_dict = {}
|
||||||
|
for entry in entries:
|
||||||
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
|
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
|
||||||
|
if config['obi']['skiperror']:
|
||||||
|
i-=1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise Exception("obi import error in first readthrough")
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
elif not i%50000:
|
||||||
|
logger("info", "Read %d entries", i)
|
||||||
|
|
||||||
|
for tag in entry :
|
||||||
|
if type(entry[tag]) == dict :
|
||||||
|
if tag in dict_dict:
|
||||||
|
dict_dict[tag][0].update(entry[tag].keys())
|
||||||
|
else:
|
||||||
|
dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
for tag in dict_dict:
|
||||||
|
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
|
||||||
|
nb_elements_per_line=len(dict_dict[tag][0]), \
|
||||||
|
elements_names=list(dict_dict[tag][0])), \
|
||||||
|
value_obitype)
|
||||||
|
|
||||||
|
|
||||||
|
# Reinitialize the input
|
||||||
|
if isinstance(input[0], CompressedFile):
|
||||||
|
input_is_file = True
|
||||||
|
if entry_count >= 0:
|
||||||
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
try:
|
||||||
|
input[0].close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not open input URI")
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
i = 0
|
i = 0
|
||||||
for entry in entries :
|
for entry in entries :
|
||||||
|
|
||||||
|
@ -46,5 +46,5 @@ def run(config):
|
|||||||
process.wait()
|
process.wait()
|
||||||
|
|
||||||
iview.close()
|
iview.close()
|
||||||
input[0].close()
|
input[0].close(force=True)
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ def run(config):
|
|||||||
l = []
|
l = []
|
||||||
for view in input[0]:
|
for view in input[0]:
|
||||||
l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
|
l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
|
||||||
|
dms[view].close()
|
||||||
l.sort()
|
l.sort()
|
||||||
for v in l:
|
for v in l:
|
||||||
print(v)
|
print(v)
|
||||||
@ -51,4 +52,5 @@ def run(config):
|
|||||||
if config['ls']['longformat'] and len(input[1].comments) > 0:
|
if config['ls']['longformat'] and len(input[1].comments) > 0:
|
||||||
print("\n### Comments:")
|
print("\n### Comments:")
|
||||||
print(str(input[1].comments))
|
print(str(input[1].comments))
|
||||||
|
|
||||||
|
input[0].close(force=True)
|
||||||
|
@ -42,7 +42,8 @@ def addOptions(parser):
|
|||||||
metavar="<URI>",
|
metavar="<URI>",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
|
help="URI to the view containing the samples definition (with tags, primers, sample names,...)"
|
||||||
|
"Warning: primer lengths must be less than or equal to 32")
|
||||||
|
|
||||||
group.add_argument('-R', '--reverse-reads',
|
group.add_argument('-R', '--reverse-reads',
|
||||||
action="store", dest="ngsfilter:reverse",
|
action="store", dest="ngsfilter:reverse",
|
||||||
@ -172,6 +173,13 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
|||||||
primer_list = []
|
primer_list = []
|
||||||
i=0
|
i=0
|
||||||
for p in info_view:
|
for p in info_view:
|
||||||
|
|
||||||
|
# Check primer length: should not be longer than 32, the max allowed by the apat lib
|
||||||
|
if len(p[b'forward_primer']) > 32:
|
||||||
|
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
|
||||||
|
if len(p[b'reverse_primer']) > 32:
|
||||||
|
raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
|
||||||
|
|
||||||
forward=Primer(p[b'forward_primer'],
|
forward=Primer(p[b'forward_primer'],
|
||||||
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
||||||
True,
|
True,
|
||||||
@ -594,7 +602,13 @@ def run(config):
|
|||||||
pb = ProgressBar(entries_len, config, seconde=5)
|
pb = ProgressBar(entries_len, config, seconde=5)
|
||||||
|
|
||||||
# Check and store primers and tags
|
# Check and store primers and tags
|
||||||
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
try:
|
||||||
|
infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned) # TODO obi verbose option
|
||||||
|
except RollbackException, e:
|
||||||
|
if unidentified is not None:
|
||||||
|
raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
|
||||||
|
else:
|
||||||
|
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
aligner = Primer_search(primer_list, config['ngsfilter']['error'])
|
aligner = Primer_search(primer_list, config['ngsfilter']['error'])
|
||||||
|
|
||||||
@ -652,11 +666,11 @@ def run(config):
|
|||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
input[0].close()
|
input[0].close(force=True)
|
||||||
output[0].close()
|
output[0].close(force=True)
|
||||||
info_input[0].close()
|
info_input[0].close(force=True)
|
||||||
if unidentified is not None:
|
if unidentified is not None:
|
||||||
unidentified_input[0].close()
|
unidentified_input[0].close(force=True)
|
||||||
aligner.free()
|
aligner.free()
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -141,7 +141,7 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -251,7 +251,7 @@ def run(config):
|
|||||||
for i in range(len(sorted_stats)):
|
for i in range(len(sorted_stats)):
|
||||||
c = sorted_stats[i][0]
|
c = sorted_stats[i][0]
|
||||||
for v in c:
|
for v in c:
|
||||||
if v is not None:
|
if type(v) == bytes:
|
||||||
print(pcat % tostr(v)+"\t", end="")
|
print(pcat % tostr(v)+"\t", end="")
|
||||||
else:
|
else:
|
||||||
print(pcat % str(v)+"\t", end="")
|
print(pcat % str(v)+"\t", end="")
|
||||||
@ -268,6 +268,6 @@ def run(config):
|
|||||||
print("%7d" %catcount[c], end="")
|
print("%7d" %catcount[c], end="")
|
||||||
print("%9d" %totcount[c])
|
print("%9d" %totcount[c])
|
||||||
|
|
||||||
input[0].close()
|
input[0].close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -106,7 +106,7 @@ def run(config):
|
|||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close()
|
o_dms.close(force=True)
|
||||||
i_dms.close()
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -529,7 +529,7 @@ def run(config):
|
|||||||
test_taxo(config, infos)
|
test_taxo(config, infos)
|
||||||
|
|
||||||
infos['view'].close()
|
infos['view'].close()
|
||||||
infos['dms'].close()
|
infos['dms'].close(force=True)
|
||||||
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
|
shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
|
||||||
|
|
||||||
print("Done.")
|
print("Done.")
|
||||||
|
@ -94,16 +94,16 @@ cdef class DMS(OBIWrapper):
|
|||||||
return dms
|
return dms
|
||||||
|
|
||||||
|
|
||||||
def close(self) :
|
def close(self, force=False) :
|
||||||
'''
|
'''
|
||||||
Closes the DMS instance and free the associated memory
|
Closes the DMS instance and free the associated memory (no counter, closing is final)
|
||||||
|
|
||||||
The `close` method is automatically called by the object destructor.
|
The `close` method is automatically called by the object destructor.
|
||||||
'''
|
'''
|
||||||
cdef OBIDMS_p pointer = self.pointer()
|
cdef OBIDMS_p pointer = self.pointer()
|
||||||
if self.active() :
|
if self.active() :
|
||||||
OBIWrapper.close(self)
|
OBIWrapper.close(self)
|
||||||
if (obi_close_dms(pointer, False)) < 0 :
|
if (obi_close_dms(pointer, force=force)) < 0 :
|
||||||
raise Exception("Problem closing an OBIDMS")
|
raise Exception("Problem closing an OBIDMS")
|
||||||
|
|
||||||
|
|
||||||
@ -254,7 +254,8 @@ cdef class DMS(OBIWrapper):
|
|||||||
# bash command history property getter
|
# bash command history property getter
|
||||||
@property
|
@property
|
||||||
def bash_history(self):
|
def bash_history(self):
|
||||||
s = b"#!/bin/bash\n\n"
|
#s = b"#!${bash}/bin/bash\n\n"
|
||||||
|
s = b""
|
||||||
first = True
|
first = True
|
||||||
for command in self.command_line_history:
|
for command in self.command_line_history:
|
||||||
s+=b"#"
|
s+=b"#"
|
||||||
|
@ -526,7 +526,7 @@ cdef class View(OBIWrapper) :
|
|||||||
# bash command history property getter
|
# bash command history property getter
|
||||||
@property
|
@property
|
||||||
def bash_history(self):
|
def bash_history(self):
|
||||||
s = b"#!/bin/bash\n\n"
|
s = b""
|
||||||
first = True
|
first = True
|
||||||
for level in self.view_history:
|
for level in self.view_history:
|
||||||
command_list = [level[input][b"command_line"] for input in level.keys()]
|
command_list = [level[input][b"command_line"] for input in level.keys()]
|
||||||
|
@ -156,6 +156,9 @@ def emblIterator_file(lineiterator,
|
|||||||
yield seq
|
yield seq
|
||||||
read+=1
|
read+=1
|
||||||
|
|
||||||
|
# Last sequence
|
||||||
|
seq = emblParser(entry)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
free(entry)
|
free(entry)
|
||||||
|
@ -153,6 +153,9 @@ def genbankIterator_file(lineiterator,
|
|||||||
yield seq
|
yield seq
|
||||||
read+=1
|
read+=1
|
||||||
|
|
||||||
|
# Last sequence
|
||||||
|
seq = genbankParser(entry)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
free(entry)
|
free(entry)
|
||||||
|
7
python/obitools3/uri/decode.pyx
Executable file → Normal file
7
python/obitools3/uri/decode.pyx
Executable file → Normal file
@ -171,7 +171,8 @@ Reads an URI and returns a tuple containing:
|
|||||||
def open_uri(uri,
|
def open_uri(uri,
|
||||||
bint input=True,
|
bint input=True,
|
||||||
type newviewtype=View,
|
type newviewtype=View,
|
||||||
dms_only=False):
|
dms_only=False,
|
||||||
|
force_file=False):
|
||||||
|
|
||||||
cdef bytes urib = tobytes(uri)
|
cdef bytes urib = tobytes(uri)
|
||||||
cdef bytes scheme
|
cdef bytes scheme
|
||||||
@ -195,9 +196,9 @@ def open_uri(uri,
|
|||||||
if 'obi' not in config:
|
if 'obi' not in config:
|
||||||
config['obi']={}
|
config['obi']={}
|
||||||
|
|
||||||
try:
|
if not force_file and "defaultdms" in config["obi"]:
|
||||||
default_dms=config["obi"]["defaultdms"]
|
default_dms=config["obi"]["defaultdms"]
|
||||||
except KeyError:
|
else:
|
||||||
default_dms=None
|
default_dms=None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
|
|||||||
return -1
|
return -1
|
||||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||||
total_count += len(re.findall(sep, mmapped_file))
|
total_count += len(re.findall(sep, mmapped_file))
|
||||||
if format != b"ngsfilter" and format != b"tabular":
|
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
|
||||||
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0-beta9'
|
serial= '0-beta15'
|
||||||
|
|
||||||
version ="%d.%02d.%s" % (major,minor,serial)
|
version ="%d.%02d.%s" % (major,minor,serial)
|
||||||
|
@ -157,7 +157,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
ecotx_t* lca_2 = NULL;
|
ecotx_t* lca_2 = NULL;
|
||||||
ecotx_t* lca = NULL;
|
ecotx_t* lca = NULL;
|
||||||
index_t idx1, idx2;
|
index_t idx1, idx2;
|
||||||
index_t i, j, k;
|
index_t i, j, k, count;
|
||||||
int32_t taxid_array_length;
|
int32_t taxid_array_length;
|
||||||
int32_t score_array_length;
|
int32_t score_array_length;
|
||||||
int32_t taxid_array_writable_length;
|
int32_t taxid_array_writable_length;
|
||||||
@ -185,6 +185,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
||||||
strcat(matrix_view_name, "_matrix");
|
strcat(matrix_view_name, "_matrix");
|
||||||
|
|
||||||
|
fprintf(stderr, "Aligning queries with reference database...\n");
|
||||||
if (obi_lcs_align_one_column(dms_name,
|
if (obi_lcs_align_one_column(dms_name,
|
||||||
refs_view_name,
|
refs_view_name,
|
||||||
"",
|
"",
|
||||||
@ -320,13 +321,19 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
count = (matrix_with_lca_view->infos)->line_count;
|
||||||
|
fprintf(stderr, "Computing LCAs...\n");
|
||||||
|
|
||||||
// Compute all the LCAs
|
// Compute all the LCAs
|
||||||
// For each pair
|
// For each pair
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read all taxids associated with the first sequence and compute their LCA
|
// Read all taxids associated with the first sequence and compute their LCA
|
||||||
// Read line index
|
// Read line index
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
@ -363,6 +370,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
// Clone refs view, add 2 arrays columns for lca and score, compute and write them
|
||||||
|
|
||||||
@ -442,13 +450,18 @@ int build_reference_db(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "Building LCA arrays...\n");
|
||||||
|
|
||||||
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
|
||||||
// Going through matrix once, filling refs arrays on the go for efficiency
|
// Going through matrix once, filling refs arrays on the go for efficiency
|
||||||
for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
if (! keep_running)
|
if (! keep_running)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
// Read ref line indexes
|
// Read ref line indexes
|
||||||
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
|
||||||
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
|
||||||
@ -464,6 +477,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
// Read alignment score
|
// Read alignment score
|
||||||
score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
|
score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n\ntaxid_lca=%d, score=%f, idx1=%d, idx2=%d", taxid_lca, score, idx1, idx2);
|
||||||
|
|
||||||
///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\ (TODO function)
|
||||||
|
|
||||||
// Read arrays
|
// Read arrays
|
||||||
@ -480,9 +495,11 @@ int build_reference_db(const char* dms_name,
|
|||||||
// return -1;
|
// return -1;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n1st sequence");
|
||||||
// If empty, add values
|
// If empty, add values
|
||||||
if (taxid_array_length == 0)
|
if (taxid_array_length == 0)
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nEmpty, add value");
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -496,6 +513,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nNot empty");
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
modified = false;
|
modified = false;
|
||||||
while (j < taxid_array_length)
|
while (j < taxid_array_length)
|
||||||
@ -509,6 +528,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
|
||||||
|
// score_array_writable[j], taxid_lca, score);
|
||||||
|
|
||||||
// Better score for the same LCA, replace this LCA/score pair
|
// Better score for the same LCA, replace this LCA/score pair
|
||||||
lca_taxid_array_writable[j] = taxid_lca;
|
lca_taxid_array_writable[j] = taxid_lca;
|
||||||
score_array_writable[j] = score;
|
score_array_writable[j] = score;
|
||||||
@ -535,6 +557,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
{
|
{
|
||||||
if (score > score_array[j])
|
if (score > score_array[j])
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nInsert new");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -579,10 +603,15 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nAppend at the end");
|
||||||
|
|
||||||
// Append LCA
|
// Append LCA
|
||||||
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
||||||
score_array_writable[score_array_writable_length] = score;
|
score_array_writable[score_array_writable_length] = score;
|
||||||
|
|
||||||
|
taxid_array_writable_length++;
|
||||||
|
score_array_writable_length++;
|
||||||
|
|
||||||
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
||||||
while ((j>0) && (score_array_writable[j-1] <= score))
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
||||||
{
|
{
|
||||||
@ -603,6 +632,13 @@ int build_reference_db(const char* dms_name,
|
|||||||
// Write new arrays
|
// Write new arrays
|
||||||
if (modified)
|
if (modified)
|
||||||
{
|
{
|
||||||
|
// fprintf(stderr, "\n\nnew array:");
|
||||||
|
// for (k=0;k<taxid_array_writable_length;k++)
|
||||||
|
// {
|
||||||
|
// lca = obi_taxo_get_taxon_with_taxid(tax, lca_taxid_array_writable[k]);
|
||||||
|
// fprintf(stderr, "\nLCA=%d, %s, score=%f", lca_taxid_array_writable[k], lca->name, score_array_writable[k]);
|
||||||
|
// }
|
||||||
|
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -632,9 +668,13 @@ int build_reference_db(const char* dms_name,
|
|||||||
// return -1;
|
// return -1;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
//fprintf(stderr, "\n2nd sequence");
|
||||||
|
|
||||||
// If empty, add values
|
// If empty, add values
|
||||||
if (taxid_array_length == 0)
|
if (taxid_array_length == 0)
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nEmpty, add value");
|
||||||
|
|
||||||
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
|
||||||
@ -648,6 +688,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nNot empty");
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
modified = false;
|
modified = false;
|
||||||
while (j < taxid_array_length)
|
while (j < taxid_array_length)
|
||||||
@ -661,6 +703,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
|
|
||||||
|
//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
|
||||||
|
// score_array_writable[j], taxid_lca, score);
|
||||||
|
|
||||||
// Better score for the same LCA, replace this LCA/score pair
|
// Better score for the same LCA, replace this LCA/score pair
|
||||||
lca_taxid_array_writable[j] = taxid_lca;
|
lca_taxid_array_writable[j] = taxid_lca;
|
||||||
score_array_writable[j] = score;
|
score_array_writable[j] = score;
|
||||||
@ -687,6 +732,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
{
|
{
|
||||||
if (score > score_array[j])
|
if (score > score_array[j])
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nInsert new");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -727,6 +774,8 @@ int build_reference_db(const char* dms_name,
|
|||||||
|
|
||||||
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
|
||||||
{
|
{
|
||||||
|
//fprintf(stderr, "\nAppend at the end");
|
||||||
|
|
||||||
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
|
||||||
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
|
||||||
modified = true;
|
modified = true;
|
||||||
@ -735,6 +784,9 @@ int build_reference_db(const char* dms_name,
|
|||||||
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
|
||||||
score_array_writable[score_array_writable_length] = score;
|
score_array_writable[score_array_writable_length] = score;
|
||||||
|
|
||||||
|
taxid_array_writable_length++;
|
||||||
|
score_array_writable_length++;
|
||||||
|
|
||||||
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
// Remove the previous (children) LCAs from the array if their score is equal or lower
|
||||||
while ((j>0) && (score_array_writable[j-1] <= score))
|
while ((j>0) && (score_array_writable[j-1] <= score))
|
||||||
{
|
{
|
||||||
@ -769,11 +821,17 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
|
fprintf(stderr, "Writing results...\n");
|
||||||
|
count = (o_view->infos)->line_count;
|
||||||
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
|
||||||
score=1.0; // technically getting LCA of identical sequences
|
score=1.0; // technically getting LCA of identical sequences
|
||||||
for (i=0; i<(o_view->infos)->line_count; i++)
|
for (i=0; i<count; i++)
|
||||||
{
|
{
|
||||||
|
if (i%1000 == 0)
|
||||||
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) count)*100);
|
||||||
|
|
||||||
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
|
||||||
if (taxid_array_length == 0) // no LCA set
|
if (taxid_array_length == 0) // no LCA set
|
||||||
{
|
{
|
||||||
@ -799,6 +857,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
|
|
||||||
// Add information about the threshold used to build the DB
|
// Add information about the threshold used to build the DB
|
||||||
snprintf(threshold_str, 5, "%f", threshold);
|
snprintf(threshold_str, 5, "%f", threshold);
|
||||||
@ -858,7 +917,6 @@ int build_reference_db(const char* dms_name,
|
|||||||
free(matrix_view_name);
|
free(matrix_view_name);
|
||||||
free(matrix_with_lca_view_name);
|
free(matrix_with_lca_view_name);
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% \n");
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
48
src/obi_ecopcr.c
Executable file → Normal file
48
src/obi_ecopcr.c
Executable file → Normal file
@ -105,7 +105,8 @@ static int create_output_columns(Obiview_p o_view, bool kingdom_mode);
|
|||||||
* @param o_temp1_column A pointer on the output column for the temperature for the first primer.
|
* @param o_temp1_column A pointer on the output column for the temperature for the first primer.
|
||||||
* @param o_temp2_column A pointer on the output column for the temperature for the second primer.
|
* @param o_temp2_column A pointer on the output column for the temperature for the second primer.
|
||||||
*
|
*
|
||||||
* @retval 0 if the operation was successfully completed.
|
* @retval 0 if the sequence was skipped (taxid not found, warning printed).
|
||||||
|
* @retval 1 if the sequence was successfully printed to the output.
|
||||||
* @retval -1 if an error occurred.
|
* @retval -1 if an error occurred.
|
||||||
*
|
*
|
||||||
* @since July 2018
|
* @since July 2018
|
||||||
@ -366,6 +367,17 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
|
|||||||
|
|
||||||
// TODO add check for primer longer than MAX_PAT_LEN (32)
|
// TODO add check for primer longer than MAX_PAT_LEN (32)
|
||||||
|
|
||||||
|
// Get sequence id
|
||||||
|
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
|
||||||
|
|
||||||
|
// Get the taxon structure
|
||||||
|
main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
|
||||||
|
if (main_taxon == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nWarning: error reading the taxonomic information of a sequence. Seq id: %s, taxid: %d. Probably deprecated taxid. Skipping this sequence.", seq_id, taxid);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
ldelta = (pos1 <= keep_nucleotides)?pos1:keep_nucleotides;
|
ldelta = (pos1 <= keep_nucleotides)?pos1:keep_nucleotides;
|
||||||
rdelta = ((pos2+keep_nucleotides)>=seq_len)?seq_len-pos2:keep_nucleotides;
|
rdelta = ((pos2+keep_nucleotides)>=seq_len)?seq_len-pos2:keep_nucleotides;
|
||||||
|
|
||||||
@ -431,16 +443,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
|
|||||||
if (isnan(tm2))
|
if (isnan(tm2))
|
||||||
tm2 = OBIFloat_NA;
|
tm2 = OBIFloat_NA;
|
||||||
|
|
||||||
// Get the taxon structure
|
|
||||||
main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
|
|
||||||
if (main_taxon == NULL)
|
|
||||||
{
|
|
||||||
obidebug(1, "\nError reading the taxonomic information of a sequence");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write sequence id
|
// Write sequence id
|
||||||
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
|
|
||||||
if (obi_set_str_with_elt_idx_and_col_p_in_view(o_view, o_id_column, o_idx, 0, seq_id) < 0)
|
if (obi_set_str_with_elt_idx_and_col_p_in_view(o_view, o_id_column, o_idx, 0, seq_id) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing the sequence id");
|
obidebug(1, "\nError writing the sequence id");
|
||||||
@ -629,7 +632,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -698,6 +701,7 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
|
|
||||||
obiint_t taxid;
|
obiint_t taxid;
|
||||||
char* sequence;
|
char* sequence;
|
||||||
|
int printed;
|
||||||
|
|
||||||
SeqPtr apatseq=NULL;
|
SeqPtr apatseq=NULL;
|
||||||
int32_t o1Hits;
|
int32_t o1Hits;
|
||||||
@ -1057,14 +1061,14 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length = posj - posi - o1->patlen - o2->patlen;
|
length = posj - posi - o1->patlen - o2->patlen;
|
||||||
if (posj < posi)
|
else if (circular > 0)
|
||||||
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
if ((length>0) && // For when primers touch or overlap
|
if ((length>0) && // For when primers touch or overlap
|
||||||
(!min_len || (length >= min_len)) &&
|
(!min_len || (length >= min_len)) &&
|
||||||
(!max_len || (length <= max_len)))
|
(!max_len || (length <= max_len)))
|
||||||
{
|
{
|
||||||
// Print the found amplicon
|
// Print the found amplicon
|
||||||
if (print_seq(i_view, o_view,
|
printed = print_seq(i_view, o_view,
|
||||||
i_idx, o_idx,
|
i_idx, o_idx,
|
||||||
taxonomy,
|
taxonomy,
|
||||||
sequence,
|
sequence,
|
||||||
@ -1090,12 +1094,14 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
o_strand_column,
|
o_strand_column,
|
||||||
o_primer1_column, o_primer2_column,
|
o_primer1_column, o_primer2_column,
|
||||||
o_error1_column, o_error2_column,
|
o_error1_column, o_error2_column,
|
||||||
o_temp1_column, o_temp2_column) < 0)
|
o_temp1_column, o_temp2_column);
|
||||||
|
if (printed < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing the ecopcr result");
|
obidebug(1, "\nError writing the ecopcr result");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
o_idx++;
|
else if (printed > 0)
|
||||||
|
o_idx++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1145,14 +1151,14 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
length = 0;
|
length = 0;
|
||||||
if (posj > posi)
|
if (posj > posi)
|
||||||
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
|
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
|
||||||
if (posj < posi)
|
else if (circular > 0)
|
||||||
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
|
||||||
if ((length>0) && // For when primers touch or overlap
|
if ((length>0) && // For when primers touch or overlap
|
||||||
(!min_len || (length >= min_len)) &&
|
(!min_len || (length >= min_len)) &&
|
||||||
(!max_len || (length <= max_len)))
|
(!max_len || (length <= max_len)))
|
||||||
{
|
{
|
||||||
// Print the found amplicon
|
// Print the found amplicon
|
||||||
if (print_seq(i_view, o_view,
|
printed = print_seq(i_view, o_view,
|
||||||
i_idx, o_idx,
|
i_idx, o_idx,
|
||||||
taxonomy,
|
taxonomy,
|
||||||
sequence,
|
sequence,
|
||||||
@ -1178,12 +1184,14 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
o_strand_column,
|
o_strand_column,
|
||||||
o_primer1_column, o_primer2_column,
|
o_primer1_column, o_primer2_column,
|
||||||
o_error1_column, o_error2_column,
|
o_error1_column, o_error2_column,
|
||||||
o_temp1_column, o_temp2_column) < 0)
|
o_temp1_column, o_temp2_column);
|
||||||
|
if (printed < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError writing the ecopcr result");
|
obidebug(1, "\nError writing the ecopcr result");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
o_idx++;
|
else if (printed > 0)
|
||||||
|
o_idx++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1224,7 +1232,7 @@ int obi_ecopcr(const char* i_dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr,"\rDone : 100 %% ");
|
fprintf(stderr,"\rDone : 100 %% \n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -81,8 +81,8 @@
|
|||||||
* @param o_dms_name The path to the output DMS.
|
* @param o_dms_name The path to the output DMS.
|
||||||
* @param o_view_name The name of the output view.
|
* @param o_view_name The name of the output view.
|
||||||
* @param o_view_comments The comments to associate with the output view.
|
* @param o_view_comments The comments to associate with the output view.
|
||||||
* @param primer1 The first primer.
|
* @param primer1 The first primer, length must be less than or equal to 32 (because of apat lib limitation).
|
||||||
* @param primer2 The second primer.
|
* @param primer2 The second primer, length must be less than or equal to 32 (because of apat lib limitation).
|
||||||
* @param error_max The maximum number of errors allowed per primer for amplification.
|
* @param error_max The maximum number of errors allowed per primer for amplification.
|
||||||
* @param min_len The minimum length of an amplicon.
|
* @param min_len The minimum length of an amplicon.
|
||||||
* @param max_len The maximum length of an amplicon.
|
* @param max_len The maximum length of an amplicon.
|
||||||
|
@ -100,35 +100,35 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
|||||||
static int create_output_columns(Obiview_p o_view)
|
static int create_output_columns(Obiview_p o_view)
|
||||||
{
|
{
|
||||||
// Score column
|
// Score column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_SCORE_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the score in ecotag");
|
obidebug(1, "\nError creating the column for the score in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assigned taxid column
|
// Assigned taxid column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_TAXID_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assigned taxid in ecotag");
|
obidebug(1, "\nError creating the column for the assigned taxid in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assigned scientific name column
|
// Assigned scientific name column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_NAME_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assigned scientific name in ecotag");
|
obidebug(1, "\nError creating the column for the assigned scientific name in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assignement status column
|
// Assignement status column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_STATUS_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assignment status in ecotag");
|
obidebug(1, "\nError creating the column for the assignment status in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Column for array of best match ids
|
// Column for array of best match ids
|
||||||
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
@ -455,7 +455,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
|
|
||||||
for (i=0; i < query_count; i++)
|
for (i=0; i < query_count; i++)
|
||||||
{
|
{
|
||||||
if (i%100 == 0)
|
if (i%1000 == 0)
|
||||||
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
|
fprintf(stderr,"\rDone : %f %% ", (i / (float) query_count)*100);
|
||||||
|
|
||||||
best_match_count = 0;
|
best_match_count = 0;
|
||||||
@ -562,7 +562,7 @@ int obi_ecotag(const char* dms_name,
|
|||||||
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
|
||||||
|
|
||||||
k = 0;
|
k = 0;
|
||||||
while ((k < lca_array_length) && (score_array[k] >= ecotag_threshold))
|
while ((k < lca_array_length) && (score_array[k] >= best_score))
|
||||||
k++;
|
k++;
|
||||||
|
|
||||||
if (k>0)
|
if (k>0)
|
||||||
@ -570,12 +570,12 @@ int obi_ecotag(const char* dms_name,
|
|||||||
lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
|
lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
|
||||||
if (j>0)
|
if (j>0)
|
||||||
{
|
{
|
||||||
lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
|
// lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
|
||||||
if (lca == NULL)
|
// if (lca == NULL)
|
||||||
{
|
// {
|
||||||
obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
|
// obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
|
||||||
return -1;
|
// return -1;
|
||||||
}
|
// }
|
||||||
lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
|
lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
|
||||||
if (lca_in_array == NULL)
|
if (lca_in_array == NULL)
|
||||||
{
|
{
|
||||||
|
@ -2376,9 +2376,10 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
|
|||||||
// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
|
// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
|
||||||
// of the taxon in the taxa structure, or -1 for deleted taxids.
|
// of the taxon in the taxa structure, or -1 for deleted taxids.
|
||||||
// Creating the merged list requires to merge the 3 ordered lists into one.
|
// Creating the merged list requires to merge the 3 ordered lists into one.
|
||||||
while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) || ((nD >= 0) && (delnodes[nD] < old_taxid)))
|
while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) ||
|
||||||
|
((nD >= 0) && (delnodes[nD] < old_taxid)))
|
||||||
{
|
{
|
||||||
if ((tax->taxa)->taxon[nT].taxid < delnodes[nD])
|
if ((nT < (tax->taxa)->count) && (tax->taxa)->taxon[nT].taxid < delnodes[nD])
|
||||||
{ // Add element from taxa list
|
{ // Add element from taxa list
|
||||||
// Enlarge structure if needed
|
// Enlarge structure if needed
|
||||||
if (n == buffer_size)
|
if (n == buffer_size)
|
||||||
@ -2401,7 +2402,7 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
|
|||||||
nT++;
|
nT++;
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid)
|
else
|
||||||
{ // Add element from deleted taxids list
|
{ // Add element from deleted taxids list
|
||||||
// Enlarge structure if needed
|
// Enlarge structure if needed
|
||||||
if (n == buffer_size)
|
if (n == buffer_size)
|
||||||
@ -3036,12 +3037,12 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
|||||||
|
|
||||||
strcpy(tax->tax_name, taxonomy_name);
|
strcpy(tax->tax_name, taxonomy_name);
|
||||||
|
|
||||||
buffer_size = 2048;
|
|
||||||
|
|
||||||
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
|
taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
|
||||||
if (taxonomy_path == NULL)
|
if (taxonomy_path == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
buffer_size = strlen(taxonomy_path) + strlen(taxonomy_name) + 6;
|
||||||
|
|
||||||
// Read ranks
|
// Read ranks
|
||||||
ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
|
ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||||
if (ranks_file_name == NULL)
|
if (ranks_file_name == NULL)
|
||||||
|
@ -1973,7 +1973,11 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
|
|
||||||
// Calculate the new file size
|
// Calculate the new file size
|
||||||
old_line_count = (column->header)->line_count;
|
old_line_count = (column->header)->line_count;
|
||||||
new_line_count = old_line_count * COLUMN_GROWTH_FACTOR;
|
new_line_count = ceil((double) old_line_count * (double) COLUMN_GROWTH_FACTOR);
|
||||||
|
if (new_line_count > old_line_count+100000)
|
||||||
|
new_line_count = old_line_count+100000;
|
||||||
|
else if (new_line_count < old_line_count+1000)
|
||||||
|
new_line_count = old_line_count+1000;
|
||||||
|
|
||||||
if (new_line_count > MAXIMUM_LINE_COUNT)
|
if (new_line_count > MAXIMUM_LINE_COUNT)
|
||||||
{
|
{
|
||||||
@ -2381,6 +2385,54 @@ char* obi_get_elements_names(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* obi_get_formatted_elements_names(OBIDMS_column_p column)
|
||||||
|
{
|
||||||
|
char* elements_names;
|
||||||
|
int i, j;
|
||||||
|
int elt_idx;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
elements_names = (char*) malloc(((column->header)->elements_names_length + (column->header)->nb_elements_per_line) * sizeof(char));
|
||||||
|
if (elements_names == NULL)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
obidebug(1, "\nError allocating memory for elements names");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
j = 0;
|
||||||
|
for (i=0; i < (column->header)->nb_elements_per_line; i++)
|
||||||
|
{
|
||||||
|
elt_idx = ((column->header)->elements_names_idx)[i];
|
||||||
|
len = strlen(((column->header)->elements_names)+elt_idx);
|
||||||
|
memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
|
||||||
|
j = j + len;
|
||||||
|
elements_names[j] = ';';
|
||||||
|
j++;
|
||||||
|
elements_names[j] = ' ';
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
elements_names[j - 1] = '\0';
|
||||||
|
|
||||||
|
return elements_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* obi_column_formatted_infos(OBIDMS_column_p column)
|
||||||
|
{
|
||||||
|
char* column_infos;
|
||||||
|
char* elt_names;
|
||||||
|
|
||||||
|
column_infos = malloc(1024 * sizeof(char));
|
||||||
|
|
||||||
|
elt_names = obi_get_formatted_elements_names(column);
|
||||||
|
|
||||||
|
|
||||||
|
free(elt_names);
|
||||||
|
return column_infos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx)
|
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx)
|
||||||
{
|
{
|
||||||
|
@ -505,6 +505,14 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
|
|||||||
char* obi_get_elements_names(OBIDMS_column_p column);
|
char* obi_get_elements_names(OBIDMS_column_p column);
|
||||||
|
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
//char* obi_get_formatted_elements_names(OBIDMS_column_p column);
|
||||||
|
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
//char* obi_column_formatted_infos(OBIDMS_column_p column);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Prepares a column to set a value.
|
* @brief Prepares a column to set a value.
|
||||||
*
|
*
|
||||||
|
@ -1407,7 +1407,7 @@ static char* view_check_qual_match_seqs(Obiview_p view)
|
|||||||
// Test that the lengths of the quality and the sequence are equal
|
// Test that the lengths of the quality and the sequence are equal
|
||||||
if ((size_t)qual_len != strlen(seq))
|
if ((size_t)qual_len != strlen(seq))
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match.", (view->infos)->name);
|
obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match (index %lld, seq=%s, quality length = %d).", (view->infos)->name, j, seq, qual_len);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -686,6 +686,9 @@ int calculateSizeToAllocate(int maxLen, int LCSmin)
|
|||||||
size *= 3;
|
size *= 3;
|
||||||
size += 16;
|
size += 16;
|
||||||
|
|
||||||
|
size += 10; // band-aid for memory bug I don't understand (triggered on specific db on ubuntu)
|
||||||
|
// bug might have to do with the way different systems behave when aligning the address in obi_get_memory_aligned_on_16
|
||||||
|
|
||||||
return(size*sizeof(int16_t));
|
return(size*sizeof(int16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user