Compare commits

...

36 Commits

Author SHA1 Message Date
24a737aa55 switch to version 3.0.0b28 2020-07-24 16:10:10 +02:00
8aa455ad8a Python: made all commands handle output to buffer object (e.g. stdout) 2020-07-24 16:09:48 +02:00
46ca693ca9 Cython: View: new method to print a view to a buffer (e.g. stdout) 2020-07-24 16:03:23 +02:00
9a9afde113 Cython: progress bar: set default refresh rate to 5 seconds 2020-07-24 15:29:12 +02:00
8dd403a118 grep: now prints the number of entries grepped 2020-07-13 17:08:13 +02:00
9672f01c6a alignpairedend: improved/fixed the output tags for the alignment score
and lengths. Removed minimum score option
2020-07-13 15:59:50 +02:00
ed9549acfb ngsfilter: unidentified sequences are now stored untrimmed 2020-07-13 15:56:40 +02:00
9ace9989c4 Switch to version 3.0.0b27 2020-07-07 16:47:21 +02:00
a3ebe5f118 C: AVL trees: fixed a bug where storing the difference between 2 crc64
values in an int64 would mess trees up resulting in failed data
dereplication
2020-07-07 16:47:00 +02:00
9100e14899 obi uniq: quick fix for bug where some sequences are not correctly
dereplicated
2020-07-03 17:36:57 +02:00
ccda0661ce small help documentation improvement 2020-07-01 18:20:38 +02:00
aab59f2214 obi clean: fixed a memory bug, fixed the behaviour when no sample info,
and added checks warnings and error handling when sample info not
dereplicated
2020-07-01 18:17:47 +02:00
ade1107b42 switch to version 3.0.0b26 2020-06-17 18:56:07 +02:00
9c7d24406f export: dictionaries are now formatted like in the original OBITools
when exporting in tabular format and tuple formatting is cleaner
2020-06-17 18:55:46 +02:00
03bc9915f2 Cython: utils: added handling of tuples to bytes2str_object function 2020-06-17 18:54:14 +02:00
24b1dab573 Cython: Columns: added a keys() method that returns all element names 2020-06-17 18:53:41 +02:00
7593673f3f ngsfilter: now setting 'reversed' tag to False instead of None when
false
2020-06-17 18:52:35 +02:00
aa01236cae switch to version 3.0.0b25 2020-06-13 21:48:49 +02:00
49b8810a76 C: made indexer opening/closing cleaner 2020-06-13 21:47:03 +02:00
7a39df54c0 ls: fixed an issue where big DMS couldn't be read by ls 2020-06-13 21:45:22 +02:00
09e483b0d6 switch to temporary version 3.0.0b24a 2020-06-10 17:47:56 +02:00
14a2579173 uniq: now outputs an empty view if input view is empty instead of
displaying an error
2020-06-10 17:47:26 +02:00
36a8aaa92e grep: now creating empty views instead of displaying an error when
selecting on an unexisting column/tag
2020-06-10 16:57:42 +02:00
a17eb445c2 ngsfilter: made one of the tag error messages more accurate 2020-06-10 16:27:36 +02:00
e4a32788c2 Switch to version 3.0.0b24 2020-06-09 14:36:58 +02:00
2442cc80bf Cython: View: fixed bash history display 2020-06-09 14:36:37 +02:00
aa836b2ace uniq: improved progress bar of second browsing 2020-06-09 14:36:02 +02:00
8776ce22e6 C: fixed a bug where indexers referring to tuples of certain types were
not properly closed and imported
2020-06-09 14:34:43 +02:00
4aa772c405 ecotag: Added list of taxids for all best matches (closes #80) 2020-06-09 14:33:14 +02:00
b0b96ac37a version 3.0.0b23a 2020-06-05 16:10:24 +02:00
687e42ad22 C: kmer alignment: fixed a bug where scores of 0 were at
(0+kmer_length-1) (and now setting alignment direction to None if score
is 0
2020-06-05 16:09:33 +02:00
5fbbb6d304 alignpairedend: fixed a bug when rebuilding joined (unaligned) sequences
where only the forward sequence was kept
2020-06-05 16:06:43 +02:00
359a9fe237 Switch to version 3.0.0b23 2020-06-04 15:35:03 +02:00
f9b6851f75 Python: correctly flagged some mandatory options as required 2020-06-04 15:34:24 +02:00
29a2652bbf Fixed installation on Ubuntu without pip 2020-06-04 15:06:35 +02:00
2a2c233936 obi import: fixed a bug when skipping an entry 2020-05-29 21:19:42 +02:00
42 changed files with 808 additions and 310 deletions

View File

@ -30,12 +30,12 @@ cdef class ProgressBar:
off_t maxi,
dict config={},
str head="",
double seconde=0.1,
double seconds=5,
cut=False):
self.starttime = self.clock()
self.lasttime = self.starttime
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
self.freq = 1
self.cycle = 0
self.arrow = 0

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -12,6 +12,9 @@ from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
obi_lcs_align_two_columns
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
import time
import sys
@ -23,6 +26,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi align specific options')
@ -201,21 +205,21 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = o_dms.name
final_o_view_name = output[1]
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
else:
o_view_name = final_o_view_name
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
@ -263,8 +267,15 @@ def run(config):
View.delete_view(i_dms, i_view_name_2)
i_dms_2.close()
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_QUAL
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
@ -15,7 +15,9 @@ from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequenc
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport str2bytes
from io import BufferedWriter
import sys
import os
@ -29,6 +31,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi alignpairedend specific options')
@ -39,12 +42,13 @@ def addOptions(parser):
type=str,
help="URI to the reverse reads if they are in a different view than the forward reads")
group.add_argument('--score-min',
action="store", dest="alignpairedend:smin",
metavar="#.###",
default=None,
type=float,
help="Minimum score for keeping alignments")
# group.add_argument('--score-min',
# action="store", dest="alignpairedend:smin",
# metavar="#.###",
# default=None,
# type=float,
# help="Minimum score for keeping alignments. "
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
# group.add_argument('-A', '--true-ali',
# action="store_true", dest="alignpairedend:trueali",
@ -170,17 +174,28 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
view = output[1]
output_0 = output[0]
o_dms = output[0]
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
if 'smin' in config['alignpairedend']:
smin = config['alignpairedend']['smin']
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
i_dms = forward.dms # using any dms
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
else:
smin = 0
o_view = output[1]
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
# Initialize the progress bar
pb = ProgressBar(entries_len, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
#if config['alignpairedend']['trueali']:
# kmer_ali = False
@ -206,18 +221,19 @@ def run(config):
i = 0
for ali in ba:
if pb is not None:
pb(i)
PyErr_CheckSignals()
consensus = view[i]
consensus = o_view[i]
if not two_views:
seqF = entries[i]
else:
seqF = forward[i]
if ali.score > smin and ali.consensus_len > 0 :
if ali.overlap_len > 0 :
buildConsensus(ali, consensus, seqF)
else:
if not two_views:
@ -226,13 +242,12 @@ def run(config):
seqR = reverse[i]
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
consensus[b"smin"] = smin
if kmer_ali :
ali.free()
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
@ -241,16 +256,28 @@ def run(config):
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If stdout output, delete the temporary imported view used to create the final file
if type(output_0)==BufferedWriter:
View_NUC_SEQS.delete_view(o_dms, o_view_name)
output_0.close()
# Close all DMS
input[0].close(force=True)
if two_views:
rinput[0].close(force=True)
output[0].close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -4,11 +4,12 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from functools import reduce
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from io import BufferedWriter
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
@ -34,6 +35,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi annotate specific options')
@ -278,8 +280,19 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view_name = output[1]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in output DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
imported_view_name = o_view_name
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
if i_dms != o_dms:
@ -307,7 +320,10 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(o_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
try:
@ -346,12 +362,14 @@ def run(config):
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
for i in range(len(o_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
sequenceTagger(o_view[i])
except Exception, e:
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
@ -363,13 +381,19 @@ def run(config):
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(o_dms, imported_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -4,14 +4,16 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.build_reference_db cimport build_reference_db
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Tag a set of sequences for PCR and sequencing errors identification"
@ -22,6 +24,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi build_ref_db specific options')
@ -56,17 +59,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -88,14 +94,21 @@ def run(config):
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -15,6 +15,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_CO
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.column.column cimport Column
from io import BufferedWriter
import time
import sys
@ -76,9 +77,15 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# stdout output
if type(output_0)==BufferedWriter:
o_dms = i_dms
# Initialize quality columns and their associated sequence columns if needed
if type(output_0) != BufferedWriter:
if not remove_qual:
if NUC_SEQUENCE_COLUMN not in o_view:
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
@ -88,6 +95,7 @@ def run(config):
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
# Initialize multiple elements columns
if type(output_0)==BufferedWriter:
dict_cols = {}
for v in iview_list:
for coln in v.keys():
@ -105,22 +113,32 @@ def run(config):
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
# Initialize the progress bar
pb = ProgressBar(total_len, config, seconde=5)
if not config['obi']['noprogressbar']:
pb = ProgressBar(total_len, config)
else:
pb = None
i = 0
for v in iview_list:
for l in v:
for entry in v:
PyErr_CheckSignals()
if pb is not None:
pb(i)
o_view[i] = l
if type(output_0)==BufferedWriter:
rep = repr(entry)
output_0.write(str2bytes(rep)+b"\n")
else:
o_view[i] = entry
i+=1
# Deletes quality columns if needed
if type(output_0)!=BufferedWriter:
if QUALITY_COLUMN in o_view and remove_qual :
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
o_view.delete_column(REVERSE_QUALITY_COLUMN)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)

View File

@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiclean cimport obi_clean
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
@ -21,6 +22,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi clean specific options')
@ -36,8 +38,7 @@ def addOptions(parser):
dest="clean:sample-tag-name",
metavar="<SAMPLE TAG NAME>",
type=str,
default="merged_sample",
help="Name of the tag where sample counts are kept.")
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
group.add_argument('--ratio', '-r',
action="store", dest="clean:ratio",
@ -89,17 +90,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -107,6 +111,9 @@ def run(config):
command_line = " ".join(sys.argv[1:])
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
if 'sample-tag-name' not in config['clean']:
config['clean']['sample-tag-name'] = ""
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
raise Exception("Error running obiclean")
@ -115,14 +122,21 @@ def run(config):
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -22,7 +22,7 @@ def addOptions(parser):
group.add_argument('-s','--sequence',
action="store_true", dest="count:sequence",
default=False,
help="Prints only the number of sequence records.")
help="Prints only the number of sequence records (much faster, default: False).")
group.add_argument('-a','--all',
action="store_true", dest="count:all",

View File

@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view import View
@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
from libc.stdint cimport int32_t
import sys
from io import BufferedWriter
__title__="in silico PCR"
@ -27,6 +28,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecopcr specific options')
@ -35,12 +37,14 @@ def addOptions(parser):
action="store", dest="ecopcr:primer1",
metavar='<PRIMER>',
type=str,
required=True,
help="Forward primer, length must be less than or equal to 32")
group.add_argument('--primer2', '-R',
action="store", dest="ecopcr:primer2",
metavar='<PRIMER>',
type=str,
required=True,
help="Reverse primer, length must be less than or equal to 32")
group.add_argument('--error', '-e',
@ -167,12 +171,21 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = output[0].name
o_view_name = output[1]
# Read taxonomy name
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[i_dms_name]
@ -200,9 +213,20 @@ def run(config):
free(restrict_to_taxids_p)
free(ignore_taxids_p)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[o_view_name]), file=sys.stderr)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecotag cimport obi_ecotag
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
import sys
from io import BufferedWriter
__title__="Taxonomic assignment of sequences"
@ -22,6 +23,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecotag specific options')
@ -75,17 +77,19 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -120,11 +124,18 @@ def run(config):
# Save command config in DMS comments
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -74,7 +74,7 @@ def run(config):
if config['obi']['noprogressbar']:
pb = None
else:
pb = ProgressBar(withoutskip - skip, config, seconde=5)
pb = ProgressBar(withoutskip - skip, config)
i=0
for seq in iview :

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -14,6 +14,7 @@ import time
import re
import sys
import ast
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
@ -28,6 +29,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group("obi grep specific options")
@ -161,8 +163,7 @@ def obi_eval(compiled_expr, loc_env, line):
return obi_eval_result
def Filter_generator(options, tax_filter):
#taxfilter = taxonomyFilterGenerator(options)
def Filter_generator(options, tax_filter, i_view):
# Initialize conditions
predicates = None
@ -171,6 +172,9 @@ def Filter_generator(options, tax_filter):
attributes = None
if "attributes" in options and len(options["attributes"]) > 0:
attributes = options["attributes"]
for attribute in attributes:
if attribute not in i_view:
return None
lmax = None
if "lmax" in options:
lmax = options["lmax"]
@ -196,6 +200,8 @@ def Filter_generator(options, tax_filter):
if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
for p in options["attribute_patterns"]:
attribute, pattern = p.split(":", 1)
if attribute not in i_view:
return None
attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
def filter(line, loc_env):
@ -300,16 +306,21 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config["obi"]["taxoURI"])
@ -320,14 +331,27 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
# Apply filter
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
filter = Filter_generator(config["grep"], tax_filter)
filter = Filter_generator(config["grep"], tax_filter, i_view)
selection = Line_selection(i_view)
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
line = i_view[i]
@ -338,7 +362,8 @@ def run(config):
if good :
selection.append(i)
pb(i, force=True)
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output view with the line selection
@ -347,6 +372,8 @@ def run(config):
except Exception, e:
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
logger("info", "Grepped %d entries" % len(o_view))
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
@ -361,14 +388,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
import time
import sys
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
@ -22,6 +23,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi head specific options')
@ -53,29 +55,39 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
n = min(config['head']['count'], len(i_view))
# Initialize the progress bar
pb = ProgressBar(n, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(n, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(n):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
@ -94,14 +106,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -47,6 +47,8 @@ from obitools3.apps.config import logger
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Imports sequences from different formats into a DMS"
@ -130,7 +132,7 @@ def run(config):
if entry_count > 0:
logger("info", "Importing %d entries", entry_count)
else:
logger("info", "Importing an unknow number of entries")
logger("info", "Importing an unknown number of entries")
# TODO a bit dirty?
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
@ -178,7 +180,7 @@ def run(config):
return
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
pb = ProgressBar(entry_count, config)
NUC_SEQS_view = False
if isinstance(output[1], View) :
@ -243,7 +245,7 @@ def run(config):
if isinstance(input[0], CompressedFile):
input_is_file = True
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
pb = ProgressBar(entry_count, config)
try:
input[0].close()
except AttributeError:
@ -260,7 +262,6 @@ def run(config):
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
if config['obi']['skiperror']:
i-=1
continue
else:
raise RollbackException("obi import error, rollbacking view", view)

View File

@ -34,9 +34,10 @@ def run(config):
if input[2] == DMS and not config['ls']['longformat']:
dms = input[0]
l = []
for view in input[0]:
l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
dms[view].close()
for viewname in input[0]:
view = dms[viewname]
l.append(tostr(viewname) + "\t(Date created: " + str(bytes2str_object(view.comments["Date created"]))+")")
view.close()
l.sort()
for v in l:
print(v)

View File

@ -2,10 +2,10 @@
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.view import RollbackException, View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
@ -14,13 +14,14 @@ from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.capi.apat cimport MAX_PATTERN
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport tobytes
from obitools3.utils cimport tobytes, str2bytes
from libc.stdint cimport INT32_MAX
from functools import reduce
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
@ -34,6 +35,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ngsfilter specific options')
@ -42,6 +44,7 @@ def addOptions(parser):
metavar="<URI>",
type=str,
default=None,
required=True,
help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
"\nWarning: primer lengths must be less than or equal to 32")
@ -57,7 +60,7 @@ def addOptions(parser):
metavar="<URI>",
type=str,
default=None,
help="URI to the view used to store the sequences unassigned to any sample")
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
group.add_argument('--no-tags',
action="store_true", dest="ngsfilter:notags",
@ -478,6 +481,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not directmatch[0].forward:
sequences[0] = sequences[0].reverse_complement
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
else:
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
sample=None
if not no_tags:
@ -505,7 +510,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sample=None
if sample is None:
sequences[0][b'error']=b"No tags found"
sequences[0][b'error']=b"No sample with that tag combination"
return False, sequences[0]
sequences[0].update(sample)
@ -536,6 +541,7 @@ def run(config):
raise Exception("Could not open input reads")
if input[2] != View_NUC_SEQS:
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
i_dms = input[0]
if "reverse" in config["ngsfilter"]:
@ -577,8 +583,19 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
# Open the view containing the informations about the tags and the primers
info_input = open_uri(config['ngsfilter']['info_view'])
if info_input is None:
@ -599,7 +616,10 @@ def run(config):
unidentified = None
# Initialize the progress bar
pb = ProgressBar(entries_len, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
# Check and store primers and tags
try:
@ -633,6 +653,7 @@ def run(config):
try:
for i in range(entries_len):
PyErr_CheckSignals()
if pb is not None:
pb(i)
if not_aligned:
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
@ -643,7 +664,13 @@ def run(config):
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
g+=1
elif unidentified is not None:
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
# Untrim sequences (put original back)
if len(modseq) > 1:
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
else:
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
u+=1
except Exception, e:
if unidentified is not None:
@ -651,6 +678,7 @@ def run(config):
else:
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
@ -661,13 +689,23 @@ def run(config):
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
# Add comment about unidentified seqs
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
input[0].close(force=True)
output[0].close(force=True)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
info_input[0].close(force=True)
if unidentified is not None:
unidentified_input[0].close(force=True)

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
@ -42,6 +43,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi sort specific options')
@ -86,19 +88,27 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
keys = config['sort']['keys']
@ -110,8 +120,12 @@ def run(config):
for k in keys: # TODO order?
PyErr_CheckSignals()
if pb is not None:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
else:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
@ -132,16 +146,23 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -162,7 +162,7 @@ def run(config):
lcat=0
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
pb = ProgressBar(len(i_view), config)
for i in range(len(i_view)):
PyErr_CheckSignals()

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -12,6 +12,7 @@ from obitools3.utils cimport str2bytes
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Keep the N last lines of a view."
@ -21,6 +22,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi tail specific options')
@ -52,29 +54,39 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
start = max(len(i_view) - config['tail']['count'], 0)
# Initialize the progress bar
pb = ProgressBar(len(i_view) - start, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view) - start, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(start, len(i_view)):
PyErr_CheckSignals()
if pb is not None:
pb(i)
selection.append(i)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
@ -97,14 +109,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -14,13 +14,15 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
from obitools3.apps.optiongroups import addMinimalInputOption, \
addMinimalOutputOption, \
addTaxonomyOption, \
addEltLimitOption
addEltLimitOption, \
addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, tostr
from obitools3.utils cimport tobytes, tostr, str2bytes
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Group sequence records together"
@ -32,6 +34,7 @@ def addOptions(parser):
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addEltLimitOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi uniq specific options')
@ -143,11 +146,15 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
scientific_name_column = o_view[b"scientific_name"]
# Initialize the progress bar
pb = ProgressBar(len(o_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
i=0
for seq in o_view:
PyErr_CheckSignals()
if pb is not None:
pb(i)
if MERGED_TAXID_COLUMN in seq :
m_taxids = []
@ -191,6 +198,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
i+=1
if pb is not None:
pb(len(o_view), force=True)
@ -297,6 +305,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
iter_view = iter(view)
for i_seq in iter_view :
PyErr_CheckSignals()
if pb is not None:
pb(i)
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
@ -307,6 +316,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
for x in categories :
catl.append(i_seq[x])
#unique_id = tuple(catl) + (i_seq_col[i],)
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
@ -415,16 +425,21 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
if COUNT_COLUMN in view:
i_count_col = view[COUNT_COLUMN]
if pb is not None:
pb(len(view), force=True)
print("")
logger("info", "Second browsing through the input")
# Initialize the progress bar
pb = ProgressBar(len(uniques), seconde=5)
if pb is not None:
pb = ProgressBar(len(view))
o_idx = 0
total_treated = 0
for unique_id in uniques :
PyErr_CheckSignals()
pb(o_idx)
merged_sequences = uniques[unique_id]
@ -453,6 +468,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
merged_dict[mkey] = {}
for i_idx in merged_sequences:
PyErr_CheckSignals()
if pb is not None:
pb(total_treated)
i_id = i_id_col[i_idx]
i_seq = view[i_idx]
@ -505,6 +524,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
and key not in merged_dict :
o_seq[key] = None
total_treated += 1
# Write merged dicts
for mkey in merged_dict:
if mkey in str_merged_cols:
@ -526,7 +547,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
o_count_col[o_idx] = o_count
o_idx += 1
pb(len(uniques), force=True)
if pb is not None:
pb(len(view), force=True)
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
if QUALITY_COLUMN in view:
@ -574,7 +596,22 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
i_dms = input[0]
entries = input[1]
o_dms = output[0]
output_0 = output[0]
# If stdout output create a temporary view that will be exported and deleted.
if type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
o_dms = i_dms
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
else:
o_view = output[1]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
@ -586,13 +623,18 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(entries), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(entries), config)
else:
pb = None
if len(entries) > 0:
try:
uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])
except Exception, e:
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
if pb is not None:
print("", file=sys.stderr)
# Save command config in View and DMS comments
@ -603,13 +645,23 @@ def run(config):
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
input[0].close(force=True)
output[0].close(force=True)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
cdef inline OBIDMS_column_p pointer(self)
cdef read_elements_names(self)
cpdef list keys(self)
@staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)

View File

@ -323,6 +323,9 @@ cdef class Column(OBIWrapper) :
free(elts_names_b)
return elts_names_list
cpdef list keys(self):
return self._elements_names
# Column alias property getter and setter
@property

View File

@ -227,7 +227,9 @@ cdef class DMS(OBIWrapper):
cdef str s
s=""
for view_name in self.keys():
s = s + repr(self.get_view(view_name)) + "\n"
view = self.get_view(view_name)
s = s + repr(view) + "\n"
view.close()
return s

View File

@ -40,3 +40,5 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
cpdef object build_quality_array(self, list quality)
cpdef bytes build_reverse_complement(self)
cpdef str get_str(self)
cpdef repr_bytes(self)

View File

@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
def __repr__(self):
return bytes2str(self.repr_bytes())
cpdef repr_bytes(self):
if self.quality is None:
formatter = FastaFormat()
else:
formatter = FastqFormat()
return bytes2str(formatter(self))
return formatter(self)

View File

@ -21,6 +21,10 @@ cdef class View(OBIWrapper):
cdef inline Obiview_p pointer(self)
cpdef print_to_output(self,
object output,
bint noprogressbar=*)
cpdef delete_column(self,
object column_name,
bint delete_file=*)
@ -61,6 +65,8 @@ cdef class Line :
cdef index_t _index
cdef View _view
cpdef repr_bytes(self)
cdef register_view_class(bytes view_type_name,
type view_class)

View File

@ -6,6 +6,8 @@ cdef dict __VIEW_CLASS__= {}
from libc.stdlib cimport malloc
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from ..capi.obiview cimport Alias_column_pair_p, \
obi_new_view, \
obi_open_view, \
@ -48,10 +50,13 @@ from ..capi.obidms cimport obi_import_view
from obitools3.format.tab import TabFormat
from cpython.exc cimport PyErr_CheckSignals
import importlib
import inspect
import pkgutil
import json
import sys
cdef class View(OBIWrapper) :
@ -186,6 +191,30 @@ cdef class View(OBIWrapper) :
return s
cpdef print_to_output(self, object output, bint noprogressbar=False):
cdef int i
cdef Line entry
self.checkIsActive(self)
# Initialize the progress bar
if noprogressbar == False:
pb = ProgressBar(len(self))
else:
pb = None
i=0
for entry in self:
PyErr_CheckSignals()
if pb is not None:
pb(i)
output.write(entry.repr_bytes()+b"\n")
i+=1
if pb is not None:
pb(len(self), force=True)
print("", file=sys.stderr)
def keys(self):
cdef bytes col_alias
@ -533,6 +562,7 @@ cdef class View(OBIWrapper) :
for command in command_list:
s+=b"obi "
s+=command
s+=b"\n"
return s
@ -756,8 +786,12 @@ cdef class Line :
def __repr__(self):
return bytes2str(self).repr_bytes()
cpdef repr_bytes(self):
formatter = TabFormat(header=False)
return bytes2str(formatter(self))
return formatter(self)
# View property getter

View File

@ -3,7 +3,7 @@
cimport cython
from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
cdef class TabFormat:
@ -25,18 +25,28 @@ cdef class TabFormat:
for k in self.tags:
if self.header and self.first_line:
value = tobytes(k)
if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys():
line.append(tobytes(k)+b':'+tobytes(k2))
else:
line.append(tobytes(k))
else:
value = data[k]
if value is not None:
if type(value) == Column_line:
value = value.bytes()
if isinstance(data.view[k], Column_multi_elts):
if value is None: # all keys at None
for k2 in data.view[k].keys(): # TODO could be much more efficient
line.append(self.NAString)
else:
value = str2bytes(str(bytes2str_object(value))) # genius programming
if value is None:
value = self.NAString
line.append(value)
for k2 in data.view[k].keys(): # TODO could be much more efficient
if value[k2] is not None:
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else:
line.append(self.NAString)
else:
if value is not None:
line.append(str2bytes(str(bytes2str_object(value))))
else:
line.append(self.NAString)
if self.first_line:
self.first_line = False

View File

@ -183,12 +183,13 @@ def buildConsensus(ali, seq, ref_tags=None):
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
seq[b'ali_length'] = ali.consensus_len
seq[b'score_norm']=float(ali.score)/ali.consensus_len
seq[b"seq_length"] = ali.consensus_len
seq[b"overlap_length"] = ali.overlap_len
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
seq[b'shift']=ali.shift
else:
if len(ali[0])>999: # TODO why?
raise AssertionError,"Too long alignemnt"
raise AssertionError,"Too long alignment"
ic=IterOnConsensus(ali)
@ -250,11 +251,22 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
quality.extend(reverse.quality)
seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality)
seq[b"score"]=ali.score
if len(ali.direction) > 0:
seq[b"ali_direction"]=ali.direction
else:
seq[b"ali_direction"]=None
seq[b"mode"]=b"joined"
seq[b"pairedend_limit"]=len(forward)
seq[b"seq_length"] = ali.consensus_len
seq[b"overlap_length"] = ali.overlap_len
if ali.consensus_len > 0:
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
else:
seq[b"score_norm"]=0.0
for tag in forward:
if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN:
if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN and \
tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN:
seq[tag] = forward[tag]
return seq

View File

@ -210,10 +210,11 @@ def open_uri(uri,
error = None
if scheme==b"dms" or \
if urib != b"-" and \
(scheme==b"dms" or \
(scheme==b"" and \
(((not input) and "outputformat" not in config["obi"]) or \
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
(input and "inputformat" not in config["obi"])))): # TODO maybe not best way
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
dms=(default_dms, urip.path)
@ -275,10 +276,10 @@ def open_uri(uri,
iseq = urib
objclass = bytes
else: # TODO update uopen to be able to write?
if urip.path:
file = open(urip.path, 'wb')
else:
if urip.path == b'-':
file = sys.stdout.buffer
elif urip.path :
file = open(urip.path, 'wb')
if file is not None:
qualifiers=parse_qs(urip.query)

View File

@ -166,7 +166,9 @@ cdef object bytes2str_object(object value): # Only works if complex types are d
value[k] = bytes2str(v)
if type(k) == bytes:
value[bytes2str(k)] = value.pop(k)
elif isinstance(value, list):
elif isinstance(value, list) or isinstance(value, tuple):
if isinstance(value, tuple):
value = list(value)
for i in range(len(value)):
if isinstance(value[i], list) or isinstance(value[i], dict):
value[i] = bytes2str_object(value[i])

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '0b21'
serial= '0b28'
version ="%d.%d.%s" % (major,minor,serial)

View File

@ -27,10 +27,11 @@ class Distribution(ori_Distribution):
ori_Distribution.__init__(self, attrs)
self.global_options.insert(0,('cobitools3', None, "intall location of the C library"
self.global_options.insert(0,('cobitools3', None, "install location of the C library"
))
from distutils.command.build import build as build_ori
from setuptools.command.bdist_egg import bdist_egg as bdist_egg_ori
from distutils.core import Command
@ -71,6 +72,12 @@ class build(build_ori):
build_ori.run(self)
class bdist_egg(bdist_egg_ori):
def run(self):
self.run_command('build_clib')
bdist_egg_ori.run(self)
sys.path.append(os.path.abspath("python"))
@ -166,6 +173,7 @@ setup(name=PACKAGE,
ext_modules=xx,
distclass=Distribution,
cmdclass={'build': build,
'bdist_egg': bdist_egg,
'build_clib': build_clib},
cobitools3=get_python_lib(),
packages = findPackage('python'),

View File

@ -413,7 +413,13 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
return NULL;
}
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
if (max_common_kmers > 0)
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
// meaning that the score will be often underestimated and never overestimated.
else
score = 0;
abs_shift = abs(best_shift);
// Save result in Obi_ali structure
@ -423,10 +429,15 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
ali->shift = abs_shift;
ali->consensus_seq = NULL;
ali->consensus_qual = NULL;
if (score == 0)
ali->direction[0] = '\0';
else
{
if (((best_shift <= 0) && (!switched_seqs)) || ((best_shift > 0) && switched_seqs))
strcpy(ali->direction, "left");
else
strcpy(ali->direction, "right");
}
// Build the consensus sequence if asked
if (build_consensus)

View File

@ -27,7 +27,11 @@
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
*/
typedef struct Obi_ali {
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
int score; /**< Alignment score, corresponding to an approximation of the number of
* nucleotides matching in the overlap of the alignment.
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
* meaning that the score will be often underestimated and never overestimated.
*/
int consensus_length; /**< Length of the final consensus sequence.
*/

View File

@ -246,7 +246,16 @@ int obi_clean(const char* dms_name,
// Open the sample column if there is one
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
sample_column = NULL;
{
fprintf(stderr, "Info: No sample information provided, assuming one sample.\n");
sample_column = obi_view_get_column(i_view, COUNT_COLUMN);
if (sample_column == NULL)
{
obidebug(1, "\nError getting the COUNT column");
return -1;
}
sample_count = 1;
}
else
{
sample_column = obi_view_get_column(i_view, sample_column_name);
@ -255,6 +264,13 @@ int obi_clean(const char* dms_name,
obidebug(1, "\nError getting the sample column");
return -1;
}
sample_count = (sample_column->header)->nb_elements_per_line;
// Check that the sample column is a merged column with all sample informations
if (sample_count == 1)
{
obidebug(1, "\n\nError: If a sample column is provided, it must contain 'merged' sample counts as built by obi uniq with the -m option\n");
return -1;
}
}
// Create the output view, or a temporary one if heads only
@ -279,8 +295,6 @@ int obi_clean(const char* dms_name,
return -1;
}
sample_count = (sample_column->header)->nb_elements_per_line;
// Create the output columns
if (create_output_columns(o_view, sample_column, sample_count) < 0)
{
@ -549,7 +563,7 @@ int obi_clean(const char* dms_name,
if (heads_only)
{
line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));
line_selection = malloc((((o_view->infos)->line_count) + 1) * sizeof(index_t));
if (line_selection == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);

View File

@ -52,7 +52,8 @@
*
* @param dms A pointer on an OBIDMS.
* @param i_view_name The name of the input view.
* @param sample_column_name The name of the OBI_STR column in the input view where the sample information is kept.
* @param sample_column_name The name of the column in the input view where the sample information is kept.
* Must be merged informations as built by the obi uniq tool (checked by the function).
* NULL or "" (empty string) if there is no sample information.
* @param o_view_name The name of the output view where the results should be written (should not already exist).
* @param o_view_comments The comments that should be associated with the output view.

View File

@ -71,9 +71,12 @@ static int create_output_columns(Obiview_p o_view);
* @param name The assigned scientific name.
* @param assigned_status_column A pointer on the column where the assigned status should be written.
* @param assigned The assigned status (whether the sequence was assigned to a taxon or not).
* @param best_match_column A pointer on the column where the list of ids of the best matches should be written.
* @param best_match_ids_column A pointer on the column where the list of ids of the best matches should be written.
* @param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by '\0'.
* @param best_match_ids_length The total length of the array of ids of best matches.
* @param best_match_taxids_column A pointer on the column where the list of taxids of the best matches should be written.
* @param best_match_taxids The list of taxids of the best matches as an array of the taxids.
* @param best_match_taxids_length The length of the array of taxids of best matches.
* @param score_column A pointer on the column where the score should be written.
* @param score The similarity score of the sequence with its best match(es).
*
@ -87,7 +90,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
OBIDMS_column_p assigned_name_column, const char* name,
OBIDMS_column_p assigned_status_column, bool assigned,
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
OBIDMS_column_p score_column, double score);
@ -130,7 +134,14 @@ static int create_output_columns(Obiview_p o_view)
// Column for array of best match ids
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
{
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
obidebug(1, "\nError creating the column for the array of ids of best matches in ecotag");
return -1;
}
// Column for array of best match taxids
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
{
obidebug(1, "\nError creating the column for the array of taxids of best matches in ecotag");
return -1;
}
@ -142,7 +153,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
OBIDMS_column_p assigned_taxid_column, int32_t taxid,
OBIDMS_column_p assigned_name_column, const char* name,
OBIDMS_column_p assigned_status_column, bool assigned,
OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
OBIDMS_column_p score_column, double score)
{
// Write the assigned taxid
@ -167,9 +179,16 @@ int print_assignment_result(Obiview_p output_view, index_t line,
}
// Write the best match ids
if (obi_set_array_with_col_p_in_view(output_view, best_match_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
if (obi_set_array_with_col_p_in_view(output_view, best_match_ids_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
{
obidebug(1, "\nError writing a assignment status in a column when writing ecotag results");
obidebug(1, "\nError writing the array of best match ids in a column when writing ecotag results");
return -1;
}
// Write the best match taxids
if (obi_set_array_with_col_p_in_view(output_view, best_match_taxids_column, line, best_match_taxids, (uint8_t)(sizeof(OBI_INT)*8), best_match_taxids_length) < 0)
{
obidebug(1, "\nError writing the array of best match taxids in a column when writing ecotag results");
return -1;
}
@ -235,6 +254,8 @@ int obi_ecotag(const char* dms_name,
char* best_match_ids;
char* best_match_ids_to_store;
int32_t best_match_ids_length;
int32_t* best_match_taxids;
int32_t* best_match_taxids_to_store;
int best_match_count;
int buffer_size;
int best_match_ids_buffer_size;
@ -263,7 +284,8 @@ int obi_ecotag(const char* dms_name,
OBIDMS_column_p assigned_taxid_column = NULL;
OBIDMS_column_p assigned_name_column = NULL;
OBIDMS_column_p assigned_status_column = NULL;
OBIDMS_column_p best_match_column = NULL;
OBIDMS_column_p best_match_ids_column = NULL;
OBIDMS_column_p best_match_taxids_column = NULL;
OBIDMS_column_p lca_taxid_a_column = NULL;
OBIDMS_column_p score_a_column = NULL;
OBIDMS_column_p ref_taxid_column = NULL;
@ -396,7 +418,8 @@ int obi_ecotag(const char* dms_name,
assigned_taxid_column = obi_view_get_column(output_view, ECOTAG_TAXID_COLUMN_NAME);
assigned_name_column = obi_view_get_column(output_view, ECOTAG_NAME_COLUMN_NAME);
assigned_status_column = obi_view_get_column(output_view, ECOTAG_STATUS_COLUMN_NAME);
best_match_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
best_match_ids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
best_match_taxids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME);
score_column = obi_view_get_column(output_view, ECOTAG_SCORE_COLUMN_NAME);
// Open the used reference columns
@ -453,6 +476,14 @@ int obi_ecotag(const char* dms_name,
return -1;
}
best_match_taxids = (int32_t*) malloc(buffer_size* sizeof(int32_t));
if (best_match_taxids == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for the best match taxid array in ecotag");
return -1;
}
for (i=0; i < query_count; i++)
{
if (i%1000 == 0)
@ -514,7 +545,7 @@ int obi_ecotag(const char* dms_name,
// Store in best match array
// Grow match array if needed
// Grow match and taxid array if needed
if (best_match_count == buffer_size)
{
buffer_size = buffer_size*2;
@ -525,6 +556,13 @@ int obi_ecotag(const char* dms_name,
obidebug(1, "\nError reallocating match array when assigning");
return -1;
}
best_match_taxids = (int32_t*) realloc(best_match_taxids, buffer_size*sizeof(int32_t));
if (best_match_taxids == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError reallocating match taxids array when assigning");
return -1;
}
}
id = obi_get_str_with_elt_idx_and_col_p_in_view(ref_view, ref_id_column, j, 0);
@ -545,6 +583,7 @@ int obi_ecotag(const char* dms_name,
// Save match
best_match_array[best_match_count] = j;
best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
best_match_count++;
strcpy(best_match_ids+best_match_ids_length, id);
best_match_ids_length = best_match_ids_length + id_len + 1;
@ -629,6 +668,7 @@ int obi_ecotag(const char* dms_name,
else
lca_name = lca->name;
best_match_ids_to_store = best_match_ids;
best_match_taxids_to_store = best_match_taxids;
}
else
{
@ -636,6 +676,7 @@ int obi_ecotag(const char* dms_name,
lca_name = OBIStr_NA;
lca_taxid = OBIInt_NA;
best_match_ids_to_store = OBITuple_NA;
best_match_taxids_to_store = OBITuple_NA;
score = OBIFloat_NA;
}
@ -644,7 +685,8 @@ int obi_ecotag(const char* dms_name,
assigned_taxid_column, lca_taxid,
assigned_name_column, lca_name,
assigned_status_column, assigned,
best_match_column, best_match_ids_to_store, best_match_ids_length,
best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
best_match_taxids_column, best_match_taxids_to_store, best_match_count,
score_column, best_score
) < 0)
return -1;
@ -652,6 +694,7 @@ int obi_ecotag(const char* dms_name,
free(best_match_array);
free(best_match_ids);
free(best_match_taxids);
obi_close_taxonomy(taxonomy);
obi_save_and_close_view(query_view);

View File

@ -23,7 +23,8 @@
#define ECOTAG_TAXID_COLUMN_NAME "TAXID"
#define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
#define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH_IDS"
#define ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME "BEST_MATCH_TAXIDS"
#define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"

View File

@ -2259,7 +2259,13 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
parent = next;
// Compare the crc of the value with the crc of the current node
comp = (current_node->crc64) - crc;
//comp = (current_node->crc64) - crc;
if ((current_node->crc64) == crc)
comp = 0;
else if ((current_node->crc64) > crc)
comp = 1;
else
comp = -1;
if (comp == 0)
{ // check if really same value
@ -2354,7 +2360,13 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
current_node = (avl->tree)+next;
// Compare the crc of the value with the crc of the current node
comp = (current_node->crc64) - crc;
//comp = (current_node->crc64) - crc;
if ((current_node->crc64) == crc)
comp = 0;
else if ((current_node->crc64) > crc)
comp = 1;
else
comp = -1;
if (comp == 0)
{ // Check if really same value

View File

@ -1496,7 +1496,7 @@ obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, c
memcpy(column_2->data, column_1->data, header_1->data_size);
// Copy the AVL files if there are some (overwriting the automatically created files)
if ((header_1->returned_data_type == OBI_STR) || (header_1->returned_data_type == OBI_SEQ) || (header_1->returned_data_type == OBI_QUAL))
if ((header_1->tuples) || ((header_1->returned_data_type == OBI_STR) || (header_1->returned_data_type == OBI_SEQ) || (header_1->returned_data_type == OBI_QUAL)))
{
avl_name_1 = (char*) malloc((strlen(header_1->indexer_name) + 1) * sizeof(char));
if (avl_name_1 == NULL)

View File

@ -1350,6 +1350,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
}
strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME);
}
else
new_column->indexer = NULL;
// Fill the data with NA values
obi_ini_to_NA_values(new_column, 0, nb_lines);
@ -1558,6 +1560,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
return NULL;
}
}
else
column->indexer = NULL;
if (close(column_file_descriptor) < 0)
{
@ -1693,8 +1697,8 @@ int obi_close_column(OBIDMS_column_p column)
if (obi_dms_unlist_column(column->dms, column) < 0)
ret_val = -1;
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
// If it's a tuple column or the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
if ((column->indexer) != NULL)
if (obi_close_indexer(column->indexer) < 0)
ret_val = -1;