Compare commits
17 Commits
Author | SHA1 | Date | |
---|---|---|---|
24a737aa55 | |||
8aa455ad8a | |||
46ca693ca9 | |||
9a9afde113 | |||
8dd403a118 | |||
9672f01c6a | |||
ed9549acfb | |||
9ace9989c4 | |||
a3ebe5f118 | |||
9100e14899 | |||
ccda0661ce | |||
aab59f2214 | |||
ade1107b42 | |||
9c7d24406f | |||
03bc9915f2 | |||
24b1dab573 | |||
7593673f3f |
@ -30,12 +30,12 @@ cdef class ProgressBar:
|
|||||||
off_t maxi,
|
off_t maxi,
|
||||||
dict config={},
|
dict config={},
|
||||||
str head="",
|
str head="",
|
||||||
double seconde=0.1,
|
double seconds=5,
|
||||||
cut=False):
|
cut=False):
|
||||||
|
|
||||||
self.starttime = self.clock()
|
self.starttime = self.clock()
|
||||||
self.lasttime = self.starttime
|
self.lasttime = self.starttime
|
||||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
|
||||||
self.freq = 1
|
self.freq = 1
|
||||||
self.cycle = 0
|
self.cycle = 0
|
||||||
self.arrow = 0
|
self.arrow = 0
|
||||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
@ -12,6 +12,9 @@ from obitools3.utils cimport tobytes, str2bytes
|
|||||||
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
||||||
obi_lcs_align_two_columns
|
obi_lcs_align_two_columns
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -23,6 +26,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group('obi align specific options')
|
group=parser.add_argument_group('obi align specific options')
|
||||||
|
|
||||||
@ -201,20 +205,20 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output")
|
raise Exception("Could not create output")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
o_dms_name = o_dms.name
|
o_dms_name = o_dms.name
|
||||||
final_o_view_name = output[1]
|
final_o_view_name = output[1]
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||||
# the right DMS and deleted in the other afterwards.
|
# the right DMS and deleted in the other afterwards.
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
temporary_view_name = final_o_view_name
|
if type(output_0)==BufferedWriter:
|
||||||
i=0
|
o_dms = i_dms
|
||||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
o_view_name = b"temp"
|
||||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
o_view_name = temporary_view_name
|
|
||||||
else:
|
|
||||||
o_view_name = final_o_view_name
|
|
||||||
|
|
||||||
# Save command config in View comments
|
# Save command config in View comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -263,8 +267,15 @@ def run(config):
|
|||||||
View.delete_view(i_dms, i_view_name_2)
|
View.delete_view(i_dms, i_view_name_2)
|
||||||
i_dms_2.close()
|
i_dms_2.close()
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view = o_dms[o_view_name]
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
|||||||
from obitools3.dms.column.column cimport Column
|
from obitools3.dms.column.column cimport Column
|
||||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||||
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||||
@ -15,7 +15,9 @@ from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequenc
|
|||||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||||
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@ -29,6 +31,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi alignpairedend specific options')
|
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||||
|
|
||||||
@ -39,12 +42,13 @@ def addOptions(parser):
|
|||||||
type=str,
|
type=str,
|
||||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||||
|
|
||||||
group.add_argument('--score-min',
|
# group.add_argument('--score-min',
|
||||||
action="store", dest="alignpairedend:smin",
|
# action="store", dest="alignpairedend:smin",
|
||||||
metavar="#.###",
|
# metavar="#.###",
|
||||||
default=None,
|
# default=None,
|
||||||
type=float,
|
# type=float,
|
||||||
help="Minimum score for keeping alignments")
|
# help="Minimum score for keeping alignments. "
|
||||||
|
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
|
||||||
|
|
||||||
# group.add_argument('-A', '--true-ali',
|
# group.add_argument('-A', '--true-ali',
|
||||||
# action="store_true", dest="alignpairedend:trueali",
|
# action="store_true", dest="alignpairedend:trueali",
|
||||||
@ -170,18 +174,29 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
view = output[1]
|
output_0 = output[0]
|
||||||
|
o_dms = output[0]
|
||||||
|
|
||||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
# stdout output: create temporary view
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
if 'smin' in config['alignpairedend']:
|
i_dms = forward.dms # using any dms
|
||||||
smin = config['alignpairedend']['smin']
|
o_dms = i_dms
|
||||||
|
i=0
|
||||||
|
o_view_name = b"temp"
|
||||||
|
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
|
||||||
else:
|
else:
|
||||||
smin = 0
|
o_view = output[1]
|
||||||
|
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(entries_len, config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(entries_len, config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
#if config['alignpairedend']['trueali']:
|
#if config['alignpairedend']['trueali']:
|
||||||
# kmer_ali = False
|
# kmer_ali = False
|
||||||
# aligner = buildAlignment
|
# aligner = buildAlignment
|
||||||
@ -206,18 +221,19 @@ def run(config):
|
|||||||
i = 0
|
i = 0
|
||||||
for ali in ba:
|
for ali in ba:
|
||||||
|
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
consensus = view[i]
|
consensus = o_view[i]
|
||||||
|
|
||||||
if not two_views:
|
if not two_views:
|
||||||
seqF = entries[i]
|
seqF = entries[i]
|
||||||
else:
|
else:
|
||||||
seqF = forward[i]
|
seqF = forward[i]
|
||||||
|
|
||||||
if ali.score > smin and ali.consensus_len > 0 :
|
if ali.overlap_len > 0 :
|
||||||
buildConsensus(ali, consensus, seqF)
|
buildConsensus(ali, consensus, seqF)
|
||||||
else:
|
else:
|
||||||
if not two_views:
|
if not two_views:
|
||||||
@ -225,32 +241,43 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
seqR = reverse[i]
|
seqR = reverse[i]
|
||||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||||
|
|
||||||
consensus[b"smin"] = smin
|
|
||||||
|
|
||||||
if kmer_ali :
|
if kmer_ali :
|
||||||
ali.free()
|
ali.free()
|
||||||
|
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
if kmer_ali :
|
if kmer_ali :
|
||||||
aligner.free()
|
aligner.free()
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
output[0].record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(view), file=sys.stderr)
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
|
# If stdout output, delete the temporary imported view used to create the final file
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
View_NUC_SEQS.delete_view(o_dms, o_view_name)
|
||||||
|
output_0.close()
|
||||||
|
|
||||||
|
# Close all DMS
|
||||||
input[0].close(force=True)
|
input[0].close(force=True)
|
||||||
if two_views:
|
if two_views:
|
||||||
rinput[0].close(force=True)
|
rinput[0].close(force=True)
|
||||||
output[0].close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
@ -4,11 +4,12 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line_selection
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
from io import BufferedWriter
|
||||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||||
ID_COLUMN, \
|
ID_COLUMN, \
|
||||||
DEFINITION_COLUMN, \
|
DEFINITION_COLUMN, \
|
||||||
@ -34,6 +35,7 @@ def addOptions(parser):
|
|||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group('obi annotate specific options')
|
group=parser.add_argument_group('obi annotate specific options')
|
||||||
|
|
||||||
@ -278,8 +280,19 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
o_view_name = output[1]
|
o_view_name = output[1]
|
||||||
|
|
||||||
|
# stdout output: create temporary view
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
i=0
|
||||||
|
o_view_name = b"temp"
|
||||||
|
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
imported_view_name = o_view_name
|
||||||
|
|
||||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
@ -290,7 +303,7 @@ def run(config):
|
|||||||
i+=1
|
i+=1
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||||
i_view = o_dms[imported_view_name]
|
i_view = o_dms[imported_view_name]
|
||||||
|
|
||||||
# Clone output view from input view
|
# Clone output view from input view
|
||||||
o_view = i_view.clone(o_view_name)
|
o_view = i_view.clone(o_view_name)
|
||||||
if o_view is None:
|
if o_view is None:
|
||||||
@ -307,7 +320,10 @@ def run(config):
|
|||||||
taxo = None
|
taxo = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(o_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
@ -346,14 +362,16 @@ def run(config):
|
|||||||
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
||||||
for i in range(len(o_view)):
|
for i in range(len(o_view)):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
sequenceTagger(o_view[i])
|
sequenceTagger(o_view[i])
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -363,13 +381,19 @@ def run(config):
|
|||||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
output[0].record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# stdout output: write to buffer
|
||||||
if i_dms != o_dms:
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(o_dms, imported_view_name)
|
View.delete_view(o_dms, imported_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
|
@ -4,14 +4,16 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms.dms cimport DMS
|
from obitools3.dms.dms cimport DMS
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
import sys
|
import sys
|
||||||
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
|
||||||
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||||
@ -22,6 +24,7 @@ def addOptions(parser):
|
|||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi build_ref_db specific options')
|
group = parser.add_argument_group('obi build_ref_db specific options')
|
||||||
|
|
||||||
@ -56,17 +59,20 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output")
|
raise Exception("Could not create output")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
final_o_view_name = output[1]
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||||
# the right DMS and deleted in the other afterwards.
|
# the right DMS and deleted in the other afterwards.
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
temporary_view_name = final_o_view_name
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
o_view_name = temporary_view_name
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
else:
|
else:
|
||||||
o_view_name = final_o_view_name
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
@ -80,22 +86,29 @@ def run(config):
|
|||||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
|
||||||
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
||||||
raise Exception("Error building a reference database")
|
raise Exception("Error building a reference database")
|
||||||
|
|
||||||
# If the input and output DMS are not the same, export result view to output DMS
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view = o_dms[o_view_name]
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
# Save command config in DMS comments
|
# Save command config in DMS comments
|
||||||
o_dms.record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_CO
|
|||||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||||
from obitools3.dms.column.column cimport Column
|
from obitools3.dms.column.column cimport Column
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -76,53 +77,70 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
o_view = output[1]
|
o_view = output[1]
|
||||||
|
|
||||||
|
# stdout output
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
|
||||||
# Initialize quality columns and their associated sequence columns if needed
|
# Initialize quality columns and their associated sequence columns if needed
|
||||||
if not remove_qual:
|
if type(output_0) != BufferedWriter:
|
||||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
if not remove_qual:
|
||||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||||
if not remove_rev_qual:
|
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
if not remove_rev_qual:
|
||||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||||
|
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||||
|
|
||||||
# Initialize multiple elements columns
|
# Initialize multiple elements columns
|
||||||
dict_cols = {}
|
if type(output_0)==BufferedWriter:
|
||||||
for v in iview_list:
|
dict_cols = {}
|
||||||
for coln in v.keys():
|
for v in iview_list:
|
||||||
if v[coln].nb_elements_per_line > 1:
|
for coln in v.keys():
|
||||||
if coln not in dict_cols:
|
if v[coln].nb_elements_per_line > 1:
|
||||||
dict_cols[coln] = {}
|
if coln not in dict_cols:
|
||||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
dict_cols[coln] = {}
|
||||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||||
else:
|
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
else:
|
||||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||||
for coln in dict_cols:
|
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
for coln in dict_cols:
|
||||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||||
|
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(total_len, config, seconde=5)
|
if not config['obi']['noprogressbar']:
|
||||||
|
pb = ProgressBar(total_len, config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
for v in iview_list:
|
for v in iview_list:
|
||||||
for l in v:
|
for entry in v:
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
o_view[i] = l
|
pb(i)
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
rep = repr(entry)
|
||||||
|
output_0.write(str2bytes(rep)+b"\n")
|
||||||
|
else:
|
||||||
|
o_view[i] = entry
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
# Deletes quality columns if needed
|
# Deletes quality columns if needed
|
||||||
if QUALITY_COLUMN in o_view and remove_qual :
|
if type(output_0)!=BufferedWriter:
|
||||||
o_view.delete_column(QUALITY_COLUMN)
|
if QUALITY_COLUMN in o_view and remove_qual :
|
||||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
o_view.delete_column(QUALITY_COLUMN)
|
||||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||||
|
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||||
pb(i, force=True)
|
|
||||||
print("", file=sys.stderr)
|
if pb is not None:
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in DMS comments
|
# Save command config in DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms.dms cimport DMS
|
from obitools3.dms.dms cimport DMS
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.dms.capi.obiclean cimport obi_clean
|
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@ -21,7 +22,8 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi clean specific options')
|
group = parser.add_argument_group('obi clean specific options')
|
||||||
|
|
||||||
group.add_argument('--distance', '-d',
|
group.add_argument('--distance', '-d',
|
||||||
@ -36,8 +38,7 @@ def addOptions(parser):
|
|||||||
dest="clean:sample-tag-name",
|
dest="clean:sample-tag-name",
|
||||||
metavar="<SAMPLE TAG NAME>",
|
metavar="<SAMPLE TAG NAME>",
|
||||||
type=str,
|
type=str,
|
||||||
default="merged_sample",
|
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
|
||||||
help="Name of the tag where sample counts are kept.")
|
|
||||||
|
|
||||||
group.add_argument('--ratio', '-r',
|
group.add_argument('--ratio', '-r',
|
||||||
action="store", dest="clean:ratio",
|
action="store", dest="clean:ratio",
|
||||||
@ -89,17 +90,20 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output")
|
raise Exception("Could not create output")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
final_o_view_name = output[1]
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
|
||||||
# the right DMS and deleted in the other afterwards.
|
# the right DMS and deleted in the other afterwards.
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
temporary_view_name = final_o_view_name
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
o_view_name = temporary_view_name
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
else:
|
else:
|
||||||
o_view_name = final_o_view_name
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
@ -107,6 +111,9 @@ def run(config):
|
|||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
||||||
|
|
||||||
|
if 'sample-tag-name' not in config['clean']:
|
||||||
|
config['clean']['sample-tag-name'] = ""
|
||||||
|
|
||||||
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
||||||
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
||||||
raise Exception("Error running obiclean")
|
raise Exception("Error running obiclean")
|
||||||
@ -114,15 +121,22 @@ def run(config):
|
|||||||
# If the input and output DMS are not the same, export result view to output DMS
|
# If the input and output DMS are not the same, export result view to output DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view = o_dms[o_view_name]
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
# Save command config in DMS comments
|
# Save command config in DMS comments
|
||||||
o_dms.record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ def addOptions(parser):
|
|||||||
group.add_argument('-s','--sequence',
|
group.add_argument('-s','--sequence',
|
||||||
action="store_true", dest="count:sequence",
|
action="store_true", dest="count:sequence",
|
||||||
default=False,
|
default=False,
|
||||||
help="Prints only the number of sequence records.")
|
help="Prints only the number of sequence records (much faster, default: False).")
|
||||||
|
|
||||||
group.add_argument('-a','--all',
|
group.add_argument('-a','--all',
|
||||||
action="store_true", dest="count:all",
|
action="store_true", dest="count:all",
|
||||||
|
@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
|
|||||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
from obitools3.dms.view import View
|
from obitools3.dms.view import View
|
||||||
|
|
||||||
@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
|
|||||||
from libc.stdint cimport int32_t
|
from libc.stdint cimport int32_t
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
__title__="in silico PCR"
|
__title__="in silico PCR"
|
||||||
@ -27,6 +28,7 @@ def addOptions(parser):
|
|||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
|
|
||||||
group = parser.add_argument_group('obi ecopcr specific options')
|
group = parser.add_argument_group('obi ecopcr specific options')
|
||||||
@ -169,11 +171,20 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output")
|
raise Exception("Could not create output")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
o_dms_name = output[0].name
|
o_dms_name = output[0].name
|
||||||
o_view_name = output[1]
|
o_view_name = output[1]
|
||||||
|
|
||||||
# Read taxonomy name
|
# Read taxonomy name
|
||||||
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||||
|
|
||||||
|
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
o_view_name = b"temp"
|
||||||
|
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
|
||||||
# Save command config in View comments
|
# Save command config in View comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -201,10 +212,21 @@ def run(config):
|
|||||||
|
|
||||||
free(restrict_to_taxids_p)
|
free(restrict_to_taxids_p)
|
||||||
free(ignore_taxids_p)
|
free(ignore_taxids_p)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view = o_dms[o_view_name]
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
|
# If stdout output, delete the temporary result view in the input DMS
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms.dms cimport DMS
|
from obitools3.dms.dms cimport DMS
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
|
|||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
__title__="Taxonomic assignment of sequences"
|
__title__="Taxonomic assignment of sequences"
|
||||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
|||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi ecotag specific options')
|
group = parser.add_argument_group('obi ecotag specific options')
|
||||||
|
|
||||||
@ -75,17 +77,19 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output")
|
raise Exception("Could not create output")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
final_o_view_name = output[1]
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||||
# the right DMS and deleted in the other afterwards.
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
if i_dms != o_dms:
|
temporary_view_name = b"temp"
|
||||||
temporary_view_name = final_o_view_name
|
|
||||||
i=0
|
i=0
|
||||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
o_view_name = temporary_view_name
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
else:
|
else:
|
||||||
o_view_name = final_o_view_name
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
@ -120,11 +124,18 @@ def run(config):
|
|||||||
# Save command config in DMS comments
|
# Save command config in DMS comments
|
||||||
o_dms.record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view = o_dms[o_view_name]
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ def run(config):
|
|||||||
if config['obi']['noprogressbar']:
|
if config['obi']['noprogressbar']:
|
||||||
pb = None
|
pb = None
|
||||||
else:
|
else:
|
||||||
pb = ProgressBar(withoutskip - skip, config, seconde=5)
|
pb = ProgressBar(withoutskip - skip, config)
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in iview :
|
for seq in iview :
|
||||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line_selection
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, str2bytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
@ -14,6 +14,7 @@ import time
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import ast
|
import ast
|
||||||
|
from io import BufferedWriter
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
|
||||||
@ -28,6 +29,7 @@ def addOptions(parser):
|
|||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group("obi grep specific options")
|
group=parser.add_argument_group("obi grep specific options")
|
||||||
|
|
||||||
@ -304,16 +306,21 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
o_view_name_final = output[1]
|
output_0 = output[0]
|
||||||
o_view_name = o_view_name_final
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
|
||||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
if i_dms != o_dms:
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while o_view_name in i_dms:
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
||||||
@ -324,7 +331,10 @@ def run(config):
|
|||||||
taxo = None
|
taxo = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(i_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
# Apply filter
|
# Apply filter
|
||||||
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
||||||
@ -334,31 +344,36 @@ def run(config):
|
|||||||
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
|
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
|
||||||
for i in range(len(i_view)):
|
for i in range(len(i_view)):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
selection.append(i)
|
pb(i)
|
||||||
|
selection.append(i)
|
||||||
|
|
||||||
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
|
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
|
||||||
for i in range(len(i_view)):
|
for i in range(len(i_view)):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
line = i_view[i]
|
line = i_view[i]
|
||||||
|
|
||||||
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
||||||
|
|
||||||
good = filter(line, loc_env)
|
good = filter(line, loc_env)
|
||||||
|
|
||||||
if good :
|
if good :
|
||||||
selection.append(i)
|
selection.append(i)
|
||||||
|
|
||||||
pb(len(i_view), force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(len(i_view), force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Create output view with the line selection
|
# Create output view with the line selection
|
||||||
try:
|
try:
|
||||||
o_view = selection.materialize(o_view_name)
|
o_view = selection.materialize(o_view_name)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
logger("info", "Grepped %d entries" % len(o_view))
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
input_dms_name=[input[0].name]
|
input_dms_name=[input[0].name]
|
||||||
@ -373,14 +388,20 @@ def run(config):
|
|||||||
# and delete the temporary view in the input DMS
|
# and delete the temporary view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
o_view.close()
|
o_view.close()
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
o_view = o_dms[o_view_name_final]
|
o_view = o_dms[final_o_view_name]
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
|
@ -4,14 +4,15 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line_selection
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport str2bytes
|
from obitools3.utils cimport str2bytes
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
|
||||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group('obi head specific options')
|
group=parser.add_argument_group('obi head specific options')
|
||||||
|
|
||||||
@ -53,31 +55,41 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
o_view_name_final = output[1]
|
output_0 = output[0]
|
||||||
o_view_name = o_view_name_final
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
if i_dms != o_dms:
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while o_view_name in i_dms:
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
n = min(config['head']['count'], len(i_view))
|
n = min(config['head']['count'], len(i_view))
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(n, config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(n, config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
selection = Line_selection(i_view)
|
selection = Line_selection(i_view)
|
||||||
|
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
selection.append(i)
|
selection.append(i)
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Create output view with the line selection
|
# Create output view with the line selection
|
||||||
try:
|
try:
|
||||||
@ -94,14 +106,20 @@ def run(config):
|
|||||||
# and delete the temporary view in the input DMS
|
# and delete the temporary view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
o_view.close()
|
o_view.close()
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
o_view = o_dms[o_view_name_final]
|
o_view = o_dms[final_o_view_name]
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(view), file=sys.stderr)
|
#print(repr(view), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
|
@ -47,6 +47,8 @@ from obitools3.apps.config import logger
|
|||||||
|
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
__title__="Imports sequences from different formats into a DMS"
|
__title__="Imports sequences from different formats into a DMS"
|
||||||
|
|
||||||
@ -130,7 +132,7 @@ def run(config):
|
|||||||
if entry_count > 0:
|
if entry_count > 0:
|
||||||
logger("info", "Importing %d entries", entry_count)
|
logger("info", "Importing %d entries", entry_count)
|
||||||
else:
|
else:
|
||||||
logger("info", "Importing an unknow number of entries")
|
logger("info", "Importing an unknown number of entries")
|
||||||
|
|
||||||
# TODO a bit dirty?
|
# TODO a bit dirty?
|
||||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||||
@ -178,7 +180,7 @@ def run(config):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if entry_count >= 0:
|
if entry_count >= 0:
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
pb = ProgressBar(entry_count, config)
|
||||||
|
|
||||||
NUC_SEQS_view = False
|
NUC_SEQS_view = False
|
||||||
if isinstance(output[1], View) :
|
if isinstance(output[1], View) :
|
||||||
@ -243,7 +245,7 @@ def run(config):
|
|||||||
if isinstance(input[0], CompressedFile):
|
if isinstance(input[0], CompressedFile):
|
||||||
input_is_file = True
|
input_is_file = True
|
||||||
if entry_count >= 0:
|
if entry_count >= 0:
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
pb = ProgressBar(entry_count, config)
|
||||||
try:
|
try:
|
||||||
input[0].close()
|
input[0].close()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
|
@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException, View
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
from obitools3.dms.column.column cimport Column, Column_line
|
from obitools3.dms.column.column cimport Column, Column_line
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
||||||
@ -14,13 +14,14 @@ from obitools3.dms.obiseq cimport Nuc_Seq
|
|||||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||||
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
||||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||||
from obitools3.utils cimport tobytes
|
from obitools3.utils cimport tobytes, str2bytes
|
||||||
|
|
||||||
from libc.stdint cimport INT32_MAX
|
from libc.stdint cimport INT32_MAX
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
import math
|
import math
|
||||||
import sys
|
import sys
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
||||||
@ -34,7 +35,8 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi ngsfilter specific options')
|
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||||
|
|
||||||
group.add_argument('-t','--info-view',
|
group.add_argument('-t','--info-view',
|
||||||
@ -58,7 +60,7 @@ def addOptions(parser):
|
|||||||
metavar="<URI>",
|
metavar="<URI>",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="URI to the view used to store the sequences unassigned to any sample")
|
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
|
||||||
|
|
||||||
group.add_argument('--no-tags',
|
group.add_argument('--no-tags',
|
||||||
action="store_true", dest="ngsfilter:notags",
|
action="store_true", dest="ngsfilter:notags",
|
||||||
@ -479,6 +481,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
|
|||||||
if not directmatch[0].forward:
|
if not directmatch[0].forward:
|
||||||
sequences[0] = sequences[0].reverse_complement
|
sequences[0] = sequences[0].reverse_complement
|
||||||
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
else:
|
||||||
|
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
|
||||||
sample=None
|
sample=None
|
||||||
if not no_tags:
|
if not no_tags:
|
||||||
@ -537,7 +541,8 @@ def run(config):
|
|||||||
raise Exception("Could not open input reads")
|
raise Exception("Could not open input reads")
|
||||||
if input[2] != View_NUC_SEQS:
|
if input[2] != View_NUC_SEQS:
|
||||||
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||||
|
i_dms = input[0]
|
||||||
|
|
||||||
if "reverse" in config["ngsfilter"]:
|
if "reverse" in config["ngsfilter"]:
|
||||||
|
|
||||||
forward = input[1]
|
forward = input[1]
|
||||||
@ -578,8 +583,19 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
o_view = output[1]
|
o_view = output[1]
|
||||||
|
|
||||||
|
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
o_view_name = b"temp"
|
||||||
|
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||||
|
|
||||||
# Open the view containing the informations about the tags and the primers
|
# Open the view containing the informations about the tags and the primers
|
||||||
info_input = open_uri(config['ngsfilter']['info_view'])
|
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||||
if info_input is None:
|
if info_input is None:
|
||||||
@ -600,7 +616,10 @@ def run(config):
|
|||||||
unidentified = None
|
unidentified = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(entries_len, config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(entries_len, config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
# Check and store primers and tags
|
# Check and store primers and tags
|
||||||
try:
|
try:
|
||||||
@ -634,7 +653,8 @@ def run(config):
|
|||||||
try:
|
try:
|
||||||
for i in range(entries_len):
|
for i in range(entries_len):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
if not_aligned:
|
if not_aligned:
|
||||||
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||||
else:
|
else:
|
||||||
@ -644,7 +664,13 @@ def run(config):
|
|||||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
g+=1
|
g+=1
|
||||||
elif unidentified is not None:
|
elif unidentified is not None:
|
||||||
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
# Untrim sequences (put original back)
|
||||||
|
if len(modseq) > 1:
|
||||||
|
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
|
||||||
|
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
|
||||||
|
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
|
||||||
|
else:
|
||||||
|
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
|
||||||
u+=1
|
u+=1
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
if unidentified is not None:
|
if unidentified is not None:
|
||||||
@ -652,8 +678,9 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -662,13 +689,23 @@ def run(config):
|
|||||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
# Add comment about unidentified seqs
|
# Add comment about unidentified seqs
|
||||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||||
output[0].record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
input[0].close(force=True)
|
# If stdout output, delete the temporary result view in the input DMS
|
||||||
output[0].close(force=True)
|
if type(output_0)==BufferedWriter:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
|
||||||
|
i_dms.close(force=True)
|
||||||
|
o_dms.close(force=True)
|
||||||
info_input[0].close(force=True)
|
info_input[0].close(force=True)
|
||||||
if unidentified is not None:
|
if unidentified is not None:
|
||||||
unidentified_input[0].close(force=True)
|
unidentified_input[0].close(force=True)
|
||||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line_selection
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport str2bytes
|
from obitools3.utils cimport str2bytes
|
||||||
@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||||
@ -42,6 +43,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group('obi sort specific options')
|
group=parser.add_argument_group('obi sort specific options')
|
||||||
|
|
||||||
@ -59,7 +61,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
|
|
||||||
def line_cmp(line, key, pb):
|
def line_cmp(line, key, pb):
|
||||||
pb
|
pb
|
||||||
if line[key] is None:
|
if line[key] is None:
|
||||||
return NULL_VALUE[line.view[key].data_type_int]
|
return NULL_VALUE[line.view[key].data_type_int]
|
||||||
else:
|
else:
|
||||||
@ -86,20 +88,28 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
o_view_name_final = output[1]
|
output_0 = output[0]
|
||||||
o_view_name = o_view_name_final
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
if i_dms != o_dms:
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while o_view_name in i_dms:
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(i_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
keys = config['sort']['keys']
|
keys = config['sort']['keys']
|
||||||
|
|
||||||
selection = Line_selection(i_view)
|
selection = Line_selection(i_view)
|
||||||
@ -110,10 +120,14 @@ def run(config):
|
|||||||
|
|
||||||
for k in keys: # TODO order?
|
for k in keys: # TODO order?
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
if pb is not None:
|
||||||
|
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||||
pb(len(i_view), force=True)
|
else:
|
||||||
print("", file=sys.stderr)
|
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(len(i_view), force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Create output view with the sorted line selection
|
# Create output view with the sorted line selection
|
||||||
try:
|
try:
|
||||||
@ -132,16 +146,23 @@ def run(config):
|
|||||||
# and delete the temporary view in the input DMS
|
# and delete the temporary view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
o_view.close()
|
o_view.close()
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
o_view = o_dms[o_view_name_final]
|
o_view = o_dms[final_o_view_name]
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
|
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
@ -162,7 +162,7 @@ def run(config):
|
|||||||
lcat=0
|
lcat=0
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
pb = ProgressBar(len(i_view), config)
|
||||||
|
|
||||||
for i in range(len(i_view)):
|
for i in range(len(i_view)):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line_selection
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
from obitools3.dms.view import RollbackException
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport str2bytes
|
from obitools3.utils cimport str2bytes
|
||||||
@ -12,6 +12,7 @@ from obitools3.utils cimport str2bytes
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
__title__="Keep the N last lines of a view."
|
__title__="Keep the N last lines of a view."
|
||||||
@ -21,6 +22,7 @@ def addOptions(parser):
|
|||||||
|
|
||||||
addMinimalInputOption(parser)
|
addMinimalInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group=parser.add_argument_group('obi tail specific options')
|
group=parser.add_argument_group('obi tail specific options')
|
||||||
|
|
||||||
@ -52,31 +54,41 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
o_dms = output[0]
|
o_dms = output[0]
|
||||||
o_view_name_final = output[1]
|
output_0 = output[0]
|
||||||
o_view_name = o_view_name_final
|
final_o_view_name = output[1]
|
||||||
|
|
||||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
if i_dms != o_dms:
|
temporary_view_name = b"temp"
|
||||||
i=0
|
i=0
|
||||||
while o_view_name in i_dms:
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
i+=1
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
o_dms = i_dms
|
||||||
|
else:
|
||||||
|
o_view_name = final_o_view_name
|
||||||
|
|
||||||
start = max(len(i_view) - config['tail']['count'], 0)
|
start = max(len(i_view) - config['tail']['count'], 0)
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(i_view) - start, config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(i_view) - start, config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
selection = Line_selection(i_view)
|
selection = Line_selection(i_view)
|
||||||
|
|
||||||
for i in range(start, len(i_view)):
|
for i in range(start, len(i_view)):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
selection.append(i)
|
selection.append(i)
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in View comments
|
# Save command config in View comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -97,14 +109,20 @@ def run(config):
|
|||||||
# and delete the temporary view in the input DMS
|
# and delete the temporary view in the input DMS
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms:
|
||||||
o_view.close()
|
o_view.close()
|
||||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||||
o_view = o_dms[o_view_name_final]
|
o_view = o_dms[final_o_view_name]
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||||
if i_dms != o_dms:
|
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||||
View.delete_view(i_dms, o_view_name)
|
View.delete_view(i_dms, o_view_name)
|
||||||
o_dms.close(force=True)
|
o_dms.close(force=True)
|
||||||
i_dms.close(force=True)
|
i_dms.close(force=True)
|
||||||
|
@ -14,13 +14,15 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
|
|||||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||||
addMinimalOutputOption, \
|
addMinimalOutputOption, \
|
||||||
addTaxonomyOption, \
|
addTaxonomyOption, \
|
||||||
addEltLimitOption
|
addEltLimitOption, \
|
||||||
|
addNoProgressBarOption
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
from obitools3.utils cimport tobytes, tostr
|
from obitools3.utils cimport tobytes, tostr, str2bytes
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
from io import BufferedWriter
|
||||||
|
|
||||||
|
|
||||||
__title__="Group sequence records together"
|
__title__="Group sequence records together"
|
||||||
@ -32,6 +34,7 @@ def addOptions(parser):
|
|||||||
addTaxonomyOption(parser)
|
addTaxonomyOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
addEltLimitOption(parser)
|
addEltLimitOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
group = parser.add_argument_group('obi uniq specific options')
|
group = parser.add_argument_group('obi uniq specific options')
|
||||||
|
|
||||||
@ -143,12 +146,16 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
|||||||
scientific_name_column = o_view[b"scientific_name"]
|
scientific_name_column = o_view[b"scientific_name"]
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(o_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in o_view:
|
for seq in o_view:
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
if MERGED_TAXID_COLUMN in seq :
|
if MERGED_TAXID_COLUMN in seq :
|
||||||
m_taxids = []
|
m_taxids = []
|
||||||
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
|
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
|
||||||
@ -191,7 +198,8 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
|||||||
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
|
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
pb(len(o_view), force=True)
|
if pb is not None:
|
||||||
|
pb(len(o_view), force=True)
|
||||||
|
|
||||||
|
|
||||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
||||||
@ -297,7 +305,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
iter_view = iter(view)
|
iter_view = iter(view)
|
||||||
for i_seq in iter_view :
|
for i_seq in iter_view :
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
|
||||||
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
|
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
|
||||||
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
|
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
|
||||||
@ -307,6 +316,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
for x in categories :
|
for x in categories :
|
||||||
catl.append(i_seq[x])
|
catl.append(i_seq[x])
|
||||||
|
|
||||||
|
#unique_id = tuple(catl) + (i_seq_col[i],)
|
||||||
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
||||||
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
||||||
|
|
||||||
@ -414,12 +424,17 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
o_count_col = o_view[COUNT_COLUMN]
|
o_count_col = o_view[COUNT_COLUMN]
|
||||||
if COUNT_COLUMN in view:
|
if COUNT_COLUMN in view:
|
||||||
i_count_col = view[COUNT_COLUMN]
|
i_count_col = view[COUNT_COLUMN]
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(len(view), force=True)
|
||||||
|
print("")
|
||||||
|
|
||||||
pb(len(view), force=True)
|
|
||||||
print("")
|
|
||||||
logger("info", "Second browsing through the input")
|
logger("info", "Second browsing through the input")
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(view), seconde=5)
|
if pb is not None:
|
||||||
|
pb = ProgressBar(len(view))
|
||||||
|
|
||||||
o_idx = 0
|
o_idx = 0
|
||||||
total_treated = 0
|
total_treated = 0
|
||||||
|
|
||||||
@ -453,7 +468,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
merged_dict[mkey] = {}
|
merged_dict[mkey] = {}
|
||||||
|
|
||||||
for i_idx in merged_sequences:
|
for i_idx in merged_sequences:
|
||||||
pb(total_treated)
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(total_treated)
|
||||||
|
|
||||||
i_id = i_id_col[i_idx]
|
i_id = i_id_col[i_idx]
|
||||||
i_seq = view[i_idx]
|
i_seq = view[i_idx]
|
||||||
@ -529,7 +547,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
|||||||
o_count_col[o_idx] = o_count
|
o_count_col[o_idx] = o_count
|
||||||
o_idx += 1
|
o_idx += 1
|
||||||
|
|
||||||
pb(len(view), force=True)
|
if pb is not None:
|
||||||
|
pb(len(view), force=True)
|
||||||
|
|
||||||
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
||||||
if QUALITY_COLUMN in view:
|
if QUALITY_COLUMN in view:
|
||||||
@ -577,8 +596,23 @@ def run(config):
|
|||||||
if output is None:
|
if output is None:
|
||||||
raise Exception("Could not create output view")
|
raise Exception("Could not create output view")
|
||||||
|
|
||||||
|
i_dms = input[0]
|
||||||
entries = input[1]
|
entries = input[1]
|
||||||
o_view = output[1]
|
o_dms = output[0]
|
||||||
|
output_0 = output[0]
|
||||||
|
|
||||||
|
# If stdout output create a temporary view that will be exported and deleted.
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
temporary_view_name = b"temp"
|
||||||
|
i=0
|
||||||
|
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||||
|
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||||
|
i+=1
|
||||||
|
o_view_name = temporary_view_name
|
||||||
|
o_dms = i_dms
|
||||||
|
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||||
|
else:
|
||||||
|
o_view = output[1]
|
||||||
|
|
||||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||||
@ -589,7 +623,10 @@ def run(config):
|
|||||||
taxo = None
|
taxo = None
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(entries), config, seconde=5)
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(entries), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
if len(entries) > 0:
|
if len(entries) > 0:
|
||||||
try:
|
try:
|
||||||
@ -597,7 +634,8 @@ def run(config):
|
|||||||
except Exception, e:
|
except Exception, e:
|
||||||
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
print("", file=sys.stderr)
|
if pb is not None:
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
@ -607,13 +645,23 @@ def run(config):
|
|||||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
output[0].record_command_line(command_line)
|
o_dms.record_command_line(command_line)
|
||||||
|
|
||||||
|
# stdout output: write to buffer
|
||||||
|
if type(output_0)==BufferedWriter:
|
||||||
|
logger("info", "Printing to output...")
|
||||||
|
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||||
|
o_view.close()
|
||||||
|
|
||||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||||
#print(repr(o_view), file=sys.stderr)
|
#print(repr(o_view), file=sys.stderr)
|
||||||
|
|
||||||
input[0].close(force=True)
|
# If stdout output, delete the temporary result view in the input DMS
|
||||||
output[0].close(force=True)
|
if type(output_0)==BufferedWriter:
|
||||||
|
View.delete_view(i_dms, o_view_name)
|
||||||
|
|
||||||
|
i_dms.close(force=True)
|
||||||
|
o_dms.close(force=True)
|
||||||
|
|
||||||
logger("info", "Done.")
|
logger("info", "Done.")
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
|
|||||||
|
|
||||||
cdef inline OBIDMS_column_p pointer(self)
|
cdef inline OBIDMS_column_p pointer(self)
|
||||||
cdef read_elements_names(self)
|
cdef read_elements_names(self)
|
||||||
|
cpdef list keys(self)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
||||||
|
@ -323,7 +323,10 @@ cdef class Column(OBIWrapper) :
|
|||||||
free(elts_names_b)
|
free(elts_names_b)
|
||||||
return elts_names_list
|
return elts_names_list
|
||||||
|
|
||||||
|
cpdef list keys(self):
|
||||||
|
return self._elements_names
|
||||||
|
|
||||||
|
|
||||||
# Column alias property getter and setter
|
# Column alias property getter and setter
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
@ -340,7 +343,7 @@ cdef class Column(OBIWrapper) :
|
|||||||
@property
|
@property
|
||||||
def elements_names(self):
|
def elements_names(self):
|
||||||
return self._elements_names
|
return self._elements_names
|
||||||
|
|
||||||
# nb_elements_per_line property getter
|
# nb_elements_per_line property getter
|
||||||
@property
|
@property
|
||||||
def nb_elements_per_line(self):
|
def nb_elements_per_line(self):
|
||||||
|
@ -39,4 +39,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||||
cpdef object build_quality_array(self, list quality)
|
cpdef object build_quality_array(self, list quality)
|
||||||
cpdef bytes build_reverse_complement(self)
|
cpdef bytes build_reverse_complement(self)
|
||||||
cpdef str get_str(self)
|
cpdef str get_str(self)
|
||||||
|
cpdef repr_bytes(self)
|
||||||
|
|
@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
|||||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
return bytes2str(self.repr_bytes())
|
||||||
|
|
||||||
|
cpdef repr_bytes(self):
|
||||||
if self.quality is None:
|
if self.quality is None:
|
||||||
formatter = FastaFormat()
|
formatter = FastaFormat()
|
||||||
else:
|
else:
|
||||||
formatter = FastqFormat()
|
formatter = FastqFormat()
|
||||||
return bytes2str(formatter(self))
|
return formatter(self)
|
||||||
|
|
||||||
|
@ -20,6 +20,10 @@ cdef class View(OBIWrapper):
|
|||||||
cdef DMS _dms
|
cdef DMS _dms
|
||||||
|
|
||||||
cdef inline Obiview_p pointer(self)
|
cdef inline Obiview_p pointer(self)
|
||||||
|
|
||||||
|
cpdef print_to_output(self,
|
||||||
|
object output,
|
||||||
|
bint noprogressbar=*)
|
||||||
|
|
||||||
cpdef delete_column(self,
|
cpdef delete_column(self,
|
||||||
object column_name,
|
object column_name,
|
||||||
@ -61,6 +65,8 @@ cdef class Line :
|
|||||||
cdef index_t _index
|
cdef index_t _index
|
||||||
cdef View _view
|
cdef View _view
|
||||||
|
|
||||||
|
cpdef repr_bytes(self)
|
||||||
|
|
||||||
|
|
||||||
cdef register_view_class(bytes view_type_name,
|
cdef register_view_class(bytes view_type_name,
|
||||||
type view_class)
|
type view_class)
|
||||||
|
@ -6,6 +6,8 @@ cdef dict __VIEW_CLASS__= {}
|
|||||||
|
|
||||||
from libc.stdlib cimport malloc
|
from libc.stdlib cimport malloc
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
|
||||||
from ..capi.obiview cimport Alias_column_pair_p, \
|
from ..capi.obiview cimport Alias_column_pair_p, \
|
||||||
obi_new_view, \
|
obi_new_view, \
|
||||||
obi_open_view, \
|
obi_open_view, \
|
||||||
@ -48,10 +50,13 @@ from ..capi.obidms cimport obi_import_view
|
|||||||
|
|
||||||
from obitools3.format.tab import TabFormat
|
from obitools3.format.tab import TabFormat
|
||||||
|
|
||||||
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
cdef class View(OBIWrapper) :
|
cdef class View(OBIWrapper) :
|
||||||
@ -184,7 +189,31 @@ cdef class View(OBIWrapper) :
|
|||||||
for column_name in self.keys() :
|
for column_name in self.keys() :
|
||||||
s = s + repr(self[column_name]) + '\n'
|
s = s + repr(self[column_name]) + '\n'
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
cpdef print_to_output(self, object output, bint noprogressbar=False):
|
||||||
|
|
||||||
|
cdef int i
|
||||||
|
cdef Line entry
|
||||||
|
|
||||||
|
self.checkIsActive(self)
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
if noprogressbar == False:
|
||||||
|
pb = ProgressBar(len(self))
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
i=0
|
||||||
|
for entry in self:
|
||||||
|
PyErr_CheckSignals()
|
||||||
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
output.write(entry.repr_bytes()+b"\n")
|
||||||
|
i+=1
|
||||||
|
if pb is not None:
|
||||||
|
pb(len(self), force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
|
|
||||||
@ -757,8 +786,12 @@ cdef class Line :
|
|||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
return bytes2str(self).repr_bytes()
|
||||||
|
|
||||||
|
|
||||||
|
cpdef repr_bytes(self):
|
||||||
formatter = TabFormat(header=False)
|
formatter = TabFormat(header=False)
|
||||||
return bytes2str(formatter(self))
|
return formatter(self)
|
||||||
|
|
||||||
|
|
||||||
# View property getter
|
# View property getter
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
cimport cython
|
cimport cython
|
||||||
from obitools3.dms.view.view cimport Line
|
from obitools3.dms.view.view cimport Line
|
||||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||||
from obitools3.dms.column.column cimport Column_line
|
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||||
|
|
||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
@ -25,19 +25,29 @@ cdef class TabFormat:
|
|||||||
for k in self.tags:
|
for k in self.tags:
|
||||||
|
|
||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
value = tobytes(k)
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
|
for k2 in data.view[k].keys():
|
||||||
|
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||||
|
else:
|
||||||
|
line.append(tobytes(k))
|
||||||
else:
|
else:
|
||||||
value = data[k]
|
value = data[k]
|
||||||
if value is not None:
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
if type(value) == Column_line:
|
if value is None: # all keys at None
|
||||||
value = value.bytes()
|
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||||
|
line.append(self.NAString)
|
||||||
else:
|
else:
|
||||||
value = str2bytes(str(bytes2str_object(value))) # genius programming
|
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||||
if value is None:
|
if value[k2] is not None:
|
||||||
value = self.NAString
|
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||||
|
else:
|
||||||
line.append(value)
|
line.append(self.NAString)
|
||||||
|
else:
|
||||||
|
if value is not None:
|
||||||
|
line.append(str2bytes(str(bytes2str_object(value))))
|
||||||
|
else:
|
||||||
|
line.append(self.NAString)
|
||||||
|
|
||||||
if self.first_line:
|
if self.first_line:
|
||||||
self.first_line = False
|
self.first_line = False
|
||||||
|
|
||||||
|
@ -183,8 +183,9 @@ def buildConsensus(ali, seq, ref_tags=None):
|
|||||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
||||||
seq[b'ali_length'] = ali.consensus_len
|
seq[b"seq_length"] = ali.consensus_len
|
||||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
seq[b"overlap_length"] = ali.overlap_len
|
||||||
|
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||||
seq[b'shift']=ali.shift
|
seq[b'shift']=ali.shift
|
||||||
else:
|
else:
|
||||||
if len(ali[0])>999: # TODO why?
|
if len(ali[0])>999: # TODO why?
|
||||||
@ -256,9 +257,10 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
|||||||
seq[b"ali_direction"]=None
|
seq[b"ali_direction"]=None
|
||||||
seq[b"mode"]=b"joined"
|
seq[b"mode"]=b"joined"
|
||||||
seq[b"pairedend_limit"]=len(forward)
|
seq[b"pairedend_limit"]=len(forward)
|
||||||
seq[b"ali_length"] = ali.consensus_len
|
seq[b"seq_length"] = ali.consensus_len
|
||||||
|
seq[b"overlap_length"] = ali.overlap_len
|
||||||
if ali.consensus_len > 0:
|
if ali.consensus_len > 0:
|
||||||
seq[b"score_norm"]=float(ali.score)/ali.consensus_len
|
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||||
else:
|
else:
|
||||||
seq[b"score_norm"]=0.0
|
seq[b"score_norm"]=0.0
|
||||||
|
|
||||||
|
@ -210,10 +210,11 @@ def open_uri(uri,
|
|||||||
|
|
||||||
error = None
|
error = None
|
||||||
|
|
||||||
if scheme==b"dms" or \
|
if urib != b"-" and \
|
||||||
(scheme==b"" and \
|
(scheme==b"dms" or \
|
||||||
(((not input) and "outputformat" not in config["obi"]) or \
|
(scheme==b"" and \
|
||||||
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
|
(((not input) and "outputformat" not in config["obi"]) or \
|
||||||
|
(input and "inputformat" not in config["obi"])))): # TODO maybe not best way
|
||||||
|
|
||||||
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
|
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
|
||||||
dms=(default_dms, urip.path)
|
dms=(default_dms, urip.path)
|
||||||
@ -275,10 +276,10 @@ def open_uri(uri,
|
|||||||
iseq = urib
|
iseq = urib
|
||||||
objclass = bytes
|
objclass = bytes
|
||||||
else: # TODO update uopen to be able to write?
|
else: # TODO update uopen to be able to write?
|
||||||
if urip.path:
|
if urip.path == b'-':
|
||||||
file = open(urip.path, 'wb')
|
|
||||||
else:
|
|
||||||
file = sys.stdout.buffer
|
file = sys.stdout.buffer
|
||||||
|
elif urip.path :
|
||||||
|
file = open(urip.path, 'wb')
|
||||||
|
|
||||||
if file is not None:
|
if file is not None:
|
||||||
qualifiers=parse_qs(urip.query)
|
qualifiers=parse_qs(urip.query)
|
||||||
|
@ -166,7 +166,9 @@ cdef object bytes2str_object(object value): # Only works if complex types are d
|
|||||||
value[k] = bytes2str(v)
|
value[k] = bytes2str(v)
|
||||||
if type(k) == bytes:
|
if type(k) == bytes:
|
||||||
value[bytes2str(k)] = value.pop(k)
|
value[bytes2str(k)] = value.pop(k)
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list) or isinstance(value, tuple):
|
||||||
|
if isinstance(value, tuple):
|
||||||
|
value = list(value)
|
||||||
for i in range(len(value)):
|
for i in range(len(value)):
|
||||||
if isinstance(value[i], list) or isinstance(value[i], dict):
|
if isinstance(value[i], list) or isinstance(value[i], dict):
|
||||||
value[i] = bytes2str_object(value[i])
|
value[i] = bytes2str_object(value[i])
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0b25'
|
serial= '0b28'
|
||||||
|
|
||||||
version ="%d.%d.%s" % (major,minor,serial)
|
version ="%d.%d.%s" % (major,minor,serial)
|
||||||
|
@ -414,7 +414,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (max_common_kmers > 0)
|
if (max_common_kmers > 0)
|
||||||
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
|
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
|
||||||
|
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||||
|
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||||
|
// meaning that the score will be often underestimated and never overestimated.
|
||||||
else
|
else
|
||||||
score = 0;
|
score = 0;
|
||||||
abs_shift = abs(best_shift);
|
abs_shift = abs(best_shift);
|
||||||
|
@ -27,7 +27,11 @@
|
|||||||
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
|
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
|
||||||
*/
|
*/
|
||||||
typedef struct Obi_ali {
|
typedef struct Obi_ali {
|
||||||
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
|
int score; /**< Alignment score, corresponding to an approximation of the number of
|
||||||
|
* nucleotides matching in the overlap of the alignment.
|
||||||
|
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||||
|
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||||
|
* meaning that the score will be often underestimated and never overestimated.
|
||||||
*/
|
*/
|
||||||
int consensus_length; /**< Length of the final consensus sequence.
|
int consensus_length; /**< Length of the final consensus sequence.
|
||||||
*/
|
*/
|
||||||
|
@ -246,7 +246,16 @@ int obi_clean(const char* dms_name,
|
|||||||
|
|
||||||
// Open the sample column if there is one
|
// Open the sample column if there is one
|
||||||
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
||||||
sample_column = NULL;
|
{
|
||||||
|
fprintf(stderr, "Info: No sample information provided, assuming one sample.\n");
|
||||||
|
sample_column = obi_view_get_column(i_view, COUNT_COLUMN);
|
||||||
|
if (sample_column == NULL)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nError getting the COUNT column");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
sample_count = 1;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sample_column = obi_view_get_column(i_view, sample_column_name);
|
sample_column = obi_view_get_column(i_view, sample_column_name);
|
||||||
@ -255,6 +264,13 @@ int obi_clean(const char* dms_name,
|
|||||||
obidebug(1, "\nError getting the sample column");
|
obidebug(1, "\nError getting the sample column");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
sample_count = (sample_column->header)->nb_elements_per_line;
|
||||||
|
// Check that the sample column is a merged column with all sample informations
|
||||||
|
if (sample_count == 1)
|
||||||
|
{
|
||||||
|
obidebug(1, "\n\nError: If a sample column is provided, it must contain 'merged' sample counts as built by obi uniq with the -m option\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the output view, or a temporary one if heads only
|
// Create the output view, or a temporary one if heads only
|
||||||
@ -279,8 +295,6 @@ int obi_clean(const char* dms_name,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
sample_count = (sample_column->header)->nb_elements_per_line;
|
|
||||||
|
|
||||||
// Create the output columns
|
// Create the output columns
|
||||||
if (create_output_columns(o_view, sample_column, sample_count) < 0)
|
if (create_output_columns(o_view, sample_column, sample_count) < 0)
|
||||||
{
|
{
|
||||||
@ -549,7 +563,7 @@ int obi_clean(const char* dms_name,
|
|||||||
|
|
||||||
if (heads_only)
|
if (heads_only)
|
||||||
{
|
{
|
||||||
line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));
|
line_selection = malloc((((o_view->infos)->line_count) + 1) * sizeof(index_t));
|
||||||
if (line_selection == NULL)
|
if (line_selection == NULL)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBI_MALLOC_ERROR);
|
obi_set_errno(OBI_MALLOC_ERROR);
|
||||||
|
@ -52,7 +52,8 @@
|
|||||||
*
|
*
|
||||||
* @param dms A pointer on an OBIDMS.
|
* @param dms A pointer on an OBIDMS.
|
||||||
* @param i_view_name The name of the input view.
|
* @param i_view_name The name of the input view.
|
||||||
* @param sample_column_name The name of the OBI_STR column in the input view where the sample information is kept.
|
* @param sample_column_name The name of the column in the input view where the sample information is kept.
|
||||||
|
* Must be merged informations as built by the obi uniq tool (checked by the function).
|
||||||
* NULL or "" (empty string) if there is no sample information.
|
* NULL or "" (empty string) if there is no sample information.
|
||||||
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
||||||
* @param o_view_comments The comments that should be associated with the output view.
|
* @param o_view_comments The comments that should be associated with the output view.
|
||||||
|
16
src/obiavl.c
16
src/obiavl.c
@ -2259,7 +2259,13 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
|
|||||||
parent = next;
|
parent = next;
|
||||||
|
|
||||||
// Compare the crc of the value with the crc of the current node
|
// Compare the crc of the value with the crc of the current node
|
||||||
comp = (current_node->crc64) - crc;
|
//comp = (current_node->crc64) - crc;
|
||||||
|
if ((current_node->crc64) == crc)
|
||||||
|
comp = 0;
|
||||||
|
else if ((current_node->crc64) > crc)
|
||||||
|
comp = 1;
|
||||||
|
else
|
||||||
|
comp = -1;
|
||||||
|
|
||||||
if (comp == 0)
|
if (comp == 0)
|
||||||
{ // check if really same value
|
{ // check if really same value
|
||||||
@ -2354,7 +2360,13 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
|
|||||||
current_node = (avl->tree)+next;
|
current_node = (avl->tree)+next;
|
||||||
|
|
||||||
// Compare the crc of the value with the crc of the current node
|
// Compare the crc of the value with the crc of the current node
|
||||||
comp = (current_node->crc64) - crc;
|
//comp = (current_node->crc64) - crc;
|
||||||
|
if ((current_node->crc64) == crc)
|
||||||
|
comp = 0;
|
||||||
|
else if ((current_node->crc64) > crc)
|
||||||
|
comp = 1;
|
||||||
|
else
|
||||||
|
comp = -1;
|
||||||
|
|
||||||
if (comp == 0)
|
if (comp == 0)
|
||||||
{ // Check if really same value
|
{ // Check if really same value
|
||||||
|
Reference in New Issue
Block a user