Compare commits
24 Commits
Author | SHA1 | Date | |
---|---|---|---|
11a0945a9b | |||
f23c40c905 | |||
f99fc13b75 | |||
1da6aac1b8 | |||
159803b40a | |||
7dcbc34017 | |||
db2202c8b4 | |||
d33ff97846 | |||
1dcdf69f1f | |||
dec114eed6 | |||
f36691053b | |||
f2aa5fcf8b | |||
bccb3e6874 | |||
f5a17bea68 | |||
e28507639a | |||
e6feac93fe | |||
50b292b489 | |||
24a737aa55 | |||
8aa455ad8a | |||
46ca693ca9 | |||
9a9afde113 | |||
8dd403a118 | |||
9672f01c6a | |||
ed9549acfb |
@ -30,12 +30,12 @@ cdef class ProgressBar:
|
||||
off_t maxi,
|
||||
dict config={},
|
||||
str head="",
|
||||
double seconde=0.1,
|
||||
double seconds=5,
|
||||
cut=False):
|
||||
|
||||
self.starttime = self.clock()
|
||||
self.lasttime = self.starttime
|
||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
||||
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
|
||||
self.freq = 1
|
||||
self.cycle = 0
|
||||
self.arrow = 0
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -12,6 +12,9 @@ from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
||||
obi_lcs_align_two_columns
|
||||
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
@ -23,6 +26,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi align specific options')
|
||||
|
||||
@ -201,20 +205,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = o_dms.name
|
||||
final_o_view_name = output[1]
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -263,8 +267,15 @@ def run(config):
|
||||
View.delete_view(i_dms, i_view_name_2)
|
||||
i_dms_2.close()
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||
@ -15,7 +15,9 @@ from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequenc
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
import os
|
||||
|
||||
@ -29,6 +31,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||
|
||||
@ -39,12 +42,13 @@ def addOptions(parser):
|
||||
type=str,
|
||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||
|
||||
group.add_argument('--score-min',
|
||||
action="store", dest="alignpairedend:smin",
|
||||
metavar="#.###",
|
||||
default=None,
|
||||
type=float,
|
||||
help="Minimum score for keeping alignments")
|
||||
# group.add_argument('--score-min',
|
||||
# action="store", dest="alignpairedend:smin",
|
||||
# metavar="#.###",
|
||||
# default=None,
|
||||
# type=float,
|
||||
# help="Minimum score for keeping alignments. "
|
||||
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
|
||||
|
||||
# group.add_argument('-A', '--true-ali',
|
||||
# action="store_true", dest="alignpairedend:trueali",
|
||||
@ -170,18 +174,29 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
view = output[1]
|
||||
output_0 = output[0]
|
||||
o_dms = output[0]
|
||||
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||
|
||||
if 'smin' in config['alignpairedend']:
|
||||
smin = config['alignpairedend']['smin']
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
i_dms = forward.dms # using any dms
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
|
||||
else:
|
||||
smin = 0
|
||||
o_view = output[1]
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
#if config['alignpairedend']['trueali']:
|
||||
# kmer_ali = False
|
||||
# aligner = buildAlignment
|
||||
@ -206,18 +221,19 @@ def run(config):
|
||||
i = 0
|
||||
for ali in ba:
|
||||
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
PyErr_CheckSignals()
|
||||
|
||||
consensus = view[i]
|
||||
consensus = o_view[i]
|
||||
|
||||
if not two_views:
|
||||
seqF = entries[i]
|
||||
else:
|
||||
seqF = forward[i]
|
||||
|
||||
if ali.score > smin and ali.consensus_len > 0 :
|
||||
if ali.overlap_len > 0 :
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
else:
|
||||
if not two_views:
|
||||
@ -225,32 +241,43 @@ def run(config):
|
||||
else:
|
||||
seqR = reverse[i]
|
||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||
|
||||
consensus[b"smin"] = smin
|
||||
|
||||
|
||||
if kmer_ali :
|
||||
ali.free()
|
||||
|
||||
i+=1
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if kmer_ali :
|
||||
aligner.free()
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If stdout output, delete the temporary imported view used to create the final file
|
||||
if type(output_0)==BufferedWriter:
|
||||
View_NUC_SEQS.delete_view(o_dms, o_view_name)
|
||||
output_0.close()
|
||||
|
||||
# Close all DMS
|
||||
input[0].close(force=True)
|
||||
if two_views:
|
||||
rinput[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
@ -4,11 +4,12 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from functools import reduce
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from io import BufferedWriter
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
@ -34,6 +35,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi annotate specific options')
|
||||
|
||||
@ -278,8 +280,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view_name = output[1]
|
||||
|
||||
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
imported_view_name = o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||
if i_dms != o_dms:
|
||||
@ -290,7 +303,7 @@ def run(config):
|
||||
i+=1
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||
i_view = o_dms[imported_view_name]
|
||||
|
||||
|
||||
# Clone output view from input view
|
||||
o_view = i_view.clone(o_view_name)
|
||||
if o_view is None:
|
||||
@ -307,7 +320,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
|
||||
@ -346,14 +362,16 @@ def run(config):
|
||||
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
||||
for i in range(len(o_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
sequenceTagger(o_view[i])
|
||||
|
||||
except Exception, e:
|
||||
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -363,13 +381,19 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(o_dms, imported_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -4,14 +4,16 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||
@ -22,6 +24,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi build_ref_db specific options')
|
||||
|
||||
@ -56,17 +59,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -80,22 +86,29 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
|
||||
|
||||
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
||||
raise Exception("Error building a reference database")
|
||||
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -15,6 +15,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_CO
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
from io import BufferedWriter
|
||||
import time
|
||||
import sys
|
||||
|
||||
@ -27,6 +28,7 @@ __title__="Concatenate views."
|
||||
def addOptions(parser):
|
||||
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi cat specific options')
|
||||
|
||||
@ -46,9 +48,9 @@ def run(config):
|
||||
|
||||
logger("info", "obi cat")
|
||||
|
||||
# Open the views to concatenate
|
||||
iview_list = []
|
||||
# Check the views to concatenate
|
||||
idms_list = []
|
||||
iview_list = []
|
||||
total_len = 0
|
||||
remove_qual = False
|
||||
remove_rev_qual = False
|
||||
@ -66,8 +68,9 @@ def run(config):
|
||||
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
|
||||
remove_rev_qual = True
|
||||
total_len += len(i_view)
|
||||
iview_list.append(i_view)
|
||||
idms_list.append(i_dms)
|
||||
iview_list.append(i_view.name)
|
||||
i_view.close()
|
||||
|
||||
# Open the output: only the DMS
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
@ -76,57 +79,79 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# stdout output
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
|
||||
# Initialize quality columns and their associated sequence columns if needed
|
||||
if not remove_qual:
|
||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||
if not remove_rev_qual:
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
if type(output_0) != BufferedWriter:
|
||||
if not remove_qual:
|
||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||
if not remove_rev_qual:
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
|
||||
# Initialize multiple elements columns
|
||||
dict_cols = {}
|
||||
for v in iview_list:
|
||||
for coln in v.keys():
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||
if type(output_0)==BufferedWriter:
|
||||
dict_cols = {}
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for coln in v.keys():
|
||||
col = v[coln]
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
v.close()
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(total_len, config, seconde=5)
|
||||
if not config['obi']['noprogressbar']:
|
||||
pb = ProgressBar(total_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i = 0
|
||||
for v in iview_list:
|
||||
for l in v:
|
||||
for v_uri in config["cat"]["views_to_cat"]:
|
||||
v = open_uri(v_uri)[1]
|
||||
for entry in v:
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
o_view[i] = l
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if type(output_0)==BufferedWriter:
|
||||
rep = repr(entry)
|
||||
output_0.write(str2bytes(rep)+b"\n")
|
||||
else:
|
||||
o_view[i] = entry
|
||||
i+=1
|
||||
v.close()
|
||||
|
||||
# Deletes quality columns if needed
|
||||
if QUALITY_COLUMN in o_view and remove_qual :
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if type(output_0)!=BufferedWriter:
|
||||
if QUALITY_COLUMN in o_view and remove_qual :
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list])
|
||||
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
|
@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
|
||||
|
||||
@ -21,7 +22,8 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi clean specific options')
|
||||
|
||||
group.add_argument('--distance', '-d',
|
||||
@ -88,17 +90,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -116,15 +121,22 @@ def run(config):
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.view import View
|
||||
|
||||
@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="in silico PCR"
|
||||
@ -27,6 +28,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
|
||||
group = parser.add_argument_group('obi ecopcr specific options')
|
||||
@ -169,11 +171,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = output[0].name
|
||||
o_view_name = output[1]
|
||||
|
||||
# Read taxonomy name
|
||||
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||
|
||||
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -201,10 +212,21 @@ def run(config):
|
||||
|
||||
free(restrict_to_taxids_p)
|
||||
free(ignore_taxids_p)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Taxonomic assignment of sequences"
|
||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ecotag specific options')
|
||||
|
||||
@ -75,17 +77,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -120,11 +124,18 @@ def run(config):
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -74,7 +74,7 @@ def run(config):
|
||||
if config['obi']['noprogressbar']:
|
||||
pb = None
|
||||
else:
|
||||
pb = ProgressBar(withoutskip - skip, config, seconde=5)
|
||||
pb = ProgressBar(withoutskip - skip, config)
|
||||
|
||||
i=0
|
||||
for seq in iview :
|
||||
@ -89,7 +89,7 @@ def run(config):
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# TODO save command in input dms?
|
||||
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -14,6 +14,7 @@ import time
|
||||
import re
|
||||
import sys
|
||||
import ast
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -28,6 +29,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group("obi grep specific options")
|
||||
|
||||
@ -304,16 +306,21 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
||||
@ -324,7 +331,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Apply filter
|
||||
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
||||
@ -334,31 +344,36 @@ def run(config):
|
||||
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
line = i_view[i]
|
||||
|
||||
|
||||
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
||||
|
||||
good = filter(line, loc_env)
|
||||
|
||||
if good :
|
||||
selection.append(i)
|
||||
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
o_view = selection.materialize(o_view_name)
|
||||
except Exception, e:
|
||||
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
|
||||
logger("info", "Grepped %d entries" % len(o_view))
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
@ -373,14 +388,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -4,14 +4,15 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
from io import BufferedWriter
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi head specific options')
|
||||
|
||||
@ -53,31 +55,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
n = min(config['head']['count'], len(i_view))
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(n, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(n, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(n):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
@ -94,14 +106,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -47,6 +47,8 @@ from obitools3.apps.config import logger
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Imports sequences from different formats into a DMS"
|
||||
|
||||
@ -75,6 +77,11 @@ def addOptions(parser):
|
||||
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||
"a much faster import. This option is not recommended and will slow down the import in any other case.")
|
||||
|
||||
group.add_argument('--space-priority',
|
||||
action="store_true", dest="import:space_priority",
|
||||
default=False,
|
||||
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
|
||||
"has a line selection associated.")
|
||||
|
||||
def run(config):
|
||||
|
||||
@ -130,7 +137,7 @@ def run(config):
|
||||
if entry_count > 0:
|
||||
logger("info", "Importing %d entries", entry_count)
|
||||
else:
|
||||
logger("info", "Importing an unknow number of entries")
|
||||
logger("info", "Importing an unknown number of entries")
|
||||
|
||||
# TODO a bit dirty?
|
||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||
@ -140,7 +147,7 @@ def run(config):
|
||||
else:
|
||||
v = None
|
||||
|
||||
if config['obi']['taxdump'] or isinstance(input[1], View):
|
||||
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
|
||||
dms_only=True
|
||||
else:
|
||||
dms_only=False
|
||||
@ -168,17 +175,20 @@ def run(config):
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
# If importing a view between two DMS, use C API
|
||||
if isinstance(input[1], View):
|
||||
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
|
||||
if isinstance(input[1], View) and not config['import']['space_priority']:
|
||||
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
raise Exception("Error importing a view in a DMS")
|
||||
o_dms.record_command_line(" ".join(sys.argv[1:]))
|
||||
o_dms.close()
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
pb = ProgressBar(entry_count, config)
|
||||
|
||||
NUC_SEQS_view = False
|
||||
if isinstance(output[1], View) :
|
||||
@ -243,7 +253,7 @@ def run(config):
|
||||
if isinstance(input[0], CompressedFile):
|
||||
input_is_file = True
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
pb = ProgressBar(entry_count, config)
|
||||
try:
|
||||
input[0].close()
|
||||
except AttributeError:
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view import RollbackException, View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
||||
@ -14,13 +14,14 @@ from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
|
||||
from libc.stdint cimport INT32_MAX
|
||||
from functools import reduce
|
||||
import math
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
||||
@ -34,7 +35,8 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||
|
||||
group.add_argument('-t','--info-view',
|
||||
@ -58,7 +60,7 @@ def addOptions(parser):
|
||||
metavar="<URI>",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the view used to store the sequences unassigned to any sample")
|
||||
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
|
||||
|
||||
group.add_argument('--no-tags',
|
||||
action="store_true", dest="ngsfilter:notags",
|
||||
@ -539,7 +541,8 @@ def run(config):
|
||||
raise Exception("Could not open input reads")
|
||||
if input[2] != View_NUC_SEQS:
|
||||
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||
|
||||
i_dms = input[0]
|
||||
|
||||
if "reverse" in config["ngsfilter"]:
|
||||
|
||||
forward = input[1]
|
||||
@ -580,8 +583,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
|
||||
# Open the view containing the informations about the tags and the primers
|
||||
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||
if info_input is None:
|
||||
@ -602,7 +616,10 @@ def run(config):
|
||||
unidentified = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Check and store primers and tags
|
||||
try:
|
||||
@ -636,7 +653,8 @@ def run(config):
|
||||
try:
|
||||
for i in range(entries_len):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if not_aligned:
|
||||
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||
else:
|
||||
@ -646,7 +664,13 @@ def run(config):
|
||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
g+=1
|
||||
elif unidentified is not None:
|
||||
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
# Untrim sequences (put original back)
|
||||
if len(modseq) > 1:
|
||||
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
|
||||
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
|
||||
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
|
||||
else:
|
||||
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
|
||||
u+=1
|
||||
except Exception, e:
|
||||
if unidentified is not None:
|
||||
@ -654,8 +678,9 @@ def run(config):
|
||||
else:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -664,13 +689,23 @@ def run(config):
|
||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
# Add comment about unidentified seqs
|
||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
info_input[0].close(force=True)
|
||||
if unidentified is not None:
|
||||
unidentified_input[0].close(force=True)
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||
@ -42,6 +43,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi sort specific options')
|
||||
|
||||
@ -59,7 +61,7 @@ def addOptions(parser):
|
||||
|
||||
|
||||
def line_cmp(line, key, pb):
|
||||
pb
|
||||
pb
|
||||
if line[key] is None:
|
||||
return NULL_VALUE[line.view[key].data_type_int]
|
||||
else:
|
||||
@ -86,20 +88,28 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
keys = config['sort']['keys']
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
@ -110,10 +120,14 @@ def run(config):
|
||||
|
||||
for k in keys: # TODO order?
|
||||
PyErr_CheckSignals()
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
else:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the sorted line selection
|
||||
try:
|
||||
@ -132,16 +146,23 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
@ -162,7 +162,7 @@ def run(config):
|
||||
lcat=0
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -12,6 +12,7 @@ from obitools3.utils cimport str2bytes
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Keep the N last lines of a view."
|
||||
@ -21,6 +22,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi tail specific options')
|
||||
|
||||
@ -52,31 +54,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
start = max(len(i_view) - config['tail']['count'], 0)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view) - start, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view) - start, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(start, len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -97,14 +109,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -23,6 +23,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
import shutil
|
||||
import string
|
||||
import random
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -366,7 +367,7 @@ def random_new_view(config, infos, first=False):
|
||||
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
else :
|
||||
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
|
||||
|
||||
infos['view'].write_config(config, "test", infos["command_line"], input_dms_name=[infos['dms'].name], input_view_name=["random"])
|
||||
print_test(config, repr(infos['view']))
|
||||
if v_to_clone is not None :
|
||||
if line_selection is None:
|
||||
@ -441,7 +442,7 @@ def addOptions(parser):
|
||||
default=20,
|
||||
type=int,
|
||||
help="Maximum length of tuples. "
|
||||
"Default: 200")
|
||||
"Default: 50")
|
||||
|
||||
group.add_argument('--max_ini_col_count','-o',
|
||||
action="store", dest="test:maxinicolcount",
|
||||
@ -457,7 +458,7 @@ def addOptions(parser):
|
||||
default=10000,
|
||||
type=int,
|
||||
help="Maximum number of lines in a column. "
|
||||
"Default: 10000")
|
||||
"Default: 1000")
|
||||
|
||||
group.add_argument('--max_elts_per_line','-e',
|
||||
action="store", dest="test:maxelts",
|
||||
@ -497,7 +498,8 @@ def run(config):
|
||||
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
|
||||
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
|
||||
},
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
|
||||
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view],
|
||||
'command_line': " ".join(sys.argv[1:])
|
||||
}
|
||||
|
||||
# TODO ???
|
||||
|
@ -14,13 +14,15 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||
addMinimalOutputOption, \
|
||||
addTaxonomyOption, \
|
||||
addEltLimitOption
|
||||
addEltLimitOption, \
|
||||
addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, tostr
|
||||
from obitools3.utils cimport tobytes, tostr, str2bytes
|
||||
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Group sequence records together"
|
||||
@ -32,6 +34,7 @@ def addOptions(parser):
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addEltLimitOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi uniq specific options')
|
||||
|
||||
@ -143,12 +146,16 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
||||
scientific_name_column = o_view[b"scientific_name"]
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i=0
|
||||
for seq in o_view:
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if MERGED_TAXID_COLUMN in seq :
|
||||
m_taxids = []
|
||||
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
|
||||
@ -191,7 +198,8 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
||||
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
|
||||
i+=1
|
||||
|
||||
pb(len(o_view), force=True)
|
||||
if pb is not None:
|
||||
pb(len(o_view), force=True)
|
||||
|
||||
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
||||
@ -297,7 +305,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
iter_view = iter(view)
|
||||
for i_seq in iter_view :
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
|
||||
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
|
||||
@ -345,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
key = mergedKeys[k]
|
||||
merged_col_name = mergedKeys_m[k]
|
||||
|
||||
if merged_infos[merged_col_name]['nb_elts'] == 1:
|
||||
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
|
||||
|
||||
if merged_col_name in view:
|
||||
i_col = view[merged_col_name]
|
||||
else:
|
||||
@ -415,12 +427,17 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
o_count_col = o_view[COUNT_COLUMN]
|
||||
if COUNT_COLUMN in view:
|
||||
i_count_col = view[COUNT_COLUMN]
|
||||
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
logger("info", "Second browsing through the input")
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(view), seconde=5)
|
||||
if pb is not None:
|
||||
pb = ProgressBar(len(view))
|
||||
|
||||
o_idx = 0
|
||||
total_treated = 0
|
||||
|
||||
@ -455,7 +472,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
|
||||
for i_idx in merged_sequences:
|
||||
PyErr_CheckSignals()
|
||||
pb(total_treated)
|
||||
|
||||
if pb is not None:
|
||||
pb(total_treated)
|
||||
|
||||
i_id = i_id_col[i_idx]
|
||||
i_seq = view[i_idx]
|
||||
@ -531,7 +550,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
o_count_col[o_idx] = o_count
|
||||
o_idx += 1
|
||||
|
||||
pb(len(view), force=True)
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
|
||||
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
||||
if QUALITY_COLUMN in view:
|
||||
@ -579,8 +599,23 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
i_dms = input[0]
|
||||
entries = input[1]
|
||||
o_view = output[1]
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
|
||||
# If stdout output create a temporary view that will be exported and deleted.
|
||||
if type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
o_dms = i_dms
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
else:
|
||||
o_view = output[1]
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
@ -591,7 +626,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(entries), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(entries), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
if len(entries) > 0:
|
||||
try:
|
||||
@ -599,7 +637,8 @@ def run(config):
|
||||
except Exception, e:
|
||||
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -609,13 +648,23 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
@ -40,7 +40,8 @@ from obitools3.utils cimport tobytes, \
|
||||
from obitools3.dms.column import typed_column
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
from libc.string cimport strcpy
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
@ -97,6 +98,7 @@ cdef class Column(OBIWrapper) :
|
||||
object alias=b""):
|
||||
# TODO indexer_name?
|
||||
|
||||
cdef Column column
|
||||
cdef bytes column_name_b = tobytes(column_name)
|
||||
cdef bytes alias_b = tobytes(alias)
|
||||
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
|
||||
@ -132,13 +134,14 @@ cdef class Column(OBIWrapper) :
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = NUC_SEQUENCE_COLUMN
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
if REVERSE_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
|
||||
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
|
||||
|
||||
|
||||
if (obi_view_add_column(view = view.pointer(),
|
||||
column_name = column_name_b,
|
||||
@ -158,8 +161,19 @@ cdef class Column(OBIWrapper) :
|
||||
create = True)<0):
|
||||
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
|
||||
return Column.open(view, alias_b)
|
||||
|
||||
column = Column.open(view, alias_b)
|
||||
|
||||
# Automatically associate nuc sequence column to quality column if necessary
|
||||
if data_type == OBI_QUAL:
|
||||
if column_name == QUALITY_COLUMN:
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
|
||||
return column
|
||||
|
||||
|
||||
@staticmethod
|
||||
@ -407,6 +421,31 @@ cdef class Column(OBIWrapper) :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return obi_format_date(self.pointer().header.creation_date)
|
||||
|
||||
|
||||
# associated_column name property getter and setter
|
||||
@property
|
||||
def associated_column_name(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.column_name
|
||||
|
||||
@associated_column_name.setter
|
||||
def associated_column_name(self, object new_name):
|
||||
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
|
||||
|
||||
|
||||
# associated_column version property getter and setter
|
||||
@property
|
||||
def associated_column_version(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.version
|
||||
|
||||
@associated_column_version.setter
|
||||
def associated_column_version(self, int new_version):
|
||||
self.pointer().header.associated_column.version = new_version
|
||||
|
||||
|
||||
# comments property getter
|
||||
@property
|
||||
def comments(self):
|
||||
|
@ -39,4 +39,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||
cpdef object build_quality_array(self, list quality)
|
||||
cpdef bytes build_reverse_complement(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef repr_bytes(self)
|
||||
|
@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
def __repr__(self):
|
||||
return bytes2str(self.repr_bytes())
|
||||
|
||||
cpdef repr_bytes(self):
|
||||
if self.quality is None:
|
||||
formatter = FastaFormat()
|
||||
else:
|
||||
formatter = FastqFormat()
|
||||
return bytes2str(formatter(self))
|
||||
return formatter(self)
|
||||
|
||||
|
@ -20,6 +20,10 @@ cdef class View(OBIWrapper):
|
||||
cdef DMS _dms
|
||||
|
||||
cdef inline Obiview_p pointer(self)
|
||||
|
||||
cpdef print_to_output(self,
|
||||
object output,
|
||||
bint noprogressbar=*)
|
||||
|
||||
cpdef delete_column(self,
|
||||
object column_name,
|
||||
@ -61,6 +65,8 @@ cdef class Line :
|
||||
cdef index_t _index
|
||||
cdef View _view
|
||||
|
||||
cpdef repr_bytes(self)
|
||||
|
||||
|
||||
cdef register_view_class(bytes view_type_name,
|
||||
type view_class)
|
||||
|
@ -6,6 +6,9 @@ cdef dict __VIEW_CLASS__= {}
|
||||
|
||||
from libc.stdlib cimport malloc
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.version import version
|
||||
|
||||
from ..capi.obiview cimport Alias_column_pair_p, \
|
||||
obi_new_view, \
|
||||
obi_open_view, \
|
||||
@ -48,10 +51,13 @@ from ..capi.obidms cimport obi_import_view
|
||||
|
||||
from obitools3.format.tab import TabFormat
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
cdef class View(OBIWrapper) :
|
||||
@ -178,13 +184,43 @@ cdef class View(OBIWrapper) :
|
||||
|
||||
|
||||
@OBIWrapper.checkIsActive
|
||||
def __repr__(self) :
|
||||
cdef str s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count)
|
||||
def __repr__(self) :
|
||||
cdef str s
|
||||
if self.read_only: # can read date
|
||||
s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count,
|
||||
date = str(bytes2str_object(self.comments["Date created"])))
|
||||
else:
|
||||
s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
|
||||
line_count = self.line_count)
|
||||
for column_name in self.keys() :
|
||||
s = s + repr(self[column_name]) + '\n'
|
||||
return s
|
||||
|
||||
|
||||
|
||||
cpdef print_to_output(self, object output, bint noprogressbar=False):
|
||||
|
||||
cdef int i
|
||||
cdef Line entry
|
||||
|
||||
self.checkIsActive(self)
|
||||
|
||||
# Initialize the progress bar
|
||||
if noprogressbar == False:
|
||||
pb = ProgressBar(len(self))
|
||||
else:
|
||||
pb = None
|
||||
i=0
|
||||
for entry in self:
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
output.write(entry.repr_bytes()+b"\n")
|
||||
i+=1
|
||||
if pb is not None:
|
||||
pb(len(self), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
|
||||
def keys(self):
|
||||
|
||||
@ -405,6 +441,7 @@ cdef class View(OBIWrapper) :
|
||||
for i in range(len(input_view_name)):
|
||||
input_str.append(tostr(input_dms_name[i])+"/"+tostr(input_view_name[i]))
|
||||
comments["input_str"] = input_str
|
||||
comments["version"] = version
|
||||
return bytes2str_object(comments)
|
||||
|
||||
|
||||
@ -757,8 +794,12 @@ cdef class Line :
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return bytes2str(self).repr_bytes()
|
||||
|
||||
|
||||
cpdef repr_bytes(self):
|
||||
formatter = TabFormat(header=False)
|
||||
return bytes2str(formatter(self))
|
||||
return formatter(self)
|
||||
|
||||
|
||||
# View property getter
|
||||
|
@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
|
||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||
|
||||
import sys
|
||||
|
||||
cdef class TabFormat:
|
||||
|
||||
@ -26,18 +27,22 @@ cdef class TabFormat:
|
||||
|
||||
if self.header and self.first_line:
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
for k2 in data.view[k].keys():
|
||||
keys = data.view[k].keys()
|
||||
keys.sort()
|
||||
for k2 in keys:
|
||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||
else:
|
||||
line.append(tobytes(k))
|
||||
else:
|
||||
value = data[k]
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
keys = data.view[k].keys()
|
||||
keys.sort()
|
||||
if value is None: # all keys at None
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
for k2 in keys: # TODO could be much more efficient
|
||||
line.append(self.NAString)
|
||||
else:
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
for k2 in keys: # TODO could be much more efficient
|
||||
if value[k2] is not None:
|
||||
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||
else:
|
||||
|
@ -183,8 +183,9 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
||||
seq[b'ali_length'] = ali.consensus_len
|
||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
seq[b'shift']=ali.shift
|
||||
else:
|
||||
if len(ali[0])>999: # TODO why?
|
||||
@ -256,9 +257,10 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
seq[b"ali_direction"]=None
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
seq[b"ali_length"] = ali.consensus_len
|
||||
if ali.consensus_len > 0:
|
||||
seq[b"score_norm"]=float(ali.score)/ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
if ali.overlap_len > 0:
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
else:
|
||||
seq[b"score_norm"]=0.0
|
||||
|
||||
|
@ -210,10 +210,11 @@ def open_uri(uri,
|
||||
|
||||
error = None
|
||||
|
||||
if scheme==b"dms" or \
|
||||
(scheme==b"" and \
|
||||
(((not input) and "outputformat" not in config["obi"]) or \
|
||||
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
|
||||
if urib != b"-" and \
|
||||
(scheme==b"dms" or \
|
||||
(scheme==b"" and \
|
||||
(((not input) and "outputformat" not in config["obi"]) or \
|
||||
(input and "inputformat" not in config["obi"])))): # TODO maybe not best way
|
||||
|
||||
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
|
||||
dms=(default_dms, urip.path)
|
||||
@ -275,11 +276,11 @@ def open_uri(uri,
|
||||
iseq = urib
|
||||
objclass = bytes
|
||||
else: # TODO update uopen to be able to write?
|
||||
if urip.path:
|
||||
file = open(urip.path, 'wb')
|
||||
else:
|
||||
if not urip.path or urip.path == b'-':
|
||||
file = sys.stdout.buffer
|
||||
|
||||
else:
|
||||
file = open(urip.path, 'wb')
|
||||
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
||||
|
||||
cpdef bytes format_separator(bytes format)
|
||||
cpdef bytes format_uniq_pattern(bytes format)
|
||||
cpdef int count_entries(file, bytes format)
|
||||
|
||||
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
||||
|
@ -24,11 +24,11 @@ import glob
|
||||
import gzip
|
||||
|
||||
|
||||
cpdef bytes format_separator(bytes format):
|
||||
cpdef bytes format_uniq_pattern(bytes format):
|
||||
if format == b"fasta":
|
||||
return b"\n>"
|
||||
elif format == b"fastq":
|
||||
return b"\n@"
|
||||
return b"\n\+\n"
|
||||
elif format == b"ngsfilter" or format == b"tabular":
|
||||
return b"\n"
|
||||
elif format == b"genbank" or format == b"embl":
|
||||
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
|
||||
cpdef int count_entries(file, bytes format):
|
||||
|
||||
try:
|
||||
sep = format_separator(format)
|
||||
sep = format_uniq_pattern(format)
|
||||
if sep is None:
|
||||
return -1
|
||||
sep = re.compile(sep)
|
||||
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
|
||||
return -1
|
||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
total_count += len(re.findall(sep, mmapped_file))
|
||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
|
||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
|
||||
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
||||
|
||||
except:
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 3
|
||||
minor = 0
|
||||
serial= '0b27'
|
||||
serial= '0b34'
|
||||
|
||||
version ="%d.%d.%s" % (major,minor,serial)
|
||||
|
@ -414,7 +414,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
||||
}
|
||||
|
||||
if (max_common_kmers > 0)
|
||||
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
|
||||
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
|
||||
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
// meaning that the score will be often underestimated and never overestimated.
|
||||
else
|
||||
score = 0;
|
||||
abs_shift = abs(best_shift);
|
||||
|
@ -27,7 +27,11 @@
|
||||
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
|
||||
*/
|
||||
typedef struct Obi_ali {
|
||||
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
|
||||
int score; /**< Alignment score, corresponding to an approximation of the number of
|
||||
* nucleotides matching in the overlap of the alignment.
|
||||
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
* meaning that the score will be often underestimated and never overestimated.
|
||||
*/
|
||||
int consensus_length; /**< Length of the final consensus sequence.
|
||||
*/
|
||||
|
@ -1659,6 +1659,12 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char*
|
||||
else // Non-typed view
|
||||
view_2 = obi_new_view(dms_2, view_name_2, NULL, NULL, (view_1->infos)->comments);
|
||||
|
||||
if (view_2 == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating the new view to import a view in a DMS");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Import line count
|
||||
view_2->infos->line_count = view_1->infos->line_count;
|
||||
|
||||
|
@ -1312,19 +1312,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the associated column reference if needed // TODO discuss cases
|
||||
if (data_type == OBI_QUAL)
|
||||
// Store the associated column reference if needed
|
||||
if ((associated_column_name != NULL) && (*associated_column_name != '\0'))
|
||||
{
|
||||
if ((associated_column_name == NULL) || (*associated_column_name == '\0'))
|
||||
{
|
||||
obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
|
||||
munmap(new_column->header, header_size);
|
||||
close(column_file_descriptor);
|
||||
free(new_column);
|
||||
return NULL;
|
||||
}
|
||||
strcpy((header->associated_column).column_name, associated_column_name);
|
||||
|
||||
if (associated_column_version == -1)
|
||||
{
|
||||
obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
|
||||
@ -1336,6 +1327,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
(header->associated_column).version = associated_column_version;
|
||||
}
|
||||
|
||||
|
||||
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
|
||||
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL) || tuples)
|
||||
{
|
||||
@ -1733,16 +1725,32 @@ int obi_close_column(OBIDMS_column_p column)
|
||||
int obi_clone_column_indexer(OBIDMS_column_p column)
|
||||
{
|
||||
char* new_indexer_name;
|
||||
int i;
|
||||
|
||||
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
|
||||
if (new_indexer_name == NULL)
|
||||
return -1;
|
||||
|
||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||
if (column->indexer == NULL)
|
||||
i=0;
|
||||
while (true) // find avl name not already used
|
||||
{
|
||||
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
||||
return -1;
|
||||
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
|
||||
if (new_indexer_name == NULL)
|
||||
return -1;
|
||||
|
||||
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
|
||||
if (column->indexer == NULL)
|
||||
{
|
||||
if (errno == EEXIST)
|
||||
{
|
||||
free(new_indexer_name);
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
free(new_indexer_name);
|
||||
obidebug(1, "\nError cloning a column's indexer to make it writable");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
strcpy((column->header)->indexer_name, new_indexer_name);
|
||||
@ -2423,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
|
||||
}
|
||||
|
||||
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column)
|
||||
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
|
||||
{
|
||||
char* column_infos;
|
||||
char* elt_names;
|
||||
|
||||
column_infos = malloc(1024 * sizeof(char));
|
||||
char* column_infos = NULL;
|
||||
char* elt_names = NULL;
|
||||
char* column_name = NULL;
|
||||
// should be in view.c because alias exists in the context of view
|
||||
column_infos = malloc(2048 * sizeof(char)); // TODO
|
||||
|
||||
elt_names = obi_get_formatted_elements_names(column);
|
||||
|
||||
|
||||
// "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
|
||||
|
||||
|
||||
free(elt_names);
|
||||
return column_infos;
|
||||
}
|
||||
|
@ -254,11 +254,15 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
/**
|
||||
* @brief Internal function to clone a column in the context of a view.
|
||||
*
|
||||
* Used to edit a closed column.
|
||||
*
|
||||
* Clones with the right line selection and replaces the cloned columns with the new ones in the view.
|
||||
* If there is a line selection, all columns have to be cloned, otherwise only the column of interest is cloned.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
* @param column_name The name of the column in the view that should be cloned.
|
||||
* @param clone_associated Whether to clone the associated column
|
||||
* (should always be true except when calling from the function itself to avoid infinite recursion).
|
||||
*
|
||||
* @returns A pointer on the new column.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -266,7 +270,7 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated);
|
||||
|
||||
|
||||
/**
|
||||
@ -845,7 +849,7 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
// Clone the column first if needed
|
||||
if (!(column->writable))
|
||||
{
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias);
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias, true);
|
||||
if (column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError cloning a column in a view when updating its line count");
|
||||
@ -870,12 +874,14 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
}
|
||||
|
||||
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
OBIDMS_column_p column = NULL;
|
||||
OBIDMS_column_p new_column = NULL;
|
||||
OBIDMS_column_p column_buffer;
|
||||
OBIDMS_column_p associated_cloned_column = NULL;
|
||||
char* associated_column_alias = NULL;
|
||||
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
@ -916,11 +922,62 @@ static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_n
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Look for associated column to clone and reassociate
|
||||
if ((column_buffer->header->associated_column).column_name[0] != '\0')
|
||||
{
|
||||
// Get the associated column alias
|
||||
j=0;
|
||||
while (((strcmp((((view->infos)->column_references)[j]).column_refs.column_name, (column_buffer->header->associated_column).column_name)) ||
|
||||
((((view->infos)->column_references)[j]).column_refs.version != (column_buffer->header->associated_column).version)) &&
|
||||
j<(view->infos)->column_count) // TODO function for that
|
||||
j++;
|
||||
|
||||
if (j == (view->infos)->column_count) // not found
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nCould not find associated column when cloning a column for editing");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// No line selection: only this column is cloned, clone and reassociate the associated column
|
||||
if ((view->line_selection == NULL) && clone_associated)
|
||||
{
|
||||
associated_column_alias = (((view->infos)->column_references)[j]).alias;
|
||||
// Clone the associated column
|
||||
associated_cloned_column = clone_column_in_view(view, associated_column_alias, false);
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Line selection: all columns are cloned, check if associated column has been cloned previously (it precedes this one in the list) to reassociate
|
||||
if (j < i)
|
||||
{
|
||||
// Get pointer to associated column
|
||||
associated_cloned_column = *((OBIDMS_column_p*)ll_get(view->columns, j));
|
||||
if (associated_cloned_column == NULL)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError getting a column to clone from the linked list of column pointers of a view");
|
||||
return NULL;
|
||||
}
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close old cloned column
|
||||
obi_close_column(column_buffer);
|
||||
|
||||
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
|
||||
// Found the column to return
|
||||
// Get the column to return
|
||||
new_column = column;
|
||||
}
|
||||
}
|
||||
@ -1193,7 +1250,7 @@ static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* colum
|
||||
return -1;
|
||||
}
|
||||
|
||||
(*column_pp) = clone_column_in_view(view, column_name);
|
||||
(*column_pp) = clone_column_in_view(view, column_name, true);
|
||||
if ((*column_pp) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError trying to clone a column to modify it");
|
||||
@ -1844,6 +1901,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
{
|
||||
Obiview_p view;
|
||||
OBIDMS_column_p associated_nuc_column;
|
||||
OBIDMS_column_p associated_qual_column;
|
||||
int nb_predicates;
|
||||
|
||||
if (view_to_clone != NULL)
|
||||
@ -1896,6 +1954,10 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
|
||||
return NULL;
|
||||
}
|
||||
// Associating both ways: associating nuc sequences column to quality column
|
||||
associated_qual_column = obi_view_get_column(view, QUALITY_COLUMN);
|
||||
strcpy((associated_nuc_column->header->associated_column).column_name, associated_qual_column->header->name);
|
||||
(associated_nuc_column->header->associated_column).version = associated_qual_column->header->version;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1922,7 +1984,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
(view->predicate_functions)[(view->nb_predicates)] = view_has_nuc_sequence_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 1] = view_has_id_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 2] = view_has_definition_column;
|
||||
// if (quality_column) # TODO discuss. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// if (quality_column) # TODO fix by triggering predicate deleting if quality deleting. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// (view->predicate_functions)[(view->nb_predicates) + 3] = view_has_quality_column;
|
||||
|
||||
view->nb_predicates = nb_predicates;
|
||||
@ -2212,7 +2274,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
|
||||
// TODO return a pointer on the column?
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
@ -406,7 +406,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created.
|
||||
* @param associated_column_version The version of the associated column if there is one (otherwise -1), if the column is created.
|
||||
* @param comments Optional comments associated with the column if it is created (NULL or "" if no comments associated).
|
||||
* @param create Whether the column should be created (create == true) or opened (create == false).
|
||||
* @param create Whether the column should be created (create == true) or already exists (create == false).
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
@ -416,7 +416,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
Reference in New Issue
Block a user