Compare commits
22 Commits
Author | SHA1 | Date | |
---|---|---|---|
bccb3e6874 | |||
f5a17bea68 | |||
e28507639a | |||
e6feac93fe | |||
50b292b489 | |||
24a737aa55 | |||
8aa455ad8a | |||
46ca693ca9 | |||
9a9afde113 | |||
8dd403a118 | |||
9672f01c6a | |||
ed9549acfb | |||
9ace9989c4 | |||
a3ebe5f118 | |||
9100e14899 | |||
ccda0661ce | |||
aab59f2214 | |||
ade1107b42 | |||
9c7d24406f | |||
03bc9915f2 | |||
24b1dab573 | |||
7593673f3f |
@ -30,12 +30,12 @@ cdef class ProgressBar:
|
||||
off_t maxi,
|
||||
dict config={},
|
||||
str head="",
|
||||
double seconde=0.1,
|
||||
double seconds=5,
|
||||
cut=False):
|
||||
|
||||
self.starttime = self.clock()
|
||||
self.lasttime = self.starttime
|
||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
||||
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
|
||||
self.freq = 1
|
||||
self.cycle = 0
|
||||
self.arrow = 0
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -12,6 +12,9 @@ from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
|
||||
obi_lcs_align_two_columns
|
||||
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
@ -23,6 +26,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi align specific options')
|
||||
|
||||
@ -201,20 +205,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = o_dms.name
|
||||
final_o_view_name = output[1]
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -263,8 +267,15 @@ def run(config):
|
||||
View.delete_view(i_dms, i_view_name_2)
|
||||
i_dms_2.close()
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.dms.capi.obitypes cimport OBI_QUAL
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
|
||||
@ -15,7 +15,9 @@ from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequenc
|
||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
import os
|
||||
|
||||
@ -29,6 +31,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi alignpairedend specific options')
|
||||
|
||||
@ -39,12 +42,13 @@ def addOptions(parser):
|
||||
type=str,
|
||||
help="URI to the reverse reads if they are in a different view than the forward reads")
|
||||
|
||||
group.add_argument('--score-min',
|
||||
action="store", dest="alignpairedend:smin",
|
||||
metavar="#.###",
|
||||
default=None,
|
||||
type=float,
|
||||
help="Minimum score for keeping alignments")
|
||||
# group.add_argument('--score-min',
|
||||
# action="store", dest="alignpairedend:smin",
|
||||
# metavar="#.###",
|
||||
# default=None,
|
||||
# type=float,
|
||||
# help="Minimum score for keeping alignments. "
|
||||
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
|
||||
|
||||
# group.add_argument('-A', '--true-ali',
|
||||
# action="store_true", dest="alignpairedend:trueali",
|
||||
@ -170,18 +174,29 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
view = output[1]
|
||||
output_0 = output[0]
|
||||
o_dms = output[0]
|
||||
|
||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
|
||||
|
||||
if 'smin' in config['alignpairedend']:
|
||||
smin = config['alignpairedend']['smin']
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
i_dms = forward.dms # using any dms
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
|
||||
else:
|
||||
smin = 0
|
||||
o_view = output[1]
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
#if config['alignpairedend']['trueali']:
|
||||
# kmer_ali = False
|
||||
# aligner = buildAlignment
|
||||
@ -206,18 +221,19 @@ def run(config):
|
||||
i = 0
|
||||
for ali in ba:
|
||||
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
PyErr_CheckSignals()
|
||||
|
||||
consensus = view[i]
|
||||
consensus = o_view[i]
|
||||
|
||||
if not two_views:
|
||||
seqF = entries[i]
|
||||
else:
|
||||
seqF = forward[i]
|
||||
|
||||
if ali.score > smin and ali.consensus_len > 0 :
|
||||
if ali.overlap_len > 0 :
|
||||
buildConsensus(ali, consensus, seqF)
|
||||
else:
|
||||
if not two_views:
|
||||
@ -225,32 +241,43 @@ def run(config):
|
||||
else:
|
||||
seqR = reverse[i]
|
||||
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
|
||||
|
||||
consensus[b"smin"] = smin
|
||||
|
||||
|
||||
if kmer_ali :
|
||||
ali.free()
|
||||
|
||||
i+=1
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if kmer_ali :
|
||||
aligner.free()
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If stdout output, delete the temporary imported view used to create the final file
|
||||
if type(output_0)==BufferedWriter:
|
||||
View_NUC_SEQS.delete_view(o_dms, o_view_name)
|
||||
output_0.close()
|
||||
|
||||
# Close all DMS
|
||||
input[0].close(force=True)
|
||||
if two_views:
|
||||
rinput[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
@ -4,11 +4,12 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from functools import reduce
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from io import BufferedWriter
|
||||
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN, \
|
||||
@ -34,6 +35,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi annotate specific options')
|
||||
|
||||
@ -278,8 +280,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view_name = output[1]
|
||||
|
||||
|
||||
# stdout output: create temporary view
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
i=0
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in output DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
imported_view_name = o_view_name
|
||||
|
||||
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
|
||||
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
|
||||
if i_dms != o_dms:
|
||||
@ -290,7 +303,7 @@ def run(config):
|
||||
i+=1
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
|
||||
i_view = o_dms[imported_view_name]
|
||||
|
||||
|
||||
# Clone output view from input view
|
||||
o_view = i_view.clone(o_view_name)
|
||||
if o_view is None:
|
||||
@ -307,7 +320,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
try:
|
||||
|
||||
@ -346,14 +362,16 @@ def run(config):
|
||||
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
|
||||
for i in range(len(o_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
sequenceTagger(o_view[i])
|
||||
|
||||
except Exception, e:
|
||||
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -363,13 +381,19 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(o_dms, imported_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -4,14 +4,16 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.build_reference_db cimport build_reference_db
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
__title__="Tag a set of sequences for PCR and sequencing errors identification"
|
||||
@ -22,6 +24,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi build_ref_db specific options')
|
||||
|
||||
@ -56,17 +59,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -80,22 +86,29 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
|
||||
|
||||
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
|
||||
raise Exception("Error building a reference database")
|
||||
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -15,6 +15,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_CO
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.column.column cimport Column
|
||||
|
||||
from io import BufferedWriter
|
||||
import time
|
||||
import sys
|
||||
|
||||
@ -76,53 +77,70 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# stdout output
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
|
||||
# Initialize quality columns and their associated sequence columns if needed
|
||||
if not remove_qual:
|
||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||
if not remove_rev_qual:
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
if type(output_0) != BufferedWriter:
|
||||
if not remove_qual:
|
||||
if NUC_SEQUENCE_COLUMN not in o_view:
|
||||
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
|
||||
if not remove_rev_qual:
|
||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||
|
||||
# Initialize multiple elements columns
|
||||
dict_cols = {}
|
||||
for v in iview_list:
|
||||
for coln in v.keys():
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||
if type(output_0)==BufferedWriter:
|
||||
dict_cols = {}
|
||||
for v in iview_list:
|
||||
for coln in v.keys():
|
||||
if v[coln].nb_elements_per_line > 1:
|
||||
if coln not in dict_cols:
|
||||
dict_cols[coln] = {}
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||
else:
|
||||
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||
for coln in dict_cols:
|
||||
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(total_len, config, seconde=5)
|
||||
if not config['obi']['noprogressbar']:
|
||||
pb = ProgressBar(total_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i = 0
|
||||
for v in iview_list:
|
||||
for l in v:
|
||||
for entry in v:
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
o_view[i] = l
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if type(output_0)==BufferedWriter:
|
||||
rep = repr(entry)
|
||||
output_0.write(str2bytes(rep)+b"\n")
|
||||
else:
|
||||
o_view[i] = entry
|
||||
i+=1
|
||||
|
||||
# Deletes quality columns if needed
|
||||
if QUALITY_COLUMN in o_view and remove_qual :
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if type(output_0)!=BufferedWriter:
|
||||
if QUALITY_COLUMN in o_view and remove_qual :
|
||||
o_view.delete_column(QUALITY_COLUMN)
|
||||
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
|
||||
o_view.delete_column(REVERSE_QUALITY_COLUMN)
|
||||
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
|
@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiclean cimport obi_clean
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
from io import BufferedWriter
|
||||
import sys
|
||||
|
||||
|
||||
@ -21,7 +22,8 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi clean specific options')
|
||||
|
||||
group.add_argument('--distance', '-d',
|
||||
@ -36,8 +38,7 @@ def addOptions(parser):
|
||||
dest="clean:sample-tag-name",
|
||||
metavar="<SAMPLE TAG NAME>",
|
||||
type=str,
|
||||
default="merged_sample",
|
||||
help="Name of the tag where sample counts are kept.")
|
||||
help="Name of the tag where merged sample count informations are kept (typically generated by obi uniq, usually MERGED_sample, default: None).")
|
||||
|
||||
group.add_argument('--ratio', '-r',
|
||||
action="store", dest="clean:ratio",
|
||||
@ -89,17 +90,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -107,6 +111,9 @@ def run(config):
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
comments = View.print_config(config, "clean", command_line, input_dms_name=[i_dms_name], input_view_name=[i_view_name])
|
||||
|
||||
if 'sample-tag-name' not in config['clean']:
|
||||
config['clean']['sample-tag-name'] = ""
|
||||
|
||||
if obi_clean(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(config['clean']['sample-tag-name']), tobytes(o_view_name), comments, \
|
||||
config['clean']['distance'], config['clean']['ratio'], config['clean']['heads-only'], config['clean']['thread-count']) < 0:
|
||||
raise Exception("Error running obiclean")
|
||||
@ -114,15 +121,22 @@ def run(config):
|
||||
# If the input and output DMS are not the same, export result view to output DMS
|
||||
if i_dms != o_dms:
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -22,7 +22,7 @@ def addOptions(parser):
|
||||
group.add_argument('-s','--sequence',
|
||||
action="store_true", dest="count:sequence",
|
||||
default=False,
|
||||
help="Prints only the number of sequence records.")
|
||||
help="Prints only the number of sequence records (much faster, default: False).")
|
||||
|
||||
group.add_argument('-a','--all',
|
||||
action="store_true", dest="count:all",
|
||||
|
@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.capi.obidms cimport OBIDMS_p
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.view import View
|
||||
|
||||
@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="in silico PCR"
|
||||
@ -27,6 +28,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
|
||||
group = parser.add_argument_group('obi ecopcr specific options')
|
||||
@ -169,11 +171,20 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_dms_name = output[0].name
|
||||
o_view_name = output[1]
|
||||
|
||||
# Read taxonomy name
|
||||
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
|
||||
|
||||
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -201,10 +212,21 @@ def run(config):
|
||||
|
||||
free(restrict_to_taxids_p)
|
||||
free(ignore_taxids_p)
|
||||
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[o_view_name]), file=sys.stderr)
|
||||
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms.dms cimport DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.capi.obiecotag cimport obi_ecotag
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
|
||||
import sys
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Taxonomic assignment of sequences"
|
||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ecotag specific options')
|
||||
|
||||
@ -75,17 +77,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output")
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
|
||||
# the right DMS and deleted in the other afterwards.
|
||||
if i_dms != o_dms:
|
||||
temporary_view_name = final_o_view_name
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
@ -120,11 +124,18 @@ def run(config):
|
||||
# Save command config in DMS comments
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view = o_dms[o_view_name]
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary result view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
|
@ -74,7 +74,7 @@ def run(config):
|
||||
if config['obi']['noprogressbar']:
|
||||
pb = None
|
||||
else:
|
||||
pb = ProgressBar(withoutskip - skip, config, seconde=5)
|
||||
pb = ProgressBar(withoutskip - skip, config)
|
||||
|
||||
i=0
|
||||
for seq in iview :
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
@ -14,6 +14,7 @@ import time
|
||||
import re
|
||||
import sys
|
||||
import ast
|
||||
from io import BufferedWriter
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -28,6 +29,7 @@ def addOptions(parser):
|
||||
addMinimalInputOption(parser)
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group("obi grep specific options")
|
||||
|
||||
@ -304,16 +306,21 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config["obi"]["taxoURI"])
|
||||
@ -324,7 +331,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Apply filter
|
||||
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
|
||||
@ -334,31 +344,36 @@ def run(config):
|
||||
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
line = i_view[i]
|
||||
|
||||
|
||||
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
|
||||
|
||||
good = filter(line, loc_env)
|
||||
|
||||
if good :
|
||||
selection.append(i)
|
||||
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
o_view = selection.materialize(o_view_name)
|
||||
except Exception, e:
|
||||
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
|
||||
logger("info", "Grepped %d entries" % len(o_view))
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
input_dms_name=[input[0].name]
|
||||
@ -373,14 +388,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -4,14 +4,15 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
import time
|
||||
import sys
|
||||
|
||||
from io import BufferedWriter
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
|
||||
@ -22,6 +23,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi head specific options')
|
||||
|
||||
@ -53,31 +55,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
n = min(config['head']['count'], len(i_view))
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(n, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(n, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(n):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the line selection
|
||||
try:
|
||||
@ -94,14 +106,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -47,6 +47,8 @@ from obitools3.apps.config import logger
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Imports sequences from different formats into a DMS"
|
||||
|
||||
@ -75,6 +77,11 @@ def addOptions(parser):
|
||||
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||
"a much faster import. This option is not recommended and will slow down the import in any other case.")
|
||||
|
||||
group.add_argument('--space-priority',
|
||||
action="store_true", dest="import:space_priority",
|
||||
default=False,
|
||||
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
|
||||
"has a line selection associated.")
|
||||
|
||||
def run(config):
|
||||
|
||||
@ -130,7 +137,7 @@ def run(config):
|
||||
if entry_count > 0:
|
||||
logger("info", "Importing %d entries", entry_count)
|
||||
else:
|
||||
logger("info", "Importing an unknow number of entries")
|
||||
logger("info", "Importing an unknown number of entries")
|
||||
|
||||
# TODO a bit dirty?
|
||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||
@ -140,7 +147,7 @@ def run(config):
|
||||
else:
|
||||
v = None
|
||||
|
||||
if config['obi']['taxdump'] or isinstance(input[1], View):
|
||||
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
|
||||
dms_only=True
|
||||
else:
|
||||
dms_only=False
|
||||
@ -168,17 +175,20 @@ def run(config):
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
# If importing a view between two DMS, use C API
|
||||
if isinstance(input[1], View):
|
||||
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
|
||||
if isinstance(input[1], View) and not config['import']['space_priority']:
|
||||
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
raise Exception("Error importing a view in a DMS")
|
||||
o_dms.record_command_line(" ".join(sys.argv[1:]))
|
||||
o_dms.close()
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
logger("info", "Done.")
|
||||
return
|
||||
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
pb = ProgressBar(entry_count, config)
|
||||
|
||||
NUC_SEQS_view = False
|
||||
if isinstance(output[1], View) :
|
||||
@ -243,7 +253,7 @@ def run(config):
|
||||
if isinstance(input[0], CompressedFile):
|
||||
input_is_file = True
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
pb = ProgressBar(entry_count, config)
|
||||
try:
|
||||
input[0].close()
|
||||
except AttributeError:
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view import RollbackException, View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
|
||||
@ -14,13 +14,14 @@ from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
|
||||
from obitools3.dms.capi.apat cimport MAX_PATTERN
|
||||
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
|
||||
from obitools3.utils cimport tobytes
|
||||
from obitools3.utils cimport tobytes, str2bytes
|
||||
|
||||
from libc.stdint cimport INT32_MAX
|
||||
from functools import reduce
|
||||
import math
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
|
||||
@ -34,7 +35,8 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi ngsfilter specific options')
|
||||
|
||||
group.add_argument('-t','--info-view',
|
||||
@ -58,7 +60,7 @@ def addOptions(parser):
|
||||
metavar="<URI>",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the view used to store the sequences unassigned to any sample")
|
||||
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
|
||||
|
||||
group.add_argument('--no-tags',
|
||||
action="store_true", dest="ngsfilter:notags",
|
||||
@ -479,6 +481,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
|
||||
if not directmatch[0].forward:
|
||||
sequences[0] = sequences[0].reverse_complement
|
||||
sequences[0][b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||
else:
|
||||
sequences[0][b'reversed'] = False # used by the alignpairedend tool (in kmer_similarity.c)
|
||||
|
||||
sample=None
|
||||
if not no_tags:
|
||||
@ -537,7 +541,8 @@ def run(config):
|
||||
raise Exception("Could not open input reads")
|
||||
if input[2] != View_NUC_SEQS:
|
||||
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
|
||||
|
||||
i_dms = input[0]
|
||||
|
||||
if "reverse" in config["ngsfilter"]:
|
||||
|
||||
forward = input[1]
|
||||
@ -578,8 +583,19 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
o_view = output[1]
|
||||
|
||||
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
o_view_name = b"temp"
|
||||
while o_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
o_view_name = o_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
|
||||
# Open the view containing the informations about the tags and the primers
|
||||
info_input = open_uri(config['ngsfilter']['info_view'])
|
||||
if info_input is None:
|
||||
@ -600,7 +616,10 @@ def run(config):
|
||||
unidentified = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(entries_len, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(entries_len, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
# Check and store primers and tags
|
||||
try:
|
||||
@ -634,7 +653,8 @@ def run(config):
|
||||
try:
|
||||
for i in range(entries_len):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if not_aligned:
|
||||
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||
else:
|
||||
@ -644,7 +664,13 @@ def run(config):
|
||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
g+=1
|
||||
elif unidentified is not None:
|
||||
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||
# Untrim sequences (put original back)
|
||||
if len(modseq) > 1:
|
||||
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
|
||||
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
|
||||
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
|
||||
else:
|
||||
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
|
||||
u+=1
|
||||
except Exception, e:
|
||||
if unidentified is not None:
|
||||
@ -652,8 +678,9 @@ def run(config):
|
||||
else:
|
||||
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -662,13 +689,23 @@ def run(config):
|
||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
# Add comment about unidentified seqs
|
||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
info_input[0].close(force=True)
|
||||
if unidentified is not None:
|
||||
unidentified_input[0].close(force=True)
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
|
||||
@ -42,6 +43,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi sort specific options')
|
||||
|
||||
@ -59,7 +61,7 @@ def addOptions(parser):
|
||||
|
||||
|
||||
def line_cmp(line, key, pb):
|
||||
pb
|
||||
pb
|
||||
if line[key] is None:
|
||||
return NULL_VALUE[line.view[key].data_type_int]
|
||||
else:
|
||||
@ -86,20 +88,28 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
keys = config['sort']['keys']
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
@ -110,10 +120,14 @@ def run(config):
|
||||
|
||||
for k in keys: # TODO order?
|
||||
PyErr_CheckSignals()
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
|
||||
else:
|
||||
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
|
||||
|
||||
if pb is not None:
|
||||
pb(len(i_view), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Create output view with the sorted line selection
|
||||
try:
|
||||
@ -132,16 +146,23 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
|
||||
i_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
@ -162,7 +162,7 @@ def run(config):
|
||||
lcat=0
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view), config, seconde=5)
|
||||
pb = ProgressBar(len(i_view), config)
|
||||
|
||||
for i in range(len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
|
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line_selection
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport str2bytes
|
||||
@ -12,6 +12,7 @@ from obitools3.utils cimport str2bytes
|
||||
import time
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Keep the N last lines of a view."
|
||||
@ -21,6 +22,7 @@ def addOptions(parser):
|
||||
|
||||
addMinimalInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group=parser.add_argument_group('obi tail specific options')
|
||||
|
||||
@ -52,31 +54,41 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
o_dms = output[0]
|
||||
o_view_name_final = output[1]
|
||||
o_view_name = o_view_name_final
|
||||
output_0 = output[0]
|
||||
final_o_view_name = output[1]
|
||||
|
||||
# If the input and output DMS are not the same, create output view in input DMS first, then export it
|
||||
# to output DMS, making sure the temporary view name is unique in the input DMS
|
||||
if i_dms != o_dms:
|
||||
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while o_view_name in i_dms:
|
||||
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
if type(output_0)==BufferedWriter:
|
||||
o_dms = i_dms
|
||||
else:
|
||||
o_view_name = final_o_view_name
|
||||
|
||||
start = max(len(i_view) - config['tail']['count'], 0)
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(i_view) - start, config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(i_view) - start, config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
selection = Line_selection(i_view)
|
||||
|
||||
for i in range(start, len(i_view)):
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
selection.append(i)
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -97,14 +109,20 @@ def run(config):
|
||||
# and delete the temporary view in the input DMS
|
||||
if i_dms != o_dms:
|
||||
o_view.close()
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
|
||||
o_view = o_dms[o_view_name_final]
|
||||
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
|
||||
o_view = o_dms[final_o_view_name]
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms:
|
||||
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
|
||||
if i_dms != o_dms or type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
o_dms.close(force=True)
|
||||
i_dms.close(force=True)
|
||||
|
@ -441,7 +441,7 @@ def addOptions(parser):
|
||||
default=20,
|
||||
type=int,
|
||||
help="Maximum length of tuples. "
|
||||
"Default: 200")
|
||||
"Default: 50")
|
||||
|
||||
group.add_argument('--max_ini_col_count','-o',
|
||||
action="store", dest="test:maxinicolcount",
|
||||
@ -457,7 +457,7 @@ def addOptions(parser):
|
||||
default=10000,
|
||||
type=int,
|
||||
help="Maximum number of lines in a column. "
|
||||
"Default: 10000")
|
||||
"Default: 1000")
|
||||
|
||||
group.add_argument('--max_elts_per_line','-e',
|
||||
action="store", dest="test:maxelts",
|
||||
|
@ -14,13 +14,15 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||
addMinimalOutputOption, \
|
||||
addTaxonomyOption, \
|
||||
addEltLimitOption
|
||||
addEltLimitOption, \
|
||||
addNoProgressBarOption
|
||||
from obitools3.uri.decode import open_uri
|
||||
from obitools3.apps.config import logger
|
||||
from obitools3.utils cimport tobytes, tostr
|
||||
from obitools3.utils cimport tobytes, tostr, str2bytes
|
||||
|
||||
import sys
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
from io import BufferedWriter
|
||||
|
||||
|
||||
__title__="Group sequence records together"
|
||||
@ -32,6 +34,7 @@ def addOptions(parser):
|
||||
addTaxonomyOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
addEltLimitOption(parser)
|
||||
addNoProgressBarOption(parser)
|
||||
|
||||
group = parser.add_argument_group('obi uniq specific options')
|
||||
|
||||
@ -143,12 +146,16 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
||||
scientific_name_column = o_view[b"scientific_name"]
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(o_view), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(o_view), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
i=0
|
||||
for seq in o_view:
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
if MERGED_TAXID_COLUMN in seq :
|
||||
m_taxids = []
|
||||
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
|
||||
@ -191,7 +198,8 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
|
||||
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
|
||||
i+=1
|
||||
|
||||
pb(len(o_view), force=True)
|
||||
if pb is not None:
|
||||
pb(len(o_view), force=True)
|
||||
|
||||
|
||||
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
|
||||
@ -297,7 +305,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
iter_view = iter(view)
|
||||
for i_seq in iter_view :
|
||||
PyErr_CheckSignals()
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
|
||||
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
|
||||
@ -307,6 +316,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
for x in categories :
|
||||
catl.append(i_seq[x])
|
||||
|
||||
#unique_id = tuple(catl) + (i_seq_col[i],)
|
||||
unique_id = tuple(catl) + (i_seq_col.get_line_idx(i),)
|
||||
#unique_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly
|
||||
|
||||
@ -414,12 +424,17 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
o_count_col = o_view[COUNT_COLUMN]
|
||||
if COUNT_COLUMN in view:
|
||||
i_count_col = view[COUNT_COLUMN]
|
||||
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
|
||||
pb(len(view), force=True)
|
||||
print("")
|
||||
logger("info", "Second browsing through the input")
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(view), seconde=5)
|
||||
if pb is not None:
|
||||
pb = ProgressBar(len(view))
|
||||
|
||||
o_idx = 0
|
||||
total_treated = 0
|
||||
|
||||
@ -453,7 +468,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
merged_dict[mkey] = {}
|
||||
|
||||
for i_idx in merged_sequences:
|
||||
pb(total_treated)
|
||||
PyErr_CheckSignals()
|
||||
|
||||
if pb is not None:
|
||||
pb(total_treated)
|
||||
|
||||
i_id = i_id_col[i_idx]
|
||||
i_seq = view[i_idx]
|
||||
@ -529,7 +547,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
|
||||
o_count_col[o_idx] = o_count
|
||||
o_idx += 1
|
||||
|
||||
pb(len(view), force=True)
|
||||
if pb is not None:
|
||||
pb(len(view), force=True)
|
||||
|
||||
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
|
||||
if QUALITY_COLUMN in view:
|
||||
@ -577,8 +596,23 @@ def run(config):
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
i_dms = input[0]
|
||||
entries = input[1]
|
||||
o_view = output[1]
|
||||
o_dms = output[0]
|
||||
output_0 = output[0]
|
||||
|
||||
# If stdout output create a temporary view that will be exported and deleted.
|
||||
if type(output_0)==BufferedWriter:
|
||||
temporary_view_name = b"temp"
|
||||
i=0
|
||||
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
|
||||
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
|
||||
i+=1
|
||||
o_view_name = temporary_view_name
|
||||
o_dms = i_dms
|
||||
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
|
||||
else:
|
||||
o_view = output[1]
|
||||
|
||||
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||
taxo_uri = open_uri(config['obi']['taxoURI'])
|
||||
@ -589,7 +623,10 @@ def run(config):
|
||||
taxo = None
|
||||
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(entries), config, seconde=5)
|
||||
if config['obi']['noprogressbar'] == False:
|
||||
pb = ProgressBar(len(entries), config)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
if len(entries) > 0:
|
||||
try:
|
||||
@ -597,7 +634,8 @@ def run(config):
|
||||
except Exception, e:
|
||||
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
|
||||
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
@ -607,13 +645,23 @@ def run(config):
|
||||
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||
output[0].record_command_line(command_line)
|
||||
o_dms.record_command_line(command_line)
|
||||
|
||||
# stdout output: write to buffer
|
||||
if type(output_0)==BufferedWriter:
|
||||
logger("info", "Printing to output...")
|
||||
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
|
||||
o_view.close()
|
||||
|
||||
#print("\n\nOutput view:\n````````````", file=sys.stderr)
|
||||
#print(repr(o_view), file=sys.stderr)
|
||||
|
||||
input[0].close(force=True)
|
||||
output[0].close(force=True)
|
||||
|
||||
# If stdout output, delete the temporary result view in the input DMS
|
||||
if type(output_0)==BufferedWriter:
|
||||
View.delete_view(i_dms, o_view_name)
|
||||
|
||||
i_dms.close(force=True)
|
||||
o_dms.close(force=True)
|
||||
|
||||
logger("info", "Done.")
|
||||
|
||||
|
@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
|
||||
|
||||
cdef inline OBIDMS_column_p pointer(self)
|
||||
cdef read_elements_names(self)
|
||||
cpdef list keys(self)
|
||||
|
||||
@staticmethod
|
||||
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
|
||||
|
@ -40,7 +40,8 @@ from obitools3.utils cimport tobytes, \
|
||||
from obitools3.dms.column import typed_column
|
||||
|
||||
from libc.stdlib cimport free
|
||||
|
||||
from libc.string cimport strcpy
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
@ -97,6 +98,7 @@ cdef class Column(OBIWrapper) :
|
||||
object alias=b""):
|
||||
# TODO indexer_name?
|
||||
|
||||
cdef Column column
|
||||
cdef bytes column_name_b = tobytes(column_name)
|
||||
cdef bytes alias_b = tobytes(alias)
|
||||
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
|
||||
@ -132,13 +134,14 @@ cdef class Column(OBIWrapper) :
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = NUC_SEQUENCE_COLUMN
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
if REVERSE_SEQUENCE_COLUMN not in view:
|
||||
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
|
||||
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
|
||||
|
||||
|
||||
if (obi_view_add_column(view = view.pointer(),
|
||||
column_name = column_name_b,
|
||||
@ -158,8 +161,19 @@ cdef class Column(OBIWrapper) :
|
||||
create = True)<0):
|
||||
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
|
||||
bytes2str(view.name)))
|
||||
|
||||
return Column.open(view, alias_b)
|
||||
|
||||
column = Column.open(view, alias_b)
|
||||
|
||||
# Automatically associate nuc sequence column to quality column if necessary
|
||||
if data_type == OBI_QUAL:
|
||||
if column_name == QUALITY_COLUMN:
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
elif column_name == REVERSE_QUALITY_COLUMN:
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
|
||||
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
|
||||
|
||||
return column
|
||||
|
||||
|
||||
@staticmethod
|
||||
@ -323,7 +337,10 @@ cdef class Column(OBIWrapper) :
|
||||
free(elts_names_b)
|
||||
return elts_names_list
|
||||
|
||||
|
||||
cpdef list keys(self):
|
||||
return self._elements_names
|
||||
|
||||
|
||||
# Column alias property getter and setter
|
||||
@property
|
||||
def name(self):
|
||||
@ -340,7 +357,7 @@ cdef class Column(OBIWrapper) :
|
||||
@property
|
||||
def elements_names(self):
|
||||
return self._elements_names
|
||||
|
||||
|
||||
# nb_elements_per_line property getter
|
||||
@property
|
||||
def nb_elements_per_line(self):
|
||||
@ -404,6 +421,31 @@ cdef class Column(OBIWrapper) :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return obi_format_date(self.pointer().header.creation_date)
|
||||
|
||||
|
||||
# associated_column name property getter and setter
|
||||
@property
|
||||
def associated_column_name(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.column_name
|
||||
|
||||
@associated_column_name.setter
|
||||
def associated_column_name(self, object new_name):
|
||||
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
|
||||
|
||||
|
||||
# associated_column version property getter and setter
|
||||
@property
|
||||
def associated_column_version(self):
|
||||
if not self.active() :
|
||||
raise OBIDeactivatedInstanceError()
|
||||
return self.pointer().header.associated_column.version
|
||||
|
||||
@associated_column_version.setter
|
||||
def associated_column_version(self, int new_version):
|
||||
self.pointer().header.associated_column.version = new_version
|
||||
|
||||
|
||||
# comments property getter
|
||||
@property
|
||||
def comments(self):
|
||||
|
@ -39,4 +39,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
cpdef set_quality_char(self, object new_qual, int offset=*)
|
||||
cpdef object build_quality_array(self, list quality)
|
||||
cpdef bytes build_reverse_complement(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef str get_str(self)
|
||||
cpdef repr_bytes(self)
|
||||
|
@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
|
||||
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
|
||||
|
||||
def __repr__(self):
|
||||
return bytes2str(self.repr_bytes())
|
||||
|
||||
cpdef repr_bytes(self):
|
||||
if self.quality is None:
|
||||
formatter = FastaFormat()
|
||||
else:
|
||||
formatter = FastqFormat()
|
||||
return bytes2str(formatter(self))
|
||||
return formatter(self)
|
||||
|
||||
|
@ -20,6 +20,10 @@ cdef class View(OBIWrapper):
|
||||
cdef DMS _dms
|
||||
|
||||
cdef inline Obiview_p pointer(self)
|
||||
|
||||
cpdef print_to_output(self,
|
||||
object output,
|
||||
bint noprogressbar=*)
|
||||
|
||||
cpdef delete_column(self,
|
||||
object column_name,
|
||||
@ -61,6 +65,8 @@ cdef class Line :
|
||||
cdef index_t _index
|
||||
cdef View _view
|
||||
|
||||
cpdef repr_bytes(self)
|
||||
|
||||
|
||||
cdef register_view_class(bytes view_type_name,
|
||||
type view_class)
|
||||
|
@ -6,6 +6,8 @@ cdef dict __VIEW_CLASS__= {}
|
||||
|
||||
from libc.stdlib cimport malloc
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
|
||||
from ..capi.obiview cimport Alias_column_pair_p, \
|
||||
obi_new_view, \
|
||||
obi_open_view, \
|
||||
@ -48,10 +50,13 @@ from ..capi.obidms cimport obi_import_view
|
||||
|
||||
from obitools3.format.tab import TabFormat
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
cdef class View(OBIWrapper) :
|
||||
@ -184,7 +189,31 @@ cdef class View(OBIWrapper) :
|
||||
for column_name in self.keys() :
|
||||
s = s + repr(self[column_name]) + '\n'
|
||||
return s
|
||||
|
||||
|
||||
|
||||
cpdef print_to_output(self, object output, bint noprogressbar=False):
|
||||
|
||||
cdef int i
|
||||
cdef Line entry
|
||||
|
||||
self.checkIsActive(self)
|
||||
|
||||
# Initialize the progress bar
|
||||
if noprogressbar == False:
|
||||
pb = ProgressBar(len(self))
|
||||
else:
|
||||
pb = None
|
||||
i=0
|
||||
for entry in self:
|
||||
PyErr_CheckSignals()
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
output.write(entry.repr_bytes()+b"\n")
|
||||
i+=1
|
||||
if pb is not None:
|
||||
pb(len(self), force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
|
||||
def keys(self):
|
||||
|
||||
@ -757,8 +786,12 @@ cdef class Line :
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return bytes2str(self).repr_bytes()
|
||||
|
||||
|
||||
cpdef repr_bytes(self):
|
||||
formatter = TabFormat(header=False)
|
||||
return bytes2str(formatter(self))
|
||||
return formatter(self)
|
||||
|
||||
|
||||
# View property getter
|
||||
|
@ -3,7 +3,7 @@
|
||||
cimport cython
|
||||
from obitools3.dms.view.view cimport Line
|
||||
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
|
||||
from obitools3.dms.column.column cimport Column_line
|
||||
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
|
||||
|
||||
|
||||
cdef class TabFormat:
|
||||
@ -25,19 +25,29 @@ cdef class TabFormat:
|
||||
for k in self.tags:
|
||||
|
||||
if self.header and self.first_line:
|
||||
value = tobytes(k)
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
for k2 in data.view[k].keys():
|
||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||
else:
|
||||
line.append(tobytes(k))
|
||||
else:
|
||||
value = data[k]
|
||||
if value is not None:
|
||||
if type(value) == Column_line:
|
||||
value = value.bytes()
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
if value is None: # all keys at None
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
line.append(self.NAString)
|
||||
else:
|
||||
value = str2bytes(str(bytes2str_object(value))) # genius programming
|
||||
if value is None:
|
||||
value = self.NAString
|
||||
|
||||
line.append(value)
|
||||
|
||||
for k2 in data.view[k].keys(): # TODO could be much more efficient
|
||||
if value[k2] is not None:
|
||||
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
|
||||
else:
|
||||
line.append(self.NAString)
|
||||
else:
|
||||
if value is not None:
|
||||
line.append(str2bytes(str(bytes2str_object(value))))
|
||||
else:
|
||||
line.append(self.NAString)
|
||||
|
||||
if self.first_line:
|
||||
self.first_line = False
|
||||
|
||||
|
@ -183,8 +183,9 @@ def buildConsensus(ali, seq, ref_tags=None):
|
||||
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
|
||||
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
|
||||
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
|
||||
seq[b'ali_length'] = ali.consensus_len
|
||||
seq[b'score_norm']=float(ali.score)/ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
seq[b'shift']=ali.shift
|
||||
else:
|
||||
if len(ali[0])>999: # TODO why?
|
||||
@ -256,9 +257,10 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
|
||||
seq[b"ali_direction"]=None
|
||||
seq[b"mode"]=b"joined"
|
||||
seq[b"pairedend_limit"]=len(forward)
|
||||
seq[b"ali_length"] = ali.consensus_len
|
||||
seq[b"seq_length"] = ali.consensus_len
|
||||
seq[b"overlap_length"] = ali.overlap_len
|
||||
if ali.consensus_len > 0:
|
||||
seq[b"score_norm"]=float(ali.score)/ali.consensus_len
|
||||
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
|
||||
else:
|
||||
seq[b"score_norm"]=0.0
|
||||
|
||||
|
@ -210,10 +210,11 @@ def open_uri(uri,
|
||||
|
||||
error = None
|
||||
|
||||
if scheme==b"dms" or \
|
||||
(scheme==b"" and \
|
||||
(((not input) and "outputformat" not in config["obi"]) or \
|
||||
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
|
||||
if urib != b"-" and \
|
||||
(scheme==b"dms" or \
|
||||
(scheme==b"" and \
|
||||
(((not input) and "outputformat" not in config["obi"]) or \
|
||||
(input and "inputformat" not in config["obi"])))): # TODO maybe not best way
|
||||
|
||||
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
|
||||
dms=(default_dms, urip.path)
|
||||
@ -275,10 +276,10 @@ def open_uri(uri,
|
||||
iseq = urib
|
||||
objclass = bytes
|
||||
else: # TODO update uopen to be able to write?
|
||||
if urip.path:
|
||||
file = open(urip.path, 'wb')
|
||||
else:
|
||||
if urip.path == b'-':
|
||||
file = sys.stdout.buffer
|
||||
elif urip.path :
|
||||
file = open(urip.path, 'wb')
|
||||
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
@ -166,7 +166,9 @@ cdef object bytes2str_object(object value): # Only works if complex types are d
|
||||
value[k] = bytes2str(v)
|
||||
if type(k) == bytes:
|
||||
value[bytes2str(k)] = value.pop(k)
|
||||
elif isinstance(value, list):
|
||||
elif isinstance(value, list) or isinstance(value, tuple):
|
||||
if isinstance(value, tuple):
|
||||
value = list(value)
|
||||
for i in range(len(value)):
|
||||
if isinstance(value[i], list) or isinstance(value[i], dict):
|
||||
value[i] = bytes2str_object(value[i])
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 3
|
||||
minor = 0
|
||||
serial= '0b25'
|
||||
serial= '0b29'
|
||||
|
||||
version ="%d.%d.%s" % (major,minor,serial)
|
||||
|
@ -414,7 +414,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
|
||||
}
|
||||
|
||||
if (max_common_kmers > 0)
|
||||
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
|
||||
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
|
||||
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
// meaning that the score will be often underestimated and never overestimated.
|
||||
else
|
||||
score = 0;
|
||||
abs_shift = abs(best_shift);
|
||||
|
@ -27,7 +27,11 @@
|
||||
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
|
||||
*/
|
||||
typedef struct Obi_ali {
|
||||
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
|
||||
int score; /**< Alignment score, corresponding to an approximation of the number of
|
||||
* nucleotides matching in the overlap of the alignment.
|
||||
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
|
||||
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
|
||||
* meaning that the score will be often underestimated and never overestimated.
|
||||
*/
|
||||
int consensus_length; /**< Length of the final consensus sequence.
|
||||
*/
|
||||
|
@ -246,7 +246,16 @@ int obi_clean(const char* dms_name,
|
||||
|
||||
// Open the sample column if there is one
|
||||
if ((strcmp(sample_column_name, "") == 0) || (sample_column_name == NULL))
|
||||
sample_column = NULL;
|
||||
{
|
||||
fprintf(stderr, "Info: No sample information provided, assuming one sample.\n");
|
||||
sample_column = obi_view_get_column(i_view, COUNT_COLUMN);
|
||||
if (sample_column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the COUNT column");
|
||||
return -1;
|
||||
}
|
||||
sample_count = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
sample_column = obi_view_get_column(i_view, sample_column_name);
|
||||
@ -255,6 +264,13 @@ int obi_clean(const char* dms_name,
|
||||
obidebug(1, "\nError getting the sample column");
|
||||
return -1;
|
||||
}
|
||||
sample_count = (sample_column->header)->nb_elements_per_line;
|
||||
// Check that the sample column is a merged column with all sample informations
|
||||
if (sample_count == 1)
|
||||
{
|
||||
obidebug(1, "\n\nError: If a sample column is provided, it must contain 'merged' sample counts as built by obi uniq with the -m option\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the output view, or a temporary one if heads only
|
||||
@ -279,8 +295,6 @@ int obi_clean(const char* dms_name,
|
||||
return -1;
|
||||
}
|
||||
|
||||
sample_count = (sample_column->header)->nb_elements_per_line;
|
||||
|
||||
// Create the output columns
|
||||
if (create_output_columns(o_view, sample_column, sample_count) < 0)
|
||||
{
|
||||
@ -549,7 +563,7 @@ int obi_clean(const char* dms_name,
|
||||
|
||||
if (heads_only)
|
||||
{
|
||||
line_selection = malloc((o_view->infos)->line_count * sizeof(index_t));
|
||||
line_selection = malloc((((o_view->infos)->line_count) + 1) * sizeof(index_t));
|
||||
if (line_selection == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
|
@ -52,7 +52,8 @@
|
||||
*
|
||||
* @param dms A pointer on an OBIDMS.
|
||||
* @param i_view_name The name of the input view.
|
||||
* @param sample_column_name The name of the OBI_STR column in the input view where the sample information is kept.
|
||||
* @param sample_column_name The name of the column in the input view where the sample information is kept.
|
||||
* Must be merged informations as built by the obi uniq tool (checked by the function).
|
||||
* NULL or "" (empty string) if there is no sample information.
|
||||
* @param o_view_name The name of the output view where the results should be written (should not already exist).
|
||||
* @param o_view_comments The comments that should be associated with the output view.
|
||||
|
16
src/obiavl.c
16
src/obiavl.c
@ -2259,7 +2259,13 @@ index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
parent = next;
|
||||
|
||||
// Compare the crc of the value with the crc of the current node
|
||||
comp = (current_node->crc64) - crc;
|
||||
//comp = (current_node->crc64) - crc;
|
||||
if ((current_node->crc64) == crc)
|
||||
comp = 0;
|
||||
else if ((current_node->crc64) > crc)
|
||||
comp = 1;
|
||||
else
|
||||
comp = -1;
|
||||
|
||||
if (comp == 0)
|
||||
{ // check if really same value
|
||||
@ -2354,7 +2360,13 @@ index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value)
|
||||
current_node = (avl->tree)+next;
|
||||
|
||||
// Compare the crc of the value with the crc of the current node
|
||||
comp = (current_node->crc64) - crc;
|
||||
//comp = (current_node->crc64) - crc;
|
||||
if ((current_node->crc64) == crc)
|
||||
comp = 0;
|
||||
else if ((current_node->crc64) > crc)
|
||||
comp = 1;
|
||||
else
|
||||
comp = -1;
|
||||
|
||||
if (comp == 0)
|
||||
{ // Check if really same value
|
||||
|
@ -1659,6 +1659,12 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char*
|
||||
else // Non-typed view
|
||||
view_2 = obi_new_view(dms_2, view_name_2, NULL, NULL, (view_1->infos)->comments);
|
||||
|
||||
if (view_2 == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating the new view to import a view in a DMS");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Import line count
|
||||
view_2->infos->line_count = view_1->infos->line_count;
|
||||
|
||||
|
@ -1312,19 +1312,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the associated column reference if needed // TODO discuss cases
|
||||
if (data_type == OBI_QUAL)
|
||||
// Store the associated column reference if needed
|
||||
if ((associated_column_name != NULL) && (*associated_column_name != '\0'))
|
||||
{
|
||||
if ((associated_column_name == NULL) || (*associated_column_name == '\0'))
|
||||
{
|
||||
obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
|
||||
munmap(new_column->header, header_size);
|
||||
close(column_file_descriptor);
|
||||
free(new_column);
|
||||
return NULL;
|
||||
}
|
||||
strcpy((header->associated_column).column_name, associated_column_name);
|
||||
|
||||
if (associated_column_version == -1)
|
||||
{
|
||||
obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
|
||||
@ -1336,6 +1327,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
(header->associated_column).version = associated_column_version;
|
||||
}
|
||||
|
||||
|
||||
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
|
||||
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL) || tuples)
|
||||
{
|
||||
|
@ -254,11 +254,15 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
/**
|
||||
* @brief Internal function to clone a column in the context of a view.
|
||||
*
|
||||
* Used to edit a closed column.
|
||||
*
|
||||
* Clones with the right line selection and replaces the cloned columns with the new ones in the view.
|
||||
* If there is a line selection, all columns have to be cloned, otherwise only the column of interest is cloned.
|
||||
*
|
||||
* @param view A pointer on the view.
|
||||
* @param column_name The name of the column in the view that should be cloned.
|
||||
* @param clone_associated Whether to clone the associated column
|
||||
* (should always be true except when calling from the function itself to avoid infinite recursion).
|
||||
*
|
||||
* @returns A pointer on the new column.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -266,7 +270,7 @@ static int update_lines(Obiview_p view, index_t line_count);
|
||||
* @since February 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated);
|
||||
|
||||
|
||||
/**
|
||||
@ -845,7 +849,7 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
// Clone the column first if needed
|
||||
if (!(column->writable))
|
||||
{
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias);
|
||||
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias, true);
|
||||
if (column == NULL)
|
||||
{
|
||||
obidebug(1, "\nError cloning a column in a view when updating its line count");
|
||||
@ -870,12 +874,14 @@ static int update_lines(Obiview_p view, index_t line_count)
|
||||
}
|
||||
|
||||
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
|
||||
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
OBIDMS_column_p column = NULL;
|
||||
OBIDMS_column_p new_column = NULL;
|
||||
OBIDMS_column_p column_buffer;
|
||||
OBIDMS_column_p associated_cloned_column = NULL;
|
||||
char* associated_column_alias = NULL;
|
||||
|
||||
// Check that the view is not read-only
|
||||
if (view->read_only)
|
||||
@ -916,11 +922,62 @@ static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_n
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Look for associated column to clone and reassociate
|
||||
if ((column_buffer->header->associated_column).column_name[0] != '\0')
|
||||
{
|
||||
// Get the associated column alias
|
||||
j=0;
|
||||
while (((strcmp((((view->infos)->column_references)[j]).column_refs.column_name, (column_buffer->header->associated_column).column_name)) ||
|
||||
((((view->infos)->column_references)[j]).column_refs.version != (column_buffer->header->associated_column).version)) &&
|
||||
j<(view->infos)->column_count) // TODO function for that
|
||||
j++;
|
||||
|
||||
if (j == (view->infos)->column_count) // not found
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nCould not find associated column when cloning a column for editing");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// No line selection: only this column is cloned, clone and reassociate the associated column
|
||||
if ((view->line_selection == NULL) && clone_associated)
|
||||
{
|
||||
associated_column_alias = (((view->infos)->column_references)[j]).alias;
|
||||
// Clone the associated column
|
||||
associated_cloned_column = clone_column_in_view(view, associated_column_alias, false);
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Line selection: all columns are cloned, check if associated column has been cloned previously (it precedes this one in the list) to reassociate
|
||||
if (j < i)
|
||||
{
|
||||
// Get pointer to associated column
|
||||
associated_cloned_column = *((OBIDMS_column_p*)ll_get(view->columns, j));
|
||||
if (associated_cloned_column == NULL)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nError getting a column to clone from the linked list of column pointers of a view");
|
||||
return NULL;
|
||||
}
|
||||
// Reassociate both ways
|
||||
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
|
||||
(associated_cloned_column->header->associated_column).version = column->header->version;
|
||||
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
|
||||
(column->header->associated_column).version = associated_cloned_column->header->version;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close old cloned column
|
||||
obi_close_column(column_buffer);
|
||||
|
||||
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
|
||||
// Found the column to return
|
||||
// Get the column to return
|
||||
new_column = column;
|
||||
}
|
||||
}
|
||||
@ -1193,7 +1250,7 @@ static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* colum
|
||||
return -1;
|
||||
}
|
||||
|
||||
(*column_pp) = clone_column_in_view(view, column_name);
|
||||
(*column_pp) = clone_column_in_view(view, column_name, true);
|
||||
if ((*column_pp) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError trying to clone a column to modify it");
|
||||
@ -1844,6 +1901,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
{
|
||||
Obiview_p view;
|
||||
OBIDMS_column_p associated_nuc_column;
|
||||
OBIDMS_column_p associated_qual_column;
|
||||
int nb_predicates;
|
||||
|
||||
if (view_to_clone != NULL)
|
||||
@ -1896,6 +1954,10 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
|
||||
return NULL;
|
||||
}
|
||||
// Associating both ways: associating nuc sequences column to quality column
|
||||
associated_qual_column = obi_view_get_column(view, QUALITY_COLUMN);
|
||||
strcpy((associated_nuc_column->header->associated_column).column_name, associated_qual_column->header->name);
|
||||
(associated_nuc_column->header->associated_column).version = associated_qual_column->header->version;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1922,7 +1984,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
||||
(view->predicate_functions)[(view->nb_predicates)] = view_has_nuc_sequence_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 1] = view_has_id_column;
|
||||
(view->predicate_functions)[(view->nb_predicates) + 2] = view_has_definition_column;
|
||||
// if (quality_column) # TODO discuss. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// if (quality_column) # TODO fix by triggering predicate deleting if quality deleting. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
|
||||
// (view->predicate_functions)[(view->nb_predicates) + 3] = view_has_quality_column;
|
||||
|
||||
view->nb_predicates = nb_predicates;
|
||||
@ -2212,7 +2274,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
|
||||
|
||||
// TODO return a pointer on the column?
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
@ -406,7 +406,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created.
|
||||
* @param associated_column_version The version of the associated column if there is one (otherwise -1), if the column is created.
|
||||
* @param comments Optional comments associated with the column if it is created (NULL or "" if no comments associated).
|
||||
* @param create Whether the column should be created (create == true) or opened (create == false).
|
||||
* @param create Whether the column should be created (create == true) or already exists (create == false).
|
||||
*
|
||||
* @returns A value indicating the success of the operation.
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
@ -416,7 +416,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
char* column_name,
|
||||
char* column_name,
|
||||
obiversion_t version_number,
|
||||
const char* alias,
|
||||
OBIType_t data_type,
|
||||
|
Reference in New Issue
Block a user