Compare commits

...

23 Commits

Author SHA1 Message Date
f23c40c905 obi cat: fixed a bug introduced in 3.0.0b28 and switch to version
3.0.0b33
2020-08-27 18:38:16 +02:00
f99fc13b75 switch to version 3.0.0b32 2020-08-13 18:17:09 +02:00
1da6aac1b8 C: patch for failed creation of AVL with errno EEXIST 2020-08-12 17:55:08 +02:00
159803b40a export: now automatically sorts dictionary keys alphabetically for
tab/csv output
2020-07-31 16:43:35 +02:00
7dcbc34017 import: fixed entry count estimation when importing fastq files 2020-07-30 16:56:36 +02:00
db2202c8b4 uniq: added a check to make sure that there is more than one element for
one tag when merging its information
2020-07-30 16:14:37 +02:00
d33ff97846 switch to version 3.0.0b31 2020-07-28 09:31:19 +02:00
1dcdf69f1f export: fixed a bug introduced in version 3.0.0b28 2020-07-28 09:31:05 +02:00
dec114eed6 Python: added "date created" information in view representation 2020-07-27 17:38:45 +02:00
f36691053b Python: added the OBITools3 version that generated the view in view
comments
2020-07-27 16:50:00 +02:00
f2aa5fcf8b alignpairedend: fixed division by 0 bug and switch to version 3.0.0b30 2020-07-27 10:15:59 +02:00
bccb3e6874 switch to version 3.0.0b29 2020-07-26 17:40:26 +02:00
f5a17bea68 C: added a missing error check 2020-07-26 17:39:55 +02:00
e28507639a C and Cython: fixed and improved the associated columns system 2020-07-26 17:39:29 +02:00
e6feac93fe obi test: made less heavy to be faster 2020-07-26 17:37:21 +02:00
50b292b489 obi import: added --space-priority option to import a view line by line 2020-07-26 17:36:52 +02:00
24a737aa55 switch to version 3.0.0b28 2020-07-24 16:10:10 +02:00
8aa455ad8a Python: made all commands handle output to buffer object (e.g. stdout) 2020-07-24 16:09:48 +02:00
46ca693ca9 Cython: View: new method to print a view to a buffer (e.g. stdout) 2020-07-24 16:03:23 +02:00
9a9afde113 Cython: progress bar: set default refresh rate to 5 seconds 2020-07-24 15:29:12 +02:00
8dd403a118 grep: now prints the number of entries grepped 2020-07-13 17:08:13 +02:00
9672f01c6a alignpairedend: improved/fixed the output tags for the alignment score
and lengths. Removed minimum score option
2020-07-13 15:59:50 +02:00
ed9549acfb ngsfilter: unidentified sequences are now stored untrimmed 2020-07-13 15:56:40 +02:00
36 changed files with 805 additions and 306 deletions

View File

@ -30,12 +30,12 @@ cdef class ProgressBar:
off_t maxi,
dict config={},
str head="",
double seconde=0.1,
double seconds=5,
cut=False):
self.starttime = self.clock()
self.lasttime = self.starttime
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
self.tickcount = <clock_t> (seconds * CLOCKS_PER_SEC)
self.freq = 1
self.cycle = 0
self.arrow = 0

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -12,6 +12,9 @@ from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.capi.obilcsalign cimport obi_lcs_align_one_column, \
obi_lcs_align_two_columns
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
import time
import sys
@ -23,6 +26,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi align specific options')
@ -201,20 +205,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = o_dms.name
final_o_view_name = output[1]
o_view_name = final_o_view_name
# If the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
# If stdout output or the input and output DMS are not the same, align creating a temporary view in the input dms that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
if i_dms != o_dms or type(output_0)==BufferedWriter:
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
else:
o_view_name = final_o_view_name
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
@ -263,8 +267,15 @@ def run(config):
View.delete_view(i_dms, i_view_name_2)
i_dms_2.close()
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -6,7 +6,7 @@ from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_QUAL
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._qsassemble import QSolexaReverseAssemble
@ -15,7 +15,9 @@ from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequenc
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport str2bytes
from io import BufferedWriter
import sys
import os
@ -29,6 +31,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi alignpairedend specific options')
@ -39,12 +42,13 @@ def addOptions(parser):
type=str,
help="URI to the reverse reads if they are in a different view than the forward reads")
group.add_argument('--score-min',
action="store", dest="alignpairedend:smin",
metavar="#.###",
default=None,
type=float,
help="Minimum score for keeping alignments")
# group.add_argument('--score-min',
# action="store", dest="alignpairedend:smin",
# metavar="#.###",
# default=None,
# type=float,
# help="Minimum score for keeping alignments. "
# "(for kmer alignment) The score is an approximation of the number of nucleotides matching in the overlap of the alignment.")
# group.add_argument('-A', '--true-ali',
# action="store_true", dest="alignpairedend:trueali",
@ -170,18 +174,29 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
view = output[1]
output_0 = output[0]
o_dms = output[0]
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL) #TODO output URI quality option?
if 'smin' in config['alignpairedend']:
smin = config['alignpairedend']['smin']
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
i_dms = forward.dms # using any dms
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(o_dms, o_view_name, quality=True)
else:
smin = 0
o_view = output[1]
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL)
# Initialize the progress bar
pb = ProgressBar(entries_len, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
#if config['alignpairedend']['trueali']:
# kmer_ali = False
# aligner = buildAlignment
@ -206,18 +221,19 @@ def run(config):
i = 0
for ali in ba:
pb(i)
if pb is not None:
pb(i)
PyErr_CheckSignals()
consensus = view[i]
consensus = o_view[i]
if not two_views:
seqF = entries[i]
else:
seqF = forward[i]
if ali.score > smin and ali.consensus_len > 0 :
if ali.overlap_len > 0 :
buildConsensus(ali, consensus, seqF)
else:
if not two_views:
@ -225,32 +241,43 @@ def run(config):
else:
seqR = reverse[i]
buildJoinedSequence(ali, seqR, consensus, forward=seqF)
consensus[b"smin"] = smin
if kmer_ali :
ali.free()
i+=1
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
if kmer_ali :
aligner.free()
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_view.write_config(config, "alignpairedend", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If stdout output, delete the temporary imported view used to create the final file
if type(output_0)==BufferedWriter:
View_NUC_SEQS.delete_view(o_dms, o_view_name)
output_0.close()
# Close all DMS
input[0].close(force=True)
if two_views:
rinput[0].close(force=True)
output[0].close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -4,11 +4,12 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from functools import reduce
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from io import BufferedWriter
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
@ -34,6 +35,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi annotate specific options')
@ -278,8 +280,19 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view_name = output[1]
# stdout output: create temporary view
if type(output_0)==BufferedWriter:
o_dms = i_dms
i=0
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in output DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
imported_view_name = o_view_name
# If the input and output DMS are not the same, import the input view in the output DMS before cloning it to modify it
# (could be the other way around: clone and modify in the input DMS then import the new view in the output DMS)
if i_dms != o_dms:
@ -290,7 +303,7 @@ def run(config):
i+=1
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], i_view_name, imported_view_name)
i_view = o_dms[imported_view_name]
# Clone output view from input view
o_view = i_view.clone(o_view_name)
if o_view is None:
@ -307,7 +320,10 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(o_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
try:
@ -346,14 +362,16 @@ def run(config):
sequenceTagger = sequenceTaggerGenerator(config, taxo=taxo)
for i in range(len(o_view)):
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
sequenceTagger(o_view[i])
except Exception, e:
raise RollbackException("obi annotate error, rollbacking view: "+str(e), o_view)
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
@ -363,13 +381,19 @@ def run(config):
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "annotate", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(o_dms, imported_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -4,14 +4,16 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.build_reference_db cimport build_reference_db
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Tag a set of sequences for PCR and sequencing errors identification"
@ -22,6 +24,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi build_ref_db specific options')
@ -56,17 +59,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, build the database creating a temporary view that will be exported to
# If stdout output or the input and output DMS are not the same, build the database creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -80,22 +86,29 @@ def run(config):
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
comments = View.print_config(config, "build_ref_db", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
if build_reference_db(i_dms.name_with_full_path, tobytes(i_view_name), tobytes(taxonomy_name), tobytes(o_view_name), comments, config['build_ref_db']['threshold']) < 0:
raise Exception("Error building a reference database")
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -15,6 +15,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_CO
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.column.column cimport Column
from io import BufferedWriter
import time
import sys
@ -27,6 +28,7 @@ __title__="Concatenate views."
def addOptions(parser):
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi cat specific options')
@ -76,53 +78,70 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# stdout output
if type(output_0)==BufferedWriter:
o_dms = i_dms
# Initialize quality columns and their associated sequence columns if needed
if not remove_qual:
if NUC_SEQUENCE_COLUMN not in o_view:
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
if not remove_rev_qual:
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
if type(output_0) != BufferedWriter:
if not remove_qual:
if NUC_SEQUENCE_COLUMN not in o_view:
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
if not remove_rev_qual:
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
# Initialize multiple elements columns
dict_cols = {}
for v in iview_list:
for coln in v.keys():
if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols:
dict_cols[coln] = {}
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
dict_cols[coln]['obitype'] = v[coln].data_type_int
else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
if type(output_0)==BufferedWriter:
dict_cols = {}
for v in iview_list:
for coln in v.keys():
if v[coln].nb_elements_per_line > 1:
if coln not in dict_cols:
dict_cols[coln] = {}
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
dict_cols[coln]['obitype'] = v[coln].data_type_int
else:
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
for coln in dict_cols:
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
# Initialize the progress bar
pb = ProgressBar(total_len, config, seconde=5)
if not config['obi']['noprogressbar']:
pb = ProgressBar(total_len, config)
else:
pb = None
i = 0
for v in iview_list:
for l in v:
for entry in v:
PyErr_CheckSignals()
pb(i)
o_view[i] = l
if pb is not None:
pb(i)
if type(output_0)==BufferedWriter:
rep = repr(entry)
output_0.write(str2bytes(rep)+b"\n")
else:
o_view[i] = entry
i+=1
# Deletes quality columns if needed
if QUALITY_COLUMN in o_view and remove_qual :
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
o_view.delete_column(REVERSE_QUALITY_COLUMN)
pb(i, force=True)
print("", file=sys.stderr)
if type(output_0)!=BufferedWriter:
if QUALITY_COLUMN in o_view and remove_qual :
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
o_view.delete_column(REVERSE_QUALITY_COLUMN)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])

View File

@ -4,13 +4,14 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiclean cimport obi_clean
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from io import BufferedWriter
import sys
@ -21,7 +22,8 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi clean specific options')
group.add_argument('--distance', '-d',
@ -88,17 +90,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, run obiclean creating a temporary view that will be exported to
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -116,15 +121,22 @@ def run(config):
# If the input and output DMS are not the same, export result view to output DMS
if i_dms != o_dms:
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
# Save command config in DMS comments
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -5,10 +5,10 @@ from obitools3.dms.dms cimport DMS
from obitools3.dms.capi.obidms cimport OBIDMS_p
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecopcr cimport obi_ecopcr
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addTaxonomyOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes
from obitools3.utils cimport tobytes, str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view import View
@ -16,6 +16,7 @@ from libc.stdlib cimport malloc, free
from libc.stdint cimport int32_t
import sys
from io import BufferedWriter
__title__="in silico PCR"
@ -27,6 +28,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecopcr specific options')
@ -169,11 +171,20 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
o_dms_name = output[0].name
o_view_name = output[1]
# Read taxonomy name
taxonomy_name = config['obi']['taxoURI'].split("/")[-1] # Robust in theory
# If stdout output create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
@ -201,10 +212,21 @@ def run(config):
free(restrict_to_taxids_p)
free(ignore_taxids_p)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[o_view_name]), file=sys.stderr)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms.dms cimport DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.capi.obiecotag cimport obi_ecotag
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -12,6 +12,7 @@ from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
import sys
from io import BufferedWriter
__title__="Taxonomic assignment of sequences"
@ -22,6 +23,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ecotag specific options')
@ -75,17 +77,19 @@ def run(config):
if output is None:
raise Exception("Could not create output")
o_dms = output[0]
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, run ecotag creating a temporary view that will be exported to
# the right DMS and deleted in the other afterwards.
if i_dms != o_dms:
temporary_view_name = final_o_view_name
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = final_o_view_name+b"_"+str2bytes(str(i))
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
@ -120,11 +124,18 @@ def run(config):
# Save command config in DMS comments
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view = o_dms[o_view_name]
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_dms[final_o_view_name]), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary result view in the input DMS
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)

View File

@ -74,7 +74,7 @@ def run(config):
if config['obi']['noprogressbar']:
pb = None
else:
pb = ProgressBar(withoutskip - skip, config, seconde=5)
pb = ProgressBar(withoutskip - skip, config)
i=0
for seq in iview :
@ -89,7 +89,7 @@ def run(config):
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
print("", file=sys.stderr)
# TODO save command in input dms?

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, str2bytes
@ -14,6 +14,7 @@ import time
import re
import sys
import ast
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
@ -28,6 +29,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group("obi grep specific options")
@ -304,16 +306,21 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
output_0 = output[0]
final_o_view_name = output[1]
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted afterwards.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
i+=1
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config["obi"]["taxoURI"])
@ -324,7 +331,10 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
# Apply filter
tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
@ -334,31 +344,36 @@ def run(config):
if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
pb(i)
selection.append(i)
if pb is not None:
pb(i)
selection.append(i)
elif filter is not None : # filter is None if no line will be selected because some columns don't exist
for i in range(len(i_view)):
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
line = i_view[i]
loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
good = filter(line, loc_env)
if good :
selection.append(i)
pb(len(i_view), force=True)
print("", file=sys.stderr)
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output view with the line selection
try:
o_view = selection.materialize(o_view_name)
except Exception, e:
raise RollbackException("obi grep error, rollbacking view: "+str(e), o_view)
logger("info", "Grepped %d entries" % len(o_view))
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
input_dms_name=[input[0].name]
@ -373,14 +388,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -4,14 +4,15 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
import time
import sys
from io import BufferedWriter
from cpython.exc cimport PyErr_CheckSignals
@ -22,6 +23,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi head specific options')
@ -53,31 +55,41 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
i+=1
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
n = min(config['head']['count'], len(i_view))
# Initialize the progress bar
pb = ProgressBar(n, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(n, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(n):
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
selection.append(i)
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Create output view with the line selection
try:
@ -94,14 +106,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -47,6 +47,8 @@ from obitools3.apps.config import logger
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Imports sequences from different formats into a DMS"
@ -75,6 +77,11 @@ def addOptions(parser):
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
"a much faster import. This option is not recommended and will slow down the import in any other case.")
group.add_argument('--space-priority',
action="store_true", dest="import:space_priority",
default=False,
help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
"has a line selection associated.")
def run(config):
@ -130,7 +137,7 @@ def run(config):
if entry_count > 0:
logger("info", "Importing %d entries", entry_count)
else:
logger("info", "Importing an unknow number of entries")
logger("info", "Importing an unknown number of entries")
# TODO a bit dirty?
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
@ -140,7 +147,7 @@ def run(config):
else:
v = None
if config['obi']['taxdump'] or isinstance(input[1], View):
if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']):
dms_only=True
else:
dms_only=False
@ -168,17 +175,20 @@ def run(config):
logger("info", "Done.")
return
# If importing a view between two DMS, use C API
if isinstance(input[1], View):
# If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
if isinstance(input[1], View) and not config['import']['space_priority']:
if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
input[0].close(force=True)
output[0].close(force=True)
raise Exception("Error importing a view in a DMS")
o_dms.record_command_line(" ".join(sys.argv[1:]))
o_dms.close()
input[0].close(force=True)
output[0].close(force=True)
logger("info", "Done.")
return
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
pb = ProgressBar(entry_count, config)
NUC_SEQS_view = False
if isinstance(output[1], View) :
@ -243,7 +253,7 @@ def run(config):
if isinstance(input[0], CompressedFile):
input_is_file = True
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
pb = ProgressBar(entry_count, config)
try:
input[0].close()
except AttributeError:

View File

@ -2,10 +2,10 @@
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view import RollbackException
from obitools3.dms.view import RollbackException, View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.libalign._freeendgapfm import FreeEndGapFullMatch
@ -14,13 +14,14 @@ from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.capi.apat cimport MAX_PATTERN
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport tobytes
from obitools3.utils cimport tobytes, str2bytes
from libc.stdint cimport INT32_MAX
from functools import reduce
import math
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
@ -34,7 +35,8 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi ngsfilter specific options')
group.add_argument('-t','--info-view',
@ -58,7 +60,7 @@ def addOptions(parser):
metavar="<URI>",
type=str,
default=None,
help="URI to the view used to store the sequences unassigned to any sample")
help="URI to the view used to store the sequences unassigned to any sample. Those sequences are untrimmed.")
group.add_argument('--no-tags',
action="store_true", dest="ngsfilter:notags",
@ -539,7 +541,8 @@ def run(config):
raise Exception("Could not open input reads")
if input[2] != View_NUC_SEQS:
raise NotImplementedError('obi ngsfilter only works on NUC_SEQS views')
i_dms = input[0]
if "reverse" in config["ngsfilter"]:
forward = input[1]
@ -580,8 +583,19 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
output_0 = output[0]
o_view = output[1]
# If stdout output, create a temporary view in the input dms that will be deleted afterwards.
if type(output_0)==BufferedWriter:
o_dms = i_dms
o_view_name = b"temp"
while o_view_name in i_dms: # Making sure view name is unique in input DMS
o_view_name = o_view_name+b"_"+str2bytes(str(i))
i+=1
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
# Open the view containing the informations about the tags and the primers
info_input = open_uri(config['ngsfilter']['info_view'])
if info_input is None:
@ -602,7 +616,10 @@ def run(config):
unidentified = None
# Initialize the progress bar
pb = ProgressBar(entries_len, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(entries_len, config)
else:
pb = None
# Check and store primers and tags
try:
@ -636,7 +653,8 @@ def run(config):
try:
for i in range(entries_len):
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
if not_aligned:
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
else:
@ -646,7 +664,13 @@ def run(config):
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
g+=1
elif unidentified is not None:
unidentified[u].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
# Untrim sequences (put original back)
if len(modseq) > 1:
oseq[REVERSE_SEQUENCE_COLUMN] = reverse[i].seq
oseq[REVERSE_QUALITY_COLUMN] = reverse[i].quality
unidentified[u].set(oseq.id, forward[i].seq, definition=oseq.definition, quality=forward[i].quality, tags=oseq)
else:
unidentified[u].set(oseq.id, entries[i].seq, definition=oseq.definition, quality=entries[i].quality, tags=oseq)
u+=1
except Exception, e:
if unidentified is not None:
@ -654,8 +678,9 @@ def run(config):
else:
raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
@ -664,13 +689,23 @@ def run(config):
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
# Add comment about unidentified seqs
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
input[0].close(force=True)
output[0].close(force=True)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
info_input[0].close(force=True)
if unidentified is not None:
unidentified_input[0].close(force=True)

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -24,6 +24,7 @@ from obitools3.dms.capi.obitypes cimport OBI_BOOL, \
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
NULL_VALUE = {OBI_BOOL: OBIBool_NA,
@ -42,6 +43,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi sort specific options')
@ -59,7 +61,7 @@ def addOptions(parser):
def line_cmp(line, key, pb):
pb
pb
if line[key] is None:
return NULL_VALUE[line.view[key].data_type_int]
else:
@ -86,20 +88,28 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
i+=1
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view), config)
else:
pb = None
keys = config['sort']['keys']
selection = Line_selection(i_view)
@ -110,10 +120,14 @@ def run(config):
for k in keys: # TODO order?
PyErr_CheckSignals()
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
pb(len(i_view), force=True)
print("", file=sys.stderr)
if pb is not None:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse'])
else:
selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, None), reverse=config['sort']['reverse'])
if pb is not None:
pb(len(i_view), force=True)
print("", file=sys.stderr)
# Create output view with the sorted line selection
try:
@ -132,16 +146,23 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)
logger("info", "Done.")

View File

@ -162,7 +162,7 @@ def run(config):
lcat=0
# Initialize the progress bar
pb = ProgressBar(len(i_view), config, seconde=5)
pb = ProgressBar(len(i_view), config)
for i in range(len(i_view)):
PyErr_CheckSignals()

View File

@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line_selection
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption
from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption, addNoProgressBarOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
@ -12,6 +12,7 @@ from obitools3.utils cimport str2bytes
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Keep the N last lines of a view."
@ -21,6 +22,7 @@ def addOptions(parser):
addMinimalInputOption(parser)
addMinimalOutputOption(parser)
addNoProgressBarOption(parser)
group=parser.add_argument_group('obi tail specific options')
@ -52,31 +54,41 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view_name_final = output[1]
o_view_name = o_view_name_final
output_0 = output[0]
final_o_view_name = output[1]
# If the input and output DMS are not the same, create output view in input DMS first, then export it
# to output DMS, making sure the temporary view name is unique in the input DMS
if i_dms != o_dms:
# If stdout output or the input and output DMS are not the same, create a temporary view that will be exported and deleted.
if i_dms != o_dms or type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while o_view_name in i_dms:
o_view_name = o_view_name_final+b"_"+str2bytes(str(i))
i+=1
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
if type(output_0)==BufferedWriter:
o_dms = i_dms
else:
o_view_name = final_o_view_name
start = max(len(i_view) - config['tail']['count'], 0)
# Initialize the progress bar
pb = ProgressBar(len(i_view) - start, config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(i_view) - start, config)
else:
pb = None
selection = Line_selection(i_view)
for i in range(start, len(i_view)):
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
selection.append(i)
pb(i, force=True)
print("", file=sys.stderr)
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in View comments
command_line = " ".join(sys.argv[1:])
@ -97,14 +109,20 @@ def run(config):
# and delete the temporary view in the input DMS
if i_dms != o_dms:
o_view.close()
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, o_view_name_final)
o_view = o_dms[o_view_name_final]
View.import_view(i_dms.full_path[:-7], o_dms.full_path[:-7], o_view_name, final_o_view_name)
o_view = o_dms[final_o_view_name]
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
# If the input and the output DMS are different, delete the temporary imported view used to create the final view
if i_dms != o_dms:
# If the input and the output DMS are different or if stdout output, delete the temporary imported view used to create the final view
if i_dms != o_dms or type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
o_dms.close(force=True)
i_dms.close(force=True)

View File

@ -23,6 +23,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
import shutil
import string
import random
import sys
from cpython.exc cimport PyErr_CheckSignals
@ -366,7 +367,7 @@ def random_new_view(config, infos, first=False):
infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
else :
infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config)) # TODO quality column
infos['view'].write_config(config, "test", infos["command_line"], input_dms_name=[infos['dms'].name], input_view_name=["random"])
print_test(config, repr(infos['view']))
if v_to_clone is not None :
if line_selection is None:
@ -441,7 +442,7 @@ def addOptions(parser):
default=20,
type=int,
help="Maximum length of tuples. "
"Default: 200")
"Default: 50")
group.add_argument('--max_ini_col_count','-o',
action="store", dest="test:maxinicolcount",
@ -457,7 +458,7 @@ def addOptions(parser):
default=10000,
type=int,
help="Maximum number of lines in a column. "
"Default: 10000")
"Default: 1000")
group.add_argument('--max_elts_per_line','-e',
action="store", dest="test:maxelts",
@ -497,7 +498,8 @@ def run(config):
(b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
(b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
},
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view],
'command_line': " ".join(sys.argv[1:])
}
# TODO ???

View File

@ -14,13 +14,15 @@ from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
from obitools3.apps.optiongroups import addMinimalInputOption, \
addMinimalOutputOption, \
addTaxonomyOption, \
addEltLimitOption
addEltLimitOption, \
addNoProgressBarOption
from obitools3.uri.decode import open_uri
from obitools3.apps.config import logger
from obitools3.utils cimport tobytes, tostr
from obitools3.utils cimport tobytes, tostr, str2bytes
import sys
from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter
__title__="Group sequence records together"
@ -32,6 +34,7 @@ def addOptions(parser):
addTaxonomyOption(parser)
addMinimalOutputOption(parser)
addEltLimitOption(parser)
addNoProgressBarOption(parser)
group = parser.add_argument_group('obi uniq specific options')
@ -143,12 +146,16 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
scientific_name_column = o_view[b"scientific_name"]
# Initialize the progress bar
pb = ProgressBar(len(o_view), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(o_view), config)
else:
pb = None
i=0
for seq in o_view:
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
if MERGED_TAXID_COLUMN in seq :
m_taxids = []
m_taxids_dict = seq[MERGED_TAXID_COLUMN]
@ -191,7 +198,8 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict
scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
i+=1
pb(len(o_view), force=True)
if pb is not None:
pb(len(o_view), force=True)
cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
@ -297,7 +305,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
iter_view = iter(view)
for i_seq in iter_view :
PyErr_CheckSignals()
pb(i)
if pb is not None:
pb(i)
# This can't be done in the same line as the unique_id tuple creation because it generates a bug
# where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates
@ -345,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
key = mergedKeys[k]
merged_col_name = mergedKeys_m[k]
if merged_infos[merged_col_name]['nb_elts'] == 1:
raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
if merged_col_name in view:
i_col = view[merged_col_name]
else:
@ -415,12 +427,17 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
o_count_col = o_view[COUNT_COLUMN]
if COUNT_COLUMN in view:
i_count_col = view[COUNT_COLUMN]
if pb is not None:
pb(len(view), force=True)
print("")
pb(len(view), force=True)
print("")
logger("info", "Second browsing through the input")
# Initialize the progress bar
pb = ProgressBar(len(view), seconde=5)
if pb is not None:
pb = ProgressBar(len(view))
o_idx = 0
total_treated = 0
@ -455,7 +472,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
for i_idx in merged_sequences:
PyErr_CheckSignals()
pb(total_treated)
if pb is not None:
pb(total_treated)
i_id = i_id_col[i_idx]
i_seq = view[i_idx]
@ -531,7 +550,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
o_count_col[o_idx] = o_count
o_idx += 1
pb(len(view), force=True)
if pb is not None:
pb(len(view), force=True)
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
if QUALITY_COLUMN in view:
@ -579,8 +599,23 @@ def run(config):
if output is None:
raise Exception("Could not create output view")
i_dms = input[0]
entries = input[1]
o_view = output[1]
o_dms = output[0]
output_0 = output[0]
# If stdout output create a temporary view that will be exported and deleted.
if type(output_0)==BufferedWriter:
temporary_view_name = b"temp"
i=0
while temporary_view_name in i_dms: # Making sure view name is unique in input DMS
temporary_view_name = temporary_view_name+b"_"+str2bytes(str(i))
i+=1
o_view_name = temporary_view_name
o_dms = i_dms
o_view = View_NUC_SEQS.new(i_dms, o_view_name)
else:
o_view = output[1]
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
taxo_uri = open_uri(config['obi']['taxoURI'])
@ -591,7 +626,10 @@ def run(config):
taxo = None
# Initialize the progress bar
pb = ProgressBar(len(entries), config, seconde=5)
if config['obi']['noprogressbar'] == False:
pb = ProgressBar(len(entries), config)
else:
pb = None
if len(entries) > 0:
try:
@ -599,7 +637,8 @@ def run(config):
except Exception, e:
raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
print("", file=sys.stderr)
if pb is not None:
print("", file=sys.stderr)
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
@ -609,13 +648,23 @@ def run(config):
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
o_view.write_config(config, "uniq", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
output[0].record_command_line(command_line)
o_dms.record_command_line(command_line)
# stdout output: write to buffer
if type(output_0)==BufferedWriter:
logger("info", "Printing to output...")
o_view.print_to_output(output_0, noprogressbar=config['obi']['noprogressbar'])
o_view.close()
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(o_view), file=sys.stderr)
input[0].close(force=True)
output[0].close(force=True)
# If stdout output, delete the temporary result view in the input DMS
if type(output_0)==BufferedWriter:
View.delete_view(i_dms, o_view_name)
i_dms.close(force=True)
o_dms.close(force=True)
logger("info", "Done.")

View File

@ -40,7 +40,8 @@ from obitools3.utils cimport tobytes, \
from obitools3.dms.column import typed_column
from libc.stdlib cimport free
from libc.string cimport strcpy
import importlib
import inspect
import pkgutil
@ -97,6 +98,7 @@ cdef class Column(OBIWrapper) :
object alias=b""):
# TODO indexer_name?
cdef Column column
cdef bytes column_name_b = tobytes(column_name)
cdef bytes alias_b = tobytes(alias)
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
@ -132,13 +134,14 @@ cdef class Column(OBIWrapper) :
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = NUC_SEQUENCE_COLUMN
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
elif column_name == REVERSE_QUALITY_COLUMN:
if REVERSE_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
if (obi_view_add_column(view = view.pointer(),
column_name = column_name_b,
@ -158,8 +161,19 @@ cdef class Column(OBIWrapper) :
create = True)<0):
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
bytes2str(view.name)))
return Column.open(view, alias_b)
column = Column.open(view, alias_b)
# Automatically associate nuc sequence column to quality column if necessary
if data_type == OBI_QUAL:
if column_name == QUALITY_COLUMN:
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
elif column_name == REVERSE_QUALITY_COLUMN:
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
return column
@staticmethod
@ -407,6 +421,31 @@ cdef class Column(OBIWrapper) :
raise OBIDeactivatedInstanceError()
return obi_format_date(self.pointer().header.creation_date)
# associated_column name property getter and setter
@property
def associated_column_name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.associated_column.column_name
@associated_column_name.setter
def associated_column_name(self, object new_name):
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
# associated_column version property getter and setter
@property
def associated_column_version(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.associated_column.version
@associated_column_version.setter
def associated_column_version(self, int new_version):
self.pointer().header.associated_column.version = new_version
# comments property getter
@property
def comments(self):

View File

@ -39,4 +39,6 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
cpdef set_quality_char(self, object new_qual, int offset=*)
cpdef object build_quality_array(self, list quality)
cpdef bytes build_reverse_complement(self)
cpdef str get_str(self)
cpdef str get_str(self)
cpdef repr_bytes(self)

View File

@ -431,9 +431,12 @@ cdef class Nuc_Seq_Stored(Seq_Stored) :
return len(self._view.get_column(NUC_SEQUENCE_COLUMN).get_line(self.index))
def __repr__(self):
return bytes2str(self.repr_bytes())
cpdef repr_bytes(self):
if self.quality is None:
formatter = FastaFormat()
else:
formatter = FastqFormat()
return bytes2str(formatter(self))
return formatter(self)

View File

@ -20,6 +20,10 @@ cdef class View(OBIWrapper):
cdef DMS _dms
cdef inline Obiview_p pointer(self)
cpdef print_to_output(self,
object output,
bint noprogressbar=*)
cpdef delete_column(self,
object column_name,
@ -61,6 +65,8 @@ cdef class Line :
cdef index_t _index
cdef View _view
cpdef repr_bytes(self)
cdef register_view_class(bytes view_type_name,
type view_class)

View File

@ -6,6 +6,9 @@ cdef dict __VIEW_CLASS__= {}
from libc.stdlib cimport malloc
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.version import version
from ..capi.obiview cimport Alias_column_pair_p, \
obi_new_view, \
obi_open_view, \
@ -48,10 +51,13 @@ from ..capi.obidms cimport obi_import_view
from obitools3.format.tab import TabFormat
from cpython.exc cimport PyErr_CheckSignals
import importlib
import inspect
import pkgutil
import json
import sys
cdef class View(OBIWrapper) :
@ -178,13 +184,43 @@ cdef class View(OBIWrapper) :
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef str s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
line_count = self.line_count)
def __repr__(self) :
cdef str s
if self.read_only: # can read date
s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
line_count = self.line_count,
date = str(bytes2str_object(self.comments["Date created"])))
else:
s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name = bytes2str(self.name),
line_count = self.line_count)
for column_name in self.keys() :
s = s + repr(self[column_name]) + '\n'
return s
cpdef print_to_output(self, object output, bint noprogressbar=False):
cdef int i
cdef Line entry
self.checkIsActive(self)
# Initialize the progress bar
if noprogressbar == False:
pb = ProgressBar(len(self))
else:
pb = None
i=0
for entry in self:
PyErr_CheckSignals()
if pb is not None:
pb(i)
output.write(entry.repr_bytes()+b"\n")
i+=1
if pb is not None:
pb(len(self), force=True)
print("", file=sys.stderr)
def keys(self):
@ -405,6 +441,7 @@ cdef class View(OBIWrapper) :
for i in range(len(input_view_name)):
input_str.append(tostr(input_dms_name[i])+"/"+tostr(input_view_name[i]))
comments["input_str"] = input_str
comments["version"] = version
return bytes2str_object(comments)
@ -757,8 +794,12 @@ cdef class Line :
def __repr__(self):
return bytes2str(self).repr_bytes()
cpdef repr_bytes(self):
formatter = TabFormat(header=False)
return bytes2str(formatter(self))
return formatter(self)
# View property getter

View File

@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line, Column_multi_elts
import sys
cdef class TabFormat:
@ -26,18 +27,22 @@ cdef class TabFormat:
if self.header and self.first_line:
if isinstance(data.view[k], Column_multi_elts):
for k2 in data.view[k].keys():
keys = data.view[k].keys()
keys.sort()
for k2 in keys:
line.append(tobytes(k)+b':'+tobytes(k2))
else:
line.append(tobytes(k))
else:
value = data[k]
if isinstance(data.view[k], Column_multi_elts):
keys = data.view[k].keys()
keys.sort()
if value is None: # all keys at None
for k2 in data.view[k].keys(): # TODO could be much more efficient
for k2 in keys: # TODO could be much more efficient
line.append(self.NAString)
else:
for k2 in data.view[k].keys(): # TODO could be much more efficient
for k2 in keys: # TODO could be much more efficient
if value[k2] is not None:
line.append(str2bytes(str(bytes2str_object(value[k2])))) # genius programming
else:

View File

@ -183,8 +183,9 @@ def buildConsensus(ali, seq, ref_tags=None):
# doesn't work because uint8_t* are forced into bytes by cython (nothing read/kept beyond 0 values)
#obi_set_qual_int_with_elt_idx_and_col_p_in_view(view_p, col_p, seq.index, 0, ali.consensus_qual, ali.consensus_len)
seq.set(ref_tags.id+b"_CONS", ali.consensus_seq, quality=ali.consensus_qual)
seq[b'ali_length'] = ali.consensus_len
seq[b'score_norm']=float(ali.score)/ali.consensus_len
seq[b"seq_length"] = ali.consensus_len
seq[b"overlap_length"] = ali.overlap_len
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
seq[b'shift']=ali.shift
else:
if len(ali[0])>999: # TODO why?
@ -256,9 +257,10 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
seq[b"ali_direction"]=None
seq[b"mode"]=b"joined"
seq[b"pairedend_limit"]=len(forward)
seq[b"ali_length"] = ali.consensus_len
if ali.consensus_len > 0:
seq[b"score_norm"]=float(ali.score)/ali.consensus_len
seq[b"seq_length"] = ali.consensus_len
seq[b"overlap_length"] = ali.overlap_len
if ali.overlap_len > 0:
seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
else:
seq[b"score_norm"]=0.0

View File

@ -210,10 +210,11 @@ def open_uri(uri,
error = None
if scheme==b"dms" or \
(scheme==b"" and \
(((not input) and "outputformat" not in config["obi"]) or \
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
if urib != b"-" and \
(scheme==b"dms" or \
(scheme==b"" and \
(((not input) and "outputformat" not in config["obi"]) or \
(input and "inputformat" not in config["obi"])))): # TODO maybe not best way
if default_dms is not None and b"/" not in urip.path: # assuming view to open in default DMS (TODO discuss)
dms=(default_dms, urip.path)
@ -275,11 +276,11 @@ def open_uri(uri,
iseq = urib
objclass = bytes
else: # TODO update uopen to be able to write?
if urip.path:
file = open(urip.path, 'wb')
else:
if not urip.path or urip.path == b'-':
file = sys.stdout.buffer
else:
file = open(urip.path, 'wb')
if file is not None:
qualifiers=parse_qs(urip.query)

View File

@ -2,7 +2,7 @@
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
cpdef bytes format_separator(bytes format)
cpdef bytes format_uniq_pattern(bytes format)
cpdef int count_entries(file, bytes format)
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)

View File

@ -24,11 +24,11 @@ import glob
import gzip
cpdef bytes format_separator(bytes format):
cpdef bytes format_uniq_pattern(bytes format):
if format == b"fasta":
return b"\n>"
elif format == b"fastq":
return b"\n@"
return b"\n\+\n"
elif format == b"ngsfilter" or format == b"tabular":
return b"\n"
elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
cpdef int count_entries(file, bytes format):
try:
sep = format_separator(format)
sep = format_uniq_pattern(format)
if sep is None:
return -1
sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
return -1
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
total_count += len(re.findall(sep, mmapped_file))
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
except:

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '0b27'
serial= '0b33'
version ="%d.%d.%s" % (major,minor,serial)

View File

@ -414,7 +414,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
}
if (max_common_kmers > 0)
score = max_common_kmers + kmer_size - 1; // aka the number of nucleotides in the longest stretch of kmers perfectly matching
score = max_common_kmers + kmer_size - 1; // aka an approximation of the number of nucleotides matching in the overlap of the alignment.
// It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
// and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
// meaning that the score will be often underestimated and never overestimated.
else
score = 0;
abs_shift = abs(best_shift);

View File

@ -27,7 +27,11 @@
* @brief Alignment structure, with informations about the similarity and to rebuild the alignment.
*/
typedef struct Obi_ali {
int score; /**< Alignment score, corresponding to the number of nucleotides in the longest stretch of kmers perfectly matching.
int score; /**< Alignment score, corresponding to an approximation of the number of
* nucleotides matching in the overlap of the alignment.
* It's an approximation because one mismatch produces kmer_size kmer mismatches if in the middle of the overlap,
* and less for mismatches located towards the ends of the overlap. The case where there are the most mismatches is assumed,
* meaning that the score will be often underestimated and never overestimated.
*/
int consensus_length; /**< Length of the final consensus sequence.
*/

View File

@ -1659,6 +1659,12 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char*
else // Non-typed view
view_2 = obi_new_view(dms_2, view_name_2, NULL, NULL, (view_1->infos)->comments);
if (view_2 == NULL)
{
obidebug(1, "\nError creating the new view to import a view in a DMS");
return -1;
}
// Import line count
view_2->infos->line_count = view_1->infos->line_count;

View File

@ -1312,19 +1312,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
return NULL;
}
// Store the associated column reference if needed // TODO discuss cases
if (data_type == OBI_QUAL)
// Store the associated column reference if needed
if ((associated_column_name != NULL) && (*associated_column_name != '\0'))
{
if ((associated_column_name == NULL) || (*associated_column_name == '\0'))
{
obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
munmap(new_column->header, header_size);
close(column_file_descriptor);
free(new_column);
return NULL;
}
strcpy((header->associated_column).column_name, associated_column_name);
if (associated_column_version == -1)
{
obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
@ -1336,6 +1327,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
(header->associated_column).version = associated_column_version;
}
// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL) || tuples)
{
@ -1733,16 +1725,32 @@ int obi_close_column(OBIDMS_column_p column)
int obi_clone_column_indexer(OBIDMS_column_p column)
{
char* new_indexer_name;
int i;
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL)
i=0;
while (true) // find avl name not already used
{
obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1;
new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
if (column->indexer == NULL)
{
if (errno == EEXIST)
{
free(new_indexer_name);
i++;
}
else
{
free(new_indexer_name);
obidebug(1, "\nError cloning a column's indexer to make it writable");
return -1;
}
}
else
break;
}
strcpy((column->header)->indexer_name, new_indexer_name);
@ -2423,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
}
char* obi_column_formatted_infos(OBIDMS_column_p column)
char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
{
char* column_infos;
char* elt_names;
column_infos = malloc(1024 * sizeof(char));
char* column_infos = NULL;
char* elt_names = NULL;
char* column_name = NULL;
// should be in view.c because alias exists in the context of view
column_infos = malloc(2048 * sizeof(char)); // TODO
elt_names = obi_get_formatted_elements_names(column);
// "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
free(elt_names);
return column_infos;
}

View File

@ -254,11 +254,15 @@ static int update_lines(Obiview_p view, index_t line_count);
/**
* @brief Internal function to clone a column in the context of a view.
*
* Used to edit a closed column.
*
* Clones with the right line selection and replaces the cloned columns with the new ones in the view.
* If there is a line selection, all columns have to be cloned, otherwise only the column of interest is cloned.
*
* @param view A pointer on the view.
* @param column_name The name of the column in the view that should be cloned.
* @param clone_associated Whether to clone the associated column
* (should always be true except when calling from the function itself to avoid infinite recursion).
*
* @returns A pointer on the new column.
* @retval NULL if an error occurred.
@ -266,7 +270,7 @@ static int update_lines(Obiview_p view, index_t line_count);
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated);
/**
@ -845,7 +849,7 @@ static int update_lines(Obiview_p view, index_t line_count)
// Clone the column first if needed
if (!(column->writable))
{
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias);
column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias, true);
if (column == NULL)
{
obidebug(1, "\nError cloning a column in a view when updating its line count");
@ -870,12 +874,14 @@ static int update_lines(Obiview_p view, index_t line_count)
}
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated)
{
int i;
int i, j;
OBIDMS_column_p column = NULL;
OBIDMS_column_p new_column = NULL;
OBIDMS_column_p column_buffer;
OBIDMS_column_p associated_cloned_column = NULL;
char* associated_column_alias = NULL;
// Check that the view is not read-only
if (view->read_only)
@ -916,11 +922,62 @@ static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_n
return NULL;
}
// Look for associated column to clone and reassociate
if ((column_buffer->header->associated_column).column_name[0] != '\0')
{
// Get the associated column alias
j=0;
while (((strcmp((((view->infos)->column_references)[j]).column_refs.column_name, (column_buffer->header->associated_column).column_name)) ||
((((view->infos)->column_references)[j]).column_refs.version != (column_buffer->header->associated_column).version)) &&
j<(view->infos)->column_count) // TODO function for that
j++;
if (j == (view->infos)->column_count) // not found
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nCould not find associated column when cloning a column for editing");
return NULL;
}
// No line selection: only this column is cloned, clone and reassociate the associated column
if ((view->line_selection == NULL) && clone_associated)
{
associated_column_alias = (((view->infos)->column_references)[j]).alias;
// Clone the associated column
associated_cloned_column = clone_column_in_view(view, associated_column_alias, false);
// Reassociate both ways
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
(associated_cloned_column->header->associated_column).version = column->header->version;
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
(column->header->associated_column).version = associated_cloned_column->header->version;
}
else
{
// Line selection: all columns are cloned, check if associated column has been cloned previously (it precedes this one in the list) to reassociate
if (j < i)
{
// Get pointer to associated column
associated_cloned_column = *((OBIDMS_column_p*)ll_get(view->columns, j));
if (associated_cloned_column == NULL)
{
obi_set_errno(OBIVIEW_ERROR);
obidebug(1, "\nError getting a column to clone from the linked list of column pointers of a view");
return NULL;
}
// Reassociate both ways
strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
(associated_cloned_column->header->associated_column).version = column->header->version;
strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
(column->header->associated_column).version = associated_cloned_column->header->version;
}
}
}
// Close old cloned column
obi_close_column(column_buffer);
if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
// Found the column to return
// Get the column to return
new_column = column;
}
}
@ -1193,7 +1250,7 @@ static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* colum
return -1;
}
(*column_pp) = clone_column_in_view(view, column_name);
(*column_pp) = clone_column_in_view(view, column_name, true);
if ((*column_pp) == NULL)
{
obidebug(1, "\nError trying to clone a column to modify it");
@ -1844,6 +1901,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
{
Obiview_p view;
OBIDMS_column_p associated_nuc_column;
OBIDMS_column_p associated_qual_column;
int nb_predicates;
if (view_to_clone != NULL)
@ -1896,6 +1954,10 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
return NULL;
}
// Associating both ways: associating nuc sequences column to quality column
associated_qual_column = obi_view_get_column(view, QUALITY_COLUMN);
strcpy((associated_nuc_column->header->associated_column).column_name, associated_qual_column->header->name);
(associated_nuc_column->header->associated_column).version = associated_qual_column->header->version;
}
}
@ -1922,7 +1984,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
(view->predicate_functions)[(view->nb_predicates)] = view_has_nuc_sequence_column;
(view->predicate_functions)[(view->nb_predicates) + 1] = view_has_id_column;
(view->predicate_functions)[(view->nb_predicates) + 2] = view_has_definition_column;
// if (quality_column) # TODO discuss. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
// if (quality_column) # TODO fix by triggering predicate deleting if quality deleting. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
// (view->predicate_functions)[(view->nb_predicates) + 3] = view_has_quality_column;
view->nb_predicates = nb_predicates;
@ -2212,7 +2274,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)
// TODO return a pointer on the column?
int obi_view_add_column(Obiview_p view,
char* column_name,
char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,

View File

@ -406,7 +406,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created.
* @param associated_column_version The version of the associated column if there is one (otherwise -1), if the column is created.
* @param comments Optional comments associated with the column if it is created (NULL or "" if no comments associated).
* @param create Whether the column should be created (create == true) or opened (create == false).
* @param create Whether the column should be created (create == true) or already exists (create == false).
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
@ -416,7 +416,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_view_add_column(Obiview_p view,
char* column_name,
char* column_name,
obiversion_t version_number,
const char* alias,
OBIType_t data_type,