Compare commits

..

15 Commits

Author SHA1 Message Date
dc9f897917 switch to version 3.0.0-beta10 2020-02-02 21:15:27 +01:00
bb72682f7d obi import: new option --preread to do a first readthrough of the
dataset if it contains huge dictionaries for a much faster import.
2020-02-02 21:12:34 +01:00
52920c3c71 URI decoding: dirty temp fix for bug where default dms makes a mess when
should guess file
2020-02-02 21:11:05 +01:00
18c22cecf9 switch to version 3.0.0-beta9 2020-02-01 15:48:55 +01:00
1bfb96023c obi import: rewriting a column now deletes the old one to save disk
space
2020-02-01 15:31:14 +01:00
c67d668989 obi import: fixed a bug when the first entry would contain a dictionary
with one key. Switch to beta8
2020-01-29 20:23:39 +01:00
db0ac37d41 switch to version 3.0.0-beta7 2020-01-29 16:18:53 +01:00
d0c21ecd39 Removed an OpenMP clause that was not obligatory and triggered a known
gcc bug involving macros
2020-01-24 16:00:53 +01:00
53212168a2 History: added 'obi' in bash history for practical reasons 2020-01-23 16:51:49 +01:00
b4b2e62195 Cleaner handling of reverse quality columns 2020-01-18 19:28:12 +01:00
ced82c4242 Switching to version 3.0-beta6 2020-01-18 17:29:23 +01:00
a524f8829e New command: obi cat to concatenate views (not optimized yet) 2020-01-18 17:28:31 +01:00
5c9091e9eb C: closing DMS after cleaning it instead of counting on upper layer 2020-01-18 17:27:35 +01:00
822000cb70 Fixes in documentation 2020-01-18 17:26:18 +01:00
b9cd9bee9a C: Changed obibool definitions because of conflict with R 2020-01-06 15:11:31 +01:00
21 changed files with 320 additions and 70 deletions

View File

@ -222,7 +222,7 @@ def __addDMSOutputOption(optionManager):
group.add_argument('--no-create-dms',
action="store_true", dest="obi:nocreatedms",
default=False,
help="Don't create an output DMS it does not already exist")
help="Don't create an output DMS if it does not already exist")
def __addEltLimitOption(optionManager):

View File

@ -14,7 +14,7 @@ from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble
from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
import sys
import os
@ -102,7 +102,7 @@ def alignmentIterator(entries, aligner):
seqR = reverse[i]
else:
seqF = Nuc_Seq.new_from_stored(entries[i])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality=seqF[REVERSE_QUALITY_COLUMN])
seqR.index = i
ali = aligner(seqF, seqR)
@ -196,8 +196,8 @@ def run(config):
reversed_column=None)
else:
aligner = Kmer_similarity(entries, \
column2=entries[REVERSE_SEQ_COLUMN_NAME], \
qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \
column2=entries[REVERSE_SEQUENCE_COLUMN], \
qual_column2=entries[REVERSE_QUALITY_COLUMN], \
kmer_size=config['alignpairedend']['kmersize'], \
reversed_column=entries[b'reversed']) # column created by the ngsfilter tool
@ -221,7 +221,7 @@ def run(config):
buildConsensus(ali, consensus, seqF)
else:
if not two_views:
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality = seqF[REVERSE_QUALITY_COLUMN_NAME])
seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality = seqF[REVERSE_QUALITY_COLUMN])
else:
seqR = reverse[i]
buildJoinedSequence(ali, seqR, consensus, forward=seqF)

122
python/obitools3/commands/cat.pyx Executable file
View File

@ -0,0 +1,122 @@
#cython: language_level=3
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS
from obitools3.dms.view.view cimport View
from obitools3.uri.decode import open_uri
from obitools3.apps.optiongroups import addMinimalOutputOption
from obitools3.dms.view import RollbackException
from obitools3.apps.config import logger
from obitools3.utils cimport str2bytes
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.view.view cimport View
from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_COLUMN, \
QUALITY_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.column.column cimport Column
import time
import sys
from cpython.exc cimport PyErr_CheckSignals
__title__="Concatenate views."
def addOptions(parser):
addMinimalOutputOption(parser)
group=parser.add_argument_group('obi cat specific options')
group.add_argument("-c",
action="append", dest="cat:views_to_cat",
metavar="<VIEW_NAME>",
default=[],
type=str,
help="URI of a view to concatenate. (e.g. 'my_dms/my_view'). "
"Several -c options can be used on the same "
"command line.")
def run(config):
DMS.obi_atexit()
logger("info", "obi cat")
# Open the views to concatenate
iview_list = []
idms_list = []
total_len = 0
remove_qual = False
remove_rev_qual = False
v_type = View_NUC_SEQS
for v_uri in config["cat"]["views_to_cat"]:
input = open_uri(v_uri)
if input is None:
raise Exception("Could not read input view")
i_dms = input[0]
i_view = input[1]
if input[2] != View_NUC_SEQS: # Check view type (output view is nuc_seqs view if all input view are nuc_seqs view)
v_type = View
if QUALITY_COLUMN not in i_view: # Check if keep quality column in output view (if all input views have it)
remove_qual = True
if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
remove_rev_qual = True
total_len += len(i_view)
iview_list.append(i_view)
idms_list.append(i_dms)
# Open the output: only the DMS
output = open_uri(config['obi']['outputURI'],
input=False,
newviewtype=v_type)
if output is None:
raise Exception("Could not create output view")
o_dms = output[0]
o_view = output[1]
# Initialize quality columns and their associated sequence columns if needed
if not remove_qual:
if NUC_SEQUENCE_COLUMN not in o_view:
Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version)
if not remove_rev_qual:
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
# Initialize the progress bar
pb = ProgressBar(total_len, config, seconde=5)
i = 0
for v in iview_list:
for l in v:
PyErr_CheckSignals()
pb(i)
o_view[i] = l
i+=1
# Deletes quality columns if needed
if QUALITY_COLUMN in o_view and remove_qual :
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual :
o_view.delete_column(REVERSE_QUALITY_COLUMN)
pb(i, force=True)
print("", file=sys.stderr)
# Save command config in DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list])
o_dms.record_command_line(command_line)
#print("\n\nOutput view:\n````````````", file=sys.stderr)
#print(repr(view), file=sys.stderr)
for d in idms_list:
d.close()
o_dms.close()
logger("info", "Done.")

View File

@ -36,14 +36,13 @@ def addOptions(parser):
metavar="<PREDICATE>",
default=[],
type=str,
help="Warning: use bytes for character strings (b'text' instead of 'text'). "
"Python boolean expression to be evaluated in the "
help="Python boolean expression to be evaluated in the "
"sequence/line context. The attribute name can be "
"used in the expression as a variable name. "
"An extra variable named 'sequence' or 'line' refers "
"to the sequence or line object itself. "
"Several -p options can be used on the same "
"commande line.")
"command line.")
group.add_argument("-S", "--sequence",
action="store", dest="grep:seq_pattern",

View File

@ -11,6 +11,7 @@ from obitools3.dms.column.column cimport Column
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms import DMS
from obitools3.dms.taxo.taxo cimport Taxonomy
from obitools3.files.uncompress cimport CompressedFile
from obitools3.utils cimport tobytes, \
@ -65,6 +66,14 @@ def addOptions(parser):
addTaxdumpInputOption(parser)
addMinimalOutputOption(parser)
group = parser.add_argument_group('obi import specific options')
group.add_argument('--preread',
action="store_true", dest="import:preread",
default=False,
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
"a much faster import.")
def run(config):
@ -169,8 +178,6 @@ def run(config):
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
entries = input[1]
NUC_SEQS_view = False
if isinstance(output[1], View) :
@ -188,6 +195,60 @@ def run(config):
dcols = {}
# First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
if config['import']['preread']:
logger("info", "First readthrough...")
entries = input[1]
i = 0
dict_dict = {}
for entry in entries:
PyErr_CheckSignals()
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
if config['obi']['skiperror']:
i-=1
continue
else:
raise Exception("obi import error in first readthrough")
if pb is not None:
pb(i)
elif not i%50000:
logger("info", "Read %d entries", i)
for tag in entry :
if type(entry[tag]) == dict :
if tag in dict_dict:
dict_dict[tag][0].update(entry[tag].keys())
else:
dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
i+=1
if pb is not None:
pb(i, force=True)
print("", file=sys.stderr)
for tag in dict_dict:
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
nb_elements_per_line=len(dict_dict[tag][0]), \
elements_names=list(dict_dict[tag][0])), \
value_obitype)
# Reinitialize the input
if isinstance(input[0], CompressedFile):
input_is_file = True
if entry_count >= 0:
pb = ProgressBar(entry_count, config, seconde=5)
try:
input[0].close()
except AttributeError:
pass
input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
if input is None:
raise Exception("Could not open input URI")
entries = input[1]
i = 0
for entry in entries :
@ -247,6 +308,8 @@ def run(config):
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
# Fill value
if value_type == dict and nb_elts == 1: # special case that makes the OBI3 create a 1 elt/line column which won't read a dict value
value = value[list(value.keys())[0]] # The solution is to transform the value in a simple atomic one acceptable by the column
dcols[tag][0][i] = value
# TODO else log error?
@ -263,6 +326,12 @@ def run(config):
rewrite = True
try:
# Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key
if type(value) == dict and \
dcols[tag][0].nb_elements_per_line == 1 and len(value.keys()) == 1 \
and dcols[tag][0].elements_names[0] != list(value.keys())[0] :
raise IndexError # trigger column rewrite
# Fill value
dcols[tag][0][i] = value

View File

@ -13,6 +13,7 @@ from obitools3.libalign.apat_pattern import Primer_search
from obitools3.dms.obiseq cimport Nuc_Seq
from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL
from obitools3.dms.capi.apat cimport MAX_PATTERN
from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN
from obitools3.utils cimport tobytes
from libc.stdint cimport INT32_MAX
@ -22,8 +23,8 @@ import sys
from cpython.exc cimport PyErr_CheckSignals
REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool
#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool
#REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool
__title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers"
@ -259,8 +260,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not_aligned:
sequences[1] = sequences[1].clone()
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
for seq in sequences:
if hasattr(seq, "quality_array"):
@ -295,8 +296,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if directmatch is None:
if not_aligned:
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
sequences[0][b'error']=b'No primer match'
return False, sequences[0]
@ -314,8 +315,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sequences[0] = sequences[0][directmatch[1][2]:]
else:
sequences[1] = sequences[1][directmatch[1][2]:]
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
if directmatch[0].forward:
sequences[0][b'direction']=b'forward'
@ -361,8 +362,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sequences[0] = sequences[0][:r[1]]
else:
sequences[1] = sequences[1][:r[1]]
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
# do the same on the other seq
if first_match_first_seq:
r = direct_primer.revcomp(sequences[1])
@ -373,8 +374,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sequences[1] = sequences[1][:r[1]]
else:
sequences[0] = sequences[0][:r[1]]
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality
# Look for other primer in the other direction on the sequence, or
@ -442,8 +443,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
sequences[1] = sequences[1][reversematch[1][2]:]
if not directmatch[0].forward:
sequences[1] = sequences[1].reverse_complement
sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
else:
sequences[0] = sequences[0][reversematch[1][2]:]
@ -605,12 +606,12 @@ def run(config):
paired_p.revcomp.aligner = aligner
if not_aligned: # create columns used by alignpairedend tool
Column.new_column(o_view, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=o_view[REVERSE_SEQ_COLUMN_NAME].version)
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
if unidentified is not None:
Column.new_column(unidentified, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ)
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=unidentified[REVERSE_SEQ_COLUMN_NAME].version)
Column.new_column(unidentified, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
Column.new_column(unidentified, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=unidentified[REVERSE_SEQUENCE_COLUMN].version)
g = 0
u = 0

View File

@ -8,7 +8,8 @@ from obitools3.dms.view import RollbackException
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN, TAXID_COLUMN, \
TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX
TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX, \
REVERSE_QUALITY_COLUMN
from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t
from obitools3.apps.optiongroups import addMinimalInputOption, \
addMinimalOutputOption, \
@ -24,9 +25,6 @@ from cpython.exc cimport PyErr_CheckSignals
__title__="Group sequence records together"
REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # TODO from ngsfilter, move to C
def addOptions(parser):
@ -496,8 +494,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
# Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
if QUALITY_COLUMN in view:
o_view.delete_column(QUALITY_COLUMN)
if REVERSE_QUALITY_COLUMN_NAME in view:
o_view.delete_column(REVERSE_QUALITY_COLUMN_NAME)
if REVERSE_QUALITY_COLUMN in view:
o_view.delete_column(REVERSE_QUALITY_COLUMN)
if taxonomy is not None:
print("") # TODO because in the middle of progress bar. Better solution?

View File

@ -24,6 +24,8 @@ cdef extern from "obiview.h" nogil:
extern const_char_p ID_COLUMN
extern const_char_p DEFINITION_COLUMN
extern const_char_p QUALITY_COLUMN
extern const_char_p REVERSE_QUALITY_COLUMN
extern const_char_p REVERSE_SEQUENCE_COLUMN
extern const_char_p COUNT_COLUMN
extern const_char_p TAXID_COLUMN
extern const_char_p MERGED_TAXID_COLUMN
@ -100,7 +102,7 @@ cdef extern from "obiview.h" nogil:
const_char_p comments,
bint create)
int obi_view_delete_column(Obiview_p view, const_char_p column_name)
int obi_view_delete_column(Obiview_p view, const_char_p column_name, bint delete_file)
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)

View File

@ -21,7 +21,11 @@ from ..capi.obiutils cimport obi_format_date
from ..capi.obiview cimport obi_view_add_column, \
obi_view_get_pointer_on_column_in_view, \
Obiview_p, \
NUC_SEQUENCE_COLUMN
NUC_SEQUENCE_COLUMN, \
QUALITY_COLUMN, \
REVERSE_SEQUENCE_COLUMN, \
REVERSE_QUALITY_COLUMN
from ..object cimport OBIDeactivatedInstanceError
@ -122,11 +126,18 @@ cdef class Column(OBIWrapper) :
if data_type == OBI_QUAL:
if associated_column_name_b == b"":
if NUC_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = NUC_SEQUENCE_COLUMN
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
if column_name == QUALITY_COLUMN:
if NUC_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = NUC_SEQUENCE_COLUMN
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
elif column_name == REVERSE_QUALITY_COLUMN:
if REVERSE_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
if (obi_view_add_column(view = view.pointer(),
column_name = column_name_b,

View File

@ -259,7 +259,7 @@ cdef class DMS(OBIWrapper):
for command in self.command_line_history:
s+=b"#"
s+=command[b"time"]
s+=b"\n"
s+=b"\nobi "
s+=command[b"command"]
s+=b"\n"
return s

View File

@ -22,7 +22,8 @@ cdef class View(OBIWrapper):
cdef inline Obiview_p pointer(self)
cpdef delete_column(self,
object column_name)
object column_name,
bint delete_file=*)
cpdef rename_column(self,
object current_name,

View File

@ -227,7 +227,8 @@ cdef class View(OBIWrapper) :
cpdef delete_column(self,
object column_name) :
object column_name,
bint delete_file=False) :
cdef bytes column_name_b = tobytes(column_name)
@ -239,7 +240,7 @@ cdef class View(OBIWrapper) :
col.close()
# Remove the column from the view which closes the C structure
if obi_view_delete_column(self.pointer(), column_name_b) < 0 :
if obi_view_delete_column(self.pointer(), column_name_b, delete_file) < 0 :
raise RollbackException("Problem deleting column %s from a view",
bytes2str(column_name_b), self)
@ -297,11 +298,17 @@ cdef class View(OBIWrapper) :
nb_elements_per_line=new_nb_elements_per_line, elements_names=new_elements_names,
comments=old_column.comments, alias=column_name_b+tobytes('___new___'))
switch_to_dict = old_column.nb_elements_per_line == 1 and new_nb_elements_per_line > 1
ori_key = old_column._elements_names[0]
for i in range(length) :
new_column[i] = old_column[i]
if switch_to_dict :
new_column[i] = {ori_key: old_column[i]}
else:
new_column[i] = old_column[i]
# Remove old column from view
self.delete_column(column_name_b)
self.delete_column(column_name_b, delete_file=True)
# Rename new
new_column.name = column_name_b

View File

@ -6,6 +6,7 @@ from .solexapairend import iterOnAligment
from .shifted_ali cimport Ali_shifted
from obitools3.dms.capi.obiview cimport Obiview_p, QUALITY_COLUMN, NUC_SEQUENCE_COLUMN, \
REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN, \
obi_set_qual_int_with_elt_idx_and_col_p_in_view, \
obi_set_str_with_elt_idx_and_col_p_in_view
@ -13,7 +14,6 @@ from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p
from obitools3.dms.view.view cimport View
from obitools3.dms.column.column cimport Column
from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME
from math import log10
@ -233,7 +233,7 @@ def buildConsensus(ali, seq, ref_tags=None):
seq[b'mode']=b'alignment'
for tag in ref_tags:
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME and \
if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN and \
tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN:
seq[tag] = ref_tags[tag]
@ -254,7 +254,7 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
seq[b"mode"]=b"joined"
seq[b"pairedend_limit"]=len(forward)
for tag in forward:
if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME:
if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN:
seq[tag] = forward[tag]
return seq

7
python/obitools3/uri/decode.pyx Executable file → Normal file
View File

@ -171,7 +171,8 @@ Reads an URI and returns a tuple containing:
def open_uri(uri,
bint input=True,
type newviewtype=View,
dms_only=False):
dms_only=False,
force_file=False):
cdef bytes urib = tobytes(uri)
cdef bytes scheme
@ -195,9 +196,9 @@ def open_uri(uri,
if 'obi' not in config:
config['obi']={}
try:
if not force_file and "defaultdms" in config["obi"]:
default_dms=config["obi"]["defaultdms"]
except KeyError:
else:
default_dms=None
try:

View File

@ -1,5 +1,5 @@
major = 3
minor = 0
serial= '0-beta5'
serial= '0-beta10'
version ="%d.%02d.%s" % (major,minor,serial)

View File

@ -409,8 +409,7 @@ int obi_clean(const char* dms_name,
stop = true;
}
#pragma omp parallel default(none) \
shared(thread_count, seq_count, blob_array, complete_sample_count_array, alignment_result_array, \
#pragma omp parallel shared(thread_count, seq_count, blob_array, complete_sample_count_array, alignment_result_array, \
stop, blob1, i, obi_errno, keep_running, stderr, max_ratio, iseq_column, i_view, \
similarity_mode, reference, normalize, threshold, ktable, status_column, o_view, sample_count)
{

View File

@ -696,6 +696,12 @@ int obi_clean_dms(const char* dms_path)
// return -1;
// }
if (obi_close_dms(dms, true) < 0)
{
obidebug(1, "\nError closing a DMS after cleaning");
return -1;
}
return 0;
}

View File

@ -34,8 +34,8 @@
* @brief enum for the boolean OBIType.
*/
typedef enum OBIBool {
FALSE = 0,
TRUE = 1,
OBIFalse = 0,
OBITrue = 1,
OBIBool_NA = 2
} obibool_t, *obibool_p; /**< a boolean true/false value */ // TODO check name convention?

View File

@ -2380,11 +2380,12 @@ int obi_view_add_column(Obiview_p view,
}
int obi_view_delete_column(Obiview_p view, const char* column_name)
int obi_view_delete_column(Obiview_p view, const char* column_name, bool delete_file)
{
int i;
bool found;
OBIDMS_column_p column;
char* col_to_delete_path;
// Check that the view is not read-only
if (view->read_only)
@ -2406,8 +2407,31 @@ int obi_view_delete_column(Obiview_p view, const char* column_name)
obidebug(1, "\nError getting a column from the linked list of column pointers of a view when deleting a column from a view");
return -1;
}
// Keep column path if need to delete the file
if (delete_file)
{
col_to_delete_path = obi_column_full_path(view->dms, column->header->name, column->header->version);
if (col_to_delete_path == NULL)
{
obidebug(1, "\nError getting a column file path when deleting a column");
return -1;
}
}
obi_close_column(column);
// Delete file if needed
if (delete_file)
{
if (remove(col_to_delete_path) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError deleting a column file when deleting unfinished columns: file %s", col_to_delete_path);
return -1;
}
free(col_to_delete_path);
}
view->columns = ll_delete(view->columns, i);
// TODO how do we check for error? NULL can be empty list
found = true;
@ -3047,7 +3071,7 @@ int obi_create_auto_id_column(Obiview_p view, const char* prefix)
// Delete old ID column if it exists
if (obi_view_get_column(view, ID_COLUMN) != NULL)
{
if (obi_view_delete_column(view, ID_COLUMN) < 0)
if (obi_view_delete_column(view, ID_COLUMN, false) < 0)
{
obidebug(1, "Error deleting an ID column to replace it in a view");
return -1;

View File

@ -52,6 +52,15 @@
#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities
* in NUC_SEQS_VIEW views.
*/
#define REVERSE_QUALITY_COLUMN "REVERSE_QUALITY" /**< The name of the column containing the sequence qualities
* of the reverse read (generated by ngsfilter, used by alignpairedend).
*/
#define REVERSE_SEQUENCE_COLUMN "REVERSE_SEQUENCE" /**< The name of the column containing the sequence
* of the reverse read (generated by ngsfilter, used by alignpairedend).
*/
#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities
* in NUC_SEQS_VIEW views.
*/
#define COUNT_COLUMN "COUNT" /**< The name of the column containing the sequence counts
* in NUC_SEQS_VIEW views.
*/
@ -431,6 +440,7 @@ int obi_view_add_column(Obiview_p view,
*
* @param view A pointer on the view.
* @param column_name The name of the column that should be deleted from the view.
* @param delete_file Whether the column file should be deleted. Use carefully re: dependencies.
*
* @returns A value indicating the success of the operation.
* @retval 0 if the operation was successfully completed.
@ -439,7 +449,7 @@ int obi_view_add_column(Obiview_p view,
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_view_delete_column(Obiview_p view, const char* column_name);
int obi_view_delete_column(Obiview_p view, const char* column_name, bool delete_file);
/**

View File

@ -951,15 +951,15 @@ double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, bo
// Put the DNA sequences in the int arrays. Longest sequence must be first argument of sse_align function
if (l2 > l1)
{
putSeqInSeq(iseq1, seq2, l2, TRUE);
putSeqInSeq(iseq2, seq1, l1, FALSE);
putSeqInSeq(iseq1, seq2, l2, true);
putSeqInSeq(iseq2, seq1, l1, false);
// Compute alignment
id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length);
}
else
{
putSeqInSeq(iseq1, seq1, l1, TRUE);
putSeqInSeq(iseq2, seq2, l2, FALSE);
putSeqInSeq(iseq1, seq1, l1, true);
putSeqInSeq(iseq2, seq2, l2, false);
// Compute alignment
id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length);
}
@ -1054,15 +1054,15 @@ double obiblob_sse_banded_lcs_align(Obi_blob_p seq1, Obi_blob_p seq2, double thr
// Put the DNA sequences in the int arrays. Longest sequence must be first argument of sse_align function
if (l2 > l1)
{
putBlobInSeq(iseq1, seq2, l2, TRUE);
putBlobInSeq(iseq2, seq1, l1, FALSE);
putBlobInSeq(iseq1, seq2, l2, true);
putBlobInSeq(iseq2, seq1, l1, false);
// Compute alignment
id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length);
}
else
{
putBlobInSeq(iseq1, seq1, l1, TRUE);
putBlobInSeq(iseq2, seq2, l2, FALSE);
putBlobInSeq(iseq1, seq1, l1, true);
putBlobInSeq(iseq2, seq2, l2, false);
// Compute alignment
id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, similarity_mode, address, LCSmin, lcs_length, ali_length);
}