diff --git a/python/obitools3/commands/alignpairedend.pyx b/python/obitools3/commands/alignpairedend.pyx index c9af464..2a0699b 100755 --- a/python/obitools3/commands/alignpairedend.pyx +++ b/python/obitools3/commands/alignpairedend.pyx @@ -14,7 +14,7 @@ from obitools3.libalign._qsrassemble import QSolexaRightReverseAssemble from obitools3.libalign._solexapairend import buildConsensus, buildJoinedSequence from obitools3.dms.obiseq cimport Nuc_Seq from obitools3.libalign.shifted_ali cimport Kmer_similarity, Ali_shifted -from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME +from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN import sys import os @@ -102,7 +102,7 @@ def alignmentIterator(entries, aligner): seqR = reverse[i] else: seqF = Nuc_Seq.new_from_stored(entries[i]) - seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality=seqF[REVERSE_QUALITY_COLUMN_NAME]) + seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality=seqF[REVERSE_QUALITY_COLUMN]) seqR.index = i ali = aligner(seqF, seqR) @@ -196,8 +196,8 @@ def run(config): reversed_column=None) else: aligner = Kmer_similarity(entries, \ - column2=entries[REVERSE_SEQ_COLUMN_NAME], \ - qual_column2=entries[REVERSE_QUALITY_COLUMN_NAME], \ + column2=entries[REVERSE_SEQUENCE_COLUMN], \ + qual_column2=entries[REVERSE_QUALITY_COLUMN], \ kmer_size=config['alignpairedend']['kmersize'], \ reversed_column=entries[b'reversed']) # column created by the ngsfilter tool @@ -221,7 +221,7 @@ def run(config): buildConsensus(ali, consensus, seqF) else: if not two_views: - seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQ_COLUMN_NAME], quality = seqF[REVERSE_QUALITY_COLUMN_NAME]) + seqR = Nuc_Seq(seqF.id, seqF[REVERSE_SEQUENCE_COLUMN], quality = seqF[REVERSE_QUALITY_COLUMN]) else: seqR = reverse[i] buildJoinedSequence(ali, seqR, consensus, forward=seqF) diff --git a/python/obitools3/commands/cat.pyx b/python/obitools3/commands/cat.pyx index f118e7e..dee708c 100755 --- a/python/obitools3/commands/cat.pyx +++ b/python/obitools3/commands/cat.pyx @@ -2,15 +2,18 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport from obitools3.dms import DMS -from obitools3.dms.view.view cimport View, Line_selection +from obitools3.dms.view.view cimport View from obitools3.uri.decode import open_uri -from obitools3.apps.optiongroups import addMinimalInputOption, addMinimalOutputOption +from obitools3.apps.optiongroups import addMinimalOutputOption from obitools3.dms.view import RollbackException from obitools3.apps.config import logger from obitools3.utils cimport str2bytes from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS -from obitools3.dms.capi.obiview cimport QUALITY_COLUMN -from obitools3.commands.ngsfilter import REVERSE_QUALITY_COLUMN_NAME # TODO should be stored in C +from obitools3.dms.view.view cimport View +from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, REVERSE_SEQUENCE_COLUMN, \ + QUALITY_COLUMN, REVERSE_QUALITY_COLUMN +from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL +from obitools3.dms.column.column cimport Column import time import sys @@ -47,6 +50,8 @@ def run(config): iview_list = [] idms_list = [] total_len = 0 + remove_qual = False + remove_rev_qual = False v_type = View_NUC_SEQS for v_uri in config["cat"]["views_to_cat"]: input = open_uri(v_uri) @@ -56,6 +61,10 @@ def run(config): i_view = input[1] if input[2] != View_NUC_SEQS: # Check view type (output view is nuc_seqs view if all input view are nuc_seqs view) v_type = View + if QUALITY_COLUMN not in i_view: # Check if keep quality column in output view (if all input views have it) + remove_qual = True + if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality + remove_rev_qual = True total_len += len(i_view) iview_list.append(i_view) idms_list.append(i_dms) @@ -69,6 +78,15 @@ def run(config): o_dms = output[0] o_view = output[1] + # Initialize quality columns and their associated sequence columns if needed + if not remove_qual: + if NUC_SEQUENCE_COLUMN not in o_view: + Column.new_column(o_view, NUC_SEQUENCE_COLUMN, OBI_SEQ) + Column.new_column(o_view, QUALITY_COLUMN, OBI_QUAL, associated_column_name=NUC_SEQUENCE_COLUMN, associated_column_version=o_view[NUC_SEQUENCE_COLUMN].version) + if not remove_rev_qual: + Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ) + Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version) + # Initialize the progress bar pb = ProgressBar(total_len, config, seconde=5) @@ -80,11 +98,11 @@ def run(config): o_view[i] = l i+=1 - # Deletes quality columns if there are any - if QUALITY_COLUMN in o_view: + # Deletes quality columns if needed + if QUALITY_COLUMN in o_view and remove_qual : o_view.delete_column(QUALITY_COLUMN) - if REVERSE_QUALITY_COLUMN_NAME in o_view: - o_view.delete_column(REVERSE_QUALITY_COLUMN_NAME) + if REVERSE_QUALITY_COLUMN in o_view and remove_rev_qual : + o_view.delete_column(REVERSE_QUALITY_COLUMN) pb(i, force=True) print("", file=sys.stderr) diff --git a/python/obitools3/commands/ngsfilter.pyx b/python/obitools3/commands/ngsfilter.pyx index 92b124a..53d7827 100644 --- a/python/obitools3/commands/ngsfilter.pyx +++ b/python/obitools3/commands/ngsfilter.pyx @@ -13,6 +13,7 @@ from obitools3.libalign.apat_pattern import Primer_search from obitools3.dms.obiseq cimport Nuc_Seq from obitools3.dms.capi.obitypes cimport OBI_SEQ, OBI_QUAL from obitools3.dms.capi.apat cimport MAX_PATTERN +from obitools3.dms.capi.obiview cimport REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN from obitools3.utils cimport tobytes from libc.stdint cimport INT32_MAX @@ -22,8 +23,8 @@ import sys from cpython.exc cimport PyErr_CheckSignals -REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool -REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool +#REVERSE_SEQ_COLUMN_NAME = b"REVERSE_SEQUENCE" # used by alignpairedend tool +#REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # used by alignpairedend tool __title__="Assigns sequence records to the corresponding experiment/sample based on DNA tags and primers" @@ -259,8 +260,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): if not_aligned: sequences[1] = sequences[1].clone() - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool for seq in sequences: if hasattr(seq, "quality_array"): @@ -295,8 +296,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): if directmatch is None: if not_aligned: - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool sequences[0][b'error']=b'No primer match' return False, sequences[0] @@ -314,8 +315,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): sequences[0] = sequences[0][directmatch[1][2]:] else: sequences[1] = sequences[1][directmatch[1][2]:] - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool if directmatch[0].forward: sequences[0][b'direction']=b'forward' @@ -361,8 +362,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): sequences[0] = sequences[0][:r[1]] else: sequences[1] = sequences[1][:r[1]] - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool # do the same on the other seq if first_match_first_seq: r = direct_primer.revcomp(sequences[1]) @@ -373,8 +374,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): sequences[1] = sequences[1][:r[1]] else: sequences[0] = sequences[0][:r[1]] - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # Look for other primer in the other direction on the sequence, or @@ -442,8 +443,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False): sequences[1] = sequences[1][reversematch[1][2]:] if not directmatch[0].forward: sequences[1] = sequences[1].reverse_complement - sequences[0][REVERSE_SEQ_COLUMN_NAME] = sequences[1].seq # used by alignpairedend tool - sequences[0][REVERSE_QUALITY_COLUMN_NAME] = sequences[1].quality # used by alignpairedend tool + sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool + sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool else: sequences[0] = sequences[0][reversematch[1][2]:] @@ -605,12 +606,12 @@ def run(config): paired_p.revcomp.aligner = aligner if not_aligned: # create columns used by alignpairedend tool - Column.new_column(o_view, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ) - Column.new_column(o_view, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=o_view[REVERSE_SEQ_COLUMN_NAME].version) + Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ) + Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version) if unidentified is not None: - Column.new_column(unidentified, REVERSE_SEQ_COLUMN_NAME, OBI_SEQ) - Column.new_column(unidentified, REVERSE_QUALITY_COLUMN_NAME, OBI_QUAL, associated_column_name=REVERSE_SEQ_COLUMN_NAME, associated_column_version=unidentified[REVERSE_SEQ_COLUMN_NAME].version) + Column.new_column(unidentified, REVERSE_SEQUENCE_COLUMN, OBI_SEQ) + Column.new_column(unidentified, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=unidentified[REVERSE_SEQUENCE_COLUMN].version) g = 0 u = 0 diff --git a/python/obitools3/commands/uniq.pyx b/python/obitools3/commands/uniq.pyx index 6ec3e7e..668b6fa 100644 --- a/python/obitools3/commands/uniq.pyx +++ b/python/obitools3/commands/uniq.pyx @@ -8,7 +8,8 @@ from obitools3.dms.view import RollbackException from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS from obitools3.dms.column.column cimport Column, Column_line from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN, TAXID_COLUMN, \ - TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX + TAXID_DIST_COLUMN, MERGED_TAXID_COLUMN, MERGED_COLUMN, MERGED_PREFIX, \ + REVERSE_QUALITY_COLUMN from obitools3.dms.capi.obitypes cimport OBI_INT, OBI_STR, index_t from obitools3.apps.optiongroups import addMinimalInputOption, \ addMinimalOutputOption, \ @@ -24,9 +25,6 @@ from cpython.exc cimport PyErr_CheckSignals __title__="Group sequence records together" - -REVERSE_QUALITY_COLUMN_NAME = b"REVERSE_QUALITY" # TODO from ngsfilter, move to C - def addOptions(parser): @@ -496,8 +494,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li # Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not) if QUALITY_COLUMN in view: o_view.delete_column(QUALITY_COLUMN) - if REVERSE_QUALITY_COLUMN_NAME in view: - o_view.delete_column(REVERSE_QUALITY_COLUMN_NAME) + if REVERSE_QUALITY_COLUMN in view: + o_view.delete_column(REVERSE_QUALITY_COLUMN) if taxonomy is not None: print("") # TODO because in the middle of progress bar. Better solution? diff --git a/python/obitools3/dms/capi/obiview.pxd b/python/obitools3/dms/capi/obiview.pxd index ad7864e..278f864 100755 --- a/python/obitools3/dms/capi/obiview.pxd +++ b/python/obitools3/dms/capi/obiview.pxd @@ -24,6 +24,8 @@ cdef extern from "obiview.h" nogil: extern const_char_p ID_COLUMN extern const_char_p DEFINITION_COLUMN extern const_char_p QUALITY_COLUMN + extern const_char_p REVERSE_QUALITY_COLUMN + extern const_char_p REVERSE_SEQUENCE_COLUMN extern const_char_p COUNT_COLUMN extern const_char_p TAXID_COLUMN extern const_char_p MERGED_TAXID_COLUMN diff --git a/python/obitools3/dms/column/column.pyx b/python/obitools3/dms/column/column.pyx index bf93dbb..ac3ce23 100755 --- a/python/obitools3/dms/column/column.pyx +++ b/python/obitools3/dms/column/column.pyx @@ -21,7 +21,11 @@ from ..capi.obiutils cimport obi_format_date from ..capi.obiview cimport obi_view_add_column, \ obi_view_get_pointer_on_column_in_view, \ Obiview_p, \ - NUC_SEQUENCE_COLUMN + NUC_SEQUENCE_COLUMN, \ + QUALITY_COLUMN, \ + REVERSE_SEQUENCE_COLUMN, \ + REVERSE_QUALITY_COLUMN + from ..object cimport OBIDeactivatedInstanceError @@ -122,11 +126,18 @@ cdef class Column(OBIWrapper) : if data_type == OBI_QUAL: if associated_column_name_b == b"": - if NUC_SEQUENCE_COLUMN not in view: - raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b), - bytes2str(view.name))) - associated_column_name_b = NUC_SEQUENCE_COLUMN - associated_column_version = view[NUC_SEQUENCE_COLUMN].version + if column_name == QUALITY_COLUMN: + if NUC_SEQUENCE_COLUMN not in view: + raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b), + bytes2str(view.name))) + associated_column_name_b = NUC_SEQUENCE_COLUMN + associated_column_version = view[NUC_SEQUENCE_COLUMN].version + elif column_name == REVERSE_QUALITY_COLUMN: + if REVERSE_SEQUENCE_COLUMN not in view: + raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b), + bytes2str(view.name))) + associated_column_name_b = REVERSE_SEQUENCE_COLUMN + associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version if (obi_view_add_column(view = view.pointer(), column_name = column_name_b, diff --git a/python/obitools3/libalign/_solexapairend.pyx b/python/obitools3/libalign/_solexapairend.pyx index 11f96aa..5555271 100755 --- a/python/obitools3/libalign/_solexapairend.pyx +++ b/python/obitools3/libalign/_solexapairend.pyx @@ -6,6 +6,7 @@ from .solexapairend import iterOnAligment from .shifted_ali cimport Ali_shifted from obitools3.dms.capi.obiview cimport Obiview_p, QUALITY_COLUMN, NUC_SEQUENCE_COLUMN, \ + REVERSE_SEQUENCE_COLUMN, REVERSE_QUALITY_COLUMN, \ obi_set_qual_int_with_elt_idx_and_col_p_in_view, \ obi_set_str_with_elt_idx_and_col_p_in_view @@ -13,7 +14,6 @@ from obitools3.dms.capi.obidmscolumn cimport OBIDMS_column_p from obitools3.dms.view.view cimport View from obitools3.dms.column.column cimport Column -from obitools3.commands.ngsfilter import REVERSE_SEQ_COLUMN_NAME, REVERSE_QUALITY_COLUMN_NAME from math import log10 @@ -233,7 +233,7 @@ def buildConsensus(ali, seq, ref_tags=None): seq[b'mode']=b'alignment' for tag in ref_tags: - if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME and \ + if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN and \ tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN: seq[tag] = ref_tags[tag] @@ -254,7 +254,7 @@ def buildJoinedSequence(ali, reverse, seq, forward=None): seq[b"mode"]=b"joined" seq[b"pairedend_limit"]=len(forward) for tag in forward: - if tag != REVERSE_SEQ_COLUMN_NAME and tag != REVERSE_QUALITY_COLUMN_NAME: + if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN: seq[tag] = forward[tag] return seq diff --git a/src/obiview.h b/src/obiview.h index 6a620fd..4fe8c20 100755 --- a/src/obiview.h +++ b/src/obiview.h @@ -52,6 +52,15 @@ #define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities * in NUC_SEQS_VIEW views. */ +#define REVERSE_QUALITY_COLUMN "REVERSE_QUALITY" /**< The name of the column containing the sequence qualities + * of the reverse read (generated by ngsfilter, used by alignpairedend). + */ +#define REVERSE_SEQUENCE_COLUMN "REVERSE_SEQUENCE" /**< The name of the column containing the sequence + * of the reverse read (generated by ngsfilter, used by alignpairedend). + */ +#define QUALITY_COLUMN "QUALITY" /**< The name of the column containing the sequence qualities + * in NUC_SEQS_VIEW views. + */ #define COUNT_COLUMN "COUNT" /**< The name of the column containing the sequence counts * in NUC_SEQS_VIEW views. */