From 6c018b403c63d2912ff3bfb2a10f88acc06e591b Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Thu, 26 Dec 2019 20:45:54 +0100 Subject: [PATCH] ecopcr: fixed and improved the options to keep nuclotides around the amplicon --- python/obitools3/commands/ecopcr.pyx | 14 ++++++++++---- python/obitools3/dms/capi/obiecopcr.pxd | 1 + python/obitools3/version.py | 2 +- src/obi_ecopcr.c | 22 +++++++++++++--------- src/obi_ecopcr.h | 5 +++-- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/python/obitools3/commands/ecopcr.pyx b/python/obitools3/commands/ecopcr.pyx index d168ed2..8228a1c 100755 --- a/python/obitools3/commands/ecopcr.pyx +++ b/python/obitools3/commands/ecopcr.pyx @@ -107,14 +107,20 @@ def addOptions(parser): help="Defines the method used for estimating the Tm (melting temperature) between the primers and their corresponding " "target sequences. SANTALUCIA: 1, or OWCZARZY: 2. Default: 1.") + group.add_argument('--keep-primers', '-p', + action="store_true", + dest="ecopcr:keep-primers", + default=False, + help="Whether to keep the primers attached to the output sequences (default: the primers are cut out).") + group.add_argument('--keep-nucs', '-D', action="store", dest="ecopcr:keep-nucs", - metavar="", + metavar="", type=int, default=0, - help="Keeps the specified number of nucleotides on each side of the in silico amplified sequences, " - "(already including the amplified DNA fragment plus the two target sequences of the primers).") + help="Keeps N nucleotides on each side of the in silico amplified sequences, " + "not including the primers (implying that primers are automatically kept if N > 0).") group.add_argument('--kingdom-mode', '-k', action="store_true", @@ -185,7 +191,7 @@ def run(config): config['ecopcr']['min-length'], config['ecopcr']['max-length'], \ restrict_to_taxids_p, ignore_taxids_p, \ config['ecopcr']['circular'], config['ecopcr']['salt-concentration'], config['ecopcr']['salt-correction-method'], \ - config['ecopcr']['keep-nucs'], config['ecopcr']['kingdom-mode']) < 0: + config['ecopcr']['keep-nucs'], config['ecopcr']['keep-primers'], config['ecopcr']['kingdom-mode']) < 0: raise Exception("Error running ecopcr") # Save command config in DMS comments diff --git a/python/obitools3/dms/capi/obiecopcr.pxd b/python/obitools3/dms/capi/obiecopcr.pxd index 54ec4aa..400cea0 100755 --- a/python/obitools3/dms/capi/obiecopcr.pxd +++ b/python/obitools3/dms/capi/obiecopcr.pxd @@ -23,6 +23,7 @@ cdef extern from "obi_ecopcr.h" nogil: double salt_concentration, int salt_correction_method, int keep_nucleotides, + bint keep_primers, bint kingdom_mode) diff --git a/python/obitools3/version.py b/python/obitools3/version.py index d2fc1ed..ca73fc9 100755 --- a/python/obitools3/version.py +++ b/python/obitools3/version.py @@ -1,5 +1,5 @@ major = 3 minor = 0 -serial= '0-beta3' +serial= '0-beta4' version ="%d.%02d.%s" % (major,minor,serial) diff --git a/src/obi_ecopcr.c b/src/obi_ecopcr.c index ac5c018..994e2f8 100755 --- a/src/obi_ecopcr.c +++ b/src/obi_ecopcr.c @@ -77,7 +77,8 @@ static int create_output_columns(Obiview_p o_view, bool kingdom_mode); * @param err2 The number of errors in the second primer. * @param strand The DNA strand direction of the amplicon (R(everse) or D(irect)). * @param kingdom_mode Whether the kingdom or the superkingdom informations should be printed to the output. - * @param keep_nucleotides Number of nucleotides kept on each side of the amplicon. + * @param keep_nucleotides Number of nucleotides kept on each side of the amplicon (not including the primers if they are kept). + * @param keep_primers Whether to keep the primers. * @param i_id_column A pointer on the input sequence identifier column. * @param o_id_column A pointer on the output sequence identifier column. * @param o_ori_seq_len_column A pointer on the original sequence length column. @@ -124,6 +125,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view, int32_t err1, int32_t err2, char strand, bool kingdom_mode, int keep_nucleotides, + bool keep_primers, OBIDMS_column_p i_id_column, OBIDMS_column_p o_id_column, OBIDMS_column_p o_ori_seq_len_column, OBIDMS_column_p o_amplicon_column, OBIDMS_column_p o_amplicon_length_column, OBIDMS_column_p o_taxid_column, OBIDMS_column_p o_rank_column, OBIDMS_column_p o_name_column, @@ -328,6 +330,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view, int32_t err1, int32_t err2, char strand, bool kingdom_mode, int keep_nucleotides, + bool keep_primers, OBIDMS_column_p i_id_column, OBIDMS_column_p o_id_column, OBIDMS_column_p o_ori_seq_len_column, OBIDMS_column_p o_amplicon_column, OBIDMS_column_p o_amplicon_length_column, OBIDMS_column_p o_taxid_column, OBIDMS_column_p o_rank_column, OBIDMS_column_p o_name_column, @@ -382,7 +385,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view, oligo2[o1->patlen] = 0; error2 = err1; - if (keep_nucleotides == 0) + if (!keep_primers) amplicon+=o2->patlen; else { @@ -401,7 +404,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view, oligo2[o2->patlen] = 0; error2 = err2; - if (keep_nucleotides==0) + if (!keep_primers) amplicon+=o1->patlen; else { @@ -411,16 +414,11 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view, } ecoComplementSequence(oligo2); - if (keep_nucleotides == 0) + if (!keep_primers) amplicon[amplicon_len]=0; else { amplicon_len = ldelta+rdelta+amplicon_len; - for (i=0; i 0) + keep_primers = true; + if (circular) { circular = strlen(primer1); @@ -1076,6 +1078,7 @@ int obi_ecopcr(const char* i_dms_name, erri, errj, 'D', kingdom_mode, keep_nucleotides, + keep_primers, i_id_column, o_id_column, o_ori_seq_len_column, o_amplicon_column, o_amplicon_length_column, o_taxid_column, o_rank_column, o_name_column, @@ -1163,6 +1166,7 @@ int obi_ecopcr(const char* i_dms_name, erri, errj, 'R', kingdom_mode, keep_nucleotides, + keep_primers, i_id_column, o_id_column, o_ori_seq_len_column, o_amplicon_column, o_amplicon_length_column, o_taxid_column, o_rank_column, o_name_column, diff --git a/src/obi_ecopcr.h b/src/obi_ecopcr.h index 62a7b1b..5ba4c6a 100755 --- a/src/obi_ecopcr.h +++ b/src/obi_ecopcr.h @@ -93,8 +93,8 @@ * @param salt_concentration The salt concentration used for estimating the Tm. * @param salt_correction_method The method used for estimating the Tm (melting temperature) between the primers and their corresponding * target sequences. SANTALUCIA: 1, or OWCZARZY: 2. - * @param keep_nucleotides The number of nucleotides to keep on each side of the in silico amplified sequences - * (already including the amplified DNA fragment plus the two target sequences of the primers). + * @param keep_nucleotides The number of nucleotides to keep on each side of the in silico amplified sequences, not including primers (primers automatically entirely kept if > 0). + * @param keep_primers Whether primers are kept attached to the output sequences. * @param kingdom_mode Whether the kingdom or the superkingdom informations should be printed to the output. * * @returns A value indicating the success of the operation. @@ -121,6 +121,7 @@ int obi_ecopcr(const char* i_dms_name, double salt_concentration, int salt_correction_method, int keep_nucleotides, + bool keep_primers, bool kingdom_mode); #endif /* OBI_ECOPCR_H_ */