ngsfilter and ecopcr: now check for primers too long for apat library to

handle (31bp max) and switch to version 3.0.1b23
This commit is contained in:
Celine Mercier
2023-05-12 17:04:21 +12:00
parent 55b2679b23
commit 1c9a906f5b
3 changed files with 23 additions and 5 deletions

View File

@ -24,6 +24,9 @@ from cpython.exc cimport PyErr_CheckSignals
from io import BufferedWriter from io import BufferedWriter
MAX_PAT_LEN = 31
__title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers" __title__="Assign sequence records to the corresponding experiment/sample based on DNA tags and primers"
@ -84,6 +87,8 @@ class Primer:
@type direct: @type direct:
''' '''
assert len(sequence) <= MAX_PAT_LEN, "Primer %s is too long, 31 bp max" % sequence
assert sequence not in Primer.collection \ assert sequence not in Primer.collection \
or Primer.collection[sequence]==taglength, \ or Primer.collection[sequence]==taglength, \
"Primer %s must always be used with tags of the same length" % sequence "Primer %s must always be used with tags of the same length" % sequence
@ -271,7 +276,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
if not_aligned: if not_aligned:
sequences[1] = sequences[1].clone() sequences[1] = sequences[1].clone()
sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool sequences[0][REVERSE_SEQUENCE_COLUMN] = sequences[1].seq # used by alignpairedend tool
sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool sequences[0][REVERSE_QUALITY_COLUMN] = sequences[1].quality # used by alignpairedend tool
for seq in sequences: for seq in sequences:
@ -299,7 +304,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p)) directmatch.append((p, p(seq, same_sequence=not new_seq, pattern=pattern), seq, p))
new_seq = False new_seq = False
pattern+=1 pattern+=1
# Choose match closer to the start of (one of the) sequence(s) # Choose match closer to the start of (one of the) sequence(s)
directmatch = sorted(directmatch, key=sortMatch) directmatch = sorted(directmatch, key=sortMatch)
all_direct_matches = directmatch all_direct_matches = directmatch

View File

@ -1,5 +1,5 @@
major = 3 major = 3
minor = 0 minor = 0
serial= '1b22' serial= '1b23'
version ="%d.%d.%s" % (major,minor,serial) version ="%d.%d.%s" % (major,minor,serial)

View File

@ -18,6 +18,7 @@
#include "libecoPCR/ecoPCR.h" #include "libecoPCR/ecoPCR.h"
#include "libecoPCR/libthermo/nnparams.h" #include "libecoPCR/libthermo/nnparams.h"
#include "libecoPCR/libapat/apat.h"
#include "obi_ecopcr.h" #include "obi_ecopcr.h"
#include "obidms.h" #include "obidms.h"
@ -365,8 +366,6 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
int32_t i; int32_t i;
// TODO add check for primer longer than MAX_PAT_LEN (32)
// Get sequence id // Get sequence id
seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0); seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
@ -751,6 +750,20 @@ int obi_ecopcr(const char* i_dms_name,
o1c = complementPattern(o1); o1c = complementPattern(o1);
o2c = complementPattern(o2); o2c = complementPattern(o2);
// check for primers equal or longer than MAX_PAT_LEN (32)
if (strlen(primer1) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: first primer is too long, needs to be < 32bp (%s)", primer1);
return -1;
}
if (strlen(primer2) >= MAX_PAT_LEN)
{
obi_set_errno(OBI_ECOPCR_ERROR);
obidebug(1, "\nError: second primer is too long, needs to be < 32bp (%s)", primer2);
return -1;
}
// Open input DMS // Open input DMS
i_dms = obi_open_dms(i_dms_name, false); i_dms = obi_open_dms(i_dms_name, false);
if (i_dms == NULL) if (i_dms == NULL)