ngsfilter: made more robust and practical to use with empty tags

This commit is contained in:
Celine Mercier
2019-11-29 15:21:08 +01:00
parent 8bc249b2f4
commit 53f18316b0
3 changed files with 52 additions and 40 deletions

View File

@ -55,7 +55,7 @@ def __addImportInputOption(optionManager):
action="store_const", dest="obi:inputformat",
default=None,
const=b'ngsfilter',
help="Input file is an ngsfilter file")
help="Input file is an ngsfilter file. If not using tags, use ':' or 'None:None' or '-:-' or any combination")
group.add_argument('--ecopcr-result-input',
action="store_const", dest="obi:inputformat",

View File

@ -57,6 +57,11 @@ def addOptions(parser):
default=None,
help="URI to the view used to store the sequences unassigned to any sample")
group.add_argument('--no-tags',
action="store_true", dest="ngsfilter:notags",
default=False,
help="Use this option if your experiment does not use tags to identify samples")
group.add_argument('-e','--error',
action="store", dest="ngsfilter:error",
metavar="###",
@ -167,7 +172,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
i=0
for p in info_view:
forward=Primer(p[b'forward_primer'],
len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None,
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
True,
max_errors=max_errors,
verbose=verbose,
@ -178,7 +183,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
infos[forward]=fp
reverse=Primer(p[b'reverse_primer'],
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None,
False,
max_errors=max_errors,
verbose=verbose,
@ -213,9 +218,10 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
rpp=rp.get(cf,{})
rp[cf]=rpp
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None)
if tags != (None, None):
assert tags not in dpp, \
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
@ -234,7 +240,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
return infos, primer_list
cdef tuple annotate(sequences, infos, verbose=False):
cdef tuple annotate(sequences, infos, no_tags, verbose=False):
def sortMatch(match):
if match[1] is None:
@ -430,7 +436,7 @@ cdef tuple annotate(sequences, infos, verbose=False):
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
sample=None
if not no_tags:
if tags[0] is not None: # Direct tag known
if tags[1] is not None: # Reverse tag known
sample = samples.get(tags, None)
@ -572,6 +578,7 @@ def run(config):
g = 0
u = 0
no_tags = config['ngsfilter']['notags']
try:
for i in range(entries_len):
PyErr_CheckSignals()
@ -580,7 +587,7 @@ def run(config):
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
else:
modseq = [Nuc_Seq.new_from_stored(entries[i])]
good, oseq = annotate(modseq, infos)
good, oseq = annotate(modseq, infos, no_tags)
if good:
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
g+=1
@ -596,6 +603,7 @@ def run(config):
# Save command config in View and DMS comments
command_line = " ".join(sys.argv[1:])
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
if unidentified is not None:
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
# Add comment about unidentified seqs
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
@ -607,6 +615,7 @@ def run(config):
input[0].close()
output[0].close()
info_input[0].close()
if unidentified is not None:
unidentified_input[0].close()
aligner.free()

3
python/obitools3/parsers/ngsfilter.pyx Executable file → Normal file
View File

@ -57,6 +57,9 @@ def ngsfilterIterator(lineiterator,
split_line = line.split()
tags = split_line.pop(2)
tags = tags.split(b":")
for t_idx in range(2):
if tags[t_idx]==b"-" or tags[t_idx]==b"None" or tags[t_idx]==b"":
tags[t_idx] = nastring
if len(tags) == 1: # Forward and reverse tags are the same
tags.append(tags[0])
split_line.insert(2, tags[0])