Compare commits
2 Commits
v3.0.0-bet
...
v3.0.0-bet
Author | SHA1 | Date | |
---|---|---|---|
35ce37c0f7 | |||
53f18316b0 |
@ -55,7 +55,7 @@ def __addImportInputOption(optionManager):
|
|||||||
action="store_const", dest="obi:inputformat",
|
action="store_const", dest="obi:inputformat",
|
||||||
default=None,
|
default=None,
|
||||||
const=b'ngsfilter',
|
const=b'ngsfilter',
|
||||||
help="Input file is an ngsfilter file")
|
help="Input file is an ngsfilter file. If not using tags, use ':' or 'None:None' or '-:-' or any combination")
|
||||||
|
|
||||||
group.add_argument('--ecopcr-result-input',
|
group.add_argument('--ecopcr-result-input',
|
||||||
action="store_const", dest="obi:inputformat",
|
action="store_const", dest="obi:inputformat",
|
||||||
|
36
python/obitools3/commands/ngsfilter.pyx
Executable file → Normal file
36
python/obitools3/commands/ngsfilter.pyx
Executable file → Normal file
@ -57,6 +57,11 @@ def addOptions(parser):
|
|||||||
default=None,
|
default=None,
|
||||||
help="URI to the view used to store the sequences unassigned to any sample")
|
help="URI to the view used to store the sequences unassigned to any sample")
|
||||||
|
|
||||||
|
group.add_argument('--no-tags',
|
||||||
|
action="store_true", dest="ngsfilter:notags",
|
||||||
|
default=False,
|
||||||
|
help="Use this option if your experiment does not use tags to identify samples")
|
||||||
|
|
||||||
group.add_argument('-e','--error',
|
group.add_argument('-e','--error',
|
||||||
action="store", dest="ngsfilter:error",
|
action="store", dest="ngsfilter:error",
|
||||||
metavar="###",
|
metavar="###",
|
||||||
@ -167,7 +172,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
|||||||
i=0
|
i=0
|
||||||
for p in info_view:
|
for p in info_view:
|
||||||
forward=Primer(p[b'forward_primer'],
|
forward=Primer(p[b'forward_primer'],
|
||||||
len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None,
|
len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
||||||
True,
|
True,
|
||||||
max_errors=max_errors,
|
max_errors=max_errors,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
@ -178,7 +183,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
|||||||
infos[forward]=fp
|
infos[forward]=fp
|
||||||
|
|
||||||
reverse=Primer(p[b'reverse_primer'],
|
reverse=Primer(p[b'reverse_primer'],
|
||||||
len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None,
|
len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None,
|
||||||
False,
|
False,
|
||||||
max_errors=max_errors,
|
max_errors=max_errors,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
@ -213,9 +218,10 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
|||||||
rpp=rp.get(cf,{})
|
rpp=rp.get(cf,{})
|
||||||
rp[cf]=rpp
|
rp[cf]=rpp
|
||||||
|
|
||||||
tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None,
|
tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
|
||||||
p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None)
|
p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None)
|
||||||
|
|
||||||
|
if tags != (None, None):
|
||||||
assert tags not in dpp, \
|
assert tags not in dpp, \
|
||||||
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
|
"Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse)
|
||||||
|
|
||||||
@ -234,7 +240,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
|
|||||||
return infos, primer_list
|
return infos, primer_list
|
||||||
|
|
||||||
|
|
||||||
cdef tuple annotate(sequences, infos, verbose=False):
|
cdef tuple annotate(sequences, infos, no_tags, verbose=False):
|
||||||
|
|
||||||
def sortMatch(match):
|
def sortMatch(match):
|
||||||
if match[1] is None:
|
if match[1] is None:
|
||||||
@ -330,12 +336,17 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
|
|
||||||
# Keep only paired reverse primer
|
# Keep only paired reverse primer
|
||||||
infos = infos[directmatch[0]]
|
infos = infos[directmatch[0]]
|
||||||
|
rev_prim = list(infos.keys())[0]
|
||||||
|
|
||||||
# If not aligned, look for other match in already computed matches (choose the one that makes the biggest amplicon)
|
# If not aligned, look for other match in already computed matches (choose the one that makes the biggest amplicon)
|
||||||
if not_aligned:
|
if not_aligned:
|
||||||
i=1
|
i=1
|
||||||
# TODO comment
|
# TODO comment
|
||||||
while i<len(all_direct_matches) and (all_direct_matches[i][1] is None or all_direct_matches[i][0].forward == directmatch[0].forward or all_direct_matches[i][0] == directmatch[0]):
|
while i<len(all_direct_matches) and \
|
||||||
|
(all_direct_matches[i][1] is None or \
|
||||||
|
all_direct_matches[i][0].forward == directmatch[0].forward or \
|
||||||
|
all_direct_matches[i][0] == directmatch[0] or \
|
||||||
|
rev_prim != all_direct_matches[i][0]) :
|
||||||
i+=1
|
i+=1
|
||||||
if i < len(all_direct_matches):
|
if i < len(all_direct_matches):
|
||||||
reversematch = all_direct_matches[i]
|
reversematch = all_direct_matches[i]
|
||||||
@ -430,7 +441,7 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c)
|
||||||
|
|
||||||
sample=None
|
sample=None
|
||||||
|
if not no_tags:
|
||||||
if tags[0] is not None: # Direct tag known
|
if tags[0] is not None: # Direct tag known
|
||||||
if tags[1] is not None: # Reverse tag known
|
if tags[1] is not None: # Reverse tag known
|
||||||
sample = samples.get(tags, None)
|
sample = samples.get(tags, None)
|
||||||
@ -439,7 +450,7 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
if len(s)==1:
|
if len(s)==1:
|
||||||
sample=s[0]
|
sample=s[0]
|
||||||
elif len(s)>1:
|
elif len(s)>1:
|
||||||
final_sequence[b'error']=b'multiple samples match tags'
|
final_sequence[b'error']=b'Did not found reverse tag'
|
||||||
return False, final_sequence
|
return False, final_sequence
|
||||||
else:
|
else:
|
||||||
sample=None
|
sample=None
|
||||||
@ -449,13 +460,13 @@ cdef tuple annotate(sequences, infos, verbose=False):
|
|||||||
if len(s)==1:
|
if len(s)==1:
|
||||||
sample=s[0]
|
sample=s[0]
|
||||||
elif len(s)>1:
|
elif len(s)>1:
|
||||||
final_sequence[b'error']=b'multiple samples match tags'
|
final_sequence[b'error']=b'Did not found forward tag'
|
||||||
return False, final_sequence
|
return False, final_sequence
|
||||||
else:
|
else:
|
||||||
sample=None
|
sample=None
|
||||||
|
|
||||||
if sample is None:
|
if sample is None:
|
||||||
final_sequence[b'error']=b"Cannot assign sequence to a sample"
|
final_sequence[b'error']=b"No tags found"
|
||||||
return False, final_sequence
|
return False, final_sequence
|
||||||
|
|
||||||
final_sequence.update(sample)
|
final_sequence.update(sample)
|
||||||
@ -572,6 +583,7 @@ def run(config):
|
|||||||
|
|
||||||
g = 0
|
g = 0
|
||||||
u = 0
|
u = 0
|
||||||
|
no_tags = config['ngsfilter']['notags']
|
||||||
try:
|
try:
|
||||||
for i in range(entries_len):
|
for i in range(entries_len):
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
@ -580,7 +592,7 @@ def run(config):
|
|||||||
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])]
|
||||||
else:
|
else:
|
||||||
modseq = [Nuc_Seq.new_from_stored(entries[i])]
|
modseq = [Nuc_Seq.new_from_stored(entries[i])]
|
||||||
good, oseq = annotate(modseq, infos)
|
good, oseq = annotate(modseq, infos, no_tags)
|
||||||
if good:
|
if good:
|
||||||
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq)
|
||||||
g+=1
|
g+=1
|
||||||
@ -596,6 +608,7 @@ def run(config):
|
|||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
if unidentified is not None:
|
||||||
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
# Add comment about unidentified seqs
|
# Add comment about unidentified seqs
|
||||||
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command"
|
||||||
@ -607,6 +620,7 @@ def run(config):
|
|||||||
input[0].close()
|
input[0].close()
|
||||||
output[0].close()
|
output[0].close()
|
||||||
info_input[0].close()
|
info_input[0].close()
|
||||||
|
if unidentified is not None:
|
||||||
unidentified_input[0].close()
|
unidentified_input[0].close()
|
||||||
aligner.free()
|
aligner.free()
|
||||||
|
|
||||||
|
3
python/obitools3/parsers/ngsfilter.pyx
Executable file → Normal file
3
python/obitools3/parsers/ngsfilter.pyx
Executable file → Normal file
@ -57,6 +57,9 @@ def ngsfilterIterator(lineiterator,
|
|||||||
split_line = line.split()
|
split_line = line.split()
|
||||||
tags = split_line.pop(2)
|
tags = split_line.pop(2)
|
||||||
tags = tags.split(b":")
|
tags = tags.split(b":")
|
||||||
|
for t_idx in range(2):
|
||||||
|
if tags[t_idx]==b"-" or tags[t_idx]==b"None" or tags[t_idx]==b"":
|
||||||
|
tags[t_idx] = nastring
|
||||||
if len(tags) == 1: # Forward and reverse tags are the same
|
if len(tags) == 1: # Forward and reverse tags are the same
|
||||||
tags.append(tags[0])
|
tags.append(tags[0])
|
||||||
split_line.insert(2, tags[0])
|
split_line.insert(2, tags[0])
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0-beta1'
|
serial= '0-beta2'
|
||||||
|
|
||||||
version ="%d.%02d.%s" % (major,minor,serial)
|
version ="%d.%02d.%s" % (major,minor,serial)
|
||||||
|
2
setup.py
2
setup.py
@ -83,7 +83,7 @@ def findPackage(root,base=None):
|
|||||||
|
|
||||||
|
|
||||||
PACKAGE = "OBITools3"
|
PACKAGE = "OBITools3"
|
||||||
VERSION = "3.0.0-beta1"
|
VERSION = "3.0.0-beta2"
|
||||||
AUTHOR = 'Celine Mercier'
|
AUTHOR = 'Celine Mercier'
|
||||||
EMAIL = 'celine.mercier@metabarcoding.org'
|
EMAIL = 'celine.mercier@metabarcoding.org'
|
||||||
URL = "http://metabarcoding.org/obitools3"
|
URL = "http://metabarcoding.org/obitools3"
|
||||||
|
Reference in New Issue
Block a user