diff --git a/python/obitools3/apps/optiongroups/__init__.py b/python/obitools3/apps/optiongroups/__init__.py index 94171e3..2e4cf69 100755 --- a/python/obitools3/apps/optiongroups/__init__.py +++ b/python/obitools3/apps/optiongroups/__init__.py @@ -55,7 +55,7 @@ def __addImportInputOption(optionManager): action="store_const", dest="obi:inputformat", default=None, const=b'ngsfilter', - help="Input file is an ngsfilter file") + help="Input file is an ngsfilter file. If not using tags, use ':' or 'None:None' or '-:-' or any combination") group.add_argument('--ecopcr-result-input', action="store_const", dest="obi:inputformat", diff --git a/python/obitools3/commands/ngsfilter.pyx b/python/obitools3/commands/ngsfilter.pyx index 64137dd..d09eae9 100755 --- a/python/obitools3/commands/ngsfilter.pyx +++ b/python/obitools3/commands/ngsfilter.pyx @@ -56,6 +56,11 @@ def addOptions(parser): type=str, default=None, help="URI to the view used to store the sequences unassigned to any sample") + + group.add_argument('--no-tags', + action="store_true", dest="ngsfilter:notags", + default=False, + help="Use this option if your experiment does not use tags to identify samples") group.add_argument('-e','--error', action="store", dest="ngsfilter:error", @@ -167,7 +172,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False): i=0 for p in info_view: forward=Primer(p[b'forward_primer'], - len(p[b'forward_tag']) if p[b'forward_tag']!=b'-' else None, + len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None, True, max_errors=max_errors, verbose=verbose, @@ -178,7 +183,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False): infos[forward]=fp reverse=Primer(p[b'reverse_primer'], - len(p[b'reverse_tag']) if p[b'reverse_tag']!=b'-' else None, + len(p[b'reverse_tag']) if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None, False, max_errors=max_errors, verbose=verbose, @@ -213,10 +218,11 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False): rpp=rp.get(cf,{}) rp[cf]=rpp - tags = (p[b'forward_tag'] if p[b'forward_tag']!=b'-' else None, - p[b'reverse_tag'] if p[b'reverse_tag']!=b'-' else None) + tags = (p[b'forward_tag'] if (b'forward_tag' in p and p[b'forward_tag']!=None) else None, + p[b'reverse_tag'] if (b'reverse_tag' in p and p[b'reverse_tag']!=None) else None) - assert tags not in dpp, \ + if tags != (None, None): + assert tags not in dpp, \ "Tag pair %s is already used with primer pairs: (%s,%s)" % (str(tags),forward,reverse) # Save additional data @@ -234,7 +240,7 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False): return infos, primer_list -cdef tuple annotate(sequences, infos, verbose=False): +cdef tuple annotate(sequences, infos, no_tags, verbose=False): def sortMatch(match): if match[1] is None: @@ -430,35 +436,35 @@ cdef tuple annotate(sequences, infos, verbose=False): final_sequence[b'reversed'] = True # used by the alignpairedend tool (in kmer_similarity.c) sample=None - - if tags[0] is not None: # Direct tag known - if tags[1] is not None: # Reverse tag known - sample = samples.get(tags, None) - else: # Only direct tag known - s=[samples[x] for x in samples if x[0]==tags[0]] - if len(s)==1: - sample=s[0] - elif len(s)>1: - final_sequence[b'error']=b'multiple samples match tags' - return False, final_sequence - else: - sample=None - else: - if tags[1] is not None: # Only reverse tag known - s=[samples[x] for x in samples if x[1]==tags[1]] - if len(s)==1: - sample=s[0] - elif len(s)>1: - final_sequence[b'error']=b'multiple samples match tags' - return False, final_sequence - else: - sample=None + if not no_tags: + if tags[0] is not None: # Direct tag known + if tags[1] is not None: # Reverse tag known + sample = samples.get(tags, None) + else: # Only direct tag known + s=[samples[x] for x in samples if x[0]==tags[0]] + if len(s)==1: + sample=s[0] + elif len(s)>1: + final_sequence[b'error']=b'multiple samples match tags' + return False, final_sequence + else: + sample=None + else: + if tags[1] is not None: # Only reverse tag known + s=[samples[x] for x in samples if x[1]==tags[1]] + if len(s)==1: + sample=s[0] + elif len(s)>1: + final_sequence[b'error']=b'multiple samples match tags' + return False, final_sequence + else: + sample=None + + if sample is None: + final_sequence[b'error']=b"Cannot assign sequence to a sample" + return False, final_sequence - if sample is None: - final_sequence[b'error']=b"Cannot assign sequence to a sample" - return False, final_sequence - - final_sequence.update(sample) + final_sequence.update(sample) if not not_aligned: final_sequence[b'seq_length']=len(final_sequence) @@ -572,6 +578,7 @@ def run(config): g = 0 u = 0 + no_tags = config['ngsfilter']['notags'] try: for i in range(entries_len): PyErr_CheckSignals() @@ -580,7 +587,7 @@ def run(config): modseq = [Nuc_Seq.new_from_stored(forward[i]), Nuc_Seq.new_from_stored(reverse[i])] else: modseq = [Nuc_Seq.new_from_stored(entries[i])] - good, oseq = annotate(modseq, infos) + good, oseq = annotate(modseq, infos, no_tags) if good: o_view[g].set(oseq.id, oseq.seq, definition=oseq.definition, quality=oseq.quality, tags=oseq) g+=1 @@ -596,9 +603,10 @@ def run(config): # Save command config in View and DMS comments command_line = " ".join(sys.argv[1:]) o_view.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name) - unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name) - # Add comment about unidentified seqs - unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command" + if unidentified is not None: + unidentified.write_config(config, "ngsfilter", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name) + # Add comment about unidentified seqs + unidentified.comments["info"] = "View containing sequences categorized as unidentified by the ngsfilter command" output[0].record_command_line(command_line) #print("\n\nOutput view:\n````````````", file=sys.stderr) @@ -607,7 +615,8 @@ def run(config): input[0].close() output[0].close() info_input[0].close() - unidentified_input[0].close() + if unidentified is not None: + unidentified_input[0].close() aligner.free() logger("info", "Done.") diff --git a/python/obitools3/parsers/ngsfilter.pyx b/python/obitools3/parsers/ngsfilter.pyx old mode 100755 new mode 100644 index fbe3e89..023c11c --- a/python/obitools3/parsers/ngsfilter.pyx +++ b/python/obitools3/parsers/ngsfilter.pyx @@ -57,6 +57,9 @@ def ngsfilterIterator(lineiterator, split_line = line.split() tags = split_line.pop(2) tags = tags.split(b":") + for t_idx in range(2): + if tags[t_idx]==b"-" or tags[t_idx]==b"None" or tags[t_idx]==b"": + tags[t_idx] = nastring if len(tags) == 1: # Forward and reverse tags are the same tags.append(tags[0]) split_line.insert(2, tags[0])