From 067c5cdf74daf7257faa05a98d511b25d0114535 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 12 Oct 2009 15:25:26 +0000 Subject: [PATCH] add option to specify input format in fastaGrep --- src/fastaGrep.py | 50 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/src/fastaGrep.py b/src/fastaGrep.py index 63b2fb6..4a1f2f6 100644 --- a/src/fastaGrep.py +++ b/src/fastaGrep.py @@ -1,24 +1,56 @@ #!/usr/local/bin/python -import re -import sys - from obitools.fasta import fastaIterator,formatFasta +from obitools.format.sequence.embl import emblIterator +from obitools.format.sequence.genbank import genbankIterator +from obitools.format.sequence.fnaqual import fnaFastaIterator from obitools.options import getOptionManager from obitools.options.bioseqfilter import addSequenceFilteringOptions from obitools.options.bioseqfilter import sequenceFilterIteratorGenerator - - +def addInputFormatOption(optionManager): + optionManager.add_option('--genbank', + action="store_const", dest="seqinformat", + type="string", + default='fasta', + const='genbank', + help="input file is in genbank format") + optionManager.add_option('--embl', + action="store_const", dest="seqinformat", + type="string", + default='fasta', + const='embl', + help="input file is in embl format") + + optionManager.add_option('--fna', + action="store_const", dest="seqinformat", + type="string", + default='fasta', + const='fna', + help="input file is in fasta nucleic format produced by 454 sequencer pipeline") + + + if __name__=='__main__': - optionParser = getOptionManager([addSequenceFilteringOptions], - entryIterator=fastaIterator) + optionParser = getOptionManager([addSequenceFilteringOptions]) (options, entries) = optionParser() - goodFasta = sequenceFilterIteratorGenerator(options) - for seq in goodFasta(entries): + if options.seqinformat=='fasta': + reader=fastaIterator + elif options.seqinformat=='genbank': + reader=genbankIterator + elif options.seqinformat=='embl': + reader=emblIterator + elif options.seqinformat=='fna': + reader=fnaFastaIterator + + entries=reader(entries) + + goodSeq = sequenceFilterIteratorGenerator(options) + + for seq in goodSeq(entries): print formatFasta(seq) \ No newline at end of file