Add automanagement of input and output format
This commit is contained in:
@@ -14,32 +14,33 @@ from obitools.fnaqual.quality import qualityIterator
|
||||
from obitools.fasta import formatFasta
|
||||
from obitools.fastq import formatFastq
|
||||
|
||||
|
||||
import sys
|
||||
from array import array
|
||||
from itertools import chain
|
||||
|
||||
|
||||
|
||||
def addInputFormatOption(optionManager):
|
||||
optionManager.add_option('--genbank',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='genbank',
|
||||
help="input file is in genbank format")
|
||||
optionManager.add_option('--embl',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='embl',
|
||||
help="input file is in embl format")
|
||||
|
||||
optionManager.add_option('--fasta',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='fasta',
|
||||
help="input file is in fasta nucleic format (including obitools fasta extentions)")
|
||||
|
||||
optionManager.add_option('--fna',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='fna',
|
||||
help="input file is in fasta nucleic format produced by 454 sequencer pipeline")
|
||||
|
||||
@@ -51,19 +52,19 @@ def addInputFormatOption(optionManager):
|
||||
|
||||
optionManager.add_option('--sanger',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='sanger',
|
||||
help="input file is in sanger fastq nucleic format (standard fastq)")
|
||||
|
||||
optionManager.add_option('--solexa',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='solexa',
|
||||
help="input file is in fastq nucleic format produced by solexa sequencer")
|
||||
|
||||
optionManager.add_option('--illumina',
|
||||
action="store_const", dest="seqinformat",
|
||||
default='fasta',
|
||||
default=None,
|
||||
const='illumina',
|
||||
help="input file is in fastq nucleic format produced by old solexa sequencer")
|
||||
|
||||
@@ -79,20 +80,94 @@ def addInputFormatOption(optionManager):
|
||||
help="input file is protein sequences")
|
||||
|
||||
|
||||
def addOutputOption(optionManager):
|
||||
def addOutputFormatOption(optionManager):
|
||||
optionManager.add_option('--fastq-output',
|
||||
action="store_const", dest="output",
|
||||
default=formatFasta,
|
||||
default=None,
|
||||
const=formatFastq,
|
||||
help="output sequences in sanger fastq format")
|
||||
optionManager.add_option('--fasta-output',
|
||||
action="store_const", dest="output",
|
||||
default=formatFasta,
|
||||
default=None,
|
||||
const=formatFasta,
|
||||
help="output sequences in obitools fasta format")
|
||||
|
||||
|
||||
def entriesIterator(options):
|
||||
|
||||
def addInOutputOption(optionManager):
|
||||
addInputFormatOption(optionManager)
|
||||
addOutputFormatOption(optionManager)
|
||||
|
||||
|
||||
def printOutput(options,seq,output=sys.stdout):
|
||||
if options.output is not None:
|
||||
r=options.output(seq)
|
||||
elif options.outputFormater is not None:
|
||||
r=options.outputFormater(seq)
|
||||
else:
|
||||
r=formatFasta(seq)
|
||||
|
||||
try:
|
||||
print >>output,r
|
||||
except IOError:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
||||
def autoEntriesIterator(options):
|
||||
options.outputFormater=formatFasta
|
||||
|
||||
def annotatedIterator(formatIterator):
|
||||
options.outputFormater=formatFasta
|
||||
def iterator(lineiterator):
|
||||
for s in formatIterator(lineiterator):
|
||||
s.extractTaxon()
|
||||
yield s
|
||||
|
||||
return iterator
|
||||
|
||||
def withQualIterator(qualityfile):
|
||||
options.outputFormater=formatFastq
|
||||
def iterator(lineiterator):
|
||||
for s in fnaFastaIterator(lineiterator):
|
||||
q = qualityfile.next()
|
||||
quality = array('d',(10.**(-x/10.) for x in q))
|
||||
s.quality=quality
|
||||
yield s
|
||||
|
||||
return iterator
|
||||
|
||||
def autoSequenceIterator(lineiterator):
|
||||
options.outputFormater=formatFasta
|
||||
first = lineiterator.next()
|
||||
if first[0]==">":
|
||||
if options.withqualfile is not None:
|
||||
qualfile=qualityIterator(options.withqualfile)
|
||||
reader=withQualIterator(qualfile)
|
||||
options.outputFormater=formatFastq
|
||||
elif options.moltype=='nuc':
|
||||
reader=fastaNucIterator
|
||||
elif options.moltype=='pep':
|
||||
reader=fastaAAIterator
|
||||
else:
|
||||
reader=fastaIterator
|
||||
elif first[0]=='@':
|
||||
reader=fastqSangerIterator
|
||||
options.outputFormater=formatFastq
|
||||
elif first[0:3]=='ID ':
|
||||
reader=emblIterator
|
||||
elif first[0:6]=='LOCUS ':
|
||||
reader=genbankIterator
|
||||
else:
|
||||
raise AssertionError,'file is not in fasta, fasta, embl, or genbank format'
|
||||
|
||||
input = reader(chain([first],lineiterator))
|
||||
|
||||
return input
|
||||
|
||||
if options.seqinformat is None:
|
||||
reader = autoSequenceIterator
|
||||
else:
|
||||
if options.seqinformat=='fasta':
|
||||
if options.moltype=='nuc':
|
||||
reader=fastaNucIterator
|
||||
@@ -101,33 +176,27 @@ def entriesIterator(options):
|
||||
else:
|
||||
reader=fastaIterator
|
||||
elif options.seqinformat=='genbank':
|
||||
reader=genbankIterator
|
||||
reader=annotatedIterator(genbankIterator)
|
||||
elif options.seqinformat=='embl':
|
||||
reader=emblIterator
|
||||
reader=annotatedIterator(emblIterator)
|
||||
elif options.seqinformat=='fna':
|
||||
reader=fnaFastaIterator
|
||||
elif options.seqinformat=='sanger':
|
||||
options.outputFormater=formatFastq
|
||||
reader=fastqSangerIterator
|
||||
elif options.seqinformat=='solexa':
|
||||
options.outputFormater=formatFastq
|
||||
reader=fastqSolexaIterator
|
||||
elif options.seqinformat=='illumina':
|
||||
options.outputFormater=formatFastq
|
||||
reader=fastqIlluminaIterator
|
||||
|
||||
if options.seqinformat=='fna' and options.withqualfile is not None:
|
||||
qualfile=qualityIterator(options.withqualfile)
|
||||
else:
|
||||
qualfile=None
|
||||
reader=withQualIterator(qualfile)
|
||||
options.outputFormater=formatFastq
|
||||
|
||||
def iterator(lineiterator):
|
||||
for s in reader(lineiterator):
|
||||
if qualfile is not None:
|
||||
q = qualfile.next()
|
||||
quality = array('d',(10.**(-x/10.) for x in q))
|
||||
s.quality=quality
|
||||
s.extractTaxon()
|
||||
yield s
|
||||
|
||||
return iterator
|
||||
return reader
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user