Patch several bugs or inconsistencies following the tutorial at Anthony's lab
This commit is contained in:
@ -1,11 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?eclipse-pydev version="1.0"?>
|
||||
|
||||
<pydev_project>
|
||||
<?eclipse-pydev version="1.0"?><pydev_project>
|
||||
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
|
||||
<path>/OBITools-1.0/src</path>
|
||||
<path>/OBITools-1.0/textwrangler</path>
|
||||
</pydev_pathproperty>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Python2.7</pydev_property>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Python-2.7</pydev_property>
|
||||
</pydev_project>
|
||||
|
@ -17,7 +17,7 @@ Standard output format
|
||||
Generating an ecoPCR database
|
||||
.............................
|
||||
|
||||
.. cmdoption:: --ecopcrDB-output=<PREFIX_FILENAME>
|
||||
.. cmdoption:: --ecopcrdb-output=<PREFIX_FILENAME>
|
||||
|
||||
Creates an ecoPCR database from sequence records results
|
||||
|
||||
|
31
setup.py
31
setup.py
@ -6,9 +6,16 @@
|
||||
|
||||
|
||||
try:
|
||||
from setuptools.core import setup
|
||||
from setuptools import setup
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
import ez_setup
|
||||
ez_setup.use_setuptools()
|
||||
from setuptools import setup
|
||||
|
||||
# try:
|
||||
# from setuptools.core import setup
|
||||
# except ImportError:
|
||||
# from distutils.core import setup
|
||||
from distutils.extension import Extension
|
||||
from distutils.util import convert_path
|
||||
from distutils import log
|
||||
@ -39,6 +46,10 @@ import glob
|
||||
|
||||
from os import path
|
||||
|
||||
# requires = ['Cython>=0.20', 'Sphinx>=1.2']
|
||||
requires = ['Cython>=0.20']
|
||||
|
||||
|
||||
class install_scripts(ori_install_scripts):
|
||||
|
||||
def remove_deprecated_script(self):
|
||||
@ -209,7 +220,7 @@ def findC(root,base=None,pyrexs=None):
|
||||
#from obitools.version import version as obiversion
|
||||
#sys.path.pop(0)
|
||||
|
||||
VERSION = "1.0.beta"
|
||||
VERSION = "1.0.beta.2"
|
||||
AUTHOR = 'Eric Coissac'
|
||||
EMAIL = 'eric@coissac.eu'
|
||||
URL = 'www.grenoble.prabi.fr/trac/OBITools'
|
||||
@ -242,6 +253,18 @@ else:
|
||||
|
||||
setup(name="OBITools",
|
||||
description="Scripts and library for sequence analysis",
|
||||
classifiers=[
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: Research',
|
||||
'License :: CeCILL-V2',
|
||||
'Operating System :: Unix like',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Topic :: NGS Data processing',
|
||||
'Topic :: DNA metabarcoding',
|
||||
'Topic :: Utilities',
|
||||
],
|
||||
version=VERSION,
|
||||
author=AUTHOR,
|
||||
author_email=EMAIL,
|
||||
@ -251,7 +274,7 @@ setup(name="OBITools",
|
||||
package_dir = {'': SRC},
|
||||
packages=findPackage(SRC),
|
||||
cmdclass = {'build_ext': build_ext,'build_scripts':build_scripts, 'install_scripts':install_scripts},
|
||||
requires=['Cython (>=0.16)'],
|
||||
install_requires=requires,
|
||||
zip_safe = False,
|
||||
ext_modules=EXTENTION)
|
||||
|
||||
|
@ -202,8 +202,8 @@ if __name__ == '__main__':
|
||||
|
||||
localdata=False
|
||||
|
||||
if options.write != '' :
|
||||
options.write = open(options.write, 'w')
|
||||
# if options.write != '' :
|
||||
# options.write = open(options.write, 'w')
|
||||
|
||||
for t in options.newtaxon:
|
||||
tx = t.split(':')
|
||||
|
@ -1,296 +1,69 @@
|
||||
from obitools import utils
|
||||
from obitools import NucSequence
|
||||
from obitools.utils import universalOpen, universalTell, fileSize, progressBar
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import time
|
||||
import re
|
||||
import shelve
|
||||
|
||||
from threading import Lock
|
||||
from logging import warning
|
||||
import urllib2
|
||||
|
||||
from obitools.gzip import GzipFile
|
||||
from obitools.zipfile import ZipFile
|
||||
import os.path
|
||||
|
||||
from _utils import FakeFile
|
||||
from _utils import progressBar
|
||||
|
||||
try:
|
||||
from collections import Counter
|
||||
except ImportError:
|
||||
from obitools.collections import Counter
|
||||
class EcoPCRFile(utils.ColumnFile):
|
||||
def __init__(self,stream):
|
||||
utils.ColumnFile.__init__(self,
|
||||
stream, '|', True,
|
||||
(str,int,int,
|
||||
str,int,str,
|
||||
int,str,int,
|
||||
str,int,str,
|
||||
str,str,int,float,
|
||||
str,int,float,
|
||||
int,
|
||||
str,str), "#")
|
||||
|
||||
|
||||
class FileFormatError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def universalOpen(file,*options):
|
||||
'''
|
||||
Open a file gziped or not.
|
||||
|
||||
If file is a C{str} instance, file is
|
||||
concidered as a file name. In this case
|
||||
the C{.gz} suffixe is tested to eventually
|
||||
open it a a gziped file.
|
||||
|
||||
If file is an other kind of object, it is assumed
|
||||
that this object follow the C{file} interface
|
||||
and it is return as is.
|
||||
|
||||
@param file: the file to open
|
||||
@type file: C{str} or a file like object
|
||||
|
||||
@return: an iterator on text lines.
|
||||
'''
|
||||
if isinstance(file,str):
|
||||
try:
|
||||
if urllib2.urlparse.urlparse(file)[0]=='':
|
||||
rep = open(file,*options)
|
||||
else:
|
||||
rep = urllib2.urlopen(file,timeout=15)
|
||||
|
||||
if file[-3:] == '.gz':
|
||||
rep = GzipFile(fileobj=rep)
|
||||
if file[-4:] == '.zip':
|
||||
zip = ZipFile(file=rep)
|
||||
data = zip.infolist()
|
||||
assert len(data)==1,'Only zipped file containning a single file can be open'
|
||||
name = data[0].filename
|
||||
rep = zip.open(name)
|
||||
except Exception as e:
|
||||
print>>sys.stderr, e
|
||||
sys.exit();
|
||||
else:
|
||||
rep = file
|
||||
return rep
|
||||
|
||||
def universalTell(file):
|
||||
'''
|
||||
Return the position in the file even if
|
||||
it is a gziped one.
|
||||
|
||||
@param file: the file to check
|
||||
@type file: a C{file} like instance
|
||||
|
||||
@return: position in the file
|
||||
@rtype: C{int}
|
||||
'''
|
||||
if isinstance(file, GzipFile):
|
||||
file=file.myfileobj
|
||||
return file.tell()
|
||||
|
||||
def fileSize(file):
|
||||
'''
|
||||
Return the file size even if it is a
|
||||
gziped one.
|
||||
|
||||
@param file: the file to check
|
||||
@type file: a C{file} like instance
|
||||
|
||||
@return: the size of the file
|
||||
@rtype: C{int}
|
||||
'''
|
||||
if isinstance(file, GzipFile):
|
||||
file=file.myfileobj
|
||||
pos = file.tell()
|
||||
file.seek(0,2)
|
||||
length = file.tell()
|
||||
file.seek(pos,0)
|
||||
return length
|
||||
|
||||
|
||||
def endLessIterator(endedlist):
|
||||
for x in endedlist:
|
||||
yield x
|
||||
while(1):
|
||||
yield endedlist[-1]
|
||||
|
||||
|
||||
def multiLineWrapper(lineiterator):
|
||||
'''
|
||||
Aggregator of strings.
|
||||
|
||||
@param lineiterator: a stream of strings from an opened OBO file.
|
||||
@type lineiterator: a stream of strings.
|
||||
|
||||
@return: an aggregated stanza.
|
||||
@rtype: an iterotor on str
|
||||
|
||||
@note: The aggregator aggregates strings from an opened OBO file.
|
||||
When the length of a string is < 2, the current stanza is over.
|
||||
'''
|
||||
|
||||
for line in lineiterator:
|
||||
rep = [line]
|
||||
while len(line)>=2 and line[-2]=='\\':
|
||||
rep[-1]=rep[-1][0:-2]
|
||||
try:
|
||||
line = lineiterator.next()
|
||||
except StopIteration:
|
||||
raise FileFormatError
|
||||
rep.append(line)
|
||||
yield ''.join(rep)
|
||||
|
||||
|
||||
def skipWhiteLineIterator(lineiterator):
|
||||
'''
|
||||
Curator of stanza.
|
||||
|
||||
@param lineiterator: a stream of strings from an opened OBO file.
|
||||
@type lineiterator: a stream of strings.
|
||||
|
||||
@return: a stream of strings without blank strings.
|
||||
@rtype: a stream strings
|
||||
|
||||
@note: The curator skip white lines of the current stanza.
|
||||
'''
|
||||
|
||||
for line in lineiterator:
|
||||
cleanline = line.strip()
|
||||
if cleanline:
|
||||
yield line
|
||||
else:
|
||||
print 'skipped'
|
||||
|
||||
|
||||
class ColumnFile(object):
|
||||
|
||||
def __init__(self,stream,sep=None,strip=True,
|
||||
types=None,skip=None,head=None,
|
||||
extra=None,
|
||||
extraformat='([a-zA-Z]\w*) *= *([^;]+);'):
|
||||
self._stream = universalOpen(stream)
|
||||
self._delimiter=sep
|
||||
self._strip=strip
|
||||
self._extra=extra
|
||||
self._extraformat = re.compile(extraformat)
|
||||
|
||||
if types:
|
||||
self._types=[x for x in types]
|
||||
for i in xrange(len(self._types)):
|
||||
if self._types[i] is bool:
|
||||
self._types[i]=ColumnFile.str2bool
|
||||
else:
|
||||
self._types=None
|
||||
|
||||
self._skip = skip
|
||||
if skip is not None:
|
||||
self._lskip= len(skip)
|
||||
else:
|
||||
self._lskip= 0
|
||||
self._head=head
|
||||
|
||||
def str2bool(x):
|
||||
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
|
||||
|
||||
str2bool = staticmethod(str2bool)
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
|
||||
def cast(txt,type):
|
||||
try:
|
||||
v = type(txt)
|
||||
except:
|
||||
v=None
|
||||
return v
|
||||
ligne = self._stream.next()
|
||||
if self._skip is not None:
|
||||
while ligne[0:self._lskip]==self._skip:
|
||||
ligne = self._stream.next()
|
||||
if self._extra is not None:
|
||||
try:
|
||||
(ligne,extra) = ligne.rsplit(self._extra,1)
|
||||
extra = dict(self._extraformat.findall(extra))
|
||||
except ValueError:
|
||||
extra=None
|
||||
else:
|
||||
extra = None
|
||||
data = ligne.split(self._delimiter)
|
||||
if self._strip or self._types:
|
||||
data = [x.strip() for x in data]
|
||||
if self._types:
|
||||
it = endLessIterator(self._types)
|
||||
data = [cast(*x) for x in ((y,it.next()) for y in data)]
|
||||
if self._head is not None:
|
||||
data=dict(map(None, self._head,data))
|
||||
if extra is not None:
|
||||
data['__extra__']=extra
|
||||
else:
|
||||
if extra is not None:
|
||||
data.append(extra)
|
||||
return data
|
||||
|
||||
def tell(self):
|
||||
return universalTell(self._stream)
|
||||
|
||||
|
||||
class CachedDB(object):
|
||||
|
||||
def __init__(self,cachefile,masterdb):
|
||||
self._cache = shelve.open(cachefile,'c')
|
||||
self._db = masterdb
|
||||
self._lock=Lock()
|
||||
|
||||
def _cacheSeq(self,seq):
|
||||
self._lock.acquire()
|
||||
self._cache[seq.id]=seq
|
||||
self._lock.release()
|
||||
data = utils.ColumnFile.next(self)
|
||||
seq = NucSequence(data[0],data[20],data[21])
|
||||
seq['seq_length_ori']=data[1]
|
||||
seq['taxid']=data[2]
|
||||
seq['rank']=data[3]
|
||||
seq['species']=data[4]
|
||||
seq['species_name']=data[5]
|
||||
seq['genus']=data[6]
|
||||
seq['genus_name']=data[7]
|
||||
seq['family']=data[8]
|
||||
seq['family_name']=data[9]
|
||||
seq['strand']=data[12]
|
||||
seq['forward_match']=data[13]
|
||||
seq['forward_error']=data[14]
|
||||
seq['forward_tm']=data[15]
|
||||
seq['reverse_match']=data[16]
|
||||
seq['reverse_error']=data[17]
|
||||
seq['reverse_tm']=data[18]
|
||||
|
||||
return seq
|
||||
|
||||
|
||||
def __getitem__(self,ac):
|
||||
if isinstance(ac,str):
|
||||
self._lock.acquire()
|
||||
if ac in self._cache:
|
||||
# print >>sys.stderr,"Use cache for %s" % ac
|
||||
data = self._cache[ac]
|
||||
self._lock.release()
|
||||
|
||||
else:
|
||||
self._lock.release()
|
||||
data = self._db[ac]
|
||||
self._cacheSeq(data)
|
||||
return data
|
||||
|
||||
class EcoPCRDBFile(object):
|
||||
|
||||
def _ecoRecordIterator(self,file,noError=False):
|
||||
file = universalOpen(file,noError)
|
||||
(recordCount,) = struct.unpack('> I',file.read(4))
|
||||
self._recover=False
|
||||
|
||||
if recordCount:
|
||||
for i in xrange(recordCount):
|
||||
(recordSize,)=struct.unpack('>I',file.read(4))
|
||||
record = file.read(recordSize)
|
||||
yield record
|
||||
else:
|
||||
self._lock.acquire()
|
||||
acs = [[x,self._cache.get(x,None)] for x in ac]
|
||||
self._lock.release()
|
||||
newacs = [ac for ac,cached in acs if cached is None]
|
||||
if newacs:
|
||||
newseqs = self._db[newacs]
|
||||
else:
|
||||
newseqs = iter([])
|
||||
for r in acs:
|
||||
if r[1] is None:
|
||||
r[1]=self._cacheSeq(newseqs.next())
|
||||
# else:
|
||||
# print >>sys.stderr,"Use cache for %s" % r[0]
|
||||
return (x[1] for x in acs)
|
||||
|
||||
|
||||
def moduleInDevelopment(name):
|
||||
Warning('This module %s is under development : use it with caution' % name)
|
||||
|
||||
|
||||
def deprecatedScript(newscript):
|
||||
current = sys.argv[0]
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr,"#########################################################"
|
||||
print >>sys.stderr,"# #"
|
||||
print >>sys.stderr," W A R N I N G :"
|
||||
print >>sys.stderr," %s is a deprecated script " % os.path.split(current)[1]
|
||||
print >>sys.stderr," it will disappear in the next obitools version"
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr," The new corresponding command is %s " % newscript
|
||||
print >>sys.stderr,"# #"
|
||||
print >>sys.stderr,"#########################################################"
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr," "
|
||||
print >>sys.stderr," "
|
||||
print >> sys.stderr,"\n\n WARNING : EcoPCRDB reading set into recover data mode\n"
|
||||
self._recover=True
|
||||
ok=True
|
||||
while(ok):
|
||||
try:
|
||||
(recordSize,)=struct.unpack('>I',file.read(4))
|
||||
record = file.read(recordSize)
|
||||
yield record
|
||||
except:
|
||||
ok=False
|
||||
|
@ -70,10 +70,9 @@ def loadTaxonomyDatabase(options):
|
||||
|
||||
if isinstance(options.taxonomy, Taxonomy):
|
||||
return options.taxonomy
|
||||
#taxonomy = ecobarcodeDatabaseConnection(options)
|
||||
|
||||
taxonomy = None
|
||||
if (taxonomy is not None or
|
||||
options.taxonomy is not None or
|
||||
if (options.taxonomy is not None or
|
||||
options.taxdump is not None):
|
||||
if options.taxdump is not None:
|
||||
taxonomy = TaxonomyDump(options.taxdump)
|
||||
|
@ -1,12 +1,14 @@
|
||||
from obitools import NucSequence
|
||||
from obitools.ecopcr import EcoPCRDBFile
|
||||
from obitools.ecopcr.taxonomy import EcoTaxonomyDB, ecoTaxonomyWriter
|
||||
from obitools.ecopcr.options import loadTaxonomyDatabase
|
||||
from obitools.ecopcr.annotation import EcoPCRDBAnnotationWriter
|
||||
from obitools.utils import universalOpen
|
||||
from glob import glob
|
||||
import struct
|
||||
import gzip
|
||||
import sys
|
||||
import re
|
||||
|
||||
|
||||
class EcoPCRDBSequenceIterator(EcoPCRDBFile):
|
||||
@ -40,11 +42,11 @@ class EcoPCRDBSequenceIterator(EcoPCRDBFile):
|
||||
for record in self._ecoRecordIterator(file):
|
||||
lrecord = len(record)
|
||||
lnames = lrecord - (4*4+20)
|
||||
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
|
||||
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record) # @UnusedVariable
|
||||
seqid=seqid.strip('\x00')
|
||||
de = string[:deflength]
|
||||
seq = gzip.zlib.decompress(string[deflength:])
|
||||
bioseq = NucSequence(seqid,seq,de,taxidx=taxid,taxid=self._taxonomy._taxonomy[taxid][0])
|
||||
bioseq = NucSequence(seqid,seq,de,taxid=self._taxonomy._taxonomy[taxid][0])
|
||||
yield bioseq
|
||||
|
||||
def __iter__(self):
|
||||
@ -54,8 +56,26 @@ class EcoPCRDBSequenceIterator(EcoPCRDBFile):
|
||||
|
||||
class EcoPCRDBSequenceWriter(object):
|
||||
|
||||
def __init__(self,dbname,fileidx=1,taxonomy=None,ftid=None,type=None,definition=None,append=False):
|
||||
self._taxonomy=taxonomy
|
||||
def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False):
|
||||
|
||||
# Take care of the taxonomy associated to the database
|
||||
|
||||
self._taxonomy= loadTaxonomyDatabase(options)
|
||||
dbname=options.ecopcroutput
|
||||
|
||||
if (self._taxonomy is not None
|
||||
and (not hasattr(options,'ecodb') or options.ecodb!=dbname)):
|
||||
print >> sys.stderr,"Writing the taxonomy file...",
|
||||
ecoTaxonomyWriter(dbname,self._taxonomy)
|
||||
print >> sys.stderr,"Ok"
|
||||
|
||||
# Identifiy the next sequence file numbre
|
||||
if fileidx is None:
|
||||
p = re.compile(r'([0-9]{3})\.sdx')
|
||||
fileidx = max(list(int(p.search(i).group(1))
|
||||
for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0]
|
||||
) +1
|
||||
|
||||
self._filename="%s_%03d.sdx" % (dbname,fileidx)
|
||||
if append:
|
||||
mode ='r+b'
|
||||
@ -72,12 +92,7 @@ class EcoPCRDBSequenceWriter(object):
|
||||
mode = 'wb'
|
||||
self._file = open(self._filename,mode)
|
||||
self._file.write(struct.pack('> I',self._sequenceCount))
|
||||
|
||||
if self._taxonomy is not None:
|
||||
print >> sys.stderr,"Writing the taxonomy file...",
|
||||
ecoTaxonomyWriter(dbname,self._taxonomy)
|
||||
print >> sys.stderr,"Ok"
|
||||
|
||||
|
||||
if type is not None:
|
||||
assert ftid is not None,"You must specify an id attribute for features"
|
||||
self._annotation = EcoPCRDBAnnotationWriter(dbname, ftid, fileidx, type, definition)
|
||||
|
@ -329,10 +329,10 @@ class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
|
||||
|
||||
try :
|
||||
lt=0
|
||||
for record in self._ecoRecordIterator(self._localTaxonFile):
|
||||
for record in self._ecoRecordIterator(self._localTaxonFile,noError=True):
|
||||
lrecord = len(record)
|
||||
lnames = lrecord - 16
|
||||
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
|
||||
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record) # @UnusedVariable
|
||||
lt+=1
|
||||
yield (taxid,rankid,parentidx,name,'local')
|
||||
print >> sys.stderr, " [INFO : Local taxon file found] : %d added taxa" % lt
|
||||
@ -344,7 +344,7 @@ class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
|
||||
yield record
|
||||
|
||||
def __ecoAliasIterator(self):
|
||||
for record in self._ecoRecordIterator(self._aliasFile):
|
||||
for record in self._ecoRecordIterator(self._aliasFile,noError=True):
|
||||
(taxid,index) = struct.unpack('> I i',record)
|
||||
yield taxid,index
|
||||
|
||||
@ -402,7 +402,7 @@ class EcoTaxonomyDB(Taxonomy,EcoPCRDBFile):
|
||||
|
||||
try :
|
||||
self._preferedName = [(x[0],'obi',x[2])
|
||||
for x in self.__ecoNameIterator(self._preferedNamesFile)]
|
||||
for x in self.__ecoNameIterator(self._preferedNamesFile,noError=True)]
|
||||
print >> sys.stderr, " [INFO : Preferred taxon name file found] : %d added taxa" % len(self._preferedName)
|
||||
except:
|
||||
print >> sys.stderr, " [INFO : Preferred taxon name file not found]"
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
import sys
|
||||
from obitools.fasta import formatFasta
|
||||
from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
|
||||
#from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
|
||||
|
||||
cpdef printOutput(options,seq,output=sys.stdout):
|
||||
if options.output is not None:
|
||||
|
@ -18,7 +18,6 @@ from obitools.fasta import formatFasta, rawFastaIterator,\
|
||||
from obitools.fastq import formatFastq
|
||||
|
||||
from obitools.ecopcr.sequence import EcoPCRDBSequenceWriter
|
||||
from obitools.ecopcr.options import loadTaxonomyDatabase
|
||||
|
||||
from cPickle import dump,load,UnpicklingError
|
||||
|
||||
@ -34,7 +33,7 @@ from obitools.format.sequence import skipOnErrorIterator
|
||||
from obitools import BioSequence
|
||||
from obitools.utils import FakeFile
|
||||
|
||||
|
||||
from glob import glob
|
||||
|
||||
|
||||
def binarySequenceIterator(lineiterator):
|
||||
@ -168,7 +167,7 @@ def addOutputFormatOption(optionManager):
|
||||
# help="Output sequences in sap fasta format "
|
||||
# "(Sequence must have a taxid and a taxonomy has to be loaded)")
|
||||
|
||||
group.add_option('--ecopcrDB-output',
|
||||
group.add_option('--ecopcrdb-output',
|
||||
action="store", dest="ecopcroutput",
|
||||
default=None,
|
||||
help="Output sequences in ecopcr database format "
|
||||
@ -313,6 +312,10 @@ def sequenceWriterGenerator(options,output=sys.stdout):
|
||||
self._format=formatSAPFastaGenerator(options)
|
||||
elif options.outputFormater is not None:
|
||||
self._format=options.outputFormater
|
||||
|
||||
if hasattr(seq,'_hasTaxid') and seq._hasTaxid:
|
||||
seq.extractTaxon()
|
||||
|
||||
s = self._format(seq,upper=self._upper)
|
||||
try:
|
||||
self._file.write(s)
|
||||
@ -336,8 +339,7 @@ def sequenceWriterGenerator(options,output=sys.stdout):
|
||||
|
||||
|
||||
if options.ecopcroutput is not None:
|
||||
taxo = loadTaxonomyDatabase(options)
|
||||
writer=EcoPCRDBSequenceWriter(options.ecopcroutput,taxonomy=taxo)
|
||||
writer=EcoPCRDBSequenceWriter(options)
|
||||
elif options.output==dump:
|
||||
writer=BinaryWriter(options,output)
|
||||
else:
|
||||
|
@ -6,7 +6,7 @@ from obitools.utils import universalOpen
|
||||
from obitools.utils import universalTell
|
||||
from obitools.utils import fileSize
|
||||
from obitools.ecopcr.sequence import EcoPCRDBSequenceIterator
|
||||
|
||||
from glob import glob
|
||||
from logging import debug
|
||||
import sys
|
||||
|
||||
@ -68,24 +68,31 @@ def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102):
|
||||
|
||||
if files :
|
||||
for f in files:
|
||||
if (entryIterator != EcoPCRDBSequenceIterator) :
|
||||
if (entryIterator != EcoPCRDBSequenceIterator) :
|
||||
|
||||
cfs.currentInputFileName=f
|
||||
f = universalOpen(f)
|
||||
cfs.currentFile=f
|
||||
cfs.currentFileSize=fileSize(cfs.currentFile)
|
||||
debug(f)
|
||||
|
||||
if with_progress:
|
||||
f=fileWithProgressBar(f,step=histo_step)
|
||||
|
||||
if entryIterator is None:
|
||||
for line in f:
|
||||
yield line
|
||||
try:
|
||||
f = universalOpen(f,noError=True)
|
||||
except Exception as e:
|
||||
if glob('%s_[0-9][0-9][0-9].sdx' % f):
|
||||
entryIterator=EcoPCRDBSequenceIterator
|
||||
else:
|
||||
print >>sys.stderr, e
|
||||
sys.exit();
|
||||
else:
|
||||
for entry in entryIterator(f):
|
||||
yield entry
|
||||
else :
|
||||
yield EcoPCRDBSequenceIterator(f)
|
||||
cfs.currentFile=f
|
||||
cfs.currentFileSize=fileSize(cfs.currentFile)
|
||||
debug(f)
|
||||
|
||||
if with_progress:
|
||||
f=fileWithProgressBar(f,step=histo_step)
|
||||
|
||||
if entryIterator is None:
|
||||
for line in f:
|
||||
yield line
|
||||
else:
|
||||
for entry in entryIterator(f):
|
||||
yield entry
|
||||
|
||||
else:
|
||||
if entryIterator is None:
|
||||
|
@ -26,7 +26,7 @@ class FileFormatError(Exception):
|
||||
|
||||
|
||||
|
||||
def universalOpen(file,*options):
|
||||
def universalOpen(file,noError=False):
|
||||
'''
|
||||
Open a file gziped or not.
|
||||
|
||||
@ -47,7 +47,7 @@ def universalOpen(file,*options):
|
||||
if isinstance(file,str):
|
||||
try:
|
||||
if urllib2.urlparse.urlparse(file)[0]=='':
|
||||
rep = open(file,*options)
|
||||
rep = open(file)
|
||||
else:
|
||||
rep = urllib2.urlopen(file,timeout=15)
|
||||
|
||||
@ -60,8 +60,11 @@ def universalOpen(file,*options):
|
||||
name = data[0].filename
|
||||
rep = zip.open(name)
|
||||
except Exception as e:
|
||||
print>>sys.stderr, e
|
||||
sys.exit();
|
||||
if not noError:
|
||||
print >>sys.stderr, e
|
||||
sys.exit();
|
||||
else:
|
||||
raise e
|
||||
else:
|
||||
rep = file
|
||||
return rep
|
||||
|
Reference in New Issue
Block a user