Change the way ecoPCRDB are written by obitools. If the obitools is
called with several sequence files as input when the ecoPCRDB is requested as output format, the sequences are splitted in several sdx files
This commit is contained in:
@ -64,8 +64,10 @@ class EcoPCRDBSequenceIterator(EcoPCRDBFile):
|
|||||||
class EcoPCRDBSequenceWriter(object):
|
class EcoPCRDBSequenceWriter(object):
|
||||||
|
|
||||||
def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False):
|
def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False):
|
||||||
|
from obitools.options import currentInputFileName
|
||||||
|
self.currentInputFileName=currentInputFileName
|
||||||
# Take care of the taxonomy associated to the database
|
# Take care of the taxonomy associated to the database
|
||||||
|
self._currentfile=None
|
||||||
|
|
||||||
self._taxonomy= loadTaxonomyDatabase(options)
|
self._taxonomy= loadTaxonomyDatabase(options)
|
||||||
dbname = options.ecopcroutput
|
dbname = options.ecopcroutput
|
||||||
@ -83,22 +85,24 @@ class EcoPCRDBSequenceWriter(object):
|
|||||||
for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0]
|
for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0]
|
||||||
) +1
|
) +1
|
||||||
|
|
||||||
|
self._fileidx=fileidx
|
||||||
|
self._dbname=dbname
|
||||||
|
|
||||||
|
|
||||||
self._filename="%s_%03d.sdx" % (dbname,fileidx)
|
self._filename="%s_%03d.sdx" % (dbname,fileidx)
|
||||||
if append:
|
if append:
|
||||||
mode ='r+b'
|
|
||||||
f = universalOpen(self._filename)
|
f = universalOpen(self._filename)
|
||||||
(recordCount,) = struct.unpack('> I',f.read(4))
|
(recordCount,) = struct.unpack('> I',f.read(4))
|
||||||
self._sequenceCount=recordCount
|
self._sequenceCount=recordCount
|
||||||
|
self._sequenceFileCount=recordCount
|
||||||
del f
|
del f
|
||||||
self._file = open(self._filename,mode)
|
self.open('r+b')
|
||||||
self._file.seek(0,0)
|
|
||||||
self._file.write(struct.pack('> I',0))
|
|
||||||
self._file.seek(0,2)
|
self._file.seek(0,2)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self._sequenceCount=0
|
self._sequenceCount=0
|
||||||
mode = 'wb'
|
self._sequenceFileCount=0
|
||||||
self._file = open(self._filename,mode)
|
self.open("wb")
|
||||||
self._file.write(struct.pack('> I',self._sequenceCount))
|
|
||||||
|
|
||||||
if type is not None:
|
if type is not None:
|
||||||
assert ftid is not None,"You must specify an id attribute for features"
|
assert ftid is not None,"You must specify an id attribute for features"
|
||||||
@ -141,7 +145,28 @@ class EcoPCRDBSequenceWriter(object):
|
|||||||
return packed
|
return packed
|
||||||
|
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self._file.seek(0,0)
|
||||||
|
self._file.write(struct.pack('> I',self._sequenceFileCount))
|
||||||
|
self._file.close()
|
||||||
|
|
||||||
|
def open(self,mode):
|
||||||
|
self._filename="%s_%03d.sdx" % (self._dbname,self._fileidx)
|
||||||
|
self._file=open(self._filename,mode)
|
||||||
|
self._sequenceFileCount=0
|
||||||
|
self._file.write(struct.pack('> I',self._sequenceFileCount))
|
||||||
|
|
||||||
def put(self,sequence):
|
def put(self,sequence):
|
||||||
|
if self._currentfile is None:
|
||||||
|
self._currentfile=self.currentInputFileName()
|
||||||
|
if self.currentInputFileName() != self._currentfile:
|
||||||
|
self._currentfile=self.currentInputFileName()
|
||||||
|
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
self._fileidx+=1
|
||||||
|
self.open('wb')
|
||||||
|
|
||||||
if self._taxonomy is not None:
|
if self._taxonomy is not None:
|
||||||
if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
|
if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
|
||||||
sequence.extractTaxon()
|
sequence.extractTaxon()
|
||||||
@ -149,11 +174,10 @@ class EcoPCRDBSequenceWriter(object):
|
|||||||
if self._annotation is not None:
|
if self._annotation is not None:
|
||||||
self._annotation.put(sequence, self._sequenceCount)
|
self._annotation.put(sequence, self._sequenceCount)
|
||||||
self._sequenceCount+=1
|
self._sequenceCount+=1
|
||||||
|
self._sequenceFileCount+=1
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self._file.seek(0,0)
|
self.close()
|
||||||
self._file.write(struct.pack('> I',self._sequenceCount))
|
|
||||||
self._file.close()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102,opti
|
|||||||
else:
|
else:
|
||||||
|
|
||||||
if entryIterator == EcoPCRDBSequenceIterator and options is not None:
|
if entryIterator == EcoPCRDBSequenceIterator and options is not None:
|
||||||
if options.ecodb==f:
|
if hasattr(options,'ecodb') and options.ecodb==f:
|
||||||
iterator = entryIterator(f,options.taxonomy)
|
iterator = entryIterator(f,options.taxonomy)
|
||||||
else:
|
else:
|
||||||
iterator = entryIterator(f)
|
iterator = entryIterator(f)
|
||||||
|
Reference in New Issue
Block a user