diff --git a/src/obitools/ecopcr/sequence.py b/src/obitools/ecopcr/sequence.py index 89e1a38..e1a5627 100644 --- a/src/obitools/ecopcr/sequence.py +++ b/src/obitools/ecopcr/sequence.py @@ -64,12 +64,14 @@ class EcoPCRDBSequenceIterator(EcoPCRDBFile): class EcoPCRDBSequenceWriter(object): def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False): - + from obitools.options import currentInputFileName + self.currentInputFileName=currentInputFileName # Take care of the taxonomy associated to the database + self._currentfile=None self._taxonomy= loadTaxonomyDatabase(options) dbname = options.ecopcroutput - + if (self._taxonomy is not None and (not hasattr(options,'ecodb') or options.ecodb!=dbname)): print >> sys.stderr,"Writing the taxonomy file...", @@ -82,23 +84,25 @@ class EcoPCRDBSequenceWriter(object): fileidx = max(list(int(p.search(i).group(1)) for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0] ) +1 + + self._fileidx=fileidx + self._dbname=dbname + self._filename="%s_%03d.sdx" % (dbname,fileidx) if append: - mode ='r+b' f = universalOpen(self._filename) (recordCount,) = struct.unpack('> I',f.read(4)) self._sequenceCount=recordCount + self._sequenceFileCount=recordCount del f - self._file = open(self._filename,mode) - self._file.seek(0,0) - self._file.write(struct.pack('> I',0)) + self.open('r+b') self._file.seek(0,2) + else: self._sequenceCount=0 - mode = 'wb' - self._file = open(self._filename,mode) - self._file.write(struct.pack('> I',self._sequenceCount)) + self._sequenceFileCount=0 + self.open("wb") if type is not None: assert ftid is not None,"You must specify an id attribute for features" @@ -141,7 +145,28 @@ class EcoPCRDBSequenceWriter(object): return packed + def close(self): + self._file.seek(0,0) + self._file.write(struct.pack('> I',self._sequenceFileCount)) + self._file.close() + + def open(self,mode): + self._filename="%s_%03d.sdx" % (self._dbname,self._fileidx) + self._file=open(self._filename,mode) + self._sequenceFileCount=0 + self._file.write(struct.pack('> I',self._sequenceFileCount)) + def put(self,sequence): + if self._currentfile is None: + self._currentfile=self.currentInputFileName() + if self.currentInputFileName() != self._currentfile: + self._currentfile=self.currentInputFileName() + + self.close() + + self._fileidx+=1 + self.open('wb') + if self._taxonomy is not None: if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'): sequence.extractTaxon() @@ -149,11 +174,10 @@ class EcoPCRDBSequenceWriter(object): if self._annotation is not None: self._annotation.put(sequence, self._sequenceCount) self._sequenceCount+=1 + self._sequenceFileCount+=1 def __del__(self): - self._file.seek(0,0) - self._file.write(struct.pack('> I',self._sequenceCount)) - self._file.close() + self.close() diff --git a/src/obitools/options/_options.pyx b/src/obitools/options/_options.pyx index f9b8250..c972215 100644 --- a/src/obitools/options/_options.pyx +++ b/src/obitools/options/_options.pyx @@ -93,7 +93,7 @@ def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102,opti else: if entryIterator == EcoPCRDBSequenceIterator and options is not None: - if options.ecodb==f: + if hasattr(options,'ecodb') and options.ecodb==f: iterator = entryIterator(f,options.taxonomy) else: iterator = entryIterator(f)