Change the way ecoPCRDB are written by obitools. If the obitools is

called with several sequence files as input when the ecoPCRDB is requested as output format, the sequences are splitted in several sdx files
2015-10-02 16:36:25 +02:00
parent 343d9ec6df
commit fdeaf5956c
2 changed files with 37 additions and 13 deletions
--- a/src/obitools/ecopcr/sequence.py
+++ b/src/obitools/ecopcr/sequence.py
@ -64,12 +64,14 @@ class EcoPCRDBSequenceIterator(EcoPCRDBFile):
 class EcoPCRDBSequenceWriter(object):
    
    def __init__(self,options,fileidx=None,ftid=None,type=None,definition=None,append=False):
-
+        from obitools.options import currentInputFileName
+        self.currentInputFileName=currentInputFileName
        # Take care of the taxonomy associated to the database
+        self._currentfile=None
        
        self._taxonomy= loadTaxonomyDatabase(options)
        dbname = options.ecopcroutput
-
+        
        if (self._taxonomy is not None
            and (not hasattr(options,'ecodb') or options.ecodb!=dbname)):
            print >> sys.stderr,"Writing the taxonomy file...",
@ -82,23 +84,25 @@ class EcoPCRDBSequenceWriter(object):
            fileidx = max(list(int(p.search(i).group(1)) 
                               for i in glob('%s_[0-9][0-9][0-9].sdx' % dbname))+[0]
                          ) +1
+                          
+        self._fileidx=fileidx
+        self._dbname=dbname
+        
        
        self._filename="%s_%03d.sdx" % (dbname,fileidx)
        if append:
-            mode ='r+b'
            f = universalOpen(self._filename)
            (recordCount,) = struct.unpack('> I',f.read(4))
            self._sequenceCount=recordCount
+            self._sequenceFileCount=recordCount
            del f
-            self._file = open(self._filename,mode)
-            self._file.seek(0,0)
-            self._file.write(struct.pack('> I',0))
+            self.open('r+b')
            self._file.seek(0,2)
+
        else:
            self._sequenceCount=0
-            mode = 'wb'
-            self._file = open(self._filename,mode)
-            self._file.write(struct.pack('> I',self._sequenceCount))
+            self._sequenceFileCount=0
+            self.open("wb")
                
        if type is not None:
            assert ftid is not None,"You must specify an id attribute for features"
@ -141,7 +145,28 @@ class EcoPCRDBSequenceWriter(object):
        return packed

        
+    def close(self):
+        self._file.seek(0,0)
+        self._file.write(struct.pack('> I',self._sequenceFileCount))
+        self._file.close()
+            
+    def open(self,mode):
+            self._filename="%s_%03d.sdx" % (self._dbname,self._fileidx)
+            self._file=open(self._filename,mode)
+            self._sequenceFileCount=0
+            self._file.write(struct.pack('> I',self._sequenceFileCount))
+        
    def put(self,sequence):
+        if self._currentfile is None:
+            self._currentfile=self.currentInputFileName()
+        if self.currentInputFileName() != self._currentfile:
+            self._currentfile=self.currentInputFileName()
+
+            self.close()
+
+            self._fileidx+=1
+            self.open('wb')
+            
        if self._taxonomy is not None:
            if 'taxid' not in sequence and hasattr(sequence, 'extractTaxon'):
                sequence.extractTaxon()
@ -149,11 +174,10 @@ class EcoPCRDBSequenceWriter(object):
        if self._annotation is not None:
            self._annotation.put(sequence, self._sequenceCount)
        self._sequenceCount+=1
+        self._sequenceFileCount+=1
        
    def __del__(self):
-        self._file.seek(0,0)
-        self._file.write(struct.pack('> I',self._sequenceCount))
-        self._file.close()
+        self.close()
            
        
    
--- a/src/obitools/options/_options.pyx
+++ b/src/obitools/options/_options.pyx
@ -93,7 +93,7 @@ def allEntryIterator(files,entryIterator,with_progress=False,histo_step=102,opti
            else:
                
                if entryIterator == EcoPCRDBSequenceIterator and options is not None:
-                    if options.ecodb==f:
+                    if hasattr(options,'ecodb') and options.ecodb==f:
                        iterator = entryIterator(f,options.taxonomy)
                    else:
                        iterator = entryIterator(f)