diff --git a/python/obitools3/commands/import.pyx b/python/obitools3/commands/import.pyx index 91b0696..895df19 100755 --- a/python/obitools3/commands/import.pyx +++ b/python/obitools3/commands/import.pyx @@ -99,6 +99,7 @@ def run(config): logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS") entry_count = -1 + pb = None if not config['obi']['taxdump']: input = open_uri(config['obi']['inputURI']) @@ -110,7 +111,10 @@ def run(config): else: entry_count = input[4] - logger("info", "Importing %d entries", entry_count) + if entry_count > 0: + logger("info", "Importing %d entries", entry_count) + else: + logger("info", "Importing an unknow number of entries") # TODO a bit dirty? if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS: @@ -137,8 +141,6 @@ def run(config): if entry_count >= 0: pb = ProgressBar(entry_count, config, seconde=5) - else: - pb = None entries = input[1] @@ -170,6 +172,9 @@ def run(config): if pb is not None: pb(i) + elif not i%50000: + logger("info", "Imported %d entries", i) + if NUC_SEQS_view: id_col[i] = entry.id diff --git a/python/obitools3/files/uncompress.pxd b/python/obitools3/files/uncompress.pxd index e5064a3..c0f976e 100755 --- a/python/obitools3/files/uncompress.pxd +++ b/python/obitools3/files/uncompress.pxd @@ -15,4 +15,5 @@ cdef class MagicKeyFile: cdef class CompressedFile: cdef object accessor + cdef bint compressed \ No newline at end of file diff --git a/python/obitools3/files/uncompress.pyx b/python/obitools3/files/uncompress.pyx index 59a1fd2..55f755c 100755 --- a/python/obitools3/files/uncompress.pyx +++ b/python/obitools3/files/uncompress.pyx @@ -74,8 +74,7 @@ cdef class MagicKeyFile: cdef class CompressedFile: - - + def __init__(self,stream): cdef int keylength cdef MagicKeyFile magic @@ -92,11 +91,13 @@ cdef class CompressedFile: magic=MagicKeyFile(stream,keylength) self.accessor = None - + self.compressed = False + for compressor in compress: k,c = compress[compressor] if magic.key.startswith(k): self.accessor = c(magic) + self.compressed = True if self.accessor is None: if 'b' in magic.stream_mode: @@ -110,7 +111,17 @@ cdef class CompressedFile: 'b' not in magic.stream_mode): self.accessor = io.TextIOWrapper(self.accessor) - + + # compressed property getter + @property + def compressed(self) : + ''' + Returns a boolean indicating whether the file is compressed + + @rtype: bint + ''' + return self.compressed + def __getattr__(self,name): return getattr(self.accessor, name) diff --git a/python/obitools3/parsers/genbank.pyx b/python/obitools3/parsers/genbank.pyx index dcce178..8ddcc98 100755 --- a/python/obitools3/parsers/genbank.pyx +++ b/python/obitools3/parsers/genbank.pyx @@ -166,9 +166,12 @@ def genbankIterator_dir(dir_path, ): path = dir_path read = 0 - for filename in glob.glob(os.path.join(path, b'*.gbff*')): + read_files = 0 + files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))] + for filename in files: if read==only: return + print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files))) f = uopen(filename) if only is not None: only_f = only-read @@ -177,7 +180,8 @@ def genbankIterator_dir(dir_path, for seq in genbankIterator_file(f, skip=skip, only=only_f, buffersize=buffersize): yield seq read+=1 - + read_files+=1 + def genbankIterator(obj, int skip=0, diff --git a/python/obitools3/utils.pyx b/python/obitools3/utils.pyx index d3a1526..45fafdb 100755 --- a/python/obitools3/utils.pyx +++ b/python/obitools3/utils.pyx @@ -15,6 +15,8 @@ from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \ OBI_ELT_IDX_ERROR, \ obi_errno +from obitools3.files.uncompress cimport CompressedFile + import re import mmap import os @@ -45,7 +47,7 @@ cpdef int count_entries(file, bytes format): return -1 sep = re.compile(sep) - if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files + if type(file) == bytes and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files files = [] if format == b'embl': extensions = [b"*.dat"] @@ -66,6 +68,8 @@ cpdef int count_entries(file, bytes format): total_count = 0 for f in files: + if type(f) == CompressedFile and f.compressed: + return -1 mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) total_count += len(re.findall(sep, mmapped_file))