Improved progress display when importing files in a DMS
This commit is contained in:
@ -99,6 +99,7 @@ def run(config):
|
||||
logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS")
|
||||
|
||||
entry_count = -1
|
||||
pb = None
|
||||
|
||||
if not config['obi']['taxdump']:
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
@ -110,7 +111,10 @@ def run(config):
|
||||
else:
|
||||
entry_count = input[4]
|
||||
|
||||
logger("info", "Importing %d entries", entry_count)
|
||||
if entry_count > 0:
|
||||
logger("info", "Importing %d entries", entry_count)
|
||||
else:
|
||||
logger("info", "Importing an unknow number of entries")
|
||||
|
||||
# TODO a bit dirty?
|
||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||
@ -137,8 +141,6 @@ def run(config):
|
||||
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
entries = input[1]
|
||||
|
||||
@ -170,6 +172,9 @@ def run(config):
|
||||
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
elif not i%50000:
|
||||
logger("info", "Imported %d entries", i)
|
||||
|
||||
|
||||
if NUC_SEQS_view:
|
||||
id_col[i] = entry.id
|
||||
|
@ -15,4 +15,5 @@ cdef class MagicKeyFile:
|
||||
|
||||
cdef class CompressedFile:
|
||||
cdef object accessor
|
||||
cdef bint compressed
|
||||
|
@ -74,8 +74,7 @@ cdef class MagicKeyFile:
|
||||
|
||||
|
||||
cdef class CompressedFile:
|
||||
|
||||
|
||||
|
||||
def __init__(self,stream):
|
||||
cdef int keylength
|
||||
cdef MagicKeyFile magic
|
||||
@ -92,11 +91,13 @@ cdef class CompressedFile:
|
||||
magic=MagicKeyFile(stream,keylength)
|
||||
|
||||
self.accessor = None
|
||||
|
||||
self.compressed = False
|
||||
|
||||
for compressor in compress:
|
||||
k,c = compress[compressor]
|
||||
if magic.key.startswith(k):
|
||||
self.accessor = c(magic)
|
||||
self.compressed = True
|
||||
|
||||
if self.accessor is None:
|
||||
if 'b' in magic.stream_mode:
|
||||
@ -110,7 +111,17 @@ cdef class CompressedFile:
|
||||
'b' not in magic.stream_mode):
|
||||
self.accessor = io.TextIOWrapper(self.accessor)
|
||||
|
||||
|
||||
|
||||
# compressed property getter
|
||||
@property
|
||||
def compressed(self) :
|
||||
'''
|
||||
Returns a boolean indicating whether the file is compressed
|
||||
|
||||
@rtype: bint
|
||||
'''
|
||||
return self.compressed
|
||||
|
||||
def __getattr__(self,name):
|
||||
return getattr(self.accessor, name)
|
||||
|
||||
|
@ -166,9 +166,12 @@ def genbankIterator_dir(dir_path,
|
||||
):
|
||||
path = dir_path
|
||||
read = 0
|
||||
for filename in glob.glob(os.path.join(path, b'*.gbff*')):
|
||||
read_files = 0
|
||||
files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))]
|
||||
for filename in files:
|
||||
if read==only:
|
||||
return
|
||||
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
|
||||
f = uopen(filename)
|
||||
if only is not None:
|
||||
only_f = only-read
|
||||
@ -177,7 +180,8 @@ def genbankIterator_dir(dir_path,
|
||||
for seq in genbankIterator_file(f, skip=skip, only=only_f, buffersize=buffersize):
|
||||
yield seq
|
||||
read+=1
|
||||
|
||||
read_files+=1
|
||||
|
||||
|
||||
def genbankIterator(obj,
|
||||
int skip=0,
|
||||
|
@ -15,6 +15,8 @@ from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
|
||||
OBI_ELT_IDX_ERROR, \
|
||||
obi_errno
|
||||
|
||||
from obitools3.files.uncompress cimport CompressedFile
|
||||
|
||||
import re
|
||||
import mmap
|
||||
import os
|
||||
@ -45,7 +47,7 @@ cpdef int count_entries(file, bytes format):
|
||||
return -1
|
||||
sep = re.compile(sep)
|
||||
|
||||
if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
||||
if type(file) == bytes and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
||||
files = []
|
||||
if format == b'embl':
|
||||
extensions = [b"*.dat"]
|
||||
@ -66,6 +68,8 @@ cpdef int count_entries(file, bytes format):
|
||||
|
||||
total_count = 0
|
||||
for f in files:
|
||||
if type(f) == CompressedFile and f.compressed:
|
||||
return -1
|
||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
total_count += len(re.findall(sep, mmapped_file))
|
||||
|
||||
|
Reference in New Issue
Block a user