Improved progress display when importing files in a DMS
This commit is contained in:
@ -99,6 +99,7 @@ def run(config):
|
|||||||
logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS")
|
logger("info", "obi import: imports an object (file(s), obiview, taxonomy...) into a DMS")
|
||||||
|
|
||||||
entry_count = -1
|
entry_count = -1
|
||||||
|
pb = None
|
||||||
|
|
||||||
if not config['obi']['taxdump']:
|
if not config['obi']['taxdump']:
|
||||||
input = open_uri(config['obi']['inputURI'])
|
input = open_uri(config['obi']['inputURI'])
|
||||||
@ -110,7 +111,10 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
entry_count = input[4]
|
entry_count = input[4]
|
||||||
|
|
||||||
logger("info", "Importing %d entries", entry_count)
|
if entry_count > 0:
|
||||||
|
logger("info", "Importing %d entries", entry_count)
|
||||||
|
else:
|
||||||
|
logger("info", "Importing an unknow number of entries")
|
||||||
|
|
||||||
# TODO a bit dirty?
|
# TODO a bit dirty?
|
||||||
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
if input[2]==Nuc_Seq or input[2]==View_NUC_SEQS:
|
||||||
@ -137,8 +141,6 @@ def run(config):
|
|||||||
|
|
||||||
if entry_count >= 0:
|
if entry_count >= 0:
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
else:
|
|
||||||
pb = None
|
|
||||||
|
|
||||||
entries = input[1]
|
entries = input[1]
|
||||||
|
|
||||||
@ -170,6 +172,9 @@ def run(config):
|
|||||||
|
|
||||||
if pb is not None:
|
if pb is not None:
|
||||||
pb(i)
|
pb(i)
|
||||||
|
elif not i%50000:
|
||||||
|
logger("info", "Imported %d entries", i)
|
||||||
|
|
||||||
|
|
||||||
if NUC_SEQS_view:
|
if NUC_SEQS_view:
|
||||||
id_col[i] = entry.id
|
id_col[i] = entry.id
|
||||||
|
@ -15,4 +15,5 @@ cdef class MagicKeyFile:
|
|||||||
|
|
||||||
cdef class CompressedFile:
|
cdef class CompressedFile:
|
||||||
cdef object accessor
|
cdef object accessor
|
||||||
|
cdef bint compressed
|
||||||
|
|
@ -74,8 +74,7 @@ cdef class MagicKeyFile:
|
|||||||
|
|
||||||
|
|
||||||
cdef class CompressedFile:
|
cdef class CompressedFile:
|
||||||
|
|
||||||
|
|
||||||
def __init__(self,stream):
|
def __init__(self,stream):
|
||||||
cdef int keylength
|
cdef int keylength
|
||||||
cdef MagicKeyFile magic
|
cdef MagicKeyFile magic
|
||||||
@ -92,11 +91,13 @@ cdef class CompressedFile:
|
|||||||
magic=MagicKeyFile(stream,keylength)
|
magic=MagicKeyFile(stream,keylength)
|
||||||
|
|
||||||
self.accessor = None
|
self.accessor = None
|
||||||
|
self.compressed = False
|
||||||
|
|
||||||
for compressor in compress:
|
for compressor in compress:
|
||||||
k,c = compress[compressor]
|
k,c = compress[compressor]
|
||||||
if magic.key.startswith(k):
|
if magic.key.startswith(k):
|
||||||
self.accessor = c(magic)
|
self.accessor = c(magic)
|
||||||
|
self.compressed = True
|
||||||
|
|
||||||
if self.accessor is None:
|
if self.accessor is None:
|
||||||
if 'b' in magic.stream_mode:
|
if 'b' in magic.stream_mode:
|
||||||
@ -110,7 +111,17 @@ cdef class CompressedFile:
|
|||||||
'b' not in magic.stream_mode):
|
'b' not in magic.stream_mode):
|
||||||
self.accessor = io.TextIOWrapper(self.accessor)
|
self.accessor = io.TextIOWrapper(self.accessor)
|
||||||
|
|
||||||
|
|
||||||
|
# compressed property getter
|
||||||
|
@property
|
||||||
|
def compressed(self) :
|
||||||
|
'''
|
||||||
|
Returns a boolean indicating whether the file is compressed
|
||||||
|
|
||||||
|
@rtype: bint
|
||||||
|
'''
|
||||||
|
return self.compressed
|
||||||
|
|
||||||
def __getattr__(self,name):
|
def __getattr__(self,name):
|
||||||
return getattr(self.accessor, name)
|
return getattr(self.accessor, name)
|
||||||
|
|
||||||
|
@ -166,9 +166,12 @@ def genbankIterator_dir(dir_path,
|
|||||||
):
|
):
|
||||||
path = dir_path
|
path = dir_path
|
||||||
read = 0
|
read = 0
|
||||||
for filename in glob.glob(os.path.join(path, b'*.gbff*')):
|
read_files = 0
|
||||||
|
files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))]
|
||||||
|
for filename in files:
|
||||||
if read==only:
|
if read==only:
|
||||||
return
|
return
|
||||||
|
print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
|
||||||
f = uopen(filename)
|
f = uopen(filename)
|
||||||
if only is not None:
|
if only is not None:
|
||||||
only_f = only-read
|
only_f = only-read
|
||||||
@ -177,7 +180,8 @@ def genbankIterator_dir(dir_path,
|
|||||||
for seq in genbankIterator_file(f, skip=skip, only=only_f, buffersize=buffersize):
|
for seq in genbankIterator_file(f, skip=skip, only=only_f, buffersize=buffersize):
|
||||||
yield seq
|
yield seq
|
||||||
read+=1
|
read+=1
|
||||||
|
read_files+=1
|
||||||
|
|
||||||
|
|
||||||
def genbankIterator(obj,
|
def genbankIterator(obj,
|
||||||
int skip=0,
|
int skip=0,
|
||||||
|
@ -15,6 +15,8 @@ from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
|
|||||||
OBI_ELT_IDX_ERROR, \
|
OBI_ELT_IDX_ERROR, \
|
||||||
obi_errno
|
obi_errno
|
||||||
|
|
||||||
|
from obitools3.files.uncompress cimport CompressedFile
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import mmap
|
import mmap
|
||||||
import os
|
import os
|
||||||
@ -45,7 +47,7 @@ cpdef int count_entries(file, bytes format):
|
|||||||
return -1
|
return -1
|
||||||
sep = re.compile(sep)
|
sep = re.compile(sep)
|
||||||
|
|
||||||
if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
if type(file) == bytes and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
||||||
files = []
|
files = []
|
||||||
if format == b'embl':
|
if format == b'embl':
|
||||||
extensions = [b"*.dat"]
|
extensions = [b"*.dat"]
|
||||||
@ -66,6 +68,8 @@ cpdef int count_entries(file, bytes format):
|
|||||||
|
|
||||||
total_count = 0
|
total_count = 0
|
||||||
for f in files:
|
for f in files:
|
||||||
|
if type(f) == CompressedFile and f.compressed:
|
||||||
|
return -1
|
||||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||||
total_count += len(re.findall(sep, mmapped_file))
|
total_count += len(re.findall(sep, mmapped_file))
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user