data import: entries now counted if there are multiple files
This commit is contained in:
@ -131,7 +131,10 @@ def run(config):
|
||||
output[0].close()
|
||||
return
|
||||
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
if entry_count >= 0:
|
||||
pb = ProgressBar(entry_count, config, seconde=5)
|
||||
else:
|
||||
pb = None
|
||||
|
||||
entries = input[1]
|
||||
|
||||
@ -161,7 +164,8 @@ def run(config):
|
||||
else:
|
||||
raise RollbackException("obi import error, rollbacking view", view)
|
||||
|
||||
pb(i)
|
||||
if pb is not None:
|
||||
pb(i)
|
||||
|
||||
if NUC_SEQS_view:
|
||||
id_col[i] = entry.id
|
||||
@ -271,10 +275,11 @@ def run(config):
|
||||
# Fill value
|
||||
dcols[tag][0][i] = value
|
||||
|
||||
i+=1 # TODO Not if None sequence
|
||||
i+=1
|
||||
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
if pb is not None:
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
# Save command config in View and DMS comments
|
||||
command_line = " ".join(sys.argv[1:])
|
||||
|
@ -13,10 +13,13 @@ from obitools3.dms.capi.obitypes cimport is_a_DNA_seq, \
|
||||
|
||||
from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
|
||||
OBI_ELT_IDX_ERROR
|
||||
#obi_errno
|
||||
#obi_errno # TODO
|
||||
|
||||
import re
|
||||
import mmap
|
||||
import os
|
||||
import glob
|
||||
import gzip
|
||||
|
||||
|
||||
cpdef bytes format_separator(bytes format):
|
||||
@ -35,16 +38,49 @@ cpdef bytes format_separator(bytes format):
|
||||
|
||||
|
||||
cpdef int count_entries(file, bytes format):
|
||||
|
||||
try:
|
||||
sep = format_separator(format)
|
||||
if sep is None:
|
||||
return -1
|
||||
sep = re.compile(sep)
|
||||
mmapped_file = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
return len(re.findall(sep, mmapped_file))
|
||||
|
||||
if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
||||
files = []
|
||||
if format == b'embl':
|
||||
extensions = [b"*.dat"]
|
||||
elif format == b"genbank":
|
||||
extensions = [b"*.gbff"]
|
||||
|
||||
for ext in extensions:
|
||||
for filename in glob.glob(os.path.join(file, ext)):
|
||||
#if filename[:-3] == ".gz":
|
||||
# files.append(gzip.open(filename, "rb"))
|
||||
#else:
|
||||
files.append(open(filename, "rb"))
|
||||
else:
|
||||
files = [file]
|
||||
|
||||
if len(files)==0:
|
||||
return -1
|
||||
|
||||
total_count = 0
|
||||
for f in files:
|
||||
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
total_count += len(re.findall(sep, mmapped_file))
|
||||
|
||||
except:
|
||||
if len(files) > 1:
|
||||
for file in files:
|
||||
file.close()
|
||||
return -1
|
||||
|
||||
if len(files) > 1:
|
||||
for f in files:
|
||||
f.close()
|
||||
|
||||
return total_count
|
||||
|
||||
|
||||
# TODO RollbackException?
|
||||
cdef obi_errno_to_exception(int obi_errno, index_t line_nb=-1, object elt_id=None, str error_message=None) :
|
||||
|
Reference in New Issue
Block a user