data import: entries now counted if there are multiple files
This commit is contained in:
@ -130,8 +130,11 @@ def run(config):
|
|||||||
output[0].record_command_line(" ".join(sys.argv[1:]))
|
output[0].record_command_line(" ".join(sys.argv[1:]))
|
||||||
output[0].close()
|
output[0].close()
|
||||||
return
|
return
|
||||||
|
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
if entry_count >= 0:
|
||||||
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
entries = input[1]
|
entries = input[1]
|
||||||
|
|
||||||
@ -161,7 +164,8 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
raise RollbackException("obi import error, rollbacking view", view)
|
raise RollbackException("obi import error, rollbacking view", view)
|
||||||
|
|
||||||
pb(i)
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
|
||||||
if NUC_SEQS_view:
|
if NUC_SEQS_view:
|
||||||
id_col[i] = entry.id
|
id_col[i] = entry.id
|
||||||
@ -271,10 +275,11 @@ def run(config):
|
|||||||
# Fill value
|
# Fill value
|
||||||
dcols[tag][0][i] = value
|
dcols[tag][0][i] = value
|
||||||
|
|
||||||
i+=1 # TODO Not if None sequence
|
i+=1
|
||||||
|
|
||||||
pb(i, force=True)
|
if pb is not None:
|
||||||
print("", file=sys.stderr)
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Save command config in View and DMS comments
|
# Save command config in View and DMS comments
|
||||||
command_line = " ".join(sys.argv[1:])
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
@ -13,10 +13,13 @@ from obitools3.dms.capi.obitypes cimport is_a_DNA_seq, \
|
|||||||
|
|
||||||
from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
|
from obitools3.dms.capi.obierrno cimport OBI_LINE_IDX_ERROR, \
|
||||||
OBI_ELT_IDX_ERROR
|
OBI_ELT_IDX_ERROR
|
||||||
#obi_errno
|
#obi_errno # TODO
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import mmap
|
import mmap
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import gzip
|
||||||
|
|
||||||
|
|
||||||
cpdef bytes format_separator(bytes format):
|
cpdef bytes format_separator(bytes format):
|
||||||
@ -35,16 +38,49 @@ cpdef bytes format_separator(bytes format):
|
|||||||
|
|
||||||
|
|
||||||
cpdef int count_entries(file, bytes format):
|
cpdef int count_entries(file, bytes format):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sep = format_separator(format)
|
sep = format_separator(format)
|
||||||
if sep is None:
|
if sep is None:
|
||||||
return -1
|
return -1
|
||||||
sep = re.compile(sep)
|
sep = re.compile(sep)
|
||||||
mmapped_file = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
|
|
||||||
return len(re.findall(sep, mmapped_file))
|
if type(file) and (format == b'genbank' or format == b'embl'): # file is actually a directory with multiple files
|
||||||
|
files = []
|
||||||
|
if format == b'embl':
|
||||||
|
extensions = [b"*.dat"]
|
||||||
|
elif format == b"genbank":
|
||||||
|
extensions = [b"*.gbff"]
|
||||||
|
|
||||||
|
for ext in extensions:
|
||||||
|
for filename in glob.glob(os.path.join(file, ext)):
|
||||||
|
#if filename[:-3] == ".gz":
|
||||||
|
# files.append(gzip.open(filename, "rb"))
|
||||||
|
#else:
|
||||||
|
files.append(open(filename, "rb"))
|
||||||
|
else:
|
||||||
|
files = [file]
|
||||||
|
|
||||||
|
if len(files)==0:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
total_count = 0
|
||||||
|
for f in files:
|
||||||
|
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
||||||
|
total_count += len(re.findall(sep, mmapped_file))
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
if len(files) > 1:
|
||||||
|
for file in files:
|
||||||
|
file.close()
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
if len(files) > 1:
|
||||||
|
for f in files:
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
return total_count
|
||||||
|
|
||||||
|
|
||||||
# TODO RollbackException?
|
# TODO RollbackException?
|
||||||
cdef obi_errno_to_exception(int obi_errno, index_t line_nb=-1, object elt_id=None, str error_message=None) :
|
cdef obi_errno_to_exception(int obi_errno, index_t line_nb=-1, object elt_id=None, str error_message=None) :
|
||||||
|
Reference in New Issue
Block a user