import: fixed count estimation for tabular files with header
This commit is contained in:
@ -389,10 +389,7 @@ def open_uri(uri,
|
|||||||
sep = tobytes(qualifiers[b"sep"][0][0])
|
sep = tobytes(qualifiers[b"sep"][0][0])
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
sep = config["obi"]["sep"]
|
sep = tobytes(config["obi"]["sep"])
|
||||||
if sep == '\\t': # dirty workaround for flake8(?) issue that reads '\t' as '\'+'t' when parsing the option value
|
|
||||||
sep = '\t'
|
|
||||||
sep = tobytes(sep)
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
sep=None
|
sep=None
|
||||||
|
|
||||||
@ -568,6 +565,6 @@ def open_uri(uri,
|
|||||||
|
|
||||||
entry_count = -1
|
entry_count = -1
|
||||||
if input:
|
if input:
|
||||||
entry_count = count_entries(file, format)
|
entry_count = count_entries(file, format, header)
|
||||||
|
|
||||||
return (file, iseq, objclass, urib, entry_count)
|
return (file, iseq, objclass, urib, entry_count)
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
from obitools3.dms.capi.obitypes cimport obitype_t, index_t
|
||||||
|
|
||||||
cpdef bytes format_uniq_pattern(bytes format)
|
cpdef bytes format_uniq_pattern(bytes format)
|
||||||
cpdef int count_entries(file, bytes format)
|
cpdef int count_entries(file, bytes format, bint header)
|
||||||
|
|
||||||
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ cpdef bytes format_uniq_pattern(bytes format):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
cpdef int count_entries(file, bytes format):
|
cpdef int count_entries(file, bytes format, bint header):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sep = format_uniq_pattern(format)
|
sep = format_uniq_pattern(format)
|
||||||
@ -75,6 +75,8 @@ cpdef int count_entries(file, bytes format):
|
|||||||
total_count += len(re.findall(sep, mmapped_file))
|
total_count += len(re.findall(sep, mmapped_file))
|
||||||
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
|
if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
|
||||||
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
|
||||||
|
if format == b"tabular" and header: # not counting header as an entry
|
||||||
|
total_count -= 1
|
||||||
|
|
||||||
except:
|
except:
|
||||||
if len(files) > 1:
|
if len(files) > 1:
|
||||||
|
Reference in New Issue
Block a user