Cython API: when importing a file in a DMS, its length is computed

beforehand for the progress bar
This commit is contained in:
Celine Mercier
2019-03-13 18:35:32 +01:00
parent 50e7cd61a6
commit d88390c6d8
5 changed files with 97 additions and 48 deletions

View File

@ -23,20 +23,20 @@ def is_ngsfilter_line(line): # TODO doesn't work?
return False
def entryIteratorFactory(lineiterator,
int skip=0,
only=None,
bytes seqtype=b'nuc',
int offset=-1,
bint noquality=False,
bint skiperror=True,
bint header=False,
bytes sep=None,
bytes dec=b'.',
bytes nastring=b"NA",
bint stripwhite=True,
bint blanklineskip=True,
bytes commentchar=b"#",
int buffersize=100000000):
int skip=0,
only=None,
bytes seqtype=b'nuc',
int offset=-1,
bint noquality=False,
bint skiperror=True,
bint header=False,
bytes sep=None,
bytes dec=b'.',
bytes nastring=b"NA",
bint stripwhite=True,
bint blanklineskip=True,
bytes commentchar=b"#",
int buffersize=100000000):
if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator)
@ -65,7 +65,7 @@ def entryIteratorFactory(lineiterator,
format=b"embl"
elif first[0:6]==b'LOCUS ':
format=b"genbank"
elif first[0:11]==b'#@ecopcr-v2': # TODO v2????
elif first[0:8]==b'#@ecopcr':
format=b"ecopcrfile"
elif is_ngsfilter_line(first):
format=b"ngsfilter"
@ -83,7 +83,8 @@ def entryIteratorFactory(lineiterator,
firstline=first,
buffersize=buffersize,
nastring=nastring),
Nuc_Seq)
Nuc_Seq,
format)
else:
raise NotImplementedError()
elif format==b'fastq':
@ -94,7 +95,8 @@ def entryIteratorFactory(lineiterator,
firstline=first,
buffersize=buffersize,
nastring=nastring),
Nuc_Seq)
Nuc_Seq,
format)
elif format==b'tabular':
return (tabIterator(lineiterator,
header = header,
@ -108,7 +110,8 @@ def entryIteratorFactory(lineiterator,
only = only,
firstline=first,
buffersize=buffersize),
dict)
dict,
format)
elif format==b'ngsfilter':
return (ngsfilterIterator(lineiterator,
sep = sep,
@ -121,7 +124,8 @@ def entryIteratorFactory(lineiterator,
only = only,
firstline=first,
buffersize=buffersize),
dict)
dict,
format)
elif format==b'embl':
return (emblIterator(lineiterator,
@ -129,7 +133,8 @@ def entryIteratorFactory(lineiterator,
only=only,
firstline=first,
buffersize=buffersize),
dict)
dict,
format)
raise NotImplementedError('File format not yet implemented')
raise NotImplementedError('File format iterator not implemented yet')