Cython API: added EMBL parser and files to import are now read in binary
mode
This commit is contained in:
@ -8,13 +8,13 @@ Created on 30 mars 2016
|
|||||||
|
|
||||||
cdef class LineBuffer:
|
cdef class LineBuffer:
|
||||||
|
|
||||||
def __init__(self,object fileobj,int size=100000000):
|
def __init__(self, object fileobj, int size=100000000):
|
||||||
self.fileobj=fileobj
|
self.fileobj=fileobj
|
||||||
self.size=size
|
self.size=size
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
cdef list buff = self.fileobj.readlines(self.size)
|
cdef list buff = self.fileobj.readlines(self.size)
|
||||||
cdef str l
|
cdef object l # Can be str or bytes
|
||||||
|
|
||||||
while buff:
|
while buff:
|
||||||
for l in buff:
|
for l in buff:
|
||||||
|
@ -2,4 +2,4 @@
|
|||||||
|
|
||||||
from .uncompress cimport CompressedFile
|
from .uncompress cimport CompressedFile
|
||||||
|
|
||||||
cpdef CompressedFile uopen(str name, mode=?)
|
cpdef CompressedFile uopen(object name, mode=?)
|
@ -7,15 +7,16 @@ Created on 25 mars 2016
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
|
from obitools3.utils cimport tostr
|
||||||
|
|
||||||
|
|
||||||
cpdef CompressedFile uopen(str name, mode='r'):
|
cpdef CompressedFile uopen(object name, mode='rb'):
|
||||||
cdef CompressedFile c
|
cdef CompressedFile c
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f = urlopen(name)
|
f = urlopen(tostr(name))
|
||||||
except:
|
except:
|
||||||
f = open(name,mode)
|
f = open(tostr(name),mode)
|
||||||
|
|
||||||
c = CompressedFile(f)
|
c = CompressedFile(f)
|
||||||
|
|
||||||
|
@ -11,89 +11,82 @@ import types
|
|||||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
|
|
||||||
|
|
||||||
def fastaIterator(lineiterator,
|
# def fastaIterator(lineiterator,
|
||||||
int skip=0,
|
# int skip=0,
|
||||||
only=None,
|
# only=None,
|
||||||
firstline=None,
|
# firstline=None,
|
||||||
int buffersize=100000000
|
# int buffersize=100000000
|
||||||
):
|
# ):
|
||||||
cdef str ident
|
# cdef str ident
|
||||||
cdef str definition
|
# cdef str definition
|
||||||
cdef dict tags
|
# cdef dict tags
|
||||||
cdef list s
|
# cdef list s
|
||||||
cdef bytes sequence
|
# cdef bytes sequence
|
||||||
cdef int skipped, ionly, read
|
# cdef int skipped, ionly, read
|
||||||
# cdef OBI_Seq seq
|
#
|
||||||
|
# if only is None:
|
||||||
if only is None:
|
# ionly=-1
|
||||||
ionly=-1
|
# else:
|
||||||
else:
|
# ionly=int(only)
|
||||||
ionly=int(only)
|
#
|
||||||
|
# if isinstance(lineiterator, (str, bytes)):
|
||||||
if isinstance(lineiterator, (str, bytes)):
|
# lineiterator=uopen(lineiterator)
|
||||||
lineiterator=uopen(lineiterator)
|
# if isinstance(lineiterator, LineBuffer):
|
||||||
if isinstance(lineiterator, LineBuffer):
|
# iterator = iter(lineiterator)
|
||||||
iterator = iter(lineiterator)
|
# else:
|
||||||
else:
|
# if hasattr(lineiterator, "readlines"):
|
||||||
if hasattr(lineiterator, "readlines"):
|
# iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
# elif hasattr(lineiterator, '__next__'):
|
||||||
elif hasattr(lineiterator, '__next__'):
|
# iterator = lineiterator
|
||||||
iterator = lineiterator
|
# else:
|
||||||
else:
|
# raise Exception("Invalid line iterator")
|
||||||
raise Exception("Invalid line iterator")
|
#
|
||||||
|
# skipped = 0
|
||||||
skipped = 0
|
# i = iterator
|
||||||
i = iterator
|
#
|
||||||
|
# if firstline is None:
|
||||||
if firstline is None:
|
# line = next(i)
|
||||||
line = next(i)
|
# else:
|
||||||
else:
|
# line = firstline
|
||||||
line = firstline
|
#
|
||||||
|
# while True:
|
||||||
while True:
|
#
|
||||||
|
# if ionly >= 0 and read >= ionly:
|
||||||
if ionly >= 0 and read >= ionly:
|
# break
|
||||||
break
|
#
|
||||||
|
# while skipped < skip :
|
||||||
while skipped < skip :
|
# line = next(i)
|
||||||
line = next(i)
|
# try:
|
||||||
try:
|
# while line[0]!='>':
|
||||||
while line[0]!='>':
|
# line = next(i)
|
||||||
line = next(i)
|
# except StopIteration:
|
||||||
except StopIteration:
|
# pass
|
||||||
pass
|
# skipped += 1
|
||||||
skipped += 1
|
#
|
||||||
|
# ident,tags,definition = parseHeader(line)
|
||||||
ident,tags,definition = parseHeader(line)
|
# s = []
|
||||||
s = []
|
# line = next(i)
|
||||||
line = next(i)
|
#
|
||||||
|
# try:
|
||||||
try:
|
# while line[0]!='>':
|
||||||
while line[0]!='>':
|
# s.append(str2bytes(line)[0:-1])
|
||||||
s.append(str2bytes(line)[0:-1])
|
# line = next(i)
|
||||||
line = next(i)
|
#
|
||||||
|
# except StopIteration:
|
||||||
except StopIteration:
|
# pass
|
||||||
pass
|
#
|
||||||
|
# sequence = b"".join(s)
|
||||||
sequence = b"".join(s)
|
#
|
||||||
|
# yield { "id" : ident,
|
||||||
# seq = OBI_Seq(id,
|
# "definition" : definition,
|
||||||
# sequence,
|
# "sequence" : sequence,
|
||||||
# definition,
|
# "quality" : None,
|
||||||
# tags=tags,
|
# "offset" : None,
|
||||||
# )
|
# "tags" : tags,
|
||||||
# TODO Seq object
|
# "annotation" : {}
|
||||||
yield { "id" : ident,
|
# }
|
||||||
"definition" : definition,
|
#
|
||||||
"sequence" : sequence,
|
# read+=1
|
||||||
"quality" : None,
|
|
||||||
"offset" : None,
|
|
||||||
"tags" : tags,
|
|
||||||
"annotation" : {}
|
|
||||||
}
|
|
||||||
|
|
||||||
read+=1
|
|
||||||
|
|
||||||
|
|
||||||
def fastaNucIterator(lineiterator,
|
def fastaNucIterator(lineiterator,
|
||||||
@ -102,8 +95,9 @@ def fastaNucIterator(lineiterator,
|
|||||||
firstline=None,
|
firstline=None,
|
||||||
int buffersize=100000000
|
int buffersize=100000000
|
||||||
):
|
):
|
||||||
cdef str ident
|
|
||||||
cdef str definition
|
cdef bytes ident
|
||||||
|
cdef bytes definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef list s
|
cdef list s
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
@ -143,7 +137,7 @@ def fastaNucIterator(lineiterator,
|
|||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
try:
|
try:
|
||||||
while line[0]!='>':
|
while line[:1]!=b'>':
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
@ -154,8 +148,8 @@ def fastaNucIterator(lineiterator,
|
|||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while line[0]!='>':
|
while line[:1]!=b'>':
|
||||||
s.append(str2bytes(line)[0:-1])
|
s.append(line[0:-1])
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
@ -171,17 +165,6 @@ def fastaNucIterator(lineiterator,
|
|||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
# yield { "id" : ident,
|
|
||||||
# "definition" : definition,
|
|
||||||
# "sequence" : sequence,
|
|
||||||
# "quality" : None,
|
|
||||||
# "offset" : None,
|
|
||||||
# "tags" : tags,
|
|
||||||
# "annotation" : {}
|
|
||||||
# }
|
|
||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,7 +28,8 @@ def fastqIterator(lineiterator,
|
|||||||
offset,
|
offset,
|
||||||
firstline,
|
firstline,
|
||||||
buffersize)
|
buffersize)
|
||||||
|
|
||||||
|
|
||||||
def fastqWithQualityIterator(lineiterator,
|
def fastqWithQualityIterator(lineiterator,
|
||||||
int skip=0,
|
int skip=0,
|
||||||
only=None,
|
only=None,
|
||||||
@ -36,14 +37,14 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
firstline=None,
|
firstline=None,
|
||||||
int buffersize=100000000
|
int buffersize=100000000
|
||||||
):
|
):
|
||||||
|
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef bytes ident
|
||||||
cdef str definition
|
cdef bytes definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef bytes quality
|
cdef bytes quality
|
||||||
cdef int skipped, lines_to_skip, ionly, read
|
cdef int skipped, lines_to_skip, ionly, read, j
|
||||||
cdef int j
|
|
||||||
|
|
||||||
if only is None:
|
if only is None:
|
||||||
ionly=-1
|
ionly=-1
|
||||||
@ -84,9 +85,9 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
break
|
break
|
||||||
|
|
||||||
ident,tags,definition = parseHeader(hline)
|
ident,tags,definition = parseHeader(hline)
|
||||||
sequence = str2bytes(line[0:-1])
|
sequence = line[0:-1]
|
||||||
next(i)
|
next(i)
|
||||||
quality = str2bytes(next(i)[0:-1])
|
quality = next(i)[0:-1]
|
||||||
|
|
||||||
seq = Nuc_Seq(ident,
|
seq = Nuc_Seq(ident,
|
||||||
sequence,
|
sequence,
|
||||||
@ -97,15 +98,6 @@ def fastqWithQualityIterator(lineiterator,
|
|||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
# yield { "id" : ident,
|
|
||||||
# "definition" : definition,
|
|
||||||
# "sequence" : sequence,
|
|
||||||
# "quality" : quality,
|
|
||||||
# "offset" : offset,
|
|
||||||
# "tags" : tags,
|
|
||||||
# "annotation" : {}
|
|
||||||
# }
|
|
||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
hline = next(i)
|
hline = next(i)
|
||||||
|
|
||||||
@ -116,8 +108,8 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
firstline=None,
|
firstline=None,
|
||||||
int buffersize=100000000
|
int buffersize=100000000
|
||||||
):
|
):
|
||||||
cdef str ident
|
cdef bytes ident
|
||||||
cdef str definition
|
cdef bytes definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef bytes sequence
|
cdef bytes sequence
|
||||||
cdef bytes quality
|
cdef bytes quality
|
||||||
@ -163,7 +155,7 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
break
|
break
|
||||||
|
|
||||||
ident,tags,definition = parseHeader(hline)
|
ident,tags,definition = parseHeader(hline)
|
||||||
sequence = str2bytes(line[0:-1])
|
sequence = line[0:-1]
|
||||||
next(i)
|
next(i)
|
||||||
next(i)
|
next(i)
|
||||||
|
|
||||||
@ -175,15 +167,6 @@ def fastqWithoutQualityIterator(lineiterator,
|
|||||||
tags=tags)
|
tags=tags)
|
||||||
|
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
# yield { "id" : ident,
|
|
||||||
# "definition" : definition,
|
|
||||||
# "sequence" : sequence,
|
|
||||||
# "quality" : None,
|
|
||||||
# "offset" : None,
|
|
||||||
# "tags" : tags,
|
|
||||||
# "annotation" : {}
|
|
||||||
# }
|
|
||||||
|
|
||||||
read+=1
|
read+=1
|
||||||
hline = next(i)
|
hline = next(i)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
|
|
||||||
cpdef tuple parseHeader(str header)
|
cpdef tuple parseHeader(bytes header)
|
||||||
|
@ -10,25 +10,25 @@ from obitools3.utils cimport __etag__
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
|
__ret__ = re.compile(b'''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
|
||||||
|
|
||||||
|
|
||||||
cpdef tuple parseHeader(str header):
|
cpdef tuple parseHeader(bytes header):
|
||||||
cdef list m
|
cdef list m
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef str definition
|
cdef bytes definition
|
||||||
cdef str ident
|
cdef bytes ident
|
||||||
cdef str second
|
cdef bytes second
|
||||||
|
|
||||||
m=header[1:-1].split(maxsplit=1)
|
m=header[1:-1].split(maxsplit=1)
|
||||||
|
|
||||||
ident=m[0]
|
ident=m[0]
|
||||||
if ident[-1] == ';':
|
if len(ident)>1 and ident[-2:-1] == b';':
|
||||||
ident = ident[:-1]
|
ident = ident[:-1]
|
||||||
|
|
||||||
if len(m)==1:
|
if len(m)==1:
|
||||||
tags={}
|
tags={}
|
||||||
definition=''
|
definition=b''
|
||||||
else:
|
else:
|
||||||
second=m[1]
|
second=m[1]
|
||||||
m = __ret__.findall(second)
|
m = __ret__.findall(second)
|
||||||
|
@ -7,7 +7,6 @@ Created on march 8th 2018
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from .tab import tabIterator
|
from .tab import tabIterator
|
||||||
from obitools3.utils cimport bytes2str
|
|
||||||
import types
|
import types
|
||||||
|
|
||||||
|
|
||||||
@ -24,18 +23,10 @@ def ngsfilterIterator(lineiterator,
|
|||||||
):
|
):
|
||||||
|
|
||||||
cdef list all_lines
|
cdef list all_lines
|
||||||
cdef str header
|
cdef bytes header
|
||||||
cdef str sep_str
|
|
||||||
cdef bytes out_sep
|
cdef bytes out_sep
|
||||||
cdef str out_sep_str
|
|
||||||
|
|
||||||
out_sep = b"\t"
|
out_sep = b"\t"
|
||||||
out_sep_str = "\t"
|
|
||||||
|
|
||||||
if sep is not None:
|
|
||||||
sep_str = bytes2str(sep)
|
|
||||||
else:
|
|
||||||
sep_str = None
|
|
||||||
|
|
||||||
if isinstance(lineiterator, (str, bytes)):
|
if isinstance(lineiterator, (str, bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
@ -56,20 +47,20 @@ def ngsfilterIterator(lineiterator,
|
|||||||
all_lines.insert(0, firstline)
|
all_lines.insert(0, firstline)
|
||||||
|
|
||||||
# Insert header for column names
|
# Insert header for column names
|
||||||
column_names = ["experiment", "sample", "forward_tag", "reverse_tag", "forward_primer", "reverse_primer"]
|
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer"]
|
||||||
header = out_sep_str.join(column_names)
|
header = out_sep.join(column_names)
|
||||||
|
|
||||||
new_lines.append(header)
|
new_lines.append(header)
|
||||||
|
|
||||||
for line in all_lines:
|
for line in all_lines:
|
||||||
split_line = line.split(sep_str)
|
split_line = line.split(sep)
|
||||||
tags = split_line.pop(2)
|
tags = split_line.pop(2)
|
||||||
tags = tags.split(":")
|
tags = tags.split(b":")
|
||||||
if len(tags) == 1: # Forward and reverse tags are the same
|
if len(tags) == 1: # Forward and reverse tags are the same
|
||||||
tags.append(tags[0])
|
tags.append(tags[0])
|
||||||
split_line.insert(2, tags[0])
|
split_line.insert(2, tags[0])
|
||||||
split_line.insert(3, tags[1])
|
split_line.insert(3, tags[1])
|
||||||
new_lines.append(out_sep_str.join(split_line[0:6]))
|
new_lines.append(out_sep.join(split_line[0:6]))
|
||||||
|
|
||||||
return tabIterator(iter(new_lines),
|
return tabIterator(iter(new_lines),
|
||||||
header = True,
|
header = True,
|
||||||
|
@ -7,7 +7,6 @@ Created on feb 20th 2018
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import types
|
import types
|
||||||
from obitools3.utils cimport bytes2str, tobytes
|
|
||||||
from obitools3.utils cimport __etag__
|
from obitools3.utils cimport __etag__
|
||||||
|
|
||||||
|
|
||||||
@ -28,17 +27,9 @@ def tabIterator(lineiterator,
|
|||||||
cdef int lines_to_skip, ionly, read
|
cdef int lines_to_skip, ionly, read
|
||||||
cdef list data
|
cdef list data
|
||||||
cdef dict view_line
|
cdef dict view_line
|
||||||
cdef str sep_str # TODO can't we read file lines as bytes?
|
|
||||||
cdef list keys
|
cdef list keys
|
||||||
cdef list key_types
|
cdef list key_types
|
||||||
|
|
||||||
if sep is not None:
|
|
||||||
sep_str = bytes2str(sep)
|
|
||||||
else:
|
|
||||||
sep_str = None
|
|
||||||
|
|
||||||
commentchar_str = bytes2str(commentchar)
|
|
||||||
|
|
||||||
keys = []
|
keys = []
|
||||||
key_types = []
|
key_types = []
|
||||||
skipped = 0
|
skipped = 0
|
||||||
@ -68,7 +59,7 @@ def tabIterator(lineiterator,
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
if (not line.strip() and blanklineskip) or line[0] == commentchar_str:
|
if (not line.strip() and blanklineskip) or line[:1] == commentchar:
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
|
|
||||||
if ionly >= 0 and read >= ionly:
|
if ionly >= 0 and read >= ionly:
|
||||||
@ -77,13 +68,13 @@ def tabIterator(lineiterator,
|
|||||||
if not keys:
|
if not keys:
|
||||||
if header:
|
if header:
|
||||||
# TODO read types eventually
|
# TODO read types eventually
|
||||||
keys = line.split(sep_str)
|
keys = line.split(sep)
|
||||||
keys = [tobytes(x.strip()) for x in keys]
|
keys = [x.strip() for x in keys]
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# TODO ??? default column names? like R?
|
# TODO ??? default column names? like R?
|
||||||
keys = [str(i) for i in range(len(line.split(sep_str)))]
|
keys = [i for i in range(len(line.split(sep)))]
|
||||||
|
|
||||||
while skipped < skip :
|
while skipped < skip :
|
||||||
line = next(iterator)
|
line = next(iterator)
|
||||||
@ -92,7 +83,7 @@ def tabIterator(lineiterator,
|
|||||||
view_line = {}
|
view_line = {}
|
||||||
|
|
||||||
# Parse
|
# Parse
|
||||||
data = line.split(sep_str)
|
data = line.split(sep)
|
||||||
|
|
||||||
if stripwhite or key_types:
|
if stripwhite or key_types:
|
||||||
data = [x.strip() for x in data]
|
data = [x.strip() for x in data]
|
||||||
|
@ -5,10 +5,11 @@ from obitools3.parsers.fasta import fastaNucIterator
|
|||||||
from obitools3.parsers.fastq import fastqIterator
|
from obitools3.parsers.fastq import fastqIterator
|
||||||
from obitools3.parsers.tab import tabIterator
|
from obitools3.parsers.tab import tabIterator
|
||||||
from obitools3.parsers.ngsfilter import ngsfilterIterator
|
from obitools3.parsers.ngsfilter import ngsfilterIterator
|
||||||
|
from obitools3.parsers.embl import emblIterator
|
||||||
|
|
||||||
|
|
||||||
oligore = re.compile("^[ACGTRYSWKMBDHVN]+$",re.I)
|
oligore = re.compile(b"^[ACGTRYSWKMBDHVN]+$",re.I)
|
||||||
tagre = re.compile("^([ACGTRYSWKMBDHVN]+|-)(:([ACGTRYSWKMBDHVN]+)|-)?$",re.I)
|
tagre = re.compile(b"^([ACGTRYSWKMBDHVN]+|-)(:([ACGTRYSWKMBDHVN]+)|-)?$",re.I)
|
||||||
|
|
||||||
def is_ngsfilter_line(line): # TODO doesn't work?
|
def is_ngsfilter_line(line): # TODO doesn't work?
|
||||||
try:
|
try:
|
||||||
@ -16,8 +17,8 @@ def is_ngsfilter_line(line): # TODO doesn't work?
|
|||||||
ok = tagre.match(parts[2])
|
ok = tagre.match(parts[2])
|
||||||
ok&= oligore.match(parts[3])
|
ok&= oligore.match(parts[3])
|
||||||
ok&= oligore.match(parts[4])
|
ok&= oligore.match(parts[4])
|
||||||
ok&= parts[5]=="F" | parts[5]=="T"
|
ok&= parts[5]==b"F" | parts[5]==b"T"
|
||||||
return ok
|
return ok
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -55,19 +56,22 @@ def entryIteratorFactory(lineiterator,
|
|||||||
|
|
||||||
format=b"tabular"
|
format=b"tabular"
|
||||||
|
|
||||||
if first[0]==">":
|
try:
|
||||||
format=b"fasta"
|
if first[:1]==b">":
|
||||||
if first[0]=="@":
|
format=b"fasta"
|
||||||
format=b"fastq"
|
if first[:1]==b"@":
|
||||||
elif first[0:3]=='ID ':
|
format=b"fastq"
|
||||||
format=b"embl"
|
elif first[0:3]==b'ID ':
|
||||||
elif first[0:6]=='LOCUS ':
|
format=b"embl"
|
||||||
format=b"genbank"
|
elif first[0:6]==b'LOCUS ':
|
||||||
elif first[0:11]=='#@ecopcr-v2': # TODO v2????
|
format=b"genbank"
|
||||||
format=b"ecopcrfile"
|
elif first[0:11]==b'#@ecopcr-v2': # TODO v2????
|
||||||
elif is_ngsfilter_line(first):
|
format=b"ecopcrfile"
|
||||||
format=b"ngsfilter"
|
elif is_ngsfilter_line(first):
|
||||||
|
format=b"ngsfilter"
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
|
||||||
# TODO Temporary fix
|
# TODO Temporary fix
|
||||||
first=None
|
first=None
|
||||||
lineiterator.seek(0)
|
lineiterator.seek(0)
|
||||||
@ -114,6 +118,14 @@ def entryIteratorFactory(lineiterator,
|
|||||||
firstline=first,
|
firstline=first,
|
||||||
buffersize=buffersize),
|
buffersize=buffersize),
|
||||||
dict)
|
dict)
|
||||||
|
|
||||||
|
elif format==b'embl':
|
||||||
|
return (emblIterator(lineiterator,
|
||||||
|
skip=skip,
|
||||||
|
only=only,
|
||||||
|
firstline=first,
|
||||||
|
buffersize=buffersize),
|
||||||
|
dict)
|
||||||
|
|
||||||
raise NotImplementedError('File format not yet implemented')
|
raise NotImplementedError('File format not yet implemented')
|
||||||
|
|
||||||
|
@ -14,4 +14,4 @@ cdef obitype_t update_obitype(obitype_t obitype, object new_value)
|
|||||||
cdef obitype_t get_obitype_iterable_value(object value)
|
cdef obitype_t get_obitype_iterable_value(object value)
|
||||||
cdef obitype_t get_obitype(object value)
|
cdef obitype_t get_obitype(object value)
|
||||||
|
|
||||||
cdef object __etag__(str x)
|
cdef object __etag__(bytes x)
|
||||||
|
@ -160,10 +160,10 @@ cdef obitype_t get_obitype(object value) :
|
|||||||
return get_obitype_single_value(value)
|
return get_obitype_single_value(value)
|
||||||
|
|
||||||
|
|
||||||
__re_int__ = re.compile("^[+-]?[0-9]+$")
|
__re_int__ = re.compile(b"^[+-]?[0-9]+$")
|
||||||
__re_float__ = re.compile("^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
|
__re_float__ = re.compile(b"^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
|
||||||
__re_str__ = re.compile("""^"[^"]*"|'[^']*'$""")
|
__re_str__ = re.compile(b"""^"[^"]*"|'[^']*'$""")
|
||||||
__re_dict__ = re.compile("""^\{\ *
|
__re_dict__ = re.compile(b"""^\{\ *
|
||||||
(
|
(
|
||||||
("[^"]*"|'[^']*')
|
("[^"]*"|'[^']*')
|
||||||
\ *:\ *
|
\ *:\ *
|
||||||
@ -181,9 +181,9 @@ __re_dict__ = re.compile("""^\{\ *
|
|||||||
)
|
)
|
||||||
)*\ *\}$""", re.VERBOSE)
|
)*\ *\}$""", re.VERBOSE)
|
||||||
|
|
||||||
__re_val__ = re.compile("""(("[^"]*"|'[^']*') *: *([^,}]+|"[^"]*"|'[^']*') *[,}] *)""")
|
__re_val__ = re.compile(b"""(("[^"]*"|'[^']*') *: *([^,}]+|"[^"]*"|'[^']*') *[,}] *)""")
|
||||||
|
|
||||||
cdef object __etag__(str x):
|
cdef object __etag__(bytes x):
|
||||||
cdef list elements
|
cdef list elements
|
||||||
cdef tuple i
|
cdef tuple i
|
||||||
|
|
||||||
@ -193,11 +193,11 @@ cdef object __etag__(str x):
|
|||||||
v=float(x)
|
v=float(x)
|
||||||
elif __re_str__.match(x):
|
elif __re_str__.match(x):
|
||||||
v=x[1:-1]
|
v=x[1:-1]
|
||||||
elif x=='None':
|
elif x==b'None':
|
||||||
v=None
|
v=None
|
||||||
elif x=='False':
|
elif x==b'False':
|
||||||
v=False
|
v=False
|
||||||
elif x=='True':
|
elif x==b'True':
|
||||||
v=True
|
v=True
|
||||||
elif __re_dict__.match(x):
|
elif __re_dict__.match(x):
|
||||||
elements=__re_val__.findall(x)
|
elements=__re_val__.findall(x)
|
||||||
|
Reference in New Issue
Block a user