firt version of a fastq parser
This commit is contained in:
0
python/obitools3/parsers/__init__.py
Normal file
0
python/obitools3/parsers/__init__.py
Normal file
8
python/obitools3/parsers/fastq.pxd
Normal file
8
python/obitools3/parsers/fastq.pxd
Normal file
@ -0,0 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .header cimport parseHeader
|
||||
from ..files.universalopener cimport uopen
|
||||
from ..files.linebuffer cimport LineBuffer
|
||||
|
||||
|
||||
|
41
python/obitools3/parsers/fastq.pyx
Normal file
41
python/obitools3/parsers/fastq.pyx
Normal file
@ -0,0 +1,41 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 30 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def fastqIterator(lineiterator, int buffersize=100000000):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
|
||||
if isinstance(lineiterator,(str,bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
|
||||
i = iter(lb)
|
||||
for line in i:
|
||||
ident,tags,definition = parseHeader(line)
|
||||
sequence = next(i)[0:-1]
|
||||
next(i)
|
||||
quality = next(i)[0:-1]
|
||||
|
||||
yield { "id" : ident,
|
||||
"definition" : definition,
|
||||
"sequence" : sequence,
|
||||
"quality" : quality,
|
||||
"tags" : tags,
|
||||
"annotation" : {}
|
||||
}
|
||||
|
||||
|
||||
|
5
python/obitools3/parsers/header.pxd
Normal file
5
python/obitools3/parsers/header.pxd
Normal file
@ -0,0 +1,5 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef object __etag__(str x)
|
||||
|
||||
cpdef tuple parseHeader(str header)
|
47
python/obitools3/parsers/header.pyx
Normal file
47
python/obitools3/parsers/header.pyx
Normal file
@ -0,0 +1,47 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 25 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
|
||||
|
||||
cdef object __etag__(str x):
|
||||
try:
|
||||
v = eval(x,{},{})
|
||||
except:
|
||||
v = x
|
||||
return v
|
||||
|
||||
cpdef tuple parseHeader(str header):
|
||||
cdef list m
|
||||
cdef dict tags
|
||||
cdef str definition
|
||||
cdef str ident
|
||||
cdef str second
|
||||
|
||||
m=header[1:-1].split(maxsplit=1)
|
||||
|
||||
ident=m[0]
|
||||
|
||||
if len(m)==1:
|
||||
tags={}
|
||||
definition=''
|
||||
else:
|
||||
second=m[1]
|
||||
m = __ret__.findall(second)
|
||||
|
||||
if m:
|
||||
tags = dict([(a[1],__etag__(a[2])) for a in m])
|
||||
definition = second.split(m[-1][0],1)[1].strip()
|
||||
else:
|
||||
tags = {}
|
||||
definition = second.strip()
|
||||
|
||||
return ident,tags,definition
|
||||
|
||||
|
Reference in New Issue
Block a user