Cython: added tab formatter and parser (for obi export)

This commit is contained in:
Celine Mercier
2019-03-31 15:38:34 +02:00
parent 6841d879aa
commit e39c1a7fbf
4 changed files with 104 additions and 0 deletions

View File

@ -0,0 +1,8 @@
#cython: language_level=3
cdef class TabFormat:
cdef bint header
cdef bint first_line
cdef bytes NAString
cdef list tags
cdef bytes sep

44
python/obitools3/format/tab.pyx Executable file
View File

@ -0,0 +1,44 @@
#cython: language_level=3
cimport cython
from obitools3.dms.view.view cimport Line
from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
from obitools3.dms.column.column cimport Column_line
cdef class TabFormat:
def __init__(self, header=True, bytes NAString=b"NA", bytes sep=b"\t"):
self.header = True
self.first_line = True
self.NAString = NAString
self.sep = sep
@cython.boundscheck(False)
def __call__(self, object data):
line = []
if self.first_line:
self.tags = [k for k in data.keys()]
for k in self.tags:
if self.header and self.first_line:
value = tobytes(k)
else:
value = data[k]
if value is not None:
if type(value) == Column_line:
value = value.bytes()
else:
value = str2bytes(str(bytes2str_object(value))) # genius programming
if value is None:
value = self.NAString
line.append(value)
if self.first_line:
self.first_line = False
return self.sep.join(value for value in line)

View File

@ -0,0 +1,9 @@
#cython: language_level=3
cdef class TabWriter:
cdef object formatter
cdef object output
cdef int only
cdef int skip
cdef int skipped
cdef int read

View File

@ -0,0 +1,43 @@
#cython: language_level=3
'''
Created on oct 12th 2018
@author: celine.mercier.bioinfo@gmail.com
'''
cdef class TabWriter:
def __init__(self,
object formatter,
object output_object,
int skip=0,
only=None,
header=True):
if only is None:
self.only = -1
else:
self.only = int(only)
if header:
self.only += 1
self.formatter = formatter
self.output = output_object
self.skip = skip
if header:
self.skip -= 1
self.skipped = 0
self.read = 0
def __call__(self, object seq):
if self.only > -1 and self.read == self.only:
raise StopIteration
if self.skip > 0 and self.skipped < self.skip:
self.skipped += 1
return
self.output.write(self.formatter(seq))
self.output.write(b"\n") # TODO is that clean?
self.read += 1