From e39c1a7fbfd36304b8432d3a3f7d5d46e206fe9e Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Sun, 31 Mar 2019 15:38:34 +0200 Subject: [PATCH] Cython: added tab formatter and parser (for obi export) --- python/obitools3/format/tab.pxd | 8 ++++++ python/obitools3/format/tab.pyx | 44 ++++++++++++++++++++++++++++++++ python/obitools3/writers/tab.pxd | 9 +++++++ python/obitools3/writers/tab.pyx | 43 +++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100755 python/obitools3/format/tab.pxd create mode 100755 python/obitools3/format/tab.pyx create mode 100755 python/obitools3/writers/tab.pxd create mode 100755 python/obitools3/writers/tab.pyx diff --git a/python/obitools3/format/tab.pxd b/python/obitools3/format/tab.pxd new file mode 100755 index 0000000..a434faa --- /dev/null +++ b/python/obitools3/format/tab.pxd @@ -0,0 +1,8 @@ +#cython: language_level=3 + +cdef class TabFormat: + cdef bint header + cdef bint first_line + cdef bytes NAString + cdef list tags + cdef bytes sep \ No newline at end of file diff --git a/python/obitools3/format/tab.pyx b/python/obitools3/format/tab.pyx new file mode 100755 index 0000000..052b2bd --- /dev/null +++ b/python/obitools3/format/tab.pyx @@ -0,0 +1,44 @@ +#cython: language_level=3 + +cimport cython +from obitools3.dms.view.view cimport Line +from obitools3.utils cimport bytes2str_object, str2bytes, tobytes +from obitools3.dms.column.column cimport Column_line + + +cdef class TabFormat: + + def __init__(self, header=True, bytes NAString=b"NA", bytes sep=b"\t"): + self.header = True + self.first_line = True + self.NAString = NAString + self.sep = sep + + @cython.boundscheck(False) + def __call__(self, object data): + + line = [] + + if self.first_line: + self.tags = [k for k in data.keys()] + + for k in self.tags: + + if self.header and self.first_line: + value = tobytes(k) + else: + value = data[k] + if value is not None: + if type(value) == Column_line: + value = value.bytes() + else: + value = str2bytes(str(bytes2str_object(value))) # genius programming + if value is None: + value = self.NAString + + line.append(value) + + if self.first_line: + self.first_line = False + + return self.sep.join(value for value in line) diff --git a/python/obitools3/writers/tab.pxd b/python/obitools3/writers/tab.pxd new file mode 100755 index 0000000..0891c64 --- /dev/null +++ b/python/obitools3/writers/tab.pxd @@ -0,0 +1,9 @@ +#cython: language_level=3 + +cdef class TabWriter: + cdef object formatter + cdef object output + cdef int only + cdef int skip + cdef int skipped + cdef int read \ No newline at end of file diff --git a/python/obitools3/writers/tab.pyx b/python/obitools3/writers/tab.pyx new file mode 100755 index 0000000..f644910 --- /dev/null +++ b/python/obitools3/writers/tab.pyx @@ -0,0 +1,43 @@ +#cython: language_level=3 + +''' +Created on oct 12th 2018 + +@author: celine.mercier.bioinfo@gmail.com +''' + + +cdef class TabWriter: + + def __init__(self, + object formatter, + object output_object, + int skip=0, + only=None, + header=True): + + if only is None: + self.only = -1 + else: + self.only = int(only) + if header: + self.only += 1 + + self.formatter = formatter + self.output = output_object + self.skip = skip + if header: + self.skip -= 1 + self.skipped = 0 + self.read = 0 + + def __call__(self, object seq): + if self.only > -1 and self.read == self.only: + raise StopIteration + if self.skip > 0 and self.skipped < self.skip: + self.skipped += 1 + return + self.output.write(self.formatter(seq)) + self.output.write(b"\n") # TODO is that clean? + self.read += 1 +