From 61b00d6013584a4913b3c58aa4778dad76ab718e Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Tue, 9 Oct 2018 16:41:14 +0200 Subject: [PATCH] Cython: fastq formatter --- python/obitools3/format/fastq.pxd | 10 ++++++++++ python/obitools3/format/fastq.pyx | 32 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 python/obitools3/format/fastq.pxd create mode 100644 python/obitools3/format/fastq.pyx diff --git a/python/obitools3/format/fastq.pxd b/python/obitools3/format/fastq.pxd new file mode 100644 index 0000000..a32fa77 --- /dev/null +++ b/python/obitools3/format/fastq.pxd @@ -0,0 +1,10 @@ +from ..utils cimport bytes2str +from .header cimport HeaderFormat +from cython.view cimport array as cvarray + +cdef class FastqFormat: + + cdef HeaderFormat headerFormatter + + cdef size_t sequenceBufferLength + cdef char* sequenceBuffer diff --git a/python/obitools3/format/fastq.pyx b/python/obitools3/format/fastq.pyx new file mode 100644 index 0000000..4bda2dc --- /dev/null +++ b/python/obitools3/format/fastq.pyx @@ -0,0 +1,32 @@ +#cython: language_level=3 + +cimport cython +from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN +from obitools3.utils cimport bytes2str, str2bytes, tobytes + + +# TODO quality offset option +cdef class FastqFormat: + + def __init__(self, list tags=[], bint printNAKeys=False): + self.headerFormatter = HeaderFormat("fastq", + tags, + printNAKeys) + + @cython.boundscheck(False) + def __call__(self, object data): + + cdef bytes quality + + if hasattr(data, "quality_str"): + quality = str2bytes(data.quality_str) # TODO quality_bytes property + elif hasattr(data, "quality"): + quality = tobytes(data.quality) + else: + raise AttributeError("No quality when exporting to fastq") # TODO discuss + + return bytes2str(self.headerFormatter(data) + + b"\n" + + data[NUC_SEQUENCE_COLUMN] + + b"\n+\n" + + quality)