From bac7ce7184b9db310835f504e54b63242a6c22d3 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 2 Jun 2016 19:10:33 +0200 Subject: [PATCH] Start of the implementation of the export methods --- python/obitools3/format/__init__.py | 0 python/obitools3/format/fasta.pxd | 9 +++++ python/obitools3/format/fasta.pyx | 52 +++++++++++++++++++++++++++++ python/obitools3/format/header.pxd | 5 +++ python/obitools3/format/header.pyx | 27 +++++++++++++++ 5 files changed, 93 insertions(+) create mode 100644 python/obitools3/format/__init__.py create mode 100644 python/obitools3/format/fasta.pxd create mode 100644 python/obitools3/format/fasta.pyx create mode 100644 python/obitools3/format/header.pxd create mode 100644 python/obitools3/format/header.pyx diff --git a/python/obitools3/format/__init__.py b/python/obitools3/format/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/obitools3/format/fasta.pxd b/python/obitools3/format/fasta.pxd new file mode 100644 index 0000000..b2d2c2d --- /dev/null +++ b/python/obitools3/format/fasta.pxd @@ -0,0 +1,9 @@ +from .header cimport HeaderFormat +from cython.view cimport array as cvarray + +cdef class FastaFormat: + + cdef HeaderFormat headerFormater + + cdef size_t sequenceBufferLength + cdef cvarray sequenceBuffer diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx new file mode 100644 index 0000000..2460e11 --- /dev/null +++ b/python/obitools3/format/fasta.pyx @@ -0,0 +1,52 @@ +cimport cython + +cdef class FastaFormat: + + def __init__(self, list tags=[], bint printNAKeys=False): + self.headerFormater = HeaderFormat(True, + tags, + printNAKeys) + + self.sequenceBufferLength=1000 + self.sequenceBuffer = cvarray(shape=(1000,), + itemsize=sizeof(char), + format="c", + mode="c", + allocate_buffer=True) + @cython.boundscheck(False) + def __call__(self, dict data): + cdef bytes brawseq = data['sequence'] + cdef size_t lseq = len(brawseq) + cdef size_t needed_size = lseq + 1 + cdef char[:] seq + cdef char[:] fasta + cdef size_t k=0 + needed_size += needed_size/ 50 + + if needed_size > self.sequenceBufferLength: + self.sequenceBufferLength=needed_size + self.sequenceBuffer = cvarray(shape=(needed_size,), + itemsize=sizeof(char), + format="c", + mode="c", + allocate_buffer=True) + + seq = brawseq + fasta = self.sequenceBuffer + + for i in range(0,lseq,60): + if i+60 <= lseq: + fasta[k:(k+60)]=seq[i:(i+60)] + fasta[k+60]='\n' + k+=61 + else: + fasta[k:(k+lseq-i)]=seq[i:lseq] + k+=lseq-i + + + + + + + + \ No newline at end of file diff --git a/python/obitools3/format/header.pxd b/python/obitools3/format/header.pxd new file mode 100644 index 0000000..92539f2 --- /dev/null +++ b/python/obitools3/format/header.pxd @@ -0,0 +1,5 @@ +cdef class HeaderFormat: + + cdef str start + cdef size_t headerBufferLength + \ No newline at end of file diff --git a/python/obitools3/format/header.pyx b/python/obitools3/format/header.pyx new file mode 100644 index 0000000..9431c3a --- /dev/null +++ b/python/obitools3/format/header.pyx @@ -0,0 +1,27 @@ + +cdef class HeaderFormat: + + def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False): + + self.tags = tags + self.printNaKeys = printNAKeys + + if fastaHeader: + self.start=">" + else: + self.start="@" + + self.headerBufferLength = 1000 + self.headerBuffer = [] + + def __call__(self, dict data): + cdef str header + + if data['definition'] is not None: + header = "%s%s %s" % (self.start,data['id'], + data['definition']) + else: + header = "%s%s" % (self.start,data['id']) + + return header +