From 85395dfc1a0ec7e425f95933473595384770621a Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 29 May 2016 13:53:32 +0200 Subject: [PATCH 1/6] value returned for sequence is now bytes and no more str --- python/obitools3/obidms/_obidmscolumn_seq.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/obitools3/obidms/_obidmscolumn_seq.pyx b/python/obitools3/obidms/_obidmscolumn_seq.pyx index 0819d82..7e96eb8 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.pyx +++ b/python/obitools3/obidms/_obidmscolumn_seq.pyx @@ -27,7 +27,7 @@ cdef class OBIDMS_column_seq(OBIDMS_column): if strcmp(value, OBISeq_NA) == 0 : result = None else : - result = bytes2str(value) + result = value free(value) return result @@ -64,10 +64,12 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if strcmp(value, OBISeq_NA) == 0 : result = None else : - result = bytes2str(value) + result = value + # Be careful, we have to be sure that the cast copy the data in the python structure free(value) return result + cpdef object get_line(self, index_t line_nb) : cdef char* value cdef object value_in_result @@ -83,7 +85,7 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if strcmp(value, OBISeq_NA) == 0 : value_in_result = None else : - value_in_result = bytes2str(value) + value_in_result = value free(value) result[self.elements_names[i]] = value_in_result if all_NA and (value_in_result is not None) : From f18639566105c5122dd066fa501906d3bbcb1648 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 29 May 2016 21:18:20 +0200 Subject: [PATCH 2/6] Trap potential exception generated by char* to bytes casts --- python/obitools3/obidms/_obidmscolumn_seq.pyx | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/python/obitools3/obidms/_obidmscolumn_seq.pyx b/python/obitools3/obidms/_obidmscolumn_seq.pyx index 7e96eb8..5697619 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.pyx +++ b/python/obitools3/obidms/_obidmscolumn_seq.pyx @@ -27,16 +27,24 @@ cdef class OBIDMS_column_seq(OBIDMS_column): if strcmp(value, OBISeq_NA) == 0 : result = None else : - result = value - free(value) + try: + result = value + finally: + free(value) return result cpdef set_line(self, index_t line_nb, object value): cdef bytes value_b + if value is None : value_b = OBISeq_NA - else : + elif isinstance(value, bytes) : + value_b = value + elif isinstance(value, str) : value_b = str2bytes(value) + else: + raise TypeError('Sequence value must be of type Bytes, Str or None') + if obi_column_set_obiseq_with_elt_idx_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0: raise Exception("Problem setting a value in a column") @@ -64,9 +72,10 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if strcmp(value, OBISeq_NA) == 0 : result = None else : - result = value - # Be careful, we have to be sure that the cast copy the data in the python structure - free(value) + try: + result = value + finally: + free(value) return result @@ -85,8 +94,10 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if strcmp(value, OBISeq_NA) == 0 : value_in_result = None else : - value_in_result = value - free(value) + try: + value_in_result = value + finally: + free(value) result[self.elements_names[i]] = value_in_result if all_NA and (value_in_result is not None) : all_NA = False From bac7ce7184b9db310835f504e54b63242a6c22d3 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 2 Jun 2016 19:10:33 +0200 Subject: [PATCH 3/6] Start of the implementation of the export methods --- python/obitools3/format/__init__.py | 0 python/obitools3/format/fasta.pxd | 9 +++++ python/obitools3/format/fasta.pyx | 52 +++++++++++++++++++++++++++++ python/obitools3/format/header.pxd | 5 +++ python/obitools3/format/header.pyx | 27 +++++++++++++++ 5 files changed, 93 insertions(+) create mode 100644 python/obitools3/format/__init__.py create mode 100644 python/obitools3/format/fasta.pxd create mode 100644 python/obitools3/format/fasta.pyx create mode 100644 python/obitools3/format/header.pxd create mode 100644 python/obitools3/format/header.pyx diff --git a/python/obitools3/format/__init__.py b/python/obitools3/format/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/obitools3/format/fasta.pxd b/python/obitools3/format/fasta.pxd new file mode 100644 index 0000000..b2d2c2d --- /dev/null +++ b/python/obitools3/format/fasta.pxd @@ -0,0 +1,9 @@ +from .header cimport HeaderFormat +from cython.view cimport array as cvarray + +cdef class FastaFormat: + + cdef HeaderFormat headerFormater + + cdef size_t sequenceBufferLength + cdef cvarray sequenceBuffer diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx new file mode 100644 index 0000000..2460e11 --- /dev/null +++ b/python/obitools3/format/fasta.pyx @@ -0,0 +1,52 @@ +cimport cython + +cdef class FastaFormat: + + def __init__(self, list tags=[], bint printNAKeys=False): + self.headerFormater = HeaderFormat(True, + tags, + printNAKeys) + + self.sequenceBufferLength=1000 + self.sequenceBuffer = cvarray(shape=(1000,), + itemsize=sizeof(char), + format="c", + mode="c", + allocate_buffer=True) + @cython.boundscheck(False) + def __call__(self, dict data): + cdef bytes brawseq = data['sequence'] + cdef size_t lseq = len(brawseq) + cdef size_t needed_size = lseq + 1 + cdef char[:] seq + cdef char[:] fasta + cdef size_t k=0 + needed_size += needed_size/ 50 + + if needed_size > self.sequenceBufferLength: + self.sequenceBufferLength=needed_size + self.sequenceBuffer = cvarray(shape=(needed_size,), + itemsize=sizeof(char), + format="c", + mode="c", + allocate_buffer=True) + + seq = brawseq + fasta = self.sequenceBuffer + + for i in range(0,lseq,60): + if i+60 <= lseq: + fasta[k:(k+60)]=seq[i:(i+60)] + fasta[k+60]='\n' + k+=61 + else: + fasta[k:(k+lseq-i)]=seq[i:lseq] + k+=lseq-i + + + + + + + + \ No newline at end of file diff --git a/python/obitools3/format/header.pxd b/python/obitools3/format/header.pxd new file mode 100644 index 0000000..92539f2 --- /dev/null +++ b/python/obitools3/format/header.pxd @@ -0,0 +1,5 @@ +cdef class HeaderFormat: + + cdef str start + cdef size_t headerBufferLength + \ No newline at end of file diff --git a/python/obitools3/format/header.pyx b/python/obitools3/format/header.pyx new file mode 100644 index 0000000..9431c3a --- /dev/null +++ b/python/obitools3/format/header.pyx @@ -0,0 +1,27 @@ + +cdef class HeaderFormat: + + def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False): + + self.tags = tags + self.printNaKeys = printNAKeys + + if fastaHeader: + self.start=">" + else: + self.start="@" + + self.headerBufferLength = 1000 + self.headerBuffer = [] + + def __call__(self, dict data): + cdef str header + + if data['definition'] is not None: + header = "%s%s %s" % (self.start,data['id'], + data['definition']) + else: + header = "%s%s" % (self.start,data['id']) + + return header + From 6af62d8124281874242e0b13aed12dbab0562912 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 3 Jul 2016 08:25:06 +0200 Subject: [PATCH 4/6] Change a fprintf without argument to a fputs to comply with the new default parameter on ubuntu --- python/obitools3/apps/progress.pxd | 1 + python/obitools3/apps/progress.pyx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/obitools3/apps/progress.pxd b/python/obitools3/apps/progress.pxd index 979ab80..cbf7846 100644 --- a/python/obitools3/apps/progress.pxd +++ b/python/obitools3/apps/progress.pxd @@ -5,6 +5,7 @@ from ..utils cimport str2bytes cdef extern from "stdio.h": struct FILE int fprintf(FILE *stream, char *format, ...) + int fputs(char *string, FILE *stream) FILE* stderr ctypedef unsigned int off_t "unsigned long long" diff --git a/python/obitools3/apps/progress.pyx b/python/obitools3/apps/progress.pyx index 48bffe4..e6d2a71 100644 --- a/python/obitools3/apps/progress.pyx +++ b/python/obitools3/apps/progress.pyx @@ -126,7 +126,7 @@ cdef class ProgressBar: if twentyth != self.lastlog: if self.ontty: - fprintf(stderr,b'\n') + fputs(b'\n',stderr) self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d' % ( bytes2str(self.head), From 448fa8d32586ab5f3fe201f8f36592b194f61439 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 3 Jul 2016 09:18:52 +0200 Subject: [PATCH 5/6] first trial for a fasta formater --- python/obitools3/format/fasta.pxd | 2 +- python/obitools3/format/fasta.pyx | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/obitools3/format/fasta.pxd b/python/obitools3/format/fasta.pxd index b2d2c2d..e76e717 100644 --- a/python/obitools3/format/fasta.pxd +++ b/python/obitools3/format/fasta.pxd @@ -6,4 +6,4 @@ cdef class FastaFormat: cdef HeaderFormat headerFormater cdef size_t sequenceBufferLength - cdef cvarray sequenceBuffer + cdef char* sequenceBuffer diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx index 2460e11..e88e6aa 100644 --- a/python/obitools3/format/fasta.pyx +++ b/python/obitools3/format/fasta.pyx @@ -1,4 +1,6 @@ cimport cython +from libc.stdlib cimport malloc, free, realloc +from libc.string cimport strncpy cdef class FastaFormat: @@ -7,32 +9,30 @@ cdef class FastaFormat: tags, printNAKeys) + + def __cinit(self): self.sequenceBufferLength=1000 - self.sequenceBuffer = cvarray(shape=(1000,), - itemsize=sizeof(char), - format="c", - mode="c", - allocate_buffer=True) + self.sequenceBuffer = malloc(self.sequenceBufferLength) + + def __dealloc__(self) + free(self.sequenceBuffer) + @cython.boundscheck(False) def __call__(self, dict data): cdef bytes brawseq = data['sequence'] + cdef char* crowseq = brawseq cdef size_t lseq = len(brawseq) cdef size_t needed_size = lseq + 1 - cdef char[:] seq - cdef char[:] fasta cdef size_t k=0 + needed_size += needed_size/ 50 if needed_size > self.sequenceBufferLength: self.sequenceBufferLength=needed_size - self.sequenceBuffer = cvarray(shape=(needed_size,), - itemsize=sizeof(char), - format="c", - mode="c", - allocate_buffer=True) + self.sequenceBuffer = realloc(self.sequenceBuffer, + self.sequenceBufferLength + ) - seq = brawseq - fasta = self.sequenceBuffer for i in range(0,lseq,60): if i+60 <= lseq: From d1d26b902856191e33e0f8b3eda006acec3d3572 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 4 Aug 2016 08:00:54 +0200 Subject: [PATCH 6/6] Simplify the code --- python/obitools3/format/fasta.pyx | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx index 9109707..259e368 100644 --- a/python/obitools3/format/fasta.pyx +++ b/python/obitools3/format/fasta.pyx @@ -8,14 +8,6 @@ cdef class FastaFormat: self.headerFormater = HeaderFormat(True, tags, printNAKeys) - - -# def __cinit(self): -# self.sequenceBufferLength=1000 -# self.sequenceBuffer = malloc(self.sequenceBufferLength) -# -# def __dealloc__(self) -# free(self.sequenceBuffer) @cython.boundscheck(False) def __call__(self, dict data): @@ -30,28 +22,7 @@ cdef class FastaFormat: brawseq = b'\n'.join(lines) return "%s\n%s" % (self.headerFormater(data),bytes2str(brawseq)) - -# cdef char* crowseq = brawseq -# cdef size_t needed_size = lseq + 1 -# -# needed_size += needed_size/ 50 -# -# if needed_size > self.sequenceBufferLength: -# self.sequenceBufferLength=needed_size -# self.sequenceBuffer = realloc(self.sequenceBuffer, -# self.sequenceBufferLength -# ) -# -# -# for i in range(0,lseq,60): -# if i+60 <= lseq: -# fasta[k:(k+60)]=seq[i:(i+60)] -# fasta[k+60]='\n' -# k+=61 -# else: -# fasta[k:(k+lseq-i)]=seq[i:lseq] -# k+=lseq-i - +