diff --git a/python/obitools3/format/__init__.py b/python/obitools3/format/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/obitools3/format/fasta.pxd b/python/obitools3/format/fasta.pxd new file mode 100644 index 0000000..f2bbe5b --- /dev/null +++ b/python/obitools3/format/fasta.pxd @@ -0,0 +1,10 @@ +from ..utils cimport bytes2str +from .header cimport HeaderFormat +from cython.view cimport array as cvarray + +cdef class FastaFormat: + + cdef HeaderFormat headerFormater + + cdef size_t sequenceBufferLength + cdef char* sequenceBuffer diff --git a/python/obitools3/format/fasta.pyx b/python/obitools3/format/fasta.pyx new file mode 100644 index 0000000..259e368 --- /dev/null +++ b/python/obitools3/format/fasta.pyx @@ -0,0 +1,32 @@ +cimport cython +from libc.stdlib cimport malloc, free, realloc +from libc.string cimport strncpy + +cdef class FastaFormat: + + def __init__(self, list tags=[], bint printNAKeys=False): + self.headerFormater = HeaderFormat(True, + tags, + printNAKeys) + + @cython.boundscheck(False) + def __call__(self, dict data): + cdef bytes brawseq = data['sequence'] + cdef size_t lseq = len(brawseq) + cdef size_t k=0 + cdef list lines = [] + + for k in range(0,lseq,60): + lines.append(brawseq[k:(k+60)]) + + brawseq = b'\n'.join(lines) + + return "%s\n%s" % (self.headerFormater(data),bytes2str(brawseq)) + + + + + + + + \ No newline at end of file diff --git a/python/obitools3/format/header.pxd b/python/obitools3/format/header.pxd new file mode 100644 index 0000000..8e407e7 --- /dev/null +++ b/python/obitools3/format/header.pxd @@ -0,0 +1,7 @@ +cdef class HeaderFormat: + + cdef str start + cdef set tags + cdef bint printNaKeys + cdef size_t headerBufferLength + \ No newline at end of file diff --git a/python/obitools3/format/header.pyx b/python/obitools3/format/header.pyx new file mode 100644 index 0000000..229f723 --- /dev/null +++ b/python/obitools3/format/header.pyx @@ -0,0 +1,60 @@ + +cdef class HeaderFormat: + + def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False): + ''' + + @param fastaHeader: + @type fastaHeader: `bool` + + @param tags: + @type tags: `list` of `bytes` + + @param printNAKeys: + @type printNAKeys: `bool` + ''' + + self.tags = set(tags) + self.printNaKeys = printNAKeys + + if fastaHeader: + self.start=">" + else: + self.start="@" + + self.headerBufferLength = 1000 + #self.headerBuffer = [] + + def __call__(self, dict data): + cdef str header + cdef dict tags = data['tags'] + cdef set ktags + cdef list lines = [""] + cdef str tagline + + if self.tags is not None and self.tags: + ktags = self.tags + else: + ktags = set(tags.keys()) + + for k in ktags: + if k in tags: + value = tags[k] + if value is not None or self.printNaKeys: + lines.append("%s=%s;" % (k,tags[k])) + + if len(lines) > 1: + tagline=" ".join(lines) + else: + tagline="" + + if data['definition'] is not None: + header = "%s%s%s %s" % (self.start,data['id'], + tagline, + data['definition']) + else: + header = "%s%s%s" % (self.start,data['id'], + tagline) + + return header + diff --git a/python/obitools3/obidms/_obidmscolumn_seq.pyx b/python/obitools3/obidms/_obidmscolumn_seq.pyx index 134289c..d49af3e 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.pyx +++ b/python/obitools3/obidms/_obidmscolumn_seq.pyx @@ -26,17 +26,26 @@ cdef class OBIDMS_column_seq(OBIDMS_column): if value == OBISeq_NA : result = None else : - result = bytes2str(value) - free(value) + try: + result = value + finally: + free(value) return result cpdef set_line(self, index_t line_nb, object value): + cdef bytes value_b + if value is None : - if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, OBISeq_NA) < 0: - raise Exception("Problem setting a value in a column") - else : - if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0: - raise Exception("Problem setting a value in a column") + value_b = OBISeq_NA + elif isinstance(value, bytes) : + value_b = value + elif isinstance(value, str) : + value_b = str2bytes(value) + else: + raise TypeError('Sequence value must be of type Bytes, Str or None') + + if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0: + raise Exception("Problem setting a value in a column") # TODO choose alignment type (lcs or other) with supplementary argument cpdef align(self, @@ -62,10 +71,13 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if value == OBISeq_NA : result = None else : - result = bytes2str(value) - free(value) + try: + result = value + finally: + free(value) return result + cpdef object get_line(self, index_t line_nb) : cdef char* value cdef object value_in_result @@ -81,8 +93,10 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): if value == OBISeq_NA : value_in_result = None else : - value_in_result = bytes2str(value) - free(value) + try: + value_in_result = value + finally: + free(value) result[self.elements_names[i]] = value_in_result if all_NA and (value_in_result is not None) : all_NA = False diff --git a/python/obitools3/parsers/fasta.pxd b/python/obitools3/parsers/fasta.pxd index d1546d1..8962d37 100644 --- a/python/obitools3/parsers/fasta.pxd +++ b/python/obitools3/parsers/fasta.pxd @@ -1,5 +1,6 @@ #cython: language_level=3 +from ..utils cimport str2bytes from .header cimport parseHeader from ..files.universalopener cimport uopen from ..files.linebuffer cimport LineBuffer diff --git a/python/obitools3/parsers/fasta.pyx b/python/obitools3/parsers/fasta.pyx index 44be6b9..c788414 100644 --- a/python/obitools3/parsers/fasta.pyx +++ b/python/obitools3/parsers/fasta.pyx @@ -6,12 +6,15 @@ Created on 30 mars 2016 @author: coissac ''' + def fastaIterator(lineiterator, int buffersize=100000000): cdef LineBuffer lb cdef str ident cdef str definition cdef dict tags cdef list s + cdef bytes sequence + cdef bytes quality if isinstance(lineiterator,(str,bytes)): lineiterator=uopen(lineiterator) @@ -31,12 +34,12 @@ def fastaIterator(lineiterator, int buffersize=100000000): try: while line[0]!='>': - s.append(line[0:-1]) + s.append(str2bytes(line)[0:-1]) line = next(i) except StopIteration: pass - sequence = "".join(s) + sequence = b"".join(s) quality = None yield { "id" : ident, diff --git a/python/obitools3/parsers/fastq.pxd b/python/obitools3/parsers/fastq.pxd index d1546d1..f2c5d6f 100644 --- a/python/obitools3/parsers/fastq.pxd +++ b/python/obitools3/parsers/fastq.pxd @@ -1,5 +1,7 @@ #cython: language_level=3 +from ..utils cimport str2bytes + from .header cimport parseHeader from ..files.universalopener cimport uopen from ..files.linebuffer cimport LineBuffer diff --git a/python/obitools3/parsers/fastq.pyx b/python/obitools3/parsers/fastq.pyx index d57be75..2c600e4 100644 --- a/python/obitools3/parsers/fastq.pyx +++ b/python/obitools3/parsers/fastq.pyx @@ -6,15 +6,13 @@ Created on 30 mars 2016 @author: coissac ''' - - - - def fastqIterator(lineiterator, int buffersize=100000000): cdef LineBuffer lb cdef str ident cdef str definition cdef dict tags + cdef bytes sequence + cdef bytes quality if isinstance(lineiterator,(str,bytes)): lineiterator=uopen(lineiterator) @@ -27,9 +25,9 @@ def fastqIterator(lineiterator, int buffersize=100000000): i = iter(lb) for line in i: ident,tags,definition = parseHeader(line) - sequence = next(i)[0:-1] + sequence = str2bytes(next(i)[0:-1]) next(i) - quality = next(i)[0:-1] + quality = str2bytes(next(i)[0:-1]) yield { "id" : ident, "definition" : definition,