Merge branch 'Eric_version_for_sequence'

Conflicts:
	python/obitools3/obidms/_obidmscolumn_seq.pyx
This commit is contained in:
2016-08-04 09:42:42 +02:00
10 changed files with 146 additions and 19 deletions

View File

View File

@ -0,0 +1,10 @@
from ..utils cimport bytes2str
from .header cimport HeaderFormat
from cython.view cimport array as cvarray
cdef class FastaFormat:
cdef HeaderFormat headerFormater
cdef size_t sequenceBufferLength
cdef char* sequenceBuffer

View File

@ -0,0 +1,32 @@
cimport cython
from libc.stdlib cimport malloc, free, realloc
from libc.string cimport strncpy
cdef class FastaFormat:
def __init__(self, list tags=[], bint printNAKeys=False):
self.headerFormater = HeaderFormat(True,
tags,
printNAKeys)
@cython.boundscheck(False)
def __call__(self, dict data):
cdef bytes brawseq = data['sequence']
cdef size_t lseq = len(brawseq)
cdef size_t k=0
cdef list lines = []
for k in range(0,lseq,60):
lines.append(brawseq[k:(k+60)])
brawseq = b'\n'.join(lines)
return "%s\n%s" % (self.headerFormater(data),bytes2str(brawseq))

View File

@ -0,0 +1,7 @@
cdef class HeaderFormat:
cdef str start
cdef set tags
cdef bint printNaKeys
cdef size_t headerBufferLength

View File

@ -0,0 +1,60 @@
cdef class HeaderFormat:
def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False):
'''
@param fastaHeader:
@type fastaHeader: `bool`
@param tags:
@type tags: `list` of `bytes`
@param printNAKeys:
@type printNAKeys: `bool`
'''
self.tags = set(tags)
self.printNaKeys = printNAKeys
if fastaHeader:
self.start=">"
else:
self.start="@"
self.headerBufferLength = 1000
#self.headerBuffer = []
def __call__(self, dict data):
cdef str header
cdef dict tags = data['tags']
cdef set ktags
cdef list lines = [""]
cdef str tagline
if self.tags is not None and self.tags:
ktags = self.tags
else:
ktags = set(tags.keys())
for k in ktags:
if k in tags:
value = tags[k]
if value is not None or self.printNaKeys:
lines.append("%s=%s;" % (k,tags[k]))
if len(lines) > 1:
tagline=" ".join(lines)
else:
tagline=""
if data['definition'] is not None:
header = "%s%s%s %s" % (self.start,data['id'],
tagline,
data['definition'])
else:
header = "%s%s%s" % (self.start,data['id'],
tagline)
return header

View File

@ -26,17 +26,26 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
if value == OBISeq_NA :
result = None
else :
result = bytes2str(value)
free(value)
try:
result = <bytes> value
finally:
free(value)
return result
cpdef set_line(self, index_t line_nb, object value):
cdef bytes value_b
if value is None :
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, OBISeq_NA) < 0:
raise Exception("Problem setting a value in a column")
else :
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0:
raise Exception("Problem setting a value in a column")
value_b = OBISeq_NA
elif isinstance(value, bytes) :
value_b = value
elif isinstance(value, str) :
value_b = str2bytes(value)
else:
raise TypeError('Sequence value must be of type Bytes, Str or None')
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
# TODO choose alignment type (lcs or other) with supplementary argument
cpdef align(self,
@ -62,10 +71,13 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
if value == OBISeq_NA :
result = None
else :
result = bytes2str(value)
free(value)
try:
result = <bytes> value
finally:
free(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef char* value
cdef object value_in_result
@ -81,8 +93,10 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
if value == OBISeq_NA :
value_in_result = None
else :
value_in_result = bytes2str(value)
free(value)
try:
value_in_result = <bytes> value
finally:
free(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False

View File

@ -1,5 +1,6 @@
#cython: language_level=3
from ..utils cimport str2bytes
from .header cimport parseHeader
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -6,12 +6,15 @@ Created on 30 mars 2016
@author: coissac
'''
def fastaIterator(lineiterator, int buffersize=100000000):
cdef LineBuffer lb
cdef str ident
cdef str definition
cdef dict tags
cdef list s
cdef bytes sequence
cdef bytes quality
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
@ -31,12 +34,12 @@ def fastaIterator(lineiterator, int buffersize=100000000):
try:
while line[0]!='>':
s.append(line[0:-1])
s.append(str2bytes(line)[0:-1])
line = next(i)
except StopIteration:
pass
sequence = "".join(s)
sequence = b"".join(s)
quality = None
yield { "id" : ident,

View File

@ -1,5 +1,7 @@
#cython: language_level=3
from ..utils cimport str2bytes
from .header cimport parseHeader
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -6,15 +6,13 @@ Created on 30 mars 2016
@author: coissac
'''
def fastqIterator(lineiterator, int buffersize=100000000):
cdef LineBuffer lb
cdef str ident
cdef str definition
cdef dict tags
cdef bytes sequence
cdef bytes quality
if isinstance(lineiterator,(str,bytes)):
lineiterator=uopen(lineiterator)
@ -27,9 +25,9 @@ def fastqIterator(lineiterator, int buffersize=100000000):
i = iter(lb)
for line in i:
ident,tags,definition = parseHeader(line)
sequence = next(i)[0:-1]
sequence = str2bytes(next(i)[0:-1])
next(i)
quality = next(i)[0:-1]
quality = str2bytes(next(i)[0:-1])
yield { "id" : ident,
"definition" : definition,