Merge branch 'Eric_version_for_sequence'
Conflicts: python/obitools3/obidms/_obidmscolumn_seq.pyx
This commit is contained in:
0
python/obitools3/format/__init__.py
Normal file
0
python/obitools3/format/__init__.py
Normal file
10
python/obitools3/format/fasta.pxd
Normal file
10
python/obitools3/format/fasta.pxd
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from ..utils cimport bytes2str
|
||||||
|
from .header cimport HeaderFormat
|
||||||
|
from cython.view cimport array as cvarray
|
||||||
|
|
||||||
|
cdef class FastaFormat:
|
||||||
|
|
||||||
|
cdef HeaderFormat headerFormater
|
||||||
|
|
||||||
|
cdef size_t sequenceBufferLength
|
||||||
|
cdef char* sequenceBuffer
|
32
python/obitools3/format/fasta.pyx
Normal file
32
python/obitools3/format/fasta.pyx
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
cimport cython
|
||||||
|
from libc.stdlib cimport malloc, free, realloc
|
||||||
|
from libc.string cimport strncpy
|
||||||
|
|
||||||
|
cdef class FastaFormat:
|
||||||
|
|
||||||
|
def __init__(self, list tags=[], bint printNAKeys=False):
|
||||||
|
self.headerFormater = HeaderFormat(True,
|
||||||
|
tags,
|
||||||
|
printNAKeys)
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
def __call__(self, dict data):
|
||||||
|
cdef bytes brawseq = data['sequence']
|
||||||
|
cdef size_t lseq = len(brawseq)
|
||||||
|
cdef size_t k=0
|
||||||
|
cdef list lines = []
|
||||||
|
|
||||||
|
for k in range(0,lseq,60):
|
||||||
|
lines.append(brawseq[k:(k+60)])
|
||||||
|
|
||||||
|
brawseq = b'\n'.join(lines)
|
||||||
|
|
||||||
|
return "%s\n%s" % (self.headerFormater(data),bytes2str(brawseq))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
7
python/obitools3/format/header.pxd
Normal file
7
python/obitools3/format/header.pxd
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
cdef class HeaderFormat:
|
||||||
|
|
||||||
|
cdef str start
|
||||||
|
cdef set tags
|
||||||
|
cdef bint printNaKeys
|
||||||
|
cdef size_t headerBufferLength
|
||||||
|
|
60
python/obitools3/format/header.pyx
Normal file
60
python/obitools3/format/header.pyx
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
|
||||||
|
cdef class HeaderFormat:
|
||||||
|
|
||||||
|
def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False):
|
||||||
|
'''
|
||||||
|
|
||||||
|
@param fastaHeader:
|
||||||
|
@type fastaHeader: `bool`
|
||||||
|
|
||||||
|
@param tags:
|
||||||
|
@type tags: `list` of `bytes`
|
||||||
|
|
||||||
|
@param printNAKeys:
|
||||||
|
@type printNAKeys: `bool`
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.tags = set(tags)
|
||||||
|
self.printNaKeys = printNAKeys
|
||||||
|
|
||||||
|
if fastaHeader:
|
||||||
|
self.start=">"
|
||||||
|
else:
|
||||||
|
self.start="@"
|
||||||
|
|
||||||
|
self.headerBufferLength = 1000
|
||||||
|
#self.headerBuffer = []
|
||||||
|
|
||||||
|
def __call__(self, dict data):
|
||||||
|
cdef str header
|
||||||
|
cdef dict tags = data['tags']
|
||||||
|
cdef set ktags
|
||||||
|
cdef list lines = [""]
|
||||||
|
cdef str tagline
|
||||||
|
|
||||||
|
if self.tags is not None and self.tags:
|
||||||
|
ktags = self.tags
|
||||||
|
else:
|
||||||
|
ktags = set(tags.keys())
|
||||||
|
|
||||||
|
for k in ktags:
|
||||||
|
if k in tags:
|
||||||
|
value = tags[k]
|
||||||
|
if value is not None or self.printNaKeys:
|
||||||
|
lines.append("%s=%s;" % (k,tags[k]))
|
||||||
|
|
||||||
|
if len(lines) > 1:
|
||||||
|
tagline=" ".join(lines)
|
||||||
|
else:
|
||||||
|
tagline=""
|
||||||
|
|
||||||
|
if data['definition'] is not None:
|
||||||
|
header = "%s%s%s %s" % (self.start,data['id'],
|
||||||
|
tagline,
|
||||||
|
data['definition'])
|
||||||
|
else:
|
||||||
|
header = "%s%s%s" % (self.start,data['id'],
|
||||||
|
tagline)
|
||||||
|
|
||||||
|
return header
|
||||||
|
|
@ -26,17 +26,26 @@ cdef class OBIDMS_column_seq(OBIDMS_column):
|
|||||||
if value == OBISeq_NA :
|
if value == OBISeq_NA :
|
||||||
result = None
|
result = None
|
||||||
else :
|
else :
|
||||||
result = bytes2str(value)
|
try:
|
||||||
free(value)
|
result = <bytes> value
|
||||||
|
finally:
|
||||||
|
free(value)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
cpdef set_line(self, index_t line_nb, object value):
|
cpdef set_line(self, index_t line_nb, object value):
|
||||||
|
cdef bytes value_b
|
||||||
|
|
||||||
if value is None :
|
if value is None :
|
||||||
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, OBISeq_NA) < 0:
|
value_b = OBISeq_NA
|
||||||
raise Exception("Problem setting a value in a column")
|
elif isinstance(value, bytes) :
|
||||||
else :
|
value_b = value
|
||||||
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, str2bytes(value)) < 0:
|
elif isinstance(value, str) :
|
||||||
raise Exception("Problem setting a value in a column")
|
value_b = str2bytes(value)
|
||||||
|
else:
|
||||||
|
raise TypeError('Sequence value must be of type Bytes, Str or None')
|
||||||
|
|
||||||
|
if obi_set_seq_with_elt_idx_and_col_p_in_view(self.view.pointer, (self.pointer)[0], line_nb, 0, value_b) < 0:
|
||||||
|
raise Exception("Problem setting a value in a column")
|
||||||
|
|
||||||
# TODO choose alignment type (lcs or other) with supplementary argument
|
# TODO choose alignment type (lcs or other) with supplementary argument
|
||||||
cpdef align(self,
|
cpdef align(self,
|
||||||
@ -62,10 +71,13 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
|||||||
if value == OBISeq_NA :
|
if value == OBISeq_NA :
|
||||||
result = None
|
result = None
|
||||||
else :
|
else :
|
||||||
result = bytes2str(value)
|
try:
|
||||||
free(value)
|
result = <bytes> value
|
||||||
|
finally:
|
||||||
|
free(value)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
cpdef object get_line(self, index_t line_nb) :
|
cpdef object get_line(self, index_t line_nb) :
|
||||||
cdef char* value
|
cdef char* value
|
||||||
cdef object value_in_result
|
cdef object value_in_result
|
||||||
@ -81,8 +93,10 @@ cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
|||||||
if value == OBISeq_NA :
|
if value == OBISeq_NA :
|
||||||
value_in_result = None
|
value_in_result = None
|
||||||
else :
|
else :
|
||||||
value_in_result = bytes2str(value)
|
try:
|
||||||
free(value)
|
value_in_result = <bytes> value
|
||||||
|
finally:
|
||||||
|
free(value)
|
||||||
result[self.elements_names[i]] = value_in_result
|
result[self.elements_names[i]] = value_in_result
|
||||||
if all_NA and (value_in_result is not None) :
|
if all_NA and (value_in_result is not None) :
|
||||||
all_NA = False
|
all_NA = False
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from ..utils cimport str2bytes
|
||||||
from .header cimport parseHeader
|
from .header cimport parseHeader
|
||||||
from ..files.universalopener cimport uopen
|
from ..files.universalopener cimport uopen
|
||||||
from ..files.linebuffer cimport LineBuffer
|
from ..files.linebuffer cimport LineBuffer
|
||||||
|
@ -6,12 +6,15 @@ Created on 30 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
def fastaIterator(lineiterator, int buffersize=100000000):
|
def fastaIterator(lineiterator, int buffersize=100000000):
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef str ident
|
||||||
cdef str definition
|
cdef str definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
cdef list s
|
cdef list s
|
||||||
|
cdef bytes sequence
|
||||||
|
cdef bytes quality
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
@ -31,12 +34,12 @@ def fastaIterator(lineiterator, int buffersize=100000000):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
while line[0]!='>':
|
while line[0]!='>':
|
||||||
s.append(line[0:-1])
|
s.append(str2bytes(line)[0:-1])
|
||||||
line = next(i)
|
line = next(i)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sequence = "".join(s)
|
sequence = b"".join(s)
|
||||||
quality = None
|
quality = None
|
||||||
|
|
||||||
yield { "id" : ident,
|
yield { "id" : ident,
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#cython: language_level=3
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from ..utils cimport str2bytes
|
||||||
|
|
||||||
from .header cimport parseHeader
|
from .header cimport parseHeader
|
||||||
from ..files.universalopener cimport uopen
|
from ..files.universalopener cimport uopen
|
||||||
from ..files.linebuffer cimport LineBuffer
|
from ..files.linebuffer cimport LineBuffer
|
||||||
|
@ -6,15 +6,13 @@ Created on 30 mars 2016
|
|||||||
@author: coissac
|
@author: coissac
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fastqIterator(lineiterator, int buffersize=100000000):
|
def fastqIterator(lineiterator, int buffersize=100000000):
|
||||||
cdef LineBuffer lb
|
cdef LineBuffer lb
|
||||||
cdef str ident
|
cdef str ident
|
||||||
cdef str definition
|
cdef str definition
|
||||||
cdef dict tags
|
cdef dict tags
|
||||||
|
cdef bytes sequence
|
||||||
|
cdef bytes quality
|
||||||
|
|
||||||
if isinstance(lineiterator,(str,bytes)):
|
if isinstance(lineiterator,(str,bytes)):
|
||||||
lineiterator=uopen(lineiterator)
|
lineiterator=uopen(lineiterator)
|
||||||
@ -27,9 +25,9 @@ def fastqIterator(lineiterator, int buffersize=100000000):
|
|||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
for line in i:
|
for line in i:
|
||||||
ident,tags,definition = parseHeader(line)
|
ident,tags,definition = parseHeader(line)
|
||||||
sequence = next(i)[0:-1]
|
sequence = str2bytes(next(i)[0:-1])
|
||||||
next(i)
|
next(i)
|
||||||
quality = next(i)[0:-1]
|
quality = str2bytes(next(i)[0:-1])
|
||||||
|
|
||||||
yield { "id" : ident,
|
yield { "id" : ident,
|
||||||
"definition" : definition,
|
"definition" : definition,
|
||||||
|
Reference in New Issue
Block a user