Merge branch 'master' into Eric_version_for_sequence

Conflicts:
	python/obitools3/obidms/_obidmscolumn_seq.pyx
This commit is contained in:
2016-08-03 10:09:20 +02:00
31 changed files with 1142 additions and 953 deletions

View File

@ -1,3 +1,4 @@
from ..utils cimport bytes2str
from .header cimport HeaderFormat
from cython.view cimport array as cvarray

View File

@ -10,38 +10,47 @@ cdef class FastaFormat:
printNAKeys)
def __cinit(self):
self.sequenceBufferLength=1000
self.sequenceBuffer = malloc(self.sequenceBufferLength)
def __dealloc__(self)
free(self.sequenceBuffer)
# def __cinit(self):
# self.sequenceBufferLength=1000
# self.sequenceBuffer = malloc(self.sequenceBufferLength)
#
# def __dealloc__(self)
# free(self.sequenceBuffer)
@cython.boundscheck(False)
def __call__(self, dict data):
cdef bytes brawseq = <bytes> data['sequence']
cdef char* crowseq = <char*> brawseq
cdef bytes brawseq = data['sequence']
cdef size_t lseq = len(brawseq)
cdef size_t needed_size = lseq + 1
cdef size_t k=0
needed_size += needed_size/ 50
cdef list lines = []
if needed_size > self.sequenceBufferLength:
self.sequenceBufferLength=needed_size
self.sequenceBuffer = realloc(self.sequenceBuffer,
self.sequenceBufferLength
)
for k in range(0,lseq,60):
lines.append(brawseq[k:(k+60)])
brawseq = b'\n'.join(lines)
for i in range(0,lseq,60):
if i+60 <= lseq:
fasta[k:(k+60)]=seq[i:(i+60)]
fasta[k+60]='\n'
k+=61
else:
fasta[k:(k+lseq-i)]=seq[i:lseq]
k+=lseq-i
return "%s\n%s" % (self.headerFormater(data),bytes2str(brawseq))
# cdef char* crowseq = <char*> brawseq
# cdef size_t needed_size = lseq + 1
#
# needed_size += needed_size/ 50
#
# if needed_size > self.sequenceBufferLength:
# self.sequenceBufferLength=needed_size
# self.sequenceBuffer = realloc(self.sequenceBuffer,
# self.sequenceBufferLength
# )
#
#
# for i in range(0,lseq,60):
# if i+60 <= lseq:
# fasta[k:(k+60)]=seq[i:(i+60)]
# fasta[k+60]='\n'
# k+=61
# else:
# fasta[k:(k+lseq-i)]=seq[i:lseq]
# k+=lseq-i

View File

@ -1,5 +1,7 @@
cdef class HeaderFormat:
cdef str start
cdef set tags
cdef bint printNaKeys
cdef size_t headerBufferLength

View File

@ -2,8 +2,19 @@
cdef class HeaderFormat:
def __init__(self, bint fastaHeader=True, list tags=[], bint printNAKeys=False):
'''
self.tags = tags
@param fastaHeader:
@type fastaHeader: `bool`
@param tags:
@type tags: `list` of `bytes`
@param printNAKeys:
@type printNAKeys: `bool`
'''
self.tags = set(tags)
self.printNaKeys = printNAKeys
if fastaHeader:
@ -12,16 +23,38 @@ cdef class HeaderFormat:
self.start="@"
self.headerBufferLength = 1000
self.headerBuffer = []
#self.headerBuffer = []
def __call__(self, dict data):
cdef str header
cdef dict tags = data['tags']
cdef set ktags
cdef list lines = [""]
cdef str tagline
if data['definition'] is not None:
header = "%s%s %s" % (self.start,data['id'],
data['definition'])
if self.tags is not None and self.tags:
ktags = self.tags
else:
header = "%s%s" % (self.start,data['id'])
ktags = set(tags.keys())
for k in ktags:
if k in tags:
value = tags[k]
if value is not None or self.printNaKeys:
lines.append("%s=%s;" % (k,tags[k]))
if len(lines) > 1:
tagline=" ".join(lines)
else:
tagline=""
if data['definition'] is not None:
header = "%s%s%s %s" % (self.start,data['id'],
tagline,
data['definition'])
else:
header = "%s%s%s" % (self.start,data['id'],
tagline)
return header