diff --git a/python/obitools3/parsers/fasta.pyx b/python/obitools3/parsers/fasta.pyx index b2f0b43..c619d03 100644 --- a/python/obitools3/parsers/fasta.pyx +++ b/python/obitools3/parsers/fasta.pyx @@ -6,7 +6,9 @@ Created on 30 mars 2016 @author: coissac ''' -from obitools3.dms.obiseq import Nuc_Seq +import types + +from obitools3.dms.obiseq cimport Nuc_Seq def fastaIterator(lineiterator, @@ -48,7 +50,7 @@ def fastaIterator(lineiterator, while True: - if read >= ionly: + if ionly >= 0 and read >= ionly: break while skipped < skip : @@ -79,7 +81,7 @@ def fastaIterator(lineiterator, # definition, # tags=tags, # ) - + # TODO yield { "id" : ident, "definition" : definition, "sequence" : sequence, @@ -105,65 +107,65 @@ def fastaNucIterator(lineiterator, cdef list s cdef bytes sequence cdef int lines_to_skip, ionly, read -# cdef OBI_Seq seq + cdef Nuc_Seq seq if only is None: - ionly=-1 + ionly = -1 else: - ionly=int(only) + ionly = int(only) - if isinstance(lineiterator,(str,bytes)): + if isinstance(lineiterator, (str, bytes)): lineiterator=uopen(lineiterator) + if isinstance(lineiterator, types.GeneratorType): + iterator = lineiterator if isinstance(lineiterator, LineBuffer): - lb=lineiterator + iterator = iter(lineiterator) else: - lb=LineBuffer(lineiterator,buffersize) - + iterator = iter(LineBuffer(lineiterator, buffersize)) skipped = 0 read = 0 - i = iter(lb) if firstline is None: - line = next(i) + line = next(iterator) else: - line = firstline - + line = firstline + while True: - - if read >= ionly: + + if ionly >= 0 and read >= ionly: break - + while skipped < skip : - line = next(i) + line = next(iterator) try: while line[0]!='>': - line = next(i) + line = next(iterator) except StopIteration: pass skipped += 1 ident,tags,definition = parseHeader(line) s = [] - line = next(i) - + line = next(iterator) + try: while line[0]!='>': s.append(str2bytes(line)[0:-1]) - line = next(i) + line = next(iterator) except StopIteration: pass sequence = b"".join(s) -# seq = seq = Nuc_Seq(ident, sequence, - definition, - None,-1, - tags) - + definition=definition, + quality=None, + offset=-1, + tags=tags) + yield seq # yield { "id" : ident, diff --git a/python/obitools3/parsers/fastq.pyx b/python/obitools3/parsers/fastq.pyx index 18bc2c8..83ad7c4 100644 --- a/python/obitools3/parsers/fastq.pyx +++ b/python/obitools3/parsers/fastq.pyx @@ -6,7 +6,7 @@ Created on 30 mars 2016 @author: coissac ''' -from obitools3.dms.obiseq import Nuc_Seq +from obitools3.dms.obiseq cimport Nuc_Seq def fastqIterator(lineiterator, @@ -74,12 +74,11 @@ def fastqWithQualityIterator(lineiterator, else: hline = firstline - for line in i: - if read >= ionly: + if ionly >= 0 and read >= ionly: break - + ident,tags,definition = parseHeader(hline) sequence = str2bytes(line[0:-1]) next(i) @@ -87,9 +86,10 @@ def fastqWithQualityIterator(lineiterator, seq = Nuc_Seq(ident, sequence, - definition, - quality,qualityoffset, - tags) + definition=definition, + quality=quality, + offset=qualityoffset, + tags=tags) yield seq @@ -149,22 +149,23 @@ def fastqWithoutQualityIterator(lineiterator, hline = next(i) else: hline = firstline - + for line in i: - - if read >= ionly: + + if ionly >= 0 and read >= ionly: break ident,tags,definition = parseHeader(hline) sequence = str2bytes(line[0:-1]) next(i) next(i) - + seq = Nuc_Seq(ident, sequence, - definition, - None,-1, - tags) + definition=definition, + quality=None, + offset=-1, + tags=tags) yield seq diff --git a/python/obitools3/parsers/universal.pyx b/python/obitools3/parsers/universal.pyx index 8d3d0ab..2aa0bb5 100644 --- a/python/obitools3/parsers/universal.pyx +++ b/python/obitools3/parsers/universal.pyx @@ -41,11 +41,11 @@ def entryIteratorFactory(lineiterator, if isinstance(lineiterator, LineBuffer): lb=lineiterator else: - lb=LineBuffer(lineiterator,buffersize) - + lb=LineBuffer(lineiterator, buffersize) + i = iter(lb) - first=next(i) + first=next(i) format=b"tabular" @@ -61,26 +61,29 @@ def entryIteratorFactory(lineiterator, format=b"ecopcrfile" elif is_ngsfilter_line(first): format=b"ngsfilter" - + + # TODO Temporary fix + first=None + lineiterator.seek(0) + if format==b'fasta': if seqtype == b'nuc': return (fastaNucIterator(lineiterator, - skip,only, - first), + skip=skip,only=only, + firstline=first, + buffersize=buffersize), Nuc_Seq) else: raise NotImplementedError() elif format==b'fastq': return (fastqIterator(lineiterator, - skip,only, - qualityoffset, - first), + skip=skip,only=only, + qualityoffset=qualityoffset, + noquality=noquality, + firstline=first, + buffersize=buffersize), Nuc_Seq) raise NotImplementedError('File format not yet implemented') - - - - diff --git a/python/obitools3/uri/decode.pyx b/python/obitools3/uri/decode.pyx index b3b254a..4a3d9cf 100644 --- a/python/obitools3/uri/decode.pyx +++ b/python/obitools3/uri/decode.pyx @@ -364,19 +364,22 @@ def open_uri(uri, if qualifiers[b"seqtype"]==b"nuc": objclass = Nuc_Seq if format==b"fasta": - iseq = fastaNucIterator(file,skip,only) + iseq = fastaNucIterator(file, + skip=skip, + only=only) elif format==b"fastq": iseq = fastqIterator(file, - skip,only, - offset, - noquality) + skip=skip, + only=only, + offset=offset, + noquality=noquality) else: raise NotImplementedError('Sequence file format not implemented') elif qualifiers[b"seqtype"]==b"prot": raise NotImplementedError() else: iseq,objclass = entryIteratorFactory(file, - skip,only, + skip, only, seqtype, offset, noquality, @@ -388,13 +391,12 @@ def open_uri(uri, stripwhite, blanklineskip, commentchar) - - tmpdms = get_temp_dms() - - return (file,iseq,objclass,urib) - - + #tmpdms = get_temp_dms() + + return (file, iseq, objclass, urib) + + + + - -