Cython API: added ngsfilter file parser

This commit is contained in:
Celine Mercier
2018-03-21 16:41:25 +01:00
parent 49c17ab7b4
commit 6825fc13ab
2 changed files with 92 additions and 0 deletions

View File

@ -0,0 +1,8 @@
#cython: language_level=3
from ..utils cimport str2bytes
from ..files.universalopener cimport uopen
from ..files.linebuffer cimport LineBuffer

View File

@ -0,0 +1,84 @@
#cython: language_level=3
'''
Created on march 8th 2018
@author: cmercier
'''
from .tab import tabIterator
from obitools3.utils cimport bytes2str
import types
def ngsfilterIterator(lineiterator,
bytes sep = None,
bytes dec = b".",
bint stripwhite=True,
bint blanklineskip=True,
bytes commentchar=b"#",
int skip=0,
only=None,
firstline=None,
int buffersize=100000000
):
cdef list all_lines
cdef str header
cdef str sep_str
cdef bytes out_sep
cdef str out_sep_str
out_sep = b"\t"
out_sep_str = "\t"
if sep is not None:
sep_str = bytes2str(sep)
else:
sep_str = None
if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer):
iterator = iter(lineiterator)
else:
if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
all_lines = [line for line in iterator]
new_lines = []
if firstline is not None:
all_lines.insert(0, firstline)
# Insert header for column names
column_names = ["experiment", "sample", "forward_tag", "reverse_tag", "forward_primer", "reverse_primer"]
header = out_sep_str.join(column_names)
new_lines.append(header)
for line in all_lines:
split_line = line.split(sep_str)
tags = split_line.pop(2)
tags = tags.split(":")
if len(tags) == 1: # Forward and reverse tags are the same
tags.append(tags[0])
split_line.insert(2, tags[0])
split_line.insert(3, tags[1])
new_lines.append(out_sep_str.join(split_line[0:6]))
return tabIterator(iter(new_lines),
header = True,
sep = out_sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only,
firstline = None)