Cython API: added ngsfilter file parser
This commit is contained in:
8
python/obitools3/parsers/ngsfilter.pxd
Normal file
8
python/obitools3/parsers/ngsfilter.pxd
Normal file
@ -0,0 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ..utils cimport str2bytes
|
||||
from ..files.universalopener cimport uopen
|
||||
from ..files.linebuffer cimport LineBuffer
|
||||
|
||||
|
||||
|
84
python/obitools3/parsers/ngsfilter.pyx
Normal file
84
python/obitools3/parsers/ngsfilter.pyx
Normal file
@ -0,0 +1,84 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on march 8th 2018
|
||||
|
||||
@author: cmercier
|
||||
'''
|
||||
|
||||
from .tab import tabIterator
|
||||
from obitools3.utils cimport bytes2str
|
||||
import types
|
||||
|
||||
|
||||
def ngsfilterIterator(lineiterator,
|
||||
bytes sep = None,
|
||||
bytes dec = b".",
|
||||
bint stripwhite=True,
|
||||
bint blanklineskip=True,
|
||||
bytes commentchar=b"#",
|
||||
int skip=0,
|
||||
only=None,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
):
|
||||
|
||||
cdef list all_lines
|
||||
cdef str header
|
||||
cdef str sep_str
|
||||
cdef bytes out_sep
|
||||
cdef str out_sep_str
|
||||
|
||||
out_sep = b"\t"
|
||||
out_sep_str = "\t"
|
||||
|
||||
if sep is not None:
|
||||
sep_str = bytes2str(sep)
|
||||
else:
|
||||
sep_str = None
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
all_lines = [line for line in iterator]
|
||||
new_lines = []
|
||||
|
||||
if firstline is not None:
|
||||
all_lines.insert(0, firstline)
|
||||
|
||||
# Insert header for column names
|
||||
column_names = ["experiment", "sample", "forward_tag", "reverse_tag", "forward_primer", "reverse_primer"]
|
||||
header = out_sep_str.join(column_names)
|
||||
|
||||
new_lines.append(header)
|
||||
|
||||
for line in all_lines:
|
||||
split_line = line.split(sep_str)
|
||||
tags = split_line.pop(2)
|
||||
tags = tags.split(":")
|
||||
if len(tags) == 1: # Forward and reverse tags are the same
|
||||
tags.append(tags[0])
|
||||
split_line.insert(2, tags[0])
|
||||
split_line.insert(3, tags[1])
|
||||
new_lines.append(out_sep_str.join(split_line[0:6]))
|
||||
|
||||
return tabIterator(iter(new_lines),
|
||||
header = True,
|
||||
sep = out_sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only,
|
||||
firstline = None)
|
||||
|
Reference in New Issue
Block a user