Cython API: obi import can now import ngsfilter files and tabular files

This commit is contained in:
Celine Mercier
2018-03-12 18:10:43 +01:00
parent 8a0b95c1d6
commit 15e43bb9a1
9 changed files with 168 additions and 142 deletions

View File

@ -7,11 +7,15 @@ from obitools3.dms.dms import DMS
from obitools3.parsers.fasta import fastaNucIterator
from obitools3.parsers.fastq import fastqIterator
from obitools3.parsers.tab import tabIterator
from obitools3.parsers.ngsfilter import ngsfilterIterator
from obitools3.parsers.universal import entryIteratorFactory
from obitools3.dms.obiseq import Nuc_Seq
from obitools3.apps.config import getConfiguration,logger
from obitools3.apps.temp import get_temp_dms
from obitools3.utils cimport tobytes # TODO because can't read options as bytes
class MalformedURIException(RuntimeError):
pass
@ -210,22 +214,24 @@ def open_uri(uri,
if file is not None:
qualifiers=parse_qs(urip.query)
if b'format' in qualifiers:
format = qualifiers[b'format'][0]
else:
try:
format=config["obi"]["fileformat"]
format=config["obi"]["format"]
except KeyError:
format=None
if b'seqtype' in qualifiers:
seqtype=qualifiers[b'seqtype'][0]
else:
try:
seqtype=config["obi"]["seqtype"]
except KeyError:
seqtype=b'nuc'
if format == b"ngsfilter": # TODO discuss
seqtype=None
else:
try:
seqtype=config["obi"]["seqtype"]
except KeyError:
seqtype=b"nuc"
if b'skip' in qualifiers:
skip=int(qualifiers[b"skip"][0])
@ -286,7 +292,7 @@ def open_uri(uri,
offset=33
elif config["obi"]["qualityformat"][0]=="solexa":
offset=64
#offset=config["obi"]["qualityoffset"] # TODO discuss
#offset=config["obi"]["offset"] # TODO discuss
except KeyError:
offset=33
@ -304,10 +310,10 @@ def open_uri(uri,
raise MalformedURIException('Malformed header argument in URI')
if b"sep" in qualifiers:
sep=qualifiers[b"sep"][0][0]
sep=tobytes(qualifiers[b"sep"][0][0])
else:
try:
sep=config["obi"]["sep"]
sep=tobytes(config["obi"]["sep"])
except KeyError:
sep=None
@ -315,18 +321,18 @@ def open_uri(uri,
# pass
if b"dec" in qualifiers:
dec=qualifiers[b"dec"][0][0]
dec=tobytes(qualifiers[b"dec"][0][0])
else:
try:
dec=config["obi"]["dec"]
dec=tobytes(config["obi"]["dec"])
except KeyError:
dec=b"."
if b"nastring" in qualifiers:
nastring=qualifiers[b"nastring"][0]
nastring=tobytes(qualifiers[b"nastring"][0])
else:
try:
nastring=config["obi"]["nastring"]
nastring=tobytes(config["obi"]["nastring"])
except KeyError:
nastring=b'NA'
@ -357,15 +363,15 @@ def open_uri(uri,
raise MalformedURIException('Malformed blanklineskip argument in URI')
if b"commentchar" in qualifiers:
commentchar=qualifiers[b"commentchar"][0][0]
commentchar=tobytes(qualifiers[b"commentchar"][0][0])
else:
try:
commentchar=config["obi"]["commentchar"]
commentchar=tobytes(config["obi"]["commentchar"])
except KeyError:
commentchar=b'#'
if format is not None:
if qualifiers[b"seqtype"]==b"nuc":
if seqtype==b"nuc":
objclass = Nuc_Seq
if format==b"fasta":
iseq = fastaNucIterator(file,
@ -379,8 +385,29 @@ def open_uri(uri,
noquality=noquality)
else:
raise NotImplementedError('Sequence file format not implemented')
elif qualifiers[b"seqtype"]==b"prot":
elif seqtype==b"prot":
raise NotImplementedError()
elif format==b"tabular":
objclass = dict
iseq = tabIterator(file,
header = header,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only)
elif format==b"ngsfilter":
objclass = dict
iseq = ngsfilterIterator(file,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only)
else:
iseq,objclass = entryIteratorFactory(file,
skip, only,