Cython API: obi import can now import ngsfilter files and tabular files
This commit is contained in:
@ -7,11 +7,15 @@ from obitools3.dms.dms import DMS
|
||||
|
||||
from obitools3.parsers.fasta import fastaNucIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.parsers.tab import tabIterator
|
||||
from obitools3.parsers.ngsfilter import ngsfilterIterator
|
||||
from obitools3.parsers.universal import entryIteratorFactory
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.apps.config import getConfiguration,logger
|
||||
from obitools3.apps.temp import get_temp_dms
|
||||
from obitools3.utils cimport tobytes # TODO because can't read options as bytes
|
||||
|
||||
|
||||
class MalformedURIException(RuntimeError):
|
||||
pass
|
||||
@ -210,22 +214,24 @@ def open_uri(uri,
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
|
||||
if b'format' in qualifiers:
|
||||
format = qualifiers[b'format'][0]
|
||||
else:
|
||||
try:
|
||||
format=config["obi"]["fileformat"]
|
||||
format=config["obi"]["format"]
|
||||
except KeyError:
|
||||
format=None
|
||||
|
||||
if b'seqtype' in qualifiers:
|
||||
seqtype=qualifiers[b'seqtype'][0]
|
||||
else:
|
||||
try:
|
||||
seqtype=config["obi"]["seqtype"]
|
||||
except KeyError:
|
||||
seqtype=b'nuc'
|
||||
if format == b"ngsfilter": # TODO discuss
|
||||
seqtype=None
|
||||
else:
|
||||
try:
|
||||
seqtype=config["obi"]["seqtype"]
|
||||
except KeyError:
|
||||
seqtype=b"nuc"
|
||||
|
||||
if b'skip' in qualifiers:
|
||||
skip=int(qualifiers[b"skip"][0])
|
||||
@ -286,7 +292,7 @@ def open_uri(uri,
|
||||
offset=33
|
||||
elif config["obi"]["qualityformat"][0]=="solexa":
|
||||
offset=64
|
||||
#offset=config["obi"]["qualityoffset"] # TODO discuss
|
||||
#offset=config["obi"]["offset"] # TODO discuss
|
||||
except KeyError:
|
||||
offset=33
|
||||
|
||||
@ -304,10 +310,10 @@ def open_uri(uri,
|
||||
raise MalformedURIException('Malformed header argument in URI')
|
||||
|
||||
if b"sep" in qualifiers:
|
||||
sep=qualifiers[b"sep"][0][0]
|
||||
sep=tobytes(qualifiers[b"sep"][0][0])
|
||||
else:
|
||||
try:
|
||||
sep=config["obi"]["sep"]
|
||||
sep=tobytes(config["obi"]["sep"])
|
||||
except KeyError:
|
||||
sep=None
|
||||
|
||||
@ -315,18 +321,18 @@ def open_uri(uri,
|
||||
# pass
|
||||
|
||||
if b"dec" in qualifiers:
|
||||
dec=qualifiers[b"dec"][0][0]
|
||||
dec=tobytes(qualifiers[b"dec"][0][0])
|
||||
else:
|
||||
try:
|
||||
dec=config["obi"]["dec"]
|
||||
dec=tobytes(config["obi"]["dec"])
|
||||
except KeyError:
|
||||
dec=b"."
|
||||
|
||||
if b"nastring" in qualifiers:
|
||||
nastring=qualifiers[b"nastring"][0]
|
||||
nastring=tobytes(qualifiers[b"nastring"][0])
|
||||
else:
|
||||
try:
|
||||
nastring=config["obi"]["nastring"]
|
||||
nastring=tobytes(config["obi"]["nastring"])
|
||||
except KeyError:
|
||||
nastring=b'NA'
|
||||
|
||||
@ -357,15 +363,15 @@ def open_uri(uri,
|
||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||
|
||||
if b"commentchar" in qualifiers:
|
||||
commentchar=qualifiers[b"commentchar"][0][0]
|
||||
commentchar=tobytes(qualifiers[b"commentchar"][0][0])
|
||||
else:
|
||||
try:
|
||||
commentchar=config["obi"]["commentchar"]
|
||||
commentchar=tobytes(config["obi"]["commentchar"])
|
||||
except KeyError:
|
||||
commentchar=b'#'
|
||||
|
||||
if format is not None:
|
||||
if qualifiers[b"seqtype"]==b"nuc":
|
||||
if seqtype==b"nuc":
|
||||
objclass = Nuc_Seq
|
||||
if format==b"fasta":
|
||||
iseq = fastaNucIterator(file,
|
||||
@ -379,8 +385,29 @@ def open_uri(uri,
|
||||
noquality=noquality)
|
||||
else:
|
||||
raise NotImplementedError('Sequence file format not implemented')
|
||||
elif qualifiers[b"seqtype"]==b"prot":
|
||||
elif seqtype==b"prot":
|
||||
raise NotImplementedError()
|
||||
elif format==b"tabular":
|
||||
objclass = dict
|
||||
iseq = tabIterator(file,
|
||||
header = header,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
elif format==b"ngsfilter":
|
||||
objclass = dict
|
||||
iseq = ngsfilterIterator(file,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
else:
|
||||
iseq,objclass = entryIteratorFactory(file,
|
||||
skip, only,
|
||||
|
Reference in New Issue
Block a user