Cython: updated the URI decoding to handle outputs other than DMS
This commit is contained in:
@ -12,6 +12,11 @@ from obitools3.parsers.ngsfilter import ngsfilterIterator
|
|||||||
from obitools3.parsers.embl import emblIterator
|
from obitools3.parsers.embl import emblIterator
|
||||||
from obitools3.parsers.universal import entryIteratorFactory
|
from obitools3.parsers.universal import entryIteratorFactory
|
||||||
|
|
||||||
|
from obitools3.writers.fasta import FastaNucWriter
|
||||||
|
from obitools3.writers.fastq import FastqWriter
|
||||||
|
from obitools3.format.fasta import FastaFormat
|
||||||
|
from obitools3.format.fastq import FastqFormat
|
||||||
|
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
from obitools3.apps.config import getConfiguration,logger
|
from obitools3.apps.config import getConfiguration,logger
|
||||||
from obitools3.apps.temp import get_temp_dms
|
from obitools3.apps.temp import get_temp_dms
|
||||||
@ -56,7 +61,9 @@ cdef open_dms(bytes path, bint create=False):
|
|||||||
pos=pos+1
|
pos=pos+1
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def open_dms_element(DMS dms, bytes path,
|
|
||||||
|
def open_dms_element(DMS dms,
|
||||||
|
bytes path,
|
||||||
bint create=False,
|
bint create=False,
|
||||||
type newviewtype=View):
|
type newviewtype=View):
|
||||||
"""
|
"""
|
||||||
@ -139,12 +146,21 @@ def open_dms_element(DMS dms, bytes path,
|
|||||||
if len(path_parts) > 4:
|
if len(path_parts) > 4:
|
||||||
raise MalformedURIException('Malformed View URI')
|
raise MalformedURIException('Malformed View URI')
|
||||||
|
|
||||||
return (dms,subsubpart)
|
return (dms, subsubpart)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
#TODO discuss returned object. Return a dict? or some class instance?
|
||||||
|
Reads an URI and returns a tuple containing:
|
||||||
|
(1) The opened file or DMS, or the URI itself if nothing could be opened by the function
|
||||||
|
(2) The opened view or iterator on the opened file or writer
|
||||||
|
(3) The class of object returned or handled by (2)
|
||||||
|
(4) The original URI in bytes
|
||||||
|
'''
|
||||||
def open_uri(uri,
|
def open_uri(uri,
|
||||||
bint input=True,
|
bint input=True,
|
||||||
type newviewtype=View):
|
type newviewtype=View):
|
||||||
|
|
||||||
cdef bytes urib = tobytes(uri)
|
cdef bytes urib = tobytes(uri)
|
||||||
cdef bytes scheme
|
cdef bytes scheme
|
||||||
cdef tuple dms
|
cdef tuple dms
|
||||||
@ -171,17 +187,21 @@ def open_uri(uri,
|
|||||||
|
|
||||||
error = None
|
error = None
|
||||||
|
|
||||||
if scheme==b"" or scheme==b"dms" :
|
if scheme==b"dms" or \
|
||||||
dms = open_dms(urip.path,create)
|
(scheme==b"" and \
|
||||||
|
(((not input) and "outputformat" not in config["obi"]) or \
|
||||||
|
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
|
||||||
|
|
||||||
|
dms = open_dms(urip.path, create)
|
||||||
if dms is None and default_dms is not None:
|
if dms is None and default_dms is not None:
|
||||||
dms=(default_dms, urip.path)
|
dms=(default_dms, urip.path)
|
||||||
|
|
||||||
if dms is not None:
|
if dms is not None:
|
||||||
try:
|
try:
|
||||||
resource=open_dms_element(dms[0],dms[1],
|
resource=open_dms_element(dms[0],
|
||||||
|
dms[1],
|
||||||
create,
|
create,
|
||||||
newviewtype
|
newviewtype)
|
||||||
)
|
|
||||||
|
|
||||||
scheme=b"dms"
|
scheme=b"dms"
|
||||||
urip = ParseResultBytes(scheme=b"dms",
|
urip = ParseResultBytes(scheme=b"dms",
|
||||||
@ -205,27 +225,37 @@ def open_uri(uri,
|
|||||||
logger('Error','cannot open DMS: %s', uri)
|
logger('Error','cannot open DMS: %s', uri)
|
||||||
raise FileNotFoundError('uri')
|
raise FileNotFoundError('uri')
|
||||||
|
|
||||||
#if not urip.scheme: # TODO not sure what it was supposed to do but not working as intended
|
if not urip.scheme:
|
||||||
# urib=b"file:"+urib
|
urib=b"file:"+urib
|
||||||
|
|
||||||
try:
|
if input:
|
||||||
file = uopen(urib)
|
try:
|
||||||
logger('info','Opened file: %s', tostr(urib))
|
file = uopen(urip.path, mode='rb')
|
||||||
except Exception as e: # TODO discuss: if can't open file, return the character string itself
|
logger('info','Opened file: %s', urip.path)
|
||||||
file = urib
|
except Exception as e: # TODO discuss: if can't open file, return the character string itself
|
||||||
iseq = urib
|
file = tobytes(uri)
|
||||||
objclass = bytes
|
iseq = urib
|
||||||
|
objclass = bytes
|
||||||
|
else: # TODO update uopen to be able to write?
|
||||||
|
file = open(urip.path, 'wb')
|
||||||
|
|
||||||
if file is not None:
|
if file is not None:
|
||||||
qualifiers=parse_qs(urip.query)
|
qualifiers=parse_qs(urip.query)
|
||||||
|
|
||||||
if b'format' in qualifiers:
|
if input and b'format' in qualifiers:
|
||||||
format = qualifiers[b'format'][0]
|
format = qualifiers[b'format'][0]
|
||||||
else:
|
else: # TODO discuss priorities
|
||||||
try:
|
if urip.scheme:
|
||||||
format=config["obi"]["format"]
|
format = urip.scheme
|
||||||
except KeyError:
|
else:
|
||||||
format=None
|
try:
|
||||||
|
if input:
|
||||||
|
formatkey = "inputformat"
|
||||||
|
else:
|
||||||
|
formatkey = "outputformat"
|
||||||
|
format=config["obi"][formatkey]
|
||||||
|
except KeyError:
|
||||||
|
format=None
|
||||||
|
|
||||||
if b'seqtype' in qualifiers:
|
if b'seqtype' in qualifiers:
|
||||||
seqtype=qualifiers[b'seqtype'][0]
|
seqtype=qualifiers[b'seqtype'][0]
|
||||||
@ -248,7 +278,6 @@ def open_uri(uri,
|
|||||||
if skip < 0:
|
if skip < 0:
|
||||||
raise MalformedURIException('Malformed skip argument in URI')
|
raise MalformedURIException('Malformed skip argument in URI')
|
||||||
|
|
||||||
|
|
||||||
if b'only' in qualifiers:
|
if b'only' in qualifiers:
|
||||||
only=int(qualifiers[b"only"][0])
|
only=int(qualifiers[b"only"][0])
|
||||||
else:
|
else:
|
||||||
@ -259,7 +288,6 @@ def open_uri(uri,
|
|||||||
if only is not None and only <= 0:
|
if only is not None and only <= 0:
|
||||||
raise MalformedURIException('Malformed only argument in URI')
|
raise MalformedURIException('Malformed only argument in URI')
|
||||||
|
|
||||||
|
|
||||||
if b"skiperror" in qualifiers:
|
if b"skiperror" in qualifiers:
|
||||||
try:
|
try:
|
||||||
skiperror=eval(qualifiers[b"skiperror"][0])
|
skiperror=eval(qualifiers[b"skiperror"][0])
|
||||||
@ -333,11 +361,26 @@ def open_uri(uri,
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
dec=b"."
|
dec=b"."
|
||||||
|
|
||||||
|
if b"printna" in qualifiers:
|
||||||
|
try:
|
||||||
|
printna=eval(qualifiers[b"printna"][0])
|
||||||
|
except Exception as e:
|
||||||
|
raise MalformedURIException("Malformed 'print NA' argument in URI")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
printna=config["obi"]["printna"]
|
||||||
|
except KeyError:
|
||||||
|
printna=False
|
||||||
|
|
||||||
if b"nastring" in qualifiers:
|
if b"nastring" in qualifiers:
|
||||||
nastring=tobytes(qualifiers[b"nastring"][0])
|
nastring=tobytes(qualifiers[b"nastring"][0])
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
nastring=tobytes(config["obi"]["nastring"])
|
if input:
|
||||||
|
nakey = "inputnastring"
|
||||||
|
else:
|
||||||
|
nakey = "outputnastring"
|
||||||
|
nastring=tobytes(config["obi"][nakey])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
nastring=b'NA'
|
nastring=b'NA'
|
||||||
|
|
||||||
@ -377,60 +420,88 @@ def open_uri(uri,
|
|||||||
|
|
||||||
if format is not None:
|
if format is not None:
|
||||||
if seqtype==b"nuc":
|
if seqtype==b"nuc":
|
||||||
objclass = Nuc_Seq
|
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||||
if format==b"fasta":
|
if format==b"fasta":
|
||||||
iseq = fastaNucIterator(file,
|
if input:
|
||||||
|
iseq = fastaNucIterator(file,
|
||||||
|
skip=skip,
|
||||||
|
only=only)
|
||||||
|
else:
|
||||||
|
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
||||||
|
file,
|
||||||
|
skip=skip,
|
||||||
|
only=only)
|
||||||
|
elif format==b"fastq":
|
||||||
|
if input:
|
||||||
|
iseq = fastqIterator(file,
|
||||||
|
skip=skip,
|
||||||
|
only=only,
|
||||||
|
offset=offset,
|
||||||
|
noquality=noquality)
|
||||||
|
else:
|
||||||
|
iseq = FastqWriter(FastqFormat(printNAKeys=printna, NAString=nastring),
|
||||||
|
file,
|
||||||
|
skip=skip,
|
||||||
|
only=only)
|
||||||
|
elif format==b"embl":
|
||||||
|
if input:
|
||||||
|
iseq = emblIterator(file,
|
||||||
skip=skip,
|
skip=skip,
|
||||||
only=only)
|
only=only)
|
||||||
elif format==b"fastq":
|
else:
|
||||||
iseq = fastqIterator(file,
|
raise NotImplementedError('Output sequence file format not implemented')
|
||||||
skip=skip,
|
|
||||||
only=only,
|
|
||||||
offset=offset,
|
|
||||||
noquality=noquality)
|
|
||||||
elif format==b"embl":
|
|
||||||
iseq = emblIterator(file,
|
|
||||||
skip=skip,
|
|
||||||
only=only)
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('Sequence file format not implemented')
|
raise NotImplementedError('Sequence file format not implemented')
|
||||||
elif seqtype==b"prot":
|
elif seqtype==b"prot":
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
elif format==b"tabular":
|
elif format==b"tabular":
|
||||||
objclass = dict
|
objclass = dict
|
||||||
iseq = tabIterator(file,
|
if input:
|
||||||
header = header,
|
iseq = tabIterator(file,
|
||||||
sep = sep,
|
header = header,
|
||||||
dec = dec,
|
sep = sep,
|
||||||
stripwhite = stripwhite,
|
dec = dec,
|
||||||
blanklineskip = blanklineskip,
|
stripwhite = stripwhite,
|
||||||
commentchar = commentchar,
|
blanklineskip = blanklineskip,
|
||||||
skip = skip,
|
commentchar = commentchar,
|
||||||
only = only)
|
skip = skip,
|
||||||
|
only = only)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('Output sequence file format not implemented')
|
||||||
elif format==b"ngsfilter":
|
elif format==b"ngsfilter":
|
||||||
objclass = dict
|
objclass = dict
|
||||||
iseq = ngsfilterIterator(file,
|
if input:
|
||||||
sep = sep,
|
iseq = ngsfilterIterator(file,
|
||||||
dec = dec,
|
sep = sep,
|
||||||
stripwhite = stripwhite,
|
dec = dec,
|
||||||
blanklineskip = blanklineskip,
|
stripwhite = stripwhite,
|
||||||
commentchar = commentchar,
|
blanklineskip = blanklineskip,
|
||||||
skip = skip,
|
commentchar = commentchar,
|
||||||
only = only)
|
skip = skip,
|
||||||
|
only = only)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('Output sequence file format not implemented')
|
||||||
else:
|
else:
|
||||||
iseq, objclass = entryIteratorFactory(file,
|
if input:
|
||||||
skip, only,
|
iseq, objclass = entryIteratorFactory(file,
|
||||||
seqtype,
|
skip, only,
|
||||||
offset,
|
seqtype,
|
||||||
noquality,
|
offset,
|
||||||
skiperror,
|
noquality,
|
||||||
header,
|
skiperror,
|
||||||
sep,
|
header,
|
||||||
dec,
|
sep,
|
||||||
nastring,
|
dec,
|
||||||
stripwhite,
|
nastring,
|
||||||
blanklineskip,
|
stripwhite,
|
||||||
commentchar)
|
blanklineskip,
|
||||||
|
commentchar)
|
||||||
|
else: # default export is in fasta? or tab? TODO
|
||||||
|
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||||
|
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
||||||
|
file,
|
||||||
|
skip=skip,
|
||||||
|
only=only)
|
||||||
|
|
||||||
#tmpdms = get_temp_dms()
|
#tmpdms = get_temp_dms()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user